diff --git a/.buildinfo b/.buildinfo
index f8b07ad..9a3294a 100644
--- a/.buildinfo
+++ b/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 89ada319c94fcb1610b7f80d777e8b12
+config: 0ea2334c76c1e774d577e20446a79224
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/.doctrees/deployment/1_tensorrt_llm_deployment.doctree b/.doctrees/deployment/1_tensorrt_llm_deployment.doctree
index ecf83fb..5827105 100644
Binary files a/.doctrees/deployment/1_tensorrt_llm_deployment.doctree and b/.doctrees/deployment/1_tensorrt_llm_deployment.doctree differ
diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle
index b4a8fc1..eec7be3 100644
Binary files a/.doctrees/environment.pickle and b/.doctrees/environment.pickle differ
diff --git a/.doctrees/examples/0_all_examples.doctree b/.doctrees/examples/0_all_examples.doctree
index 154cbaa..5b66533 100644
Binary files a/.doctrees/examples/0_all_examples.doctree and b/.doctrees/examples/0_all_examples.doctree differ
diff --git a/.doctrees/getting_started/1_overview.doctree b/.doctrees/getting_started/1_overview.doctree
index 7be2a67..1a32aa0 100644
Binary files a/.doctrees/getting_started/1_overview.doctree and b/.doctrees/getting_started/1_overview.doctree differ
diff --git a/.doctrees/getting_started/2_installation.doctree b/.doctrees/getting_started/2_installation.doctree
index 741334c..77d5d11 100644
Binary files a/.doctrees/getting_started/2_installation.doctree and b/.doctrees/getting_started/2_installation.doctree differ
diff --git a/.doctrees/getting_started/3_quantization.doctree b/.doctrees/getting_started/3_quantization.doctree
index 0ce73de..ee2bfd1 100644
Binary files a/.doctrees/getting_started/3_quantization.doctree and b/.doctrees/getting_started/3_quantization.doctree differ
diff --git a/.doctrees/getting_started/5_distillation.doctree b/.doctrees/getting_started/5_distillation.doctree
new file mode 100644
index 0000000..9e485dd
Binary files /dev/null and b/.doctrees/getting_started/5_distillation.doctree differ
diff --git a/.doctrees/getting_started/6_sparsity.doctree b/.doctrees/getting_started/6_sparsity.doctree
index 60fb311..a9779ad 100644
Binary files a/.doctrees/getting_started/6_sparsity.doctree and b/.doctrees/getting_started/6_sparsity.doctree differ
diff --git a/.doctrees/guides/1_quantization.doctree b/.doctrees/guides/1_quantization.doctree
index 43906e9..bcc2ba5 100644
Binary files a/.doctrees/guides/1_quantization.doctree and b/.doctrees/guides/1_quantization.doctree differ
diff --git a/.doctrees/guides/4_distillation.doctree b/.doctrees/guides/4_distillation.doctree
new file mode 100644
index 0000000..4e2c5e0
Binary files /dev/null and b/.doctrees/guides/4_distillation.doctree differ
diff --git a/.doctrees/guides/5_sparsity.doctree b/.doctrees/guides/5_sparsity.doctree
index a1e8396..e2418be 100644
Binary files a/.doctrees/guides/5_sparsity.doctree and b/.doctrees/guides/5_sparsity.doctree differ
diff --git a/.doctrees/guides/_basic_quantization.doctree b/.doctrees/guides/_basic_quantization.doctree
index 82c5be6..139d290 100644
Binary files a/.doctrees/guides/_basic_quantization.doctree and b/.doctrees/guides/_basic_quantization.doctree differ
diff --git a/.doctrees/guides/_onnx_quantization.doctree b/.doctrees/guides/_onnx_quantization.doctree
index 980db0b..66299e4 100644
Binary files a/.doctrees/guides/_onnx_quantization.doctree and b/.doctrees/guides/_onnx_quantization.doctree differ
diff --git a/.doctrees/guides/_pytorch_quantization.doctree b/.doctrees/guides/_pytorch_quantization.doctree
index 44f7511..fdd597d 100644
Binary files a/.doctrees/guides/_pytorch_quantization.doctree and b/.doctrees/guides/_pytorch_quantization.doctree differ
diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree
index 5b3f64d..4bfcada 100644
Binary files a/.doctrees/index.doctree and b/.doctrees/index.doctree differ
diff --git a/.doctrees/reference/0_versions.doctree b/.doctrees/reference/0_versions.doctree
index 993786f..136aec9 100644
Binary files a/.doctrees/reference/0_versions.doctree and b/.doctrees/reference/0_versions.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.deploy.doctree b/.doctrees/reference/generated/modelopt.deploy.doctree
index b460ffd..155423f 100644
Binary files a/.doctrees/reference/generated/modelopt.deploy.doctree and b/.doctrees/reference/generated/modelopt.deploy.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.doctree
index 2a1f1ad..ac37a3e 100644
Binary files a/.doctrees/reference/generated/modelopt.deploy.llm.doctree and b/.doctrees/reference/generated/modelopt.deploy.llm.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree
index cd4ad15..143722f 100644
Binary files a/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree and b/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.model_config_trt.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.model_config_trt.doctree
deleted file mode 100644
index 58dc3bc..0000000
Binary files a/.doctrees/reference/generated/modelopt.deploy.llm.model_config_trt.doctree and /dev/null differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.op_types.doctree b/.doctrees/reference/generated/modelopt.onnx.op_types.doctree
index 53c54ea..ad61934 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.op_types.doctree and b/.doctrees/reference/generated/modelopt.onnx.op_types.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree
index 39b0c32..7c3e4d4 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.doctree
index aec17b1..8257731 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.extensions.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.extensions.doctree
new file mode 100644
index 0000000..a5d0036
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.extensions.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.fp8.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.fp8.doctree
new file mode 100644
index 0000000..b4c899d
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.fp8.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree
index 09e9d7b..dd46ca2 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree
index dccb0be..9c2be49 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.int8.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.int8.doctree
new file mode 100644
index 0000000..4d41b20
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.int8.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree
index b3eaa47..40afe21 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree
index c76eb5f..9fa8ea3 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree
index 04c744c..27d8839 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree
index 3d2b1ca..ccd3767 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree
index da5a0ee..eb5de3e 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree and b/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.onnx.utils.doctree b/.doctrees/reference/generated/modelopt.onnx.utils.doctree
index 5339a16..8716609 100644
Binary files a/.doctrees/reference/generated/modelopt.onnx.utils.doctree and b/.doctrees/reference/generated/modelopt.onnx.utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.distill.config.doctree b/.doctrees/reference/generated/modelopt.torch.distill.config.doctree
new file mode 100644
index 0000000..48b2bb9
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.config.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.distill.distillation.doctree b/.doctrees/reference/generated/modelopt.torch.distill.distillation.doctree
new file mode 100644
index 0000000..89fe63f
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.distillation.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.distill.distillation_model.doctree b/.doctrees/reference/generated/modelopt.torch.distill.distillation_model.doctree
new file mode 100644
index 0000000..d7c7bd8
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.distillation_model.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.distill.doctree b/.doctrees/reference/generated/modelopt.torch.distill.doctree
new file mode 100644
index 0000000..a210068
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.distill.loss_balancers.doctree b/.doctrees/reference/generated/modelopt.torch.distill.loss_balancers.doctree
new file mode 100644
index 0000000..057544c
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.loss_balancers.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.distill.losses.doctree b/.doctrees/reference/generated/modelopt.torch.distill.losses.doctree
new file mode 100644
index 0000000..21c0f1a
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.losses.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.distill.mode.doctree b/.doctrees/reference/generated/modelopt.torch.distill.mode.doctree
new file mode 100644
index 0000000..3c9a400
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.mode.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.distill.registry.doctree b/.doctrees/reference/generated/modelopt.torch.distill.registry.doctree
new file mode 100644
index 0000000..e6f561b
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.distill.registry.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.doctree b/.doctrees/reference/generated/modelopt.torch.doctree
index cd4233c..3358933 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.doctree and b/.doctrees/reference/generated/modelopt.torch.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree b/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree
index 97c6425..7a6222e 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree and b/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.export.doctree b/.doctrees/reference/generated/modelopt.torch.export.doctree
index 1a4d247..78a408e 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.export.doctree and b/.doctrees/reference/generated/modelopt.torch.export.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.export.hf_config_map.doctree b/.doctrees/reference/generated/modelopt.torch.export.hf_config_map.doctree
new file mode 100644
index 0000000..fdc4497
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.hf_config_map.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree
index 19ca4d8..d089388 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree and b/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree b/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree
index d451e1e..5907cb4 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree and b/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree b/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree
index 1040ff5..ea03b09 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree and b/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree
index 01d94e4..058e26a 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree and b/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree
index 8741444..0fb5f9b 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree and b/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.export.vllm.doctree b/.doctrees/reference/generated/modelopt.torch.export.vllm.doctree
new file mode 100644
index 0000000..ece0f83
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.vllm.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree b/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree
index d727c92..2fd48bf 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree and b/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree b/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree
index 7a5f751..433575f 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree and b/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree b/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree
index 0bc4c08..378f6c9 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree and b/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.algorithms.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.algorithms.doctree
new file mode 100644
index 0000000..79d555d
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.algorithms.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree
index a7c0fb0..6c7c148 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree
index 5f8b221..50dcfa7 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree
index e7633c0..c4e24bf 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree
index 72be1dc..8a99ff4 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.doctree
index cd84875..ccb4b00 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree
index a673f09..f35fd1d 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree
index c6ac50f..3aa188a 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree
index 6879fb5..a42169c 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree
index 448084b..6162689 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree
index 9be4758..98a1573 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree
index f8c99c2..6a35ff8 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree
index 0f9f93c..9ef9894 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.doctree
new file mode 100644
index 0000000..29b37d9
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree
index a452e17..892cf45 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree
index b1b8bc7..4f1cfda 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.doctree
new file mode 100644
index 0000000..584f36d
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.doctree
new file mode 100644
index 0000000..dc579f6
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.doctree
new file mode 100644
index 0000000..faf950d
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.doctree
new file mode 100644
index 0000000..ec1b13c
Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree
index 0ab87a9..650d31f 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree and b/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree b/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree
index 7063a1f..ab7abc2 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree and b/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree b/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree
index adb542c..2f8b8f1 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree and b/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree differ
diff --git a/.doctrees/reference/generated/modelopt.torch.utils.network.doctree b/.doctrees/reference/generated/modelopt.torch.utils.network.doctree
index 0ec5a03..8464a86 100644
Binary files a/.doctrees/reference/generated/modelopt.torch.utils.network.doctree and b/.doctrees/reference/generated/modelopt.torch.utils.network.doctree differ
diff --git a/.doctrees/support/1_contact.doctree b/.doctrees/support/1_contact.doctree
index 5572c37..f4796cd 100644
Binary files a/.doctrees/support/1_contact.doctree and b/.doctrees/support/1_contact.doctree differ
diff --git a/.doctrees/support/2_faqs.doctree b/.doctrees/support/2_faqs.doctree
index 34b2b30..cfebc1c 100644
Binary files a/.doctrees/support/2_faqs.doctree and b/.doctrees/support/2_faqs.doctree differ
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..5442ede
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+.doctrees/environment.pickle filter=lfs diff=lfs merge=lfs -text
diff --git a/_sources/deployment/1_tensorrt_llm_deployment.rst.txt b/_sources/deployment/1_tensorrt_llm_deployment.rst.txt
index ea32741..3b04d0d 100644
--- a/_sources/deployment/1_tensorrt_llm_deployment.rst.txt
+++ b/_sources/deployment/1_tensorrt_llm_deployment.rst.txt
@@ -90,50 +90,55 @@ If the :meth:`export_tensorrt_llm_checkpoint <modelopt.torch.export.model_config
      - Yes
      - Yes
      - No
-   * - Falcon RW 1B, 7B
+   * - MPT 7B, 30B
      - Yes
      - Yes
      - Yes
      - Yes
-   * - MPT 7B, 30B
+   * - Baichuan 1, 2
      - Yes
      - Yes
      - Yes
      - Yes
-   * - Baichuan 1, 2
+   * - ChatGLM2, 3 6B
      - Yes
+     - No
+     - No
      - Yes
+   * - Bloom
      - Yes
      - Yes
-   * - Qwen 7B, 14B
      - Yes
      - Yes
+   * - Phi-1, 2, 3
+     - Yes
      - Yes
      - Yes
-   * - ChatGLM2, 3 6B
      - Yes
+   * - Nemotron 8
      - Yes
      - Yes
+     - No
      - Yes
-   * - Bloom
+   * - Gemma 2B, 7B
      - Yes
      - Yes
+     - No
      - Yes
+   * - Recurrent Gemma
      - Yes
-   * - Phi-1, 2, 3
      - Yes
      - Yes
      - Yes
+   * - StarCoder 2
      - Yes
-   * - Nemotron 8
      - Yes
      - Yes
-     - No
      - Yes
-   * - Gemma 2B, 7B
+   * - Qwen-1, 1.5
+     - Yes
      - Yes
      - Yes
-     - No
      - Yes
 
 Convert to TensorRT-LLM
diff --git a/_sources/examples/0_all_examples.rst.txt b/_sources/examples/0_all_examples.rst.txt
index eb1ecfc..9600b44 100644
--- a/_sources/examples/0_all_examples.rst.txt
+++ b/_sources/examples/0_all_examples.rst.txt
@@ -1,5 +1,5 @@
-All ModelOpt Examples
-=====================
+GitHub Examples
+===============
 
-Please visit the `TensorRT-Model-Optimizer GitHub repository <https://github.com/NVIDIA/TensorRT-Model-Optimizer>`_
-for all ModelOpt examples.
+All examples can be accessed from the ModelOpt GitHub repository at
+`github.com/NVIDIA/TensorRT-Model-Optimizer <https://github.com/NVIDIA/TensorRT-Model-Optimizer/>`_.
diff --git a/_sources/getting_started/1_overview.rst.txt b/_sources/getting_started/1_overview.rst.txt
index b39fc37..558d512 100644
--- a/_sources/getting_started/1_overview.rst.txt
+++ b/_sources/getting_started/1_overview.rst.txt
@@ -7,8 +7,8 @@ Overview
 Minimizing inference costs presents a significant challenge as generative AI models continue to grow in complexity and size.
 The `NVIDIA TensorRT Model Optimizer <https://github.com/NVIDIA/TensorRT-Model-Optimizer>`_ (referred to as Model Optimizer, or ModelOpt)
 is a library comprising state-of-the-art model optimization techniques including quantization and sparsity to compress model.
-It accepts a torch or ONNX model as inputs and provides Python APIs for users to easily stack different model optimization
-techniques to produce quantized checkpoint. Seamlessly integrated within the NVIDIA AI software ecosystem, the quantized
+It accepts a torch or ONNX model as input and provides Python APIs for users to easily stack different model optimization
+techniques to produce optimized & quantized checkpoints. Seamlessly integrated within the NVIDIA AI software ecosystem, the quantized
 checkpoint generated from Model Optimizer is ready for deployment in downstream inference frameworks like
 `TensorRT-LLM <https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/quantization>`_ or `TensorRT <https://github.com/NVIDIA/TensorRT>`_.
 Further integrations are planned for `NVIDIA NeMo <https://github.com/NVIDIA/NeMo>`_ and `Megatron-LM <https://github.com/NVIDIA/Megatron-LM>`_
@@ -16,7 +16,7 @@ for training-in-the-loop optimization techniques. For enterprise users, the 8-bi
 `NVIDIA NIM <https://developer.nvidia.com/blog/nvidia-nim-offers-optimized-inference-microservices-for-deploying-ai-models-at-scale/>`_.
 
 Model Optimizer is available for free for all developers on `NVIDIA PyPI <https://pypi.org/project/nvidia-modelopt/>`_.
-Visit `/NVIDIA/TensorRT-Model-Optimizer repository <https://github.com/NVIDIA/TensorRT-Model-Optimizer>`_ for end-to-end
+Visit the `TensorRT Model Optimizer GitHub repository <https://github.com/NVIDIA/TensorRT-Model-Optimizer>`_ for end-to-end
 example scripts and recipes optimized for NVIDIA GPUs.
 
 Techniques
@@ -34,8 +34,11 @@ for list of formats supported.
 Sparsity
 ^^^^^^^^
 Sparsity is a technique to further reduce the memory footprint of deep learning models and accelerate the inference.
-Model Optimizer provides Python API :meth:`mts.sparsify() <modelopt.torch.sparsity.sparsification.sparsify>` to apply
-weight sparsity to a given model. The ``mts.sparsify()`` API supports `NVIDIA 2:4 <https://arxiv.org/pdf/2104.0837>`_
-sparsity pattern and various sparsification methods, such as NVIDIA `ASP <https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity>`_
-and `SparseGPT <https://arxiv.org/abs/2301.00774>`_. It supports both post-training sparsity and sparsity with fine-tuning.
-The latter workflow is recommended to minimize accuracy degradation.
+Model Optimizer provides the Python API :meth:`mts.sparsify() <modelopt.torch.sparsity.sparsification.sparsify>` to
+automatically apply weight sparsity to a given model. The
+:meth:`mts.sparsify() <modelopt.torch.sparsity.sparsification.sparsify>` API supports
+`NVIDIA 2:4 <https://arxiv.org/pdf/2104.0837>`_ sparsity pattern and various sparsification methods,
+such as `NVIDIA ASP <https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity>`_ and
+`SparseGPT <https://arxiv.org/abs/2301.00774>`_. It supports both post-training sparsity (PTS) and
+sparsity-aware training (SAT). The latter workflow is recommended to minimize accuracy
+degradation.
diff --git a/_sources/getting_started/2_installation.rst.txt b/_sources/getting_started/2_installation.rst.txt
index f015882..35c70ec 100644
--- a/_sources/getting_started/2_installation.rst.txt
+++ b/_sources/getting_started/2_installation.rst.txt
@@ -7,17 +7,19 @@ System requirements
 
 Model Optimizer (``nvidia-modelopt``) currently has the following system requirements:
 
-+----------------------+-----------------------------+
-| OS                   |  Linux, Windows             |
-+----------------------+-----------------------------+
-| Architecture         |  x86_64, aarch64, win_amd64 |
-+----------------------+-----------------------------+
-| Python               |  >=3.8,<3.12                |
-+----------------------+-----------------------------+
-| PyTorch              |  >=1.11                     |
-+----------------------+-----------------------------+
-| CUDA                 |  >=11.8 (Recommended)       |
-+----------------------+-----------------------------+
++-------------------------+-----------------------------+
+| OS                      |  Linux                      |
++-------------------------+-----------------------------+
+| Architecture            |  x86_64                     |
++-------------------------+-----------------------------+
+| Python                  |  >=3.8,<3.13                |
++-------------------------+-----------------------------+
+| CUDA                    |  >=11.8 (Recommended)       |
++-------------------------+-----------------------------+
+| PyTorch (Optional)      |  >=1.11                     |
++-------------------------+-----------------------------+
+| TensorRT-LLM (Optional) |  0.11                       |
++-------------------------+-----------------------------+
 
 Install Model Optimizer
 =======================
@@ -34,11 +36,11 @@ license terms of ModelOpt and any dependencies before use.
     **Setting up a virtual environment**
 
     We recommend setting up a virtual environment if you don't have one already. Run the following
-    command to set up and activate a ``conda`` virtual environment named ``modelopt`` with Python 3.11:
+    command to set up and activate a ``conda`` virtual environment named ``modelopt`` with Python 3.12:
 
     .. code-block:: bash
 
-        conda create -n modelopt python=3.11 pip
+        conda create -n modelopt python=3.12 pip
 
     .. code-block:: bash
 
@@ -89,11 +91,14 @@ license terms of ModelOpt and any dependencies before use.
         * - ``transformers`` (Huggingface)
           - ``[hf]``
 
+    If you want to install only partial dependencies, please replace ``[all]`` with the desired
+    optional dependencies for the below ``pip`` installation command.
+
 **Install Model Optimizer** (``nvidia-modelopt``)
 
 .. code-block:: bash
 
-    pip install "nvidia-modelopt[all]" --no-cache-dir --extra-index-url https://pypi.nvidia.com
+    pip install "nvidia-modelopt[all]" --extra-index-url https://pypi.nvidia.com
 
 Check installation
 ==================
@@ -103,7 +108,7 @@ Check installation
     When you use ModelOpt's PyTorch quantization APIs for the first time, it will compile the fast quantization kernels
     using your installed torch and CUDA if available.
     This may take a few minutes but subsequent quantization calls will be much faster.
-    To invoke the compilation now and check if it is successful, run the following command:
+    To invoke the compilation and check if it is successful or pre-compile for docker builds, run the following command:
 
     .. code-block:: bash
 
diff --git a/_sources/getting_started/3_quantization.rst.txt b/_sources/getting_started/3_quantization.rst.txt
index 693a8b2..b128e78 100644
--- a/_sources/getting_started/3_quantization.rst.txt
+++ b/_sources/getting_started/3_quantization.rst.txt
@@ -9,8 +9,8 @@ Quantization is an effective technique to reduce the memory footprint of deep le
 accelerate the inference speed.
 
 ModelOpt's :meth:`mtq.quantize() <modelopt.torch.quantization.model_quant.quantize>` API enables
-users to quantize a model with advanced algorithms like SmoothQuant, AWQ etc. ModelOpt supports both
-Post Training Quantization (PTQ) and Quantization Aware Training (QAT).
+users to quantize a model with advanced algorithms like SmoothQuant, AWQ, and more. ModelOpt
+supports both Post Training Quantization (PTQ) and Quantization Aware Training (QAT).
 
 .. tip::
 
@@ -21,7 +21,7 @@ PTQ for PyTorch models
 -----------------------------
 
 :meth:`mtq.quantize <modelopt.torch.quantization.model_quant.quantize>` requires the model,
-the appropriate quantization configuration and a forward loop as inputs. Here is a quick example of
+the appropriate quantization configuration, and a forward loop as inputs. Here is a quick example of
 quantizing a model with int8 SmoothQuant using
 :meth:`mtq.quantize <modelopt.torch.quantization.model_quant.quantize>`:
 
@@ -55,8 +55,8 @@ Deployment
 The quantized model is just like a regular Pytorch model and is ready for evaluation or deployment.
 
 Huggingface or Nemo LLM models can be exported to TensorRT-LLM using ModelOpt.
-Please see :doc:`TensorRT-LLM Deployment <../deployment/1_tensorrt_llm_deployment>` guide for more
-details.
+Please see the :doc:`TensorRT-LLM Deployment <../deployment/1_tensorrt_llm_deployment>` guide for
+more details.
 
 The model can be also exported to ONNX using
 `torch.onnx.export <https://pytorch.org/docs/stable/onnx_torchscript.html#torch.onnx.export>`_.
diff --git a/_sources/getting_started/5_distillation.rst.txt b/_sources/getting_started/5_distillation.rst.txt
new file mode 100644
index 0000000..8950cdc
--- /dev/null
+++ b/_sources/getting_started/5_distillation.rst.txt
@@ -0,0 +1,115 @@
+
+=========================
+Quick Start: Distillation
+=========================
+
+ModelOpt's :doc:`Distillation <../guides/4_distillation>` is a set of wrappers and utilities
+to easily perform Knowledge Distillation among teacher and student models.
+Given a pretrained teacher model, Distillation has the potential to train a smaller student model
+faster and/or with higher accuracy than the student model could achieve on its own.
+
+This quick-start guide shows the necessary steps to integrate Distillation into your
+training pipeline.
+
+Set up your base models
+-----------------------
+
+First obtain both a pretrained model to act as the teacher and a (usualy smaller) model to serve
+as the student.
+
+.. code-block:: python
+
+    from torchvision.models import resnet50, resnet18
+
+    # Define student
+    student_model = resnet18()
+
+
+    # Define callable which returns teacher
+    def teacher_factory():
+        teacher_model = resnet50()
+        teacher_model.load_state_dict(pretrained_weights)
+        return teacher_model
+
+
+Set up the meta model
+---------------------
+
+As Knowledge Distillation involves (at least) two models, ModelOpt simplifies the integration
+process by wrapping both student and teacher into one meta model.
+
+Please see an example Distillation setup below. This example assumes the outputs
+of ``teacher_model`` and ``student_model`` are logits.
+
+.. code-block:: python
+
+    import modelopt.torch.distill as mtd
+
+    distillation_config = {
+        "teacher_model": teacher_factory,  # model initializer
+        "criterion": mtd.LogitsDistillationLoss(),  # callable receiving student and teacher outputs, in order
+        "loss_balancer": mtd.StaticLossBalancer(),  # combines multiple losses; omit if only one distillation loss used
+    }
+
+    distillation_model = mtd.convert(student_model, mode=[("kd_loss", distillation_config)])
+
+The ``teacher_model`` can be either a callable which returns an ``nn.Module`` or a tuple of ``(model_cls, args, kwargs)``.
+The ``criterion`` is the distillation loss used between student and teacher tensors.
+The ``loss_balancer`` determines how the original and distillation losses are combined (if needed).
+
+See :doc:`Distillation <../guides/4_distillation>` for more info.
+
+
+Distill during training
+-----------------------
+
+To Distill from teacher to student, simply use the meta model in the usual training loop, while
+also using the meta model's ``.compute_kd_loss()`` method to compute the distillation loss, in addition to
+the original user loss.
+
+An example of Distillation training is given below:
+
+.. code-block:: python
+    :emphasize-lines: 14
+
+    # Setup the data loaders. As example:
+    train_loader = get_train_loader()
+
+    # Define user loss function. As example:
+    loss_fn = get_user_loss_fn()
+
+    for input, labels in train_dataloader:
+        distillation_model.zero_grad()
+        # Forward through the wrapped models
+        out = distillation_model(input)
+        # Same loss as originally present
+        loss = loss_fn(out, labels)
+        # Combine distillation and user losses
+        loss_total = distillation_model.compute_kd_loss(student_loss=loss)
+        loss_total.backward()
+
+
+.. note::
+    `DataParallel <https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html>`_ may
+    break ModelOpt's Distillation feature.
+    Note that `HuggingFace Trainer <https://huggingface.co/docs/transformers/en/main_classes/trainer>`_
+    uses DataParallel by default.
+
+
+Export trained model
+--------------------
+
+The model can easily be reverted to its original class for further use (i.e deployment)
+without any ModelOpt modifications attached.
+
+.. code-block:: python
+
+    model = mtd.export(distillation_model)
+
+
+--------------------------------
+
+**Next steps**
+    * Learn more about :doc:`Distillation <../guides/4_distillation>`.
+    * See ModelOpt's :doc:`API documentation <../reference/1_modelopt_api>` for detailed
+      functionality and usage information.
diff --git a/_sources/getting_started/6_sparsity.rst.txt b/_sources/getting_started/6_sparsity.rst.txt
index 72881e1..668533d 100644
--- a/_sources/getting_started/6_sparsity.rst.txt
+++ b/_sources/getting_started/6_sparsity.rst.txt
@@ -6,13 +6,13 @@ Sparsity
 --------
 
 ModelOpt's :doc:`sparsity<../guides/5_sparsity>` feature is an effective technique to reduce the
-memory footprint of deep learning models and accelerate the inference speed. ModelOpt provides an
+memory footprint of deep learning models and accelerate the inference speed. ModelOpt provides the
 easy-to-use API :meth:`mts.sparsify() <modelopt.torch.sparsity.sparsification.sparsify>` to apply
 weight sparsity to a given model.
 :meth:`mts.sparsify() <modelopt.torch.sparsity.sparsification.sparsify>` supports
 `NVIDIA 2:4 Sparsity <https://arxiv.org/abs/2104.08378>`_ sparsity pattern and various sparsification
-methods, such as (`NVIDIA ASP <https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity>`_)
-and (`SparseGPT <https://arxiv.org/abs/2301.00774>`_).
+methods, such as `NVIDIA ASP <https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity>`_
+and `SparseGPT <https://arxiv.org/abs/2301.00774>`_.
 
 This guide provides a quick start to apply weight sparsity to a PyTorch model using ModelOpt.
 
@@ -38,7 +38,7 @@ Here is a quick example of sparsifying a model to 2:4 sparsity pattern with Spar
     sparsity_config = {"data_loader": data_loader, "collect_func": lambda x: x}
 
     # Sparsify the model and perform calibration (PTS)
-    model = mts.sparsity(model, mode="sparsegpt", config=sparsity_config)
+    model = mts.sparsify(model, mode="sparsegpt", config=sparsity_config)
 
 .. note::
     `data_loader` is only required in case of data-driven sparsity, e.g., SparseGPT for calibration.
@@ -48,10 +48,19 @@ Here is a quick example of sparsifying a model to 2:4 sparsity pattern with Spar
     `data_loader` and `collect_func` can be substituted with a `forward_loop` that iterates the model through the
     calibration dataset.
 
+Sparsity-aware Training (SAT) for PyTorch models
+------------------------------------------------
+
+After sparsifying the model, you can save the checkpoint for the sparsified model and use it for
+fine-tuning the sparsified model. Check out the
+`GitHub end-to-end example <https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/llm_sparsity>`_
+to learn more about SAT.
+
+
 --------------------------------
 
 **Next Steps**
     * Learn more about sparsity and advanced usage of ModelOpt sparsity in
       :doc:`Sparsity guide <../guides/5_sparsity>`.
-    * Checkout out the end-to-end examples on GitHub for PTQ and QAT
-      `here <https://github.com/NVIDIA/TensorRT-Model-Optimizer?tab=readme-ov-file#examples>`_.
+    * Checkout out the `end-to-end example on GitHub <https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/llm_sparsity>`_
+      for PTS and SAT.
diff --git a/_sources/guides/1_quantization.rst.txt b/_sources/guides/1_quantization.rst.txt
index a0ab8ec..2a9bf62 100644
--- a/_sources/guides/1_quantization.rst.txt
+++ b/_sources/guides/1_quantization.rst.txt
@@ -4,10 +4,10 @@ Quantization
 ModelOpt quantization toolkit supports quantization for NVIDIA's hardware and software stack.
 Currently ModelOpt supports quantization in PyTorch and ONNX frameworks.
 
-ModelOpt is based on simulated quantization in the original precision to simulate, test and optimize
-for the best trade-off between the accuracy of the model and different low-precision formats. To
-achieve actual speedups and memory savings, the model with simulated quantization can be exported to
-deployment frameworks, like TensorRT or TensorRT-LLM. Please refer to the
+ModelOpt is based on simulated quantization in the original precision to simulate, test, and
+optimize for the best trade-off between the accuracy of the model and different low-precision
+formats. To achieve actual speedups and memory savings, the model with simulated quantization can be
+exported to deployment frameworks, like TensorRT or TensorRT-LLM. Please refer to the
 `TensorRT-Model-Optimizer GitHub repository <https://github.com/NVIDIA/TensorRT-Model-Optimizer>`_
 for more details and examples.
 
diff --git a/_sources/guides/4_distillation.rst.txt b/_sources/guides/4_distillation.rst.txt
new file mode 100644
index 0000000..a5543a4
--- /dev/null
+++ b/_sources/guides/4_distillation.rst.txt
@@ -0,0 +1,193 @@
+============
+Distillation
+============
+
+Introduction
+============
+
+ModelOpt's Distillation API (:mod:`modelopt.torch.distill <modelopt.torch.distill>`) allows you to enable a
+knowledge-distillation training pipeline with minimal script modification.
+
+Follow the steps described below to obtain a model trained with direct knowledge transferred from
+a more powerful teacher model using :mod:`modelopt.torch.distill <modelopt.torch.distill>`:
+
+#.  **Convert your model via** :meth:`mtd.convert <modelopt.torch.distill.distillation.convert>`:
+    Wrap both a teacher and student model into a larger meta-model which abstracts away the
+    interaction between the two.
+#.  **Distillation training**: Seamlessly use the meta-model in place of the original model and run
+    the orignal script with only one additional line of code for loss calculation.
+#.  **Checkpoint and re-load**: Save the model via :meth:`mto.save <modelopt.torch.opt.conversion.save>` and
+    restore via :meth:`mto.restore <modelopt.torch.opt.conversion.restore>`
+
+*To find out more about Distillation and related concepts, please refer to the below section*
+:ref:`Distillation Concepts <distillation-concepts>`.
+
+.. _distillation-conversion:
+
+Convert and integrate
+=====================
+
+You can convert your model into a :class:`DistillationModel <modelopt.torch.distill.distillation_model.DistillationModel>`
+using :meth:`mtd.convert() <modelopt.torch.distill.distillation.convert>`.
+
+
+Example usage:
+
+.. code-block:: python
+
+    import modelopt.torch.distill as mtd
+    from torchvision.models import resnet50
+
+    # User-defined model (student)
+    model = resnet50()
+
+    # Configure and convert for distillation
+    distillation_config = {
+        # `teacher_model` is a model class or callable, or a tuple.
+        # If a tuple, it must be of the form (model_cls_or_callable,) or
+        # (model_cls_or_callable, args) or (model_cls_or_callable, args, kwargs).
+        "teacher_model": teacher_model,
+        "criterion": mtd.LogitsDistillationLoss(),
+        "loss_balancer": mtd.StaticLossBalancer(),
+    }
+    distillation_model = mtd.convert(model, mode=[("kd_loss", distillation_config)])
+
+    # Export model in original class form
+    model_exported = mtd.export(distillation_model)
+
+.. note::
+    The config requires a (non-lambda) Callable to return a teacher model in place of the model
+    itself. This is to avoid re-saving the teacher state dict upon saving the Distillation
+    meta model. Thus, the same callable must be available in the namespace when restoring via
+    the :meth:`mto.restore <modelopt.torch.opt.conversion.restore>` utility.
+
+.. note::
+    As the model is not of the same class anymore, calling ``type()`` on the model after conversion
+    will not work as expected.
+    Though ``isinstance()`` will still work, as the model dynamically becomes a subclass of the original's.
+
+---
+
+.. _distillation-concepts:
+
+Distillation Concepts
+=====================
+
+Below, we will provide an overview of ModelOpt's distillation feature as well as its basic
+concepts and terminology.
+
+Overview
+--------
+
+
+..  list-table:: Glossary
+    :widths: 55 90
+    :header-rows: 0
+
+    * - `Knowledge Distillation`_
+      - The transfer of learnable feature information from a teacher model to a student.
+    * - `Student`_
+      - The model to be trained (can either start from scratch or pre-trained).
+    * - `Teacher`_
+      - The fixed, pre-trained model used as the example the student will "learn" from.
+    * - `Distillation loss`_
+      - A loss function used between the features of a student and teacher to perform Knowledge
+        Distillation, separate from the student's original task loss.
+    * - `Loss Balancer`_
+      - An implementation for a utility which determines how to combine Distillation loss(es) and
+        orignal student task loss into a single scalar.
+    * - `Soft-label Distillation`_
+      - The specific process of performing Knowledge Distillation between output logits of a teacher
+        and student models.
+
+
+Concepts
+--------
+
+Knowledge Distillation
+^^^^^^^^^^^^^^^^^^^^^^
+
+Distillation can be a broader term used to define any sort of information compressed among models,
+but in this case we refer to basic teacher-student Knowledge Distillation. The process creates an
+auxilliary loss (or can replace the orignal one) between a model which is already trained (teacher)
+and a model which is not (student), in hopes of making the student learn information (i.e. feature
+maps or logits) which the teacher has already mastered. This can serve multiple purposes:
+
+  **A.** Model-size reduction: A smaller, efficient student model (potentially a pruned teacher) reaching
+  accuracies near or exceeding that of the larger, slower teacher model. (See the
+  `Lottery Ticket Hypothesis <1_>`_ for reasoning behind this, which also applies to pruning)
+
+  **B.** An alternative to pure training: Distilling a model from an existing one (and then
+  fine-tuning) can often be faster than training it from scratch.
+
+  **C.** Module replacement: One can replace a single module within a model with a more efficient one
+  and use distillation on its original outputs to effectively re-integrate it into the whole model.
+
+Student
+^^^^^^^
+
+This is the model we wish to train and use in the end. It ideally meets the desired architectural
+and computational requirements, but is either untrained or requires a boost in accuracy.
+
+Teacher
+^^^^^^^
+
+This is the model from which learned features/information are used to create a loss for the student.
+Usually it is larger and/or slower than desired, but possesses a satisfactory accuracy.
+
+Distillation loss
+^^^^^^^^^^^^^^^^^
+
+To actually "transfer" knowledge from a teacher to student, we need to add (or replace) an
+optimization objective to the student's original loss function(s). This can be as simple as enacting
+MSE on two same-sized activation tensors between the teacher and student, with the assumption that
+the features learned by the teacher are of high-quality and should be imitated as much as possible.
+
+ModelOpt supports specifying a different loss function per layer-output pair, and includes a few
+pre-defined functions for use, though users may often need to define their own.
+Module-pairs-to-loss-function mappings are specified via the ``criterion`` key of the configuration
+dictionary - student and teacher, respectively in order - and the loss function itself should accept
+outputs in the same order as well:
+
+.. code-block:: python
+
+    # Example using pairwise-mapped criterion.
+    # Will perform the loss on the output of ``student_model.classifier`` and ``teacher_model.layers.18``
+    distillation_config = {
+        "teacher_model": teacher_model,
+        "criterion": {("classifier", "layers.18"): mtd.LogitsDistillationLoss()},
+    }
+    distillation_model = atd.convert(student_model, mode=[("kd_loss", distillation_config)])
+
+The intermediate outputs for the losses are captured by the
+:class:`DistillationModel <modelopt.torch.distill.distillation_model.DistillationModel>` and then the loss(es) are
+invoked using :meth:`DistillationModel.compute_kd_loss() <modelopt.torch.distill.distillation_model.DistillationModel.compute_kd_loss>`.
+If present, the orignal student's non-distillation loss is passed in as an argument.
+
+Writing a custom loss function is often necessary, especially to handle outputs that need to be processed
+to obtain the logits and activations.
+
+Loss Balancer
+^^^^^^^^^^^^^
+
+As Distillation losses may be applied to several pairs of layers, the losses are returned in the
+form of a dictionary which should be reduced into a scalar value for backpropagation. A Loss
+Balancer (whose interface is defined by
+:class:`DistillationLossBalancer <modelopt.torch.distill.loss_balancers.DistillationLossBalancer>`) serves to fill
+this purpose.
+
+If Distillation loss is only applied to a single pair of layer outputs, and no student loss is available,
+a Loss Balancer should not be provided.
+
+ModelOpt provides a simple Balancer implementation, and the aforementioned interface can be used to create custom ones.
+
+Soft-label Distillation
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The scenario involving distillation only on the output logits of student/teacher classification
+models is known as Soft-label Distillation. In this case, one could even omit the student's original
+classification loss altogether if the teacher's outputs are purely preferred over whatever the
+ground truth labels may be.
+
+
+.. _1: https://arxiv.org/abs/1803.03635
diff --git a/_sources/guides/5_sparsity.rst.txt b/_sources/guides/5_sparsity.rst.txt
index 94d14eb..9d1db31 100644
--- a/_sources/guides/5_sparsity.rst.txt
+++ b/_sources/guides/5_sparsity.rst.txt
@@ -7,7 +7,7 @@ Introduction
 
 ModelOpt's Sparsity module (:mod:`modelopt.torch.sparsity <modelopt.torch.sparsity>`) enables
 you to sparsify the weights of your model. This can be useful for reducing the memory footprint of
-your model, and can also be used to speed up inference.
+your model and can also be used to speed up inference.
 
 
 Follow the steps described below to obtain a model with sparse weights using ModelOpt's Sparsity
@@ -20,7 +20,7 @@ module :mod:`modelopt.torch.sparsity`:
 #.  **Checkpoint and re-load**: Save the model via :meth:`mto.save <modelopt.torch.opt.conversion.save>`
     and restore via :meth:`mto.restore <modelopt.torch.opt.conversion.restore>`
 
-*To find out more about Sparsity and related concepts, please refer to the section below*
+*To find out more about Sparsity and related concepts, please refer to the section on*
 :ref:`Sparsity Concepts <sparsity-concepts>`.
 
 .. _sparsity-pts:
@@ -37,7 +37,7 @@ config and a sparsity format as input and returns a sparse model. The sparsity c
 dictionary specifying the layers to sparsify and the optional dataloader for
 calibration in data-driven sparsity, e.g., SparseGPT.
 
-:meth:`mts.sparsify` supports (`NVIDIA ASP <1_>`_) and `SparseGPT <2_>`_ methods for magnitude-based
+:meth:`mts.sparsify` supports `NVIDIA ASP <1_>`_ and `SparseGPT <2_>`_ methods for magnitude-based
 and data-driven sparsity, respectively.
 
 Example usage:
diff --git a/_sources/guides/_basic_quantization.rst.txt b/_sources/guides/_basic_quantization.rst.txt
index c35041d..cb46438 100644
--- a/_sources/guides/_basic_quantization.rst.txt
+++ b/_sources/guides/_basic_quantization.rst.txt
@@ -10,7 +10,7 @@ Precision format
 ****************
 The precision format defines the bit-width of the quantized values. Generally, there are integer
 formats (sign bit + mantissa bits) and floating-point formats (sign bit + exponent bits + mantissa
-bits). `FP8 FORMATS FOR DEEP LEARNING <https://arxiv.org/pdf/2209.05433>`_ provides a detailed
+bits). `Fp8 Formats for Deep Learning <https://arxiv.org/pdf/2209.05433>`_ provides a detailed
 explanation of the floating-point formats.
 
 Scaling factor
diff --git a/_sources/guides/_onnx_quantization.rst.txt b/_sources/guides/_onnx_quantization.rst.txt
index ecdc15f..5a426c7 100644
--- a/_sources/guides/_onnx_quantization.rst.txt
+++ b/_sources/guides/_onnx_quantization.rst.txt
@@ -8,7 +8,7 @@ ModelOpt provides ONNX quantization that works together with `TensorRT Explicit
 #. White-box design allowing expert users to customize the quantization process.
 #. Better support for vision transformers.
 
-Currently ONNX quantization only supports INT8 quantization.
+Currently ONNX quantization supports INT4 and INT8 quantization.
 
 .. note::
 
@@ -18,7 +18,7 @@ Currently ONNX quantization only supports INT8 quantization.
 Requirements
 ============
 
-#. TensorRT >= 8.6 ( >= 9.1 preferred). Please refer to `TensorRT 9.1 download link <https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/9.1.0/tars/tensorrt-9.1.0.4.linux.x86_64-gnu.cuda-12.2.tar.gz>`_.
+#. TensorRT >= 8.6 ( >= 10.0 preferred). Please refer to `TensorRT 10.0 download link <https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz>`_.
 
 
 
@@ -29,11 +29,7 @@ PTQ should be done with a calibration dataset. If calibration dataset is not pro
 
 Prepare calibration dataset
 ---------------------------
-ModelOpt supports two types of calibration data format: image directory or numpy file.
-
-Image directory only works for single-input ONNX models.
-
-Numpy file works for both single-input and multi-input ONNX models. In the case of multi-input ONNX models, the numpy file should be a dictionary with keys as input names and values as numpy arrays.
+ModelOpt supports npz/npy file as calibration data format and that numpy file should be a dictionary with keys as model input names and values as numpy arrays.
 
 .. code-block:: python
 
diff --git a/_sources/guides/_pytorch_quantization.rst.txt b/_sources/guides/_pytorch_quantization.rst.txt
index 0ba87ca..28d6262 100644
--- a/_sources/guides/_pytorch_quantization.rst.txt
+++ b/_sources/guides/_pytorch_quantization.rst.txt
@@ -2,8 +2,6 @@
 PyTorch Quantization
 ====================
 
-ModelOpt PyTorch quantization is refactored based on `pytorch_quantization <https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/index.html>`_.
-
 Key advantages offered by ModelOpt's PyTorch quantization:
 
 #. Support advanced quantization formats, e.g., Block-wise Int4 and FP8.
@@ -69,7 +67,7 @@ To verify that the quantizer nodes are placed correctly in the model, let's prin
 
     # Print quantization summary after successfully quantizing the model with mtq.quantize
     # This will show the quantizers inserted in the model and their configurations
-    mtq.print_quantization_summary(model)
+    mtq.print_quant_summary(model)
 
 
 After PTQ, the model can be exported to ONNX with the normal PyTorch ONNX export flow.
@@ -167,24 +165,31 @@ Under the hood, ModelOpt :meth:`mtq.quantize() <modelopt.torch.quantization.mode
 :class:`TensorQuantizer <modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer>`
 (quantizer modules) into the model layers like linear layer, conv layer etc. and patches their forward method to perform quantization.
 
-To create :class:`TensorQuantizer` instance, you need to specify :class:`QuantDescriptor <modelopt.torch.quantization.tensor_quant.QuantDescriptor>`, which
-describes the quantization parameters like quantization bits, axis etc.
+The quantization parameters are as described in :class:`QuantizerAttributeConfig <modelopt.torch.quantization.config.QuantizerAttributeConfig>`.
+They can be set at initialization by passing :class:`QuantizerAttributeConfig <modelopt.torch.quantization.config.QuantizerAttributeConfig>`
+or later by calling  :meth:`TensorQuantizer.set_from_attribute_config() <modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_config>`.
+If the quantization parameters are not set explicitly, the quantizer will use the default values.
 
 Here is an example of creating a quantizer module:
 
 .. code-block:: python
 
-    from modelopt.torch.quantization.tensor_quant import QuantDescriptor
+    from modelopt.torch.quantization.config import QuantizerAttributeConfig
     from modelopt.torch.quantization.nn import TensorQuantizer
 
-    # Create quantizer descriptor
-    quant_desc = QuantDescriptor(num_bits=8, axis=(-1,), unsigned=True)
-
-    # Create quantizer module
-    quantizer = TensorQuantizer(quant_desc)
+    # Create quantizer module with default quantization parameters
+    quantizer = TensorQuantizer()
 
     quant_x = quantizer(x)  # Quantize input x
 
+    # Create quantizer module with custom quantization parameters
+    # Example setting for INT4 block-wise quantization
+    quantizer_custom = TensorQuantizer(QuantizerAttributeConfig(num_bits=4, block_sizes={-1: 128}))
+
+    # Quantize input with custom quantization parameters
+    quant_x = quantizer_custom(x)  # Quantize input x
+
+
 .. _customize_quantizer_config:
 
 Customize quantizer config
@@ -276,3 +281,12 @@ Weight folding avoids repeated quantization of weights during each inferece forw
 .. note::
 
     After weight folding, the model can no longer be exported to ONNX or fine-tuned with QAT.
+
+Migrate from pytorch_quantization
+=================================
+
+ModelOpt PyTorch quantization is refactored from and extends upon
+`pytorch_quantization <https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/index.html>`_.
+
+Previous users of ``pytorch_quantization`` can simply migrate to ``modelopt.torch.quantization`` by
+replacing the import statements.
diff --git a/_sources/reference/0_versions.rst.txt b/_sources/reference/0_versions.rst.txt
index 58dc03f..18daecf 100644
--- a/_sources/reference/0_versions.rst.txt
+++ b/_sources/reference/0_versions.rst.txt
@@ -1 +1,5 @@
+=========
+Changelog
+=========
+
 .. include:: ../../../CHANGELOG.rst
diff --git a/_sources/reference/generated/modelopt.deploy.llm.rst.txt b/_sources/reference/generated/modelopt.deploy.llm.rst.txt
index 7a09bbe..ea34ed7 100644
--- a/_sources/reference/generated/modelopt.deploy.llm.rst.txt
+++ b/_sources/reference/generated/modelopt.deploy.llm.rst.txt
@@ -17,9 +17,6 @@ llm
    modelopt.deploy.llm.generate
 
 
-   modelopt.deploy.llm.model_config_trt
-
-
    modelopt.deploy.llm.nemo_utils
 
 
diff --git a/_sources/reference/generated/modelopt.onnx.op_types.rst.txt b/_sources/reference/generated/modelopt.onnx.op_types.rst.txt
index 9325558..861cec3 100644
--- a/_sources/reference/generated/modelopt.onnx.op_types.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.op_types.rst.txt
@@ -39,7 +39,6 @@ op\_types
    .. autosummary::
       :nosignatures:
    
-      get_quantizable_op_types
       is_binary_op
       is_control_flow_op
       is_conversion_op
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt
index 8c56bf3..de103a8 100644
--- a/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt
@@ -42,4 +42,11 @@ calib\_utils
    .. Overview table of available functions in the module
    
    
+   .. rubric:: Functions
+
+   .. autosummary::
+      :nosignatures:
+   
+      import_scales_from_calib_cache
+   
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.extensions.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.extensions.rst.txt
new file mode 100644
index 0000000..cba39d7
--- /dev/null
+++ b/_sources/reference/generated/modelopt.onnx.quantization.extensions.rst.txt
@@ -0,0 +1,37 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+extensions
+==========
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.onnx.quantization.extensions
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.fp8.rst.txt
similarity index 87%
rename from _sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt
rename to _sources/reference/generated/modelopt.onnx.quantization.fp8.rst.txt
index 856ecca..b002df1 100644
--- a/_sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.quantization.fp8.rst.txt
@@ -1,7 +1,7 @@
 .. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
 
-model\_config\_trt
-==================
+fp8
+===
 
 .. List the submodules
 
@@ -17,7 +17,7 @@ model\_config\_trt
    __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
    add in ``YYY.__module__ = __name__``.
 
-.. automodule:: modelopt.deploy.llm.model_config_trt
+.. automodule:: modelopt.onnx.quantization.fp8
    :members:
    :undoc-members:
 
@@ -39,7 +39,6 @@ model\_config\_trt
    .. autosummary::
       :nosignatures:
    
-      build_tensorrt_llm
-      build_tensorrt_llm_rank
+      quantize
    
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt
index 68090db..7da4a25 100644
--- a/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt
@@ -39,12 +39,18 @@ graph\_utils
    .. autosummary::
       :nosignatures:
    
+      add_fp16_fp32_cast
       build_non_residual_input_map
       classify_partition_nodes
       filter_quantizable_kgen_heads
+      find_fp8_mha_partitions
+      find_mha_partitions
+      find_nodes_to_exclude
       get_fusible_backbone
       has_const_input
       has_path_type
+      insert_fp8_mha_casts
+      insert_matmul_casts
       is_const_input
       print_stat
       remove_partial_input_qdq
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt
index d02940e..5bc72b5 100644
--- a/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt
@@ -49,9 +49,9 @@ int4
       dq_tensor
       find_scales
       quant_tensor
-      quantize_int4
-      quantize_int4_awq_clip
-      quantize_int4_rtn
+      quantize
+      quantize_awq_clip
+      quantize_rtn
       rtn
    
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.int8.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.int8.rst.txt
new file mode 100644
index 0000000..41ad99e
--- /dev/null
+++ b/_sources/reference/generated/modelopt.onnx.quantization.int8.rst.txt
@@ -0,0 +1,44 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+int8
+====
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.onnx.quantization.int8
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+      :nosignatures:
+   
+      quantize
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt
index 9ac6f96..39d1b86 100644
--- a/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt
@@ -39,6 +39,8 @@ ort\_utils
    .. autosummary::
       :nosignatures:
    
+      configure_ort
       create_inference_session
+      get_quantizable_op_types
    
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt
index 4609774..36e220b 100644
--- a/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt
@@ -48,6 +48,7 @@ qdq\_utils
       make_gs_quantized_weight
       make_gs_scale
       make_gs_zp
+      replace_scale_values
       use_trt_qdq_ops
    
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt
index caf23db..72e2912 100644
--- a/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt
@@ -39,6 +39,7 @@ quant\_utils
    .. autosummary::
       :nosignatures:
    
+      pack_float32_to_4bit_cpp_based
       pack_float32_to_4bit_optimized
    
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt
index 6bf0871..bcaeb1e 100644
--- a/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt
@@ -1,44 +1,6 @@
-.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+modelopt.onnx.quantization.quantize
+===================================
 
-quantize
-========
+.. currentmodule:: modelopt.onnx.quantization
 
-.. List the submodules
-
-
-
-
-
-.. Autodoc anything defined in the module itself
-
-   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
-   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
-   For reimports that should be documented somewhere other than where they are defined, the re-imports
-   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
-   add in ``YYY.__module__ = __name__``.
-
-.. automodule:: modelopt.onnx.quantization.quantize
-   :members:
-   :undoc-members:
-
-   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
-   .. Ideally we should add docstrings for these members.
-
-
-   .. Overview table of available classes in the module
-   
-   
-   
-
-
-   .. Overview table of available functions in the module
-   
-   
-   .. rubric:: Functions
-
-   .. autosummary::
-      :nosignatures:
-   
-      quantize
-   
-   
\ No newline at end of file
+.. autofunction:: quantize
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.onnx.quantization.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.rst.txt
index 3af2c1a..00aa2fe 100644
--- a/_sources/reference/generated/modelopt.onnx.quantization.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.quantization.rst.txt
@@ -17,6 +17,12 @@ quantization
    modelopt.onnx.quantization.calib_utils
 
 
+   modelopt.onnx.quantization.extensions
+
+
+   modelopt.onnx.quantization.fp8
+
+
    modelopt.onnx.quantization.graph_utils
 
 
@@ -26,6 +32,9 @@ quantization
    modelopt.onnx.quantization.int4
 
 
+   modelopt.onnx.quantization.int8
+
+
    modelopt.onnx.quantization.operators
 
 
diff --git a/_sources/reference/generated/modelopt.onnx.utils.rst.txt b/_sources/reference/generated/modelopt.onnx.utils.rst.txt
index cbb0252..a853c6a 100644
--- a/_sources/reference/generated/modelopt.onnx.utils.rst.txt
+++ b/_sources/reference/generated/modelopt.onnx.utils.rst.txt
@@ -39,7 +39,7 @@ utils
    .. autosummary::
       :nosignatures:
    
-      duplicate_shared_linear_weights
+      duplicate_shared_constants
       find_lowest_common_ancestor
       gen_random_inputs
       get_all_input_names
@@ -64,6 +64,7 @@ utils
       remove_weights_data
       save_onnx
       save_onnx_bytes_to_dir
+      udpate_domain
       validate_batch_size
       validate_onnx
    
diff --git a/_sources/reference/generated/modelopt.torch.distill.config.rst.txt b/_sources/reference/generated/modelopt.torch.distill.config.rst.txt
new file mode 100644
index 0000000..20278a4
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.distill.config.rst.txt
@@ -0,0 +1,37 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+config
+======
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.distill.config
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.distill.distillation.rst.txt b/_sources/reference/generated/modelopt.torch.distill.distillation.rst.txt
new file mode 100644
index 0000000..5a7d6cf
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.distill.distillation.rst.txt
@@ -0,0 +1,45 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+distillation
+============
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.distill.distillation
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+      :nosignatures:
+   
+      convert
+      export
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.distill.distillation_model.rst.txt b/_sources/reference/generated/modelopt.torch.distill.distillation_model.rst.txt
new file mode 100644
index 0000000..8456a96
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.distill.distillation_model.rst.txt
@@ -0,0 +1,44 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+distillation\_model
+===================
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.distill.distillation_model
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :nosignatures:
+   
+      DistillationModel
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.distill.loss_balancers.rst.txt b/_sources/reference/generated/modelopt.torch.distill.loss_balancers.rst.txt
new file mode 100644
index 0000000..55859fb
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.distill.loss_balancers.rst.txt
@@ -0,0 +1,45 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+loss\_balancers
+===============
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.distill.loss_balancers
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :nosignatures:
+   
+      DistillationLossBalancer
+      StaticLossBalancer
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.distill.losses.rst.txt b/_sources/reference/generated/modelopt.torch.distill.losses.rst.txt
new file mode 100644
index 0000000..1875289
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.distill.losses.rst.txt
@@ -0,0 +1,45 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+losses
+======
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.distill.losses
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :nosignatures:
+   
+      LogitsDistillationLoss
+      MGDLoss
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.distill.mode.rst.txt b/_sources/reference/generated/modelopt.torch.distill.mode.rst.txt
new file mode 100644
index 0000000..06a7dff
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.distill.mode.rst.txt
@@ -0,0 +1,45 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+mode
+====
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.distill.mode
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :nosignatures:
+   
+      ExportStudentModeDescriptor
+      KnowledgeDistillationModeDescriptor
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.distill.registry.rst.txt b/_sources/reference/generated/modelopt.torch.distill.registry.rst.txt
new file mode 100644
index 0000000..765d2e7
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.distill.registry.rst.txt
@@ -0,0 +1,37 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+registry
+========
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.distill.registry
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.distill.rst.txt b/_sources/reference/generated/modelopt.torch.distill.rst.txt
new file mode 100644
index 0000000..55b95ce
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.distill.rst.txt
@@ -0,0 +1,65 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+distill
+=======
+
+.. List the submodules
+
+
+
+.. rubric:: Modules
+
+.. autosummary::
+   :toctree:
+   :recursive:
+
+
+   modelopt.torch.distill.config
+
+
+   modelopt.torch.distill.distillation
+
+
+   modelopt.torch.distill.distillation_model
+
+
+   modelopt.torch.distill.loss_balancers
+
+
+   modelopt.torch.distill.losses
+
+
+   modelopt.torch.distill.mode
+
+
+   modelopt.torch.distill.registry
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.distill
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt b/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt
index 970af8c..e929ca8 100644
--- a/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt
@@ -46,11 +46,7 @@ distribute
    .. autosummary::
       :nosignatures:
    
-      barrier
       get_configs_parallel
-      get_group
-      get_rank
       get_tensors_parallel
-      get_world_size
    
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.export.hf_config_map.rst.txt b/_sources/reference/generated/modelopt.torch.export.hf_config_map.rst.txt
new file mode 100644
index 0000000..292c226
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.export.hf_config_map.rst.txt
@@ -0,0 +1,37 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+hf\_config\_map
+===============
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.export.hf_config_map
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt
index 9747b72..14b7ea9 100644
--- a/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt
@@ -40,19 +40,23 @@ layer\_utils
       :nosignatures:
    
       build_attention_config
+      build_conv_config
       build_decoder_config
       build_embedding_config
       build_layernorm_config
       build_linear_config
+      build_medusa_heads_config
       build_mlp_config
       build_moe_config
       build_qkv
+      build_recurrent_config
       build_stacked_experts
       check_model_compatibility
       get_activation_scaling_factor
       get_kv_cache_dtype
       get_kv_cache_scaling_factor
       get_prequant_scaling_factor
+      get_quantization_format
       get_scaling_factor
       get_transformer_layers
       get_weight_block_size
@@ -65,5 +69,7 @@ layer\_utils
       is_linear
       is_mlp
       is_moe
+      is_quantlinear
+      is_recurrent
    
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt b/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt
index d5dd2f8..aeb2ae3 100644
--- a/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt
@@ -34,15 +34,20 @@ model\_config
       :nosignatures:
    
       AttentionConfig
+      ConvConfig
       DecoderLayerConfig
       EmbeddingConfig
       ExpertConfig
       LayernormConfig
+      LinearActConfig
       LinearConfig
       MLPConfig
       MOEConfig
+      MedusaHeadConfig
       ModelConfig
       QKVConfig
+      RecurrentConfig
+      RgLruConfig
    
    
 
diff --git a/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt b/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt
index 8cdc857..7caee45 100644
--- a/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt
@@ -39,6 +39,7 @@ model\_config\_export
    .. autosummary::
       :nosignatures:
    
+      export_hf_checkpoint
       export_tensorrt_llm_checkpoint
       torch_to_tensorrt_llm_checkpoint
    
diff --git a/_sources/reference/generated/modelopt.torch.export.rst.txt b/_sources/reference/generated/modelopt.torch.export.rst.txt
index 0a5dccb..409c8f8 100644
--- a/_sources/reference/generated/modelopt.torch.export.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.export.rst.txt
@@ -17,6 +17,9 @@ export
    modelopt.torch.export.distribute
 
 
+   modelopt.torch.export.hf_config_map
+
+
    modelopt.torch.export.layer_utils
 
 
@@ -41,6 +44,9 @@ export
    modelopt.torch.export.transformer_engine
 
 
+   modelopt.torch.export.vllm
+
+
 
 
 .. Autodoc anything defined in the module itself
diff --git a/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt
index c8b0cbe..4d2e9ee 100644
--- a/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt
@@ -39,6 +39,7 @@ scaling\_factor\_utils
    .. autosummary::
       :nosignatures:
    
+      adjust_attn_amax_values
       get_weights_scaling_factor
       resmooth_and_get_scale
    
diff --git a/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt
index e7b943c..b68f1fe 100644
--- a/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt
@@ -41,6 +41,8 @@ tensorrt\_llm\_utils
    
       convert_to_tensorrt_llm_config
       is_tensorrt_llm_0_8_or_9
+      prepare_enc_dec_decoder_layer
+      prepare_enc_dec_export_dir
       weights_to_npz
    
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.export.vllm.rst.txt b/_sources/reference/generated/modelopt.torch.export.vllm.rst.txt
new file mode 100644
index 0000000..824a66e
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.export.vllm.rst.txt
@@ -0,0 +1,44 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+vllm
+====
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.export.vllm
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+      :nosignatures:
+   
+      export_to_vllm
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt b/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt
index d815dcc..557a53d 100644
--- a/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt
@@ -39,6 +39,7 @@ utils
    .. autosummary::
       :nosignatures:
    
+      get_hparam
       is_configurable
       is_dynamic
       named_hparams
diff --git a/_sources/reference/generated/modelopt.torch.quantization.algorithms.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.algorithms.rst.txt
new file mode 100644
index 0000000..8b5faf5
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.quantization.algorithms.rst.txt
@@ -0,0 +1,46 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+algorithms
+==========
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.quantization.algorithms
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :nosignatures:
+   
+      AutoQuantizeSearcher
+      QuantRecipe
+      QuantRecipeHparam
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt
index bab84af..3a8f7bc 100644
--- a/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt
@@ -34,4 +34,12 @@ extensions
    .. Overview table of available functions in the module
    
    
+   .. rubric:: Functions
+
+   .. autosummary::
+      :nosignatures:
+   
+      get_cuda_ext
+      get_cuda_ext_fp8
+   
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt
index 6fcdfab..c4a3c87 100644
--- a/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt
@@ -40,6 +40,7 @@ model\_quant
       :nosignatures:
    
       quantize
+      auto_quantize
       disable_quantizer
       enable_quantizer
       print_quant_summary
diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.rst.txt
new file mode 100644
index 0000000..f793965
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.rst.txt
@@ -0,0 +1,58 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+quant\_rnn
+==========
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.quantization.nn.modules.quant_rnn
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :nosignatures:
+   
+      QuantRNNBase
+      QuantRNNFullBase
+      RNNLayerForward
+      VFRNNForward
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   .. rubric:: Functions
+
+   .. autosummary::
+      :nosignatures:
+   
+      get_quantized_rnn_layer_forward
+      get_quantized_rnn_layer_variable_len_forward
+      get_quantized_rnn_layer_variable_len_reverse_forward
+      lstm_cell_with_proj
+      quantized_cell_forward
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt
index 2e2017a..653feb0 100644
--- a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt
@@ -38,6 +38,9 @@ modules
    modelopt.torch.quantization.nn.modules.quant_pooling
 
 
+   modelopt.torch.quantization.nn.modules.quant_rnn
+
+
    modelopt.torch.quantization.nn.modules.tensor_quantizer
 
 
diff --git a/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt
index f6dad09..3abad44 100644
--- a/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt
@@ -22,6 +22,7 @@ plugins
 
 
 
+
 .. Autodoc anything defined in the module itself
 
    TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
diff --git a/_sources/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.rst.txt
new file mode 100644
index 0000000..b9f001e
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.rst.txt
@@ -0,0 +1,45 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+base\_qtensor
+=============
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.quantization.qtensor.base_qtensor
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :nosignatures:
+   
+      BaseQuantizedTensor
+      QTensorWrapper
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.rst.txt
new file mode 100644
index 0000000..48e1aee
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.rst.txt
@@ -0,0 +1,44 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+int4\_tensor
+============
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.quantization.qtensor.int4_tensor
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :nosignatures:
+   
+      INT4QTensor
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.rst.txt
new file mode 100644
index 0000000..8262774
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.rst.txt
@@ -0,0 +1,44 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+nf4\_tensor
+===========
+
+.. List the submodules
+
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.quantization.qtensor.nf4_tensor
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   .. rubric:: Classes
+
+   .. autosummary::
+      :nosignatures:
+   
+      NF4QTensor
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.quantization.qtensor.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.qtensor.rst.txt
new file mode 100644
index 0000000..9cb69a0
--- /dev/null
+++ b/_sources/reference/generated/modelopt.torch.quantization.qtensor.rst.txt
@@ -0,0 +1,53 @@
+.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst
+
+qtensor
+=======
+
+.. List the submodules
+
+
+
+.. rubric:: Modules
+
+.. autosummary::
+   :toctree:
+   :recursive:
+
+
+   modelopt.torch.quantization.qtensor.base_qtensor
+
+
+   modelopt.torch.quantization.qtensor.int4_tensor
+
+
+   modelopt.torch.quantization.qtensor.nf4_tensor
+
+
+
+
+.. Autodoc anything defined in the module itself
+
+   TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED
+   We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported
+   For reimports that should be documented somewhere other than where they are defined, the re-imports
+   __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``,
+   add in ``YYY.__module__ = __name__``.
+
+.. automodule:: modelopt.torch.quantization.qtensor
+   :members:
+   :undoc-members:
+
+   .. Also show members without docstrings. Only members from __all__ are considered as per conf.py
+   .. Ideally we should add docstrings for these members.
+
+
+   .. Overview table of available classes in the module
+   
+   
+   
+
+
+   .. Overview table of available functions in the module
+   
+   
+   
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.quantization.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.rst.txt
index f1dbf86..32b4657 100644
--- a/_sources/reference/generated/modelopt.torch.quantization.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.quantization.rst.txt
@@ -14,6 +14,9 @@ quantization
    :recursive:
 
 
+   modelopt.torch.quantization.algorithms
+
+
    modelopt.torch.quantization.calib
 
 
@@ -44,6 +47,9 @@ quantization
    modelopt.torch.quantization.plugins
 
 
+   modelopt.torch.quantization.qtensor
+
+
    modelopt.torch.quantization.quant_modules
 
 
diff --git a/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt
index 71f1b1f..2602dcb 100644
--- a/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt
@@ -36,9 +36,7 @@ tensor\_quant
       FakeAffineTensorQuantFunction
       FakeTensorQuantFunction
       LegacyFakeTensorQuantFunction
-      QuantDescriptor
       ScaledE4M3Function
-      ScaledQuantDescriptor
       TensorQuantFunction
    
    
diff --git a/_sources/reference/generated/modelopt.torch.rst.txt b/_sources/reference/generated/modelopt.torch.rst.txt
index a00a1df..036a86d 100644
--- a/_sources/reference/generated/modelopt.torch.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.rst.txt
@@ -14,6 +14,9 @@ torch
    :recursive:
 
 
+   modelopt.torch.distill
+
+
    modelopt.torch.export
 
 
diff --git a/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt b/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt
index ffca0fb..1757440 100644
--- a/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt
@@ -40,13 +40,15 @@ distributed
       :nosignatures:
    
       backend
-      size
-      rank
-      is_master
       barrier
-      set_data_parallel_group
-      set_tensor_parallel_group
       get_data_parallel_group
       get_tensor_parallel_group
+      is_available
+      is_initialized
+      is_master
+      rank
+      set_data_parallel_group
+      set_tensor_parallel_group
+      size
    
    
\ No newline at end of file
diff --git a/_sources/reference/generated/modelopt.torch.utils.network.rst.txt b/_sources/reference/generated/modelopt.torch.utils.network.rst.txt
index 45d6122..9fa96d7 100644
--- a/_sources/reference/generated/modelopt.torch.utils.network.rst.txt
+++ b/_sources/reference/generated/modelopt.torch.utils.network.rst.txt
@@ -51,6 +51,7 @@ network
       param_num
       param_num_from_forward
       remove_bn
+      run_forward_loop
       set_submodule
       standardize_model_args
       standardize_model_like_tuple
@@ -58,6 +59,6 @@ network
       standardize_constructor_args
       unwrap_model
       zero_grad
-      run_forward_loop
+      create_param_grad_clear_hook
    
    
\ No newline at end of file
diff --git a/_sources/support/1_contact.rst.txt b/_sources/support/1_contact.rst.txt
index 54e47ae..ae60244 100644
--- a/_sources/support/1_contact.rst.txt
+++ b/_sources/support/1_contact.rst.txt
@@ -3,5 +3,4 @@
 Contact us
 ==========
 
-You may raise an issue on `GitHub <https://github.com/NVIDIA/TensorRT-Model-Optimizer/issues>`_
-for any questions or issues you may have.
+Contact us by submitting issues on `GitHub <https://github.com/NVIDIA/TensorRT-Model-Optimizer/issues>`_.
diff --git a/_sources/support/2_faqs.rst.txt b/_sources/support/2_faqs.rst.txt
index 59da8dd..bf9b9bc 100644
--- a/_sources/support/2_faqs.rst.txt
+++ b/_sources/support/2_faqs.rst.txt
@@ -3,6 +3,9 @@
 FAQs
 ====
 
+Known Issues
+============
+
 1. Potential memory leak for ``FSDP`` with ``use_orig_params=True``
 -------------------------------------------------------------------
 
diff --git a/deployment/1_tensorrt_llm_deployment.html b/deployment/1_tensorrt_llm_deployment.html
index 43529e5..149ae47 100644
--- a/deployment/1_tensorrt_llm_deployment.html
+++ b/deployment/1_tensorrt_llm_deployment.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>TensorRT-LLM Deployment &mdash; Model Optimizer 0.11.2</title>
+  <title>TensorRT-LLM Deployment &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
-    <link rel="next" title="All ModelOpt Examples" href="../examples/0_all_examples.html" />
-    <link rel="prev" title="Sparsity" href="../guides/5_sparsity.html" />
+    <link rel="next" title="GitHub Examples" href="../examples/0_all_examples.html" />
+    <link rel="prev" title="Sparsity" href="../guides/5_sparsity.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -84,11 +86,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -120,7 +122,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="tensorrt-llm-deployment">
 <h1>TensorRT-LLM Deployment<a class="headerlink" href="#tensorrt-llm-deployment" title="Link to this heading"></a></h1>
 <div class="admonition note">
@@ -215,58 +217,64 @@ <h2>Export Quantized Model<a class="headerlink" href="#export-quantized-model" t
 <td><p>Yes</p></td>
 <td><p>No</p></td>
 </tr>
-<tr class="row-even"><td><p>Falcon RW 1B, 7B</p></td>
+<tr class="row-even"><td><p>MPT 7B, 30B</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-odd"><td><p>MPT 7B, 30B</p></td>
+<tr class="row-odd"><td><p>Baichuan 1, 2</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-even"><td><p>Baichuan 1, 2</p></td>
-<td><p>Yes</p></td>
-<td><p>Yes</p></td>
+<tr class="row-even"><td><p>ChatGLM2, 3 6B</p></td>
 <td><p>Yes</p></td>
+<td><p>No</p></td>
+<td><p>No</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-odd"><td><p>Qwen 7B, 14B</p></td>
+<tr class="row-odd"><td><p>Bloom</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-even"><td><p>ChatGLM2, 3 6B</p></td>
+<tr class="row-even"><td><p>Phi-1, 2, 3</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-odd"><td><p>Bloom</p></td>
+<tr class="row-odd"><td><p>Nemotron 8</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
+<td><p>No</p></td>
+<td><p>Yes</p></td>
+</tr>
+<tr class="row-even"><td><p>Gemma 2B, 7B</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
+<td><p>No</p></td>
+<td><p>Yes</p></td>
 </tr>
-<tr class="row-even"><td><p>Phi-1, 2, 3</p></td>
+<tr class="row-odd"><td><p>Recurrent Gemma</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-odd"><td><p>Nemotron 8</p></td>
+<tr class="row-even"><td><p>StarCoder 2</p></td>
+<td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
-<td><p>No</p></td>
 <td><p>Yes</p></td>
 </tr>
-<tr class="row-even"><td><p>Gemma 2B, 7B</p></td>
+<tr class="row-odd"><td><p>Qwen-1, 1.5</p></td>
+<td><p>Yes</p></td>
 <td><p>Yes</p></td>
 <td><p>Yes</p></td>
-<td><p>No</p></td>
 <td><p>Yes</p></td>
 </tr>
 </tbody>
@@ -283,7 +291,7 @@ <h2>Convert to TensorRT-LLM<a class="headerlink" href="#convert-to-tensorrt-llm"
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="../guides/5_sparsity.html" class="btn btn-neutral float-left" title="Sparsity" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="../examples/0_all_examples.html" class="btn btn-neutral float-right" title="All ModelOpt Examples" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="../examples/0_all_examples.html" class="btn btn-neutral float-right" title="GitHub Examples" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -295,7 +303,7 @@ <h2>Convert to TensorRT-LLM<a class="headerlink" href="#convert-to-tensorrt-llm"
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -306,7 +314,7 @@ <h2>Convert to TensorRT-LLM<a class="headerlink" href="#convert-to-tensorrt-llm"
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/examples/0_all_examples.html b/examples/0_all_examples.html
index ba97a20..2e42d12 100644
--- a/examples/0_all_examples.html
+++ b/examples/0_all_examples.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>All ModelOpt Examples &mdash; Model Optimizer 0.11.2</title>
+  <title>GitHub Examples &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
-    <link rel="next" title="Model Optimizer Changelog" href="../reference/0_versions.html" />
-    <link rel="prev" title="TensorRT-LLM Deployment" href="../deployment/1_tensorrt_llm_deployment.html" />
+    <link rel="next" title="Changelog" href="../reference/0_versions.html" />
+    <link rel="prev" title="TensorRT-LLM Deployment" href="../deployment/1_tensorrt_llm_deployment.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,11 +82,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul class="current">
-<li class="toctree-l1 current"><a class="current reference internal" href="#">All ModelOpt Examples</a></li>
+<li class="toctree-l1 current"><a class="current reference internal" href="#">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -107,7 +109,7 @@
           <div role="navigation" aria-label="Page navigation">
   <ul class="wy-breadcrumbs">
       <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
-      <li class="breadcrumb-item active">All ModelOpt Examples</li>
+      <li class="breadcrumb-item active">GitHub Examples</li>
       <li class="wy-breadcrumbs-aside">
             <a href="../_sources/examples/0_all_examples.rst.txt" rel="nofollow"> View page source</a>
       </li>
@@ -116,11 +118,11 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
-  <section id="all-modelopt-examples">
-<h1>All ModelOpt Examples<a class="headerlink" href="#all-modelopt-examples" title="Link to this heading"></a></h1>
-<p>Please visit the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer" rel="noopener noreferrer" target="_blank">TensorRT-Model-Optimizer GitHub repository</a>
-for all ModelOpt examples.</p>
+             
+  <section id="github-examples">
+<h1>GitHub Examples<a class="headerlink" href="#github-examples" title="Link to this heading"></a></h1>
+<p>All examples can be accessed from the ModelOpt GitHub repository at
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer/" rel="noopener noreferrer" target="_blank">github.com/NVIDIA/TensorRT-Model-Optimizer</a>.</p>
 </section>
 
 
@@ -128,7 +130,7 @@ <h1>All ModelOpt Examples<a class="headerlink" href="#all-modelopt-examples" tit
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="../deployment/1_tensorrt_llm_deployment.html" class="btn btn-neutral float-left" title="TensorRT-LLM Deployment" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="../reference/0_versions.html" class="btn btn-neutral float-right" title="Model Optimizer Changelog" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="../reference/0_versions.html" class="btn btn-neutral float-right" title="Changelog" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -140,7 +142,7 @@ <h1>All ModelOpt Examples<a class="headerlink" href="#all-modelopt-examples" tit
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -151,7 +153,7 @@ <h1>All ModelOpt Examples<a class="headerlink" href="#all-modelopt-examples" tit
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/genindex.html b/genindex.html
index c83e8db..495cf03 100644
--- a/genindex.html
+++ b/genindex.html
@@ -3,7 +3,7 @@
 <head>
   <meta charset="utf-8" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Index &mdash; Model Optimizer 0.11.2</title>
+  <title>Index &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />
@@ -12,11 +12,11 @@
       <link rel="stylesheet" type="text/css" href="_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="_static/jquery.js?v=5d32c60e"></script>
         <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="_static/documentation_options.js?v=5929fcd5"></script>
@@ -34,22 +34,22 @@
         <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
     <script src="_static/js/theme.js"></script>
     <link rel="index" title="Index" href="#" />
-    <link rel="search" title="Search" href="search.html" />
+    <link rel="search" title="Search" href="search.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
@@ -64,11 +64,13 @@
 <li class="toctree-l1"><a class="reference internal" href="getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -77,11 +79,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -112,7 +114,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
 
 <h1 id="index">Index</h1>
 
@@ -140,8 +142,9 @@ <h1 id="index">Index</h1>
  | <a href="#U"><strong>U</strong></a>
  | <a href="#V"><strong>V</strong></a>
  | <a href="#W"><strong>W</strong></a>
+ | <a href="#Y"><strong>Y</strong></a>
  | <a href="#Z"><strong>Z</strong></a>
-
+ 
 </div>
 <h2 id="_">_</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
@@ -150,16 +153,22 @@ <h2 id="_">_</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.AWQClipHelper.__init__">(AWQClipHelper method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.__init__">(BaseQuantizedTensor method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.__init__">(BaseSearcher method)</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.calib_utils.html#modelopt.onnx.quantization.calib_utils.CalibrationDataProvider.__init__">(CalibrationDataProvider method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.clip.html#modelopt.torch.quantization.nn.modules.clip.Clip.__init__">(Clip method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ConvConfig.__init__">(ConvConfig method)</a>
 </li>
         <li><a href="reference/generated/modelopt.deploy.llm.nemo_utils.html#modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.__init__">(CustomSentencePieceTokenizer method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.__init__">(DecoderLayerConfig method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.DistillationLossBalancer.__init__">(DistillationLossBalancer method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule.__init__">(DynamicModule method)</a>
 </li>
@@ -174,12 +183,20 @@ <h2 id="_">_</h2>
         <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.__init__">(Hparam method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LayernormConfig.__init__">(LayernormConfig method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearActConfig.__init__">(LinearActConfig method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearConfig.__init__">(LinearConfig method)</a>
 </li>
         <li><a href="reference/generated/modelopt.deploy.llm.generate.html#modelopt.deploy.llm.generate.LLM.__init__">(LLM method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.losses.html#modelopt.torch.distill.losses.LogitsDistillationLoss.__init__">(LogitsDistillationLoss method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.calib.max.html#modelopt.torch.quantization.calib.max.MaxCalibrator.__init__">(MaxCalibrator method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MedusaHeadConfig.__init__">(MedusaHeadConfig method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.losses.html#modelopt.torch.distill.losses.MGDLoss.__init__">(MGDLoss method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MLPConfig.__init__">(MLPConfig method)</a>
 </li>
@@ -196,19 +213,35 @@ <h2 id="_">_</h2>
         <li><a href="reference/generated/modelopt.onnx.quantization.operators.html#modelopt.onnx.quantization.operators.QDQNormalization.__init__">(QDQNormalization method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.QKVConfig.__init__">(QKVConfig method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.QuantRecipe.__init__">(QuantRecipe method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.QuantRecipeHparam.__init__">(QuantRecipeHparam method)</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.calib_utils.html#modelopt.onnx.quantization.calib_utils.RandomDataProvider.__init__">(RandomDataProvider method)</a>
 </li>
-        <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.__init__">(ScaledQuantDescriptor method)</a>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RecurrentConfig.__init__">(RecurrentConfig method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RgLruConfig.__init__">(RgLruConfig method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward.__init__">(RNNLayerForward method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.__init__">(SequentialQuantizer method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.StaticLossBalancer.__init__">(StaticLossBalancer method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.__init__">(TensorQuantizer method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.utils.perf.html#modelopt.torch.utils.perf.Timer.__init__">(Timer method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward.__init__">(VFRNNForward method)</a>
 </li>
       </ul></li>
   </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper.__new__">__new__() (QTensorWrapper static method)</a>
+</li>
+  </ul></td>
 </tr></table>
 
 <h2 id="A">A</h2>
@@ -221,7 +254,11 @@ <h2 id="A">A</h2>
 </li>
       </ul></li>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.active">active (Hparam property)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.QuantRecipeHparam.active">(QuantRecipeHparam property)</a>
 </li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.active_slice">active_slice (Hparam property)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.ActiveSlice">ActiveSlice (Hparam attribute)</a>
@@ -231,8 +268,14 @@ <h2 id="A">A</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool2d">AdaptiveAvgPool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool3d">AdaptiveAvgPool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)</a>
+</li>
+      <li><a href="reference/generated/modelopt.onnx.quantization.graph_utils.html#modelopt.onnx.quantization.graph_utils.add_fp16_fp32_cast">add_fp16_fp32_cast() (in module modelopt.onnx.quantization.graph_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.ModeloptStateManager.add_mode">add_mode() (ModeloptStateManager method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.RealQuantizeConfig.additional_algorithm">additional_algorithm (RealQuantizeConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.scaling_factor_utils.html#modelopt.torch.export.scaling_factor_utils.adjust_attn_amax_values">adjust_attn_amax_values() (in module modelopt.torch.export.scaling_factor_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.after_search">after_search() (BaseSearcher method)</a>
 
@@ -244,18 +287,22 @@ <h2 id="A">A</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.alibi_bias_max">alibi_bias_max (DecoderLayerConfig attribute)</a>
 </li>
-      <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.AWQClipHelper.alpha_step">alpha_step (AWQClipHelper attribute)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.all_input_quantizers_disabled">all_input_quantizers_disabled (QuantRNNBase property)</a>
 </li>
-      <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.AWQClipHelper.alphas">alphas (AWQClipHelper attribute)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.SmoothQuantCalibConfig.alpha">alpha (SmoothQuantCalibConfig attribute)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.amax">amax (ScaledQuantDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.AWQClipHelper.alpha_step">alpha_step (AWQClipHelper attribute)</a>
 
       <ul>
-        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.amax">(TensorQuantizer property)</a>
+        <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQLiteCalibConfig.alpha_step">(AWQLiteCalibConfig attribute)</a>
 </li>
       </ul></li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.AWQClipHelper.alphas">alphas (AWQClipHelper attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.amax">amax (TensorQuantizer property)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.quantization.calib.max.html#modelopt.torch.quantization.calib.max.MaxCalibrator.amaxs">amaxs (MaxCalibrator property)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.apply_mode">apply_mode() (in module modelopt.torch.opt.conversion)</a>
@@ -265,8 +312,16 @@ <h2 id="A">A</h2>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.attention">attention (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.attention_head_size">attention_head_size (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.attention_layernorm">attention_layernorm (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.AttentionConfig">AttentionConfig (class in modelopt.torch.export.model_config)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.attn_logit_softcapping">attn_logit_softcapping (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.auto_quantize">auto_quantize() (in module modelopt.torch.quantization.model_quant)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher">AutoQuantizeSearcher (class in modelopt.torch.quantization.algorithms)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool1d">AvgPool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)</a>
 </li>
@@ -282,7 +337,7 @@ <h2 id="A">A</h2>
       </ul></li>
       <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.AWQClipHelper">AWQClipHelper (class in modelopt.onnx.quantization.int4)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.axis">axis (ScaledQuantDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.axis">axis (QuantizerAttributeConfig attribute)</a>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.axis">(TensorQuantizer property)</a>
@@ -310,12 +365,10 @@ <h2 id="B">B</h2>
         <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.TensorQuantFunction.backward">(TensorQuantFunction static method)</a>
 </li>
       </ul></li>
-      <li><a href="reference/generated/modelopt.torch.export.distribute.html#modelopt.torch.export.distribute.barrier">barrier() (in module modelopt.torch.export.distribute)</a>
-
-      <ul>
-        <li><a href="reference/generated/modelopt.torch.utils.distributed.html#modelopt.torch.utils.distributed.barrier">(in module modelopt.torch.utils.distributed)</a>
+      <li><a href="reference/generated/modelopt.torch.utils.distributed.html#modelopt.torch.utils.distributed.barrier">barrier() (in module modelopt.torch.utils.distributed)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor">BaseQuantizedTensor (class in modelopt.torch.quantization.qtensor.base_qtensor)</a>
 </li>
-      </ul></li>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher">BaseSearcher (class in modelopt.torch.opt.searcher)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.sparsity.searcher.html#modelopt.torch.sparsity.searcher.BaseSparseSearcher">BaseSparseSearcher (class in modelopt.torch.sparsity.searcher)</a>
@@ -324,15 +377,19 @@ <h2 id="B">B</h2>
 </li>
       <li><a href="reference/generated/modelopt.deploy.llm.nemo_utils.html#modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.batch_encode_plus">batch_encode_plus() (CustomSentencePieceTokenizer method)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.before_search">before_search() (BaseSearcher method)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.before_search">before_search() (AutoQuantizeSearcher method)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.before_search">(BaseSearcher method)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.sparsity.sparsegpt.html#modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.before_search">(SparseGPTSearcher method)</a>
 </li>
       </ul></li>
-      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LayernormConfig.bias">bias (LayernormConfig attribute)</a>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ConvConfig.bias">bias (ConvConfig attribute)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LayernormConfig.bias">(LayernormConfig attribute)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearConfig.bias">(LinearConfig attribute)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.QKVConfig.bias">(QKVConfig property)</a>
@@ -340,13 +397,23 @@ <h2 id="B">B</h2>
       </ul></li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.block_sizes">block_sizes (ScaledQuantDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.block_sizes">block_sizes (QuantizerAttributeConfig attribute)</a>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.block_sizes">(TensorQuantizer property)</a>
 </li>
       </ul></li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_block_size">blocksparse_block_size (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_homo_head_pattern">blocksparse_homo_head_pattern (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_num_local_blocks">blocksparse_num_local_blocks (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_vertical_stride">blocksparse_vertical_stride (DecoderLayerConfig attribute)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_attention_config">build_attention_config() (in module modelopt.torch.export.layer_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_conv_config">build_conv_config() (in module modelopt.torch.export.layer_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_decoder_config">build_decoder_config() (in module modelopt.torch.export.layer_utils)</a>
 </li>
@@ -355,6 +422,8 @@ <h2 id="B">B</h2>
       <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_layernorm_config">build_layernorm_config() (in module modelopt.torch.export.layer_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_linear_config">build_linear_config() (in module modelopt.torch.export.layer_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_medusa_heads_config">build_medusa_heads_config() (in module modelopt.torch.export.layer_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_mlp_config">build_mlp_config() (in module modelopt.torch.export.layer_utils)</a>
 </li>
@@ -364,11 +433,9 @@ <h2 id="B">B</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_qkv">build_qkv() (in module modelopt.torch.export.layer_utils)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_stacked_experts">build_stacked_experts() (in module modelopt.torch.export.layer_utils)</a>
-</li>
-      <li><a href="reference/generated/modelopt.deploy.llm.model_config_trt.html#modelopt.deploy.llm.model_config_trt.build_tensorrt_llm">build_tensorrt_llm() (in module modelopt.deploy.llm.model_config_trt)</a>
+      <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_recurrent_config">build_recurrent_config() (in module modelopt.torch.export.layer_utils)</a>
 </li>
-      <li><a href="reference/generated/modelopt.deploy.llm.model_config_trt.html#modelopt.deploy.llm.model_config_trt.build_tensorrt_llm_rank">build_tensorrt_llm_rank() (in module modelopt.deploy.llm.model_config_trt)</a>
+      <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.build_stacked_experts">build_stacked_experts() (in module modelopt.torch.export.layer_utils)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -376,13 +443,15 @@ <h2 id="B">B</h2>
 <h2 id="C">C</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.calib_method">calib_method (ScaledQuantDescriptor property)</a>
-</li>
       <li><a href="reference/generated/modelopt.torch.quantization.model_calib.html#modelopt.torch.quantization.model_calib.calibrate">calibrate() (in module modelopt.torch.quantization.model_calib)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.calib.histogram.html#modelopt.torch.quantization.calib.histogram.calibrate_weights">calibrate_weights() (in module modelopt.torch.quantization.calib.histogram)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.calib_utils.html#modelopt.onnx.quantization.calib_utils.CalibrationDataProvider">CalibrationDataProvider (class in modelopt.onnx.quantization.calib_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.calibrator">calibrator (QuantizerAttributeConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.candidate_stats">candidate_stats (AutoQuantizeSearcher attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.random.html#modelopt.torch.utils.random.centroid">centroid() (in module modelopt.torch.utils.random)</a>
 </li>
@@ -405,7 +474,11 @@ <h2 id="C">C</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.clip.html#modelopt.torch.quantization.nn.modules.clip.Clip">Clip (class in modelopt.torch.quantization.nn.modules.clip)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.AttentionConfig.clip_qkv">clip_qkv (AttentionConfig attribute)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.clip_qkv">(DecoderLayerConfig attribute)</a>
 </li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.functional.html#modelopt.torch.quantization.nn.functional.ClipFunction">ClipFunction (class in modelopt.torch.quantization.nn.functional)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.calib.histogram.html#modelopt.torch.quantization.calib.histogram.HistogramCalibrator.collect">collect() (HistogramCalibrator method)</a>
@@ -415,6 +488,8 @@ <h2 id="C">C</h2>
 </li>
       </ul></li>
       <li><a href="reference/generated/modelopt.torch.utils.network.html#modelopt.torch.utils.network.compare_dict">compare_dict() (in module modelopt.torch.utils.network)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.QuantRecipe.compression">compression (QuantRecipe property)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.calib.histogram.html#modelopt.torch.quantization.calib.histogram.HistogramCalibrator.compute_amax">compute_amax() (HistogramCalibrator method)</a>
 
@@ -422,15 +497,25 @@ <h2 id="C">C</h2>
         <li><a href="reference/generated/modelopt.torch.quantization.calib.max.html#modelopt.torch.quantization.calib.max.MaxCalibrator.compute_amax">(MaxCalibrator method)</a>
 </li>
       </ul></li>
+      <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.compute_kd_loss">compute_kd_loss() (DistillationModel method)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.sparsity.magnitude.html#modelopt.torch.sparsity.magnitude.compute_valid_1d_patterns">compute_valid_1d_patterns() (in module modelopt.torch.sparsity.magnitude)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.config">config (BaseSearcher attribute)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.QuantRecipe.config">(QuantRecipe property)</a>
 </li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicSpace.config">config() (DynamicSpace method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.config_class">config_class (ExportSparseModeDescriptor property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.ExportStudentModeDescriptor.config_class">(ExportStudentModeDescriptor property)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.config_class">(KnowledgeDistillationModeDescriptor property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.config_class">(QuantizeExportModeDescriptor property)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.config_class">(QuantizeModeDescriptor property)</a>
@@ -442,19 +527,29 @@ <h2 id="C">C</h2>
       </ul></li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.onnx.quantization.ort_utils.html#modelopt.onnx.quantization.ort_utils.configure_ort">configure_ort() (in module modelopt.onnx.quantization.ort_utils)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.constraints">constraints (BaseSearcher attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.construct_forward_loop">construct_forward_loop() (BaseSearcher method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html#modelopt.torch.quantization.nn.modules.quant_conv.Conv1d">Conv1d (in module modelopt.torch.quantization.nn.modules.quant_conv)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RecurrentConfig.conv1d">conv1d (RecurrentConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html#modelopt.torch.quantization.nn.modules.quant_conv.Conv2d">Conv2d (in module modelopt.torch.quantization.nn.modules.quant_conv)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html#modelopt.torch.quantization.nn.modules.quant_conv.Conv3d">Conv3d (in module modelopt.torch.quantization.nn.modules.quant_conv)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ConvConfig">ConvConfig (class in modelopt.torch.export.model_config)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.convert">convert (ExportSparseModeDescriptor property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.ExportStudentModeDescriptor.convert">(ExportStudentModeDescriptor property)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.convert">(KnowledgeDistillationModeDescriptor property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.convert">(QuantizeExportModeDescriptor property)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.convert">(QuantizeModeDescriptor property)</a>
@@ -463,7 +558,11 @@ <h2 id="C">C</h2>
 </li>
       </ul></li>
       <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule.convert">convert() (DynamicModule class method)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.distillation.html#modelopt.torch.distill.distillation.convert">(in module modelopt.torch.distill.distillation)</a>
 </li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.convert_sparse_model">convert_sparse_model() (in module modelopt.torch.sparsity.mode)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicSpace.convert_to_dynamic">convert_to_dynamic() (DynamicSpace method)</a>
@@ -483,8 +582,16 @@ <h2 id="C">C</h2>
       <li><a href="reference/generated/modelopt.torch.utils.dataset_utils.html#modelopt.torch.utils.dataset_utils.create_forward_loop">create_forward_loop() (in module modelopt.torch.utils.dataset_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.ort_utils.html#modelopt.onnx.quantization.ort_utils.create_inference_session">create_inference_session() (in module modelopt.onnx.quantization.ort_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.utils.network.html#modelopt.torch.utils.network.create_param_grad_clear_hook">create_param_grad_clear_hook() (in module modelopt.torch.utils.network)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.sparsity.sparsegpt.html#modelopt.torch.sparsity.sparsegpt.create_sgpt_mask">create_sgpt_mask() (in module modelopt.torch.sparsity.sparsegpt)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.config.html#modelopt.torch.distill.config.KDLossConfig.criterion">criterion (KDLossConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.cross_attention">cross_attention (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.cross_attention_layernorm">cross_attention_layernorm (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseRule.customize_rule">customize_rule() (ModeloptBaseRule class method)</a>
 </li>
@@ -498,6 +605,14 @@ <h2 id="D">D</h2>
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.quantization.quant_modules.html#modelopt.torch.quantization.quant_modules.deactivate">deactivate() (in module modelopt.torch.quantization.quant_modules)</a>
 </li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQClipCalibConfig.debug">debug (AWQClipCalibConfig attribute)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQFullCalibConfig.debug">(AWQFullCalibConfig attribute)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQLiteCalibConfig.debug">(AWQLiteCalibConfig attribute)</a>
+</li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.deploy.llm.nemo_utils.html#modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.decode">decode() (CustomSentencePieceTokenizer method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.decoder_type">decoder_type (DecoderLayerConfig attribute)</a>
@@ -505,7 +620,11 @@ <h2 id="D">D</h2>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig">DecoderLayerConfig (class in modelopt.torch.export.model_config)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_input">default_quant_desc_input (QuantInputBase attribute)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.default_quant_desc_input">(QuantRNNBase attribute)</a>
 </li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_output">default_quant_desc_output (QuantInputBase attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html#modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d.default_quant_desc_weight">default_quant_desc_weight (QuantConv1d attribute)</a>
@@ -524,30 +643,48 @@ <h2 id="D">D</h2>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.html#modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear.default_quant_desc_weight">(QuantLinear attribute)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.default_quant_desc_weight">(QuantLinearConvBase attribute)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.default_quant_desc_weight">(QuantRNNBase attribute)</a>
 </li>
       </ul></li>
-      <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.default_search_config">default_search_config (BaseSearcher property)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.default_search_config">default_search_config (AutoQuantizeSearcher property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.default_search_config">(BaseSearcher property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.sparsity.searcher.html#modelopt.torch.sparsity.searcher.BaseSparseSearcher.default_search_config">(BaseSparseSearcher property)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.sparsity.sparsegpt.html#modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.default_search_config">(SparseGPTSearcher property)</a>
 </li>
       </ul></li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.default_state_dict">default_state_dict (BaseSearcher property)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.default_state_dict">default_state_dict (AutoQuantizeSearcher property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.default_state_dict">(BaseSearcher property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.sparsity.searcher.html#modelopt.torch.sparsity.searcher.BaseSparseSearcher.default_state_dict">(BaseSparseSearcher property)</a>
 </li>
       </ul></li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.AttentionConfig.dense">dense (AttentionConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.dense_attention_every_n_layers">dense_attention_every_n_layers (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.deployment">deployment (BaseSearcher attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.logging.html#modelopt.torch.utils.logging.DeprecatedError">DeprecatedError</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.dict">dict() (ScaledQuantDescriptor method)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.dequantize">dequantize() (BaseQuantizedTensor method)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html#modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.dequantize">(INT4QTensor method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.dequantize">(NF4QTensor method)</a>
 </li>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.dequantize">(TensorQuantizer method)</a>
+</li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.disable">disable() (SequentialQuantizer method)</a>
 
       <ul>
@@ -561,6 +698,12 @@ <h2 id="D">D</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_quant">disable_quant() (TensorQuantizer method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.disable_quantizer">disable_quantizer() (in module modelopt.torch.quantization.model_quant)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.DistillationLossBalancer">DistillationLossBalancer (class in modelopt.torch.distill.loss_balancers)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel">DistillationModel (class in modelopt.torch.distill.distillation_model)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.double_quantization">double_quantization() (NF4QTensor class method)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.dq_tensor">dq_tensor() (in module modelopt.onnx.quantization.int4)</a>
 </li>
@@ -568,7 +711,7 @@ <h2 id="D">D</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.dummy_input">dummy_input (BaseSearcher attribute)</a>
 </li>
-      <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.duplicate_shared_linear_weights">duplicate_shared_linear_weights() (in module modelopt.onnx.utils)</a>
+      <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.duplicate_shared_constants">duplicate_shared_constants() (in module modelopt.onnx.utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule">DynamicModule (class in modelopt.torch.opt.dynamic)</a>
 </li>
@@ -580,7 +723,11 @@ <h2 id="D">D</h2>
 <h2 id="E">E</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.emb_scale_by_sqrt_dim">emb_scale_by_sqrt_dim (DecoderLayerConfig attribute)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.EmbeddingConfig">EmbeddingConfig (class in modelopt.torch.export.model_config)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.enable">enable (QuantizerAttributeConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable">enable() (TensorQuantizer method)</a>
 </li>
@@ -593,8 +740,16 @@ <h2 id="E">E</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_quant">enable_quant() (TensorQuantizer method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.enable_quantizer">enable_quantizer() (in module modelopt.torch.quantization.model_quant)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.enc_dec">enc_dec (ModelConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.deploy.llm.nemo_utils.html#modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.encode">encode() (CustomSentencePieceTokenizer method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.encoder_head_size">encoder_head_size (ModelConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.encoder_hidden_size">encoder_hidden_size (ModelConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.encoder_num_heads">encoder_num_heads (ModelConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.enforce_order">enforce_order() (Hparam method)</a>
 </li>
@@ -616,25 +771,37 @@ <h2 id="E">E</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicSpace.export">(DynamicSpace method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.distillation.html#modelopt.torch.distill.distillation.export">(in module modelopt.torch.distill.distillation)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.sparsity.sparsification.html#modelopt.torch.sparsity.sparsification.export">(in module modelopt.torch.sparsity.sparsification)</a>
 </li>
       </ul></li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.export_amax">export_amax() (TensorQuantizer method)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.export_mode">export_mode (QuantizeModeDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.export.model_config_export.html#modelopt.torch.export.model_config_export.export_hf_checkpoint">export_hf_checkpoint() (in module modelopt.torch.export.model_config_export)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.export_mode">export_mode (KnowledgeDistillationModeDescriptor property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.export_mode">(QuantizeModeDescriptor property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.export_mode">(SparseMagnitudeModeDescriptor property)</a>
 </li>
       </ul></li>
       <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.export_sparse">export_sparse() (in module modelopt.torch.sparsity.mode)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config_export.html#modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint">export_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.vllm.html#modelopt.torch.export.vllm.export_to_vllm">export_to_vllm() (in module modelopt.torch.export.vllm)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.utils.html#modelopt.torch.quantization.utils.export_torch_mode">export_torch_mode() (in module modelopt.torch.quantization.utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.ExportSparseModeDescriptor">ExportSparseModeDescriptor (class in modelopt.torch.sparsity.mode)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.ExportStudentModeDescriptor">ExportStudentModeDescriptor (class in modelopt.torch.distill.mode)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.config.html#modelopt.torch.distill.config.KDLossConfig.expose_minimal_state_dict">expose_minimal_state_dict (KDLossConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule.extra_repr">extra_repr() (DynamicModule method)</a>
 
@@ -648,7 +815,7 @@ <h2 id="E">E</h2>
 <h2 id="F">F</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.fake_quant">fake_quant (ScaledQuantDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.fake_quant">fake_quant (QuantizerAttributeConfig attribute)</a>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.fake_quant">(TensorQuantizer property)</a>
@@ -671,6 +838,10 @@ <h2 id="F">F</h2>
       <li><a href="reference/generated/modelopt.torch.sparsity.magnitude.html#modelopt.torch.sparsity.magnitude.fill">fill() (in module modelopt.torch.sparsity.magnitude)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.graph_utils.html#modelopt.onnx.quantization.graph_utils.filter_quantizable_kgen_heads">filter_quantizable_kgen_heads() (in module modelopt.onnx.quantization.graph_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.final_logit_softcapping">final_logit_softcapping (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.onnx.quantization.graph_utils.html#modelopt.onnx.quantization.graph_utils.find_fp8_mha_partitions">find_fp8_mha_partitions() (in module modelopt.onnx.quantization.graph_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.partitioning.html#modelopt.onnx.quantization.partitioning.find_fusible_partitions">find_fusible_partitions() (in module modelopt.onnx.quantization.partitioning)</a>
 </li>
@@ -680,40 +851,60 @@ <h2 id="F">F</h2>
 </li>
       <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.find_lowest_common_ancestor">find_lowest_common_ancestor() (in module modelopt.onnx.utils)</a>
 </li>
-      <li><a href="reference/generated/modelopt.onnx.quantization.partitioning.html#modelopt.onnx.quantization.partitioning.find_mha_partitions">find_mha_partitions() (in module modelopt.onnx.quantization.partitioning)</a>
+      <li><a href="reference/generated/modelopt.onnx.quantization.graph_utils.html#modelopt.onnx.quantization.graph_utils.find_mha_partitions">find_mha_partitions() (in module modelopt.onnx.quantization.graph_utils)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.onnx.quantization.partitioning.html#modelopt.onnx.quantization.partitioning.find_mha_partitions">(in module modelopt.onnx.quantization.partitioning)</a>
+</li>
+      </ul></li>
+      <li><a href="reference/generated/modelopt.onnx.quantization.graph_utils.html#modelopt.onnx.quantization.graph_utils.find_nodes_to_exclude">find_nodes_to_exclude() (in module modelopt.onnx.quantization.graph_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.partitioning.html#modelopt.onnx.quantization.partitioning.find_non_quantizable_partitions_from_patterns">find_non_quantizable_partitions_from_patterns() (in module modelopt.onnx.quantization.partitioning)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.partitioning.html#modelopt.onnx.quantization.partitioning.find_quantizable_nodes">find_quantizable_nodes() (in module modelopt.onnx.quantization.partitioning)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.find_scales">find_scales() (in module modelopt.onnx.quantization.int4)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.fold_weight">fold_weight() (in module modelopt.torch.quantization.model_quant)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule.force_assign">force_assign() (DynamicModule method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.clip.html#modelopt.torch.quantization.nn.modules.clip.Clip.forward">forward() (Clip method)</a>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.functional.html#modelopt.torch.quantization.nn.functional.ClipFunction.forward">(ClipFunction static method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.DistillationLossBalancer.forward">(DistillationLossBalancer method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.forward">(DistillationModel method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction.forward">(FakeAffineTensorQuantFunction static method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.forward">(FakeTensorQuantFunction static method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction.forward">(LegacyFakeTensorQuantFunction static method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.losses.html#modelopt.torch.distill.losses.LogitsDistillationLoss.forward">(LogitsDistillationLoss method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.losses.html#modelopt.torch.distill.losses.MGDLoss.forward">(MGDLoss method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.forward">(QuantInputBase method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.forward">(QuantLinearConvBase method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.forward">(QuantRNNBase method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.forward">(ScaledE4M3Function static method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.StaticLossBalancer.forward">(StaticLossBalancer method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.TensorQuantFunction.forward">(TensorQuantFunction static method)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.forward">(TensorQuantizer method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward.forward">(VFRNNForward method)</a>
 </li>
       </ul></li>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.forward_loop">forward_loop (BaseSearcher attribute)</a>
@@ -723,6 +914,8 @@ <h2 id="F">F</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.optim.html#modelopt.torch.quantization.optim.freeze_parameters">freeze_parameters() (in module modelopt.torch.quantization.optim)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config_utils.html#modelopt.torch.export.model_config_utils.from_quantized_weight">from_quantized_weight() (in module modelopt.torch.export.model_config_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.functionals_to_replace">functionals_to_replace (QuantRNNBase property)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -731,10 +924,14 @@ <h2 id="G">G</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MLPConfig.gate">gate (MLPConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.gegelu_limit">gegelu_limit (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.gen_random_inputs">gen_random_inputs() (in module modelopt.onnx.utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.deploy.llm.generate.html#modelopt.deploy.llm.generate.LLM.generate_text">generate_text() (LLM method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.deploy.llm.generate.html#modelopt.deploy.llm.generate.LLM.generate_tokens">generate_tokens() (LLM method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig.get">get() (ModeloptBaseConfig method)</a>
 </li>
@@ -745,14 +942,16 @@ <h2 id="G">G</h2>
       <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.get_batch_size">get_batch_size() (in module modelopt.onnx.utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.get_batch_size_from_bytes">get_batch_size_from_bytes() (in module modelopt.onnx.utils)</a>
-</li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.get_block_quant_axes_and_sizes">get_block_quant_axes_and_sizes() (ScaledQuantDescriptor static method)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.get_child_nodes">get_child_nodes() (in module modelopt.onnx.utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.ModeloptStateManager.get_config_class">get_config_class() (ModeloptStateManager static method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.distribute.html#modelopt.torch.export.distribute.get_configs_parallel">get_configs_parallel() (in module modelopt.torch.export.distribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.extensions.html#modelopt.torch.quantization.extensions.get_cuda_ext">get_cuda_ext() (in module modelopt.torch.quantization.extensions)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.extensions.html#modelopt.torch.quantization.extensions.get_cuda_ext_fp8">get_cuda_ext_fp8() (in module modelopt.torch.quantization.extensions)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.perf.html#modelopt.torch.utils.perf.get_cuda_memory_stats">get_cuda_memory_stats() (in module modelopt.torch.utils.perf)</a>
 </li>
@@ -763,13 +962,13 @@ <h2 id="G">G</h2>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig.get_field_name_from_key">get_field_name_from_key() (ModeloptBaseConfig method)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.graph_utils.html#modelopt.onnx.quantization.graph_utils.get_fusible_backbone">get_fusible_backbone() (in module modelopt.onnx.quantization.graph_utils)</a>
-</li>
-      <li><a href="reference/generated/modelopt.torch.export.distribute.html#modelopt.torch.export.distribute.get_group">get_group() (in module modelopt.torch.export.distribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule.get_hparam">get_hparam() (DynamicModule method)</a>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicSpace.get_hparam">(DynamicSpace method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.opt.utils.html#modelopt.torch.opt.utils.get_hparam">(in module modelopt.torch.opt.utils)</a>
 </li>
       </ul></li>
       <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.get_input_names">get_input_names() (in module modelopt.onnx.utils)</a>
@@ -822,9 +1021,15 @@ <h2 id="G">G</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.get_prequant_scaling_factor">get_prequant_scaling_factor() (in module modelopt.torch.export.layer_utils)</a>
 </li>
-      <li><a href="reference/generated/modelopt.onnx.op_types.html#modelopt.onnx.op_types.get_quantizable_op_types">get_quantizable_op_types() (in module modelopt.onnx.op_types)</a>
+      <li><a href="reference/generated/modelopt.onnx.quantization.ort_utils.html#modelopt.onnx.quantization.ort_utils.get_quantizable_op_types">get_quantizable_op_types() (in module modelopt.onnx.quantization.ort_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.get_quantization_format">get_quantization_format() (in module modelopt.torch.export.layer_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_forward">get_quantized_rnn_layer_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_forward">get_quantized_rnn_layer_variable_len_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.export.distribute.html#modelopt.torch.export.distribute.get_rank">get_rank() (in module modelopt.torch.export.distribute)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_reverse_forward">get_quantized_rnn_layer_variable_len_reverse_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseRule.get_rule_type">get_rule_type() (ModeloptBaseRule class method)</a>
 </li>
@@ -851,8 +1056,6 @@ <h2 id="G">G</h2>
       <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.get_weight_scaling_factor_2">get_weight_scaling_factor_2() (in module modelopt.torch.export.layer_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.scaling_factor_utils.html#modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor">get_weights_scaling_factor() (in module modelopt.torch.export.scaling_factor_utils)</a>
-</li>
-      <li><a href="reference/generated/modelopt.torch.export.distribute.html#modelopt.torch.export.distribute.get_world_size">get_world_size() (in module modelopt.torch.export.distribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.optim.html#modelopt.torch.quantization.optim.group_parameters">group_parameters() (in module modelopt.torch.quantization.optim)</a>
 </li>
@@ -870,9 +1073,11 @@ <h2 id="H">H</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.ModeloptStateManager.has_state">has_state (ModeloptStateManager property)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MLPConfig.hidden_act">hidden_act (MLPConfig attribute)</a>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearActConfig.hidden_act">hidden_act (LinearActConfig attribute)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MLPConfig.hidden_act">(MLPConfig attribute)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.hidden_act">(ModelConfig property)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MOEConfig.hidden_act">(MOEConfig attribute)</a>
@@ -888,6 +1093,10 @@ <h2 id="H">H</h2>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.hidden_size">(ModelConfig property)</a>
 </li>
       </ul></li>
+      <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.hide_loss_modules">hide_loss_modules() (DistillationModel method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.hide_teacher_model">hide_teacher_model() (DistillationModel method)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.quantization.calib.histogram.html#modelopt.torch.quantization.calib.histogram.HistogramCalibrator">HistogramCalibrator (class in modelopt.torch.quantization.calib.histogram)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam">Hparam (class in modelopt.torch.opt.hparam)</a>
@@ -898,6 +1107,8 @@ <h2 id="H">H</h2>
 <h2 id="I">I</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.onnx.quantization.calib_utils.html#modelopt.onnx.quantization.calib_utils.import_scales_from_calib_cache">import_scales_from_calib_cache() (in module modelopt.onnx.quantization.calib_utils)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.Importance">Importance (Hparam attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.importance">importance (Hparam property)</a>
@@ -911,18 +1122,30 @@ <h2 id="I">I</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.quant_modules.html#modelopt.torch.quantization.quant_modules.initialize">initialize() (in module modelopt.torch.quantization.quant_modules)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.initialize_quantizer_with_dummy_states">initialize_quantizer_with_dummy_states() (QuantLinearConvBase static method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RgLruConfig.input_gate">input_gate (RgLruConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.input_layernorm">input_layernorm (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.input_quantizer">input_quantizer (QuantInputBase attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.qdq_utils.html#modelopt.onnx.quantization.qdq_utils.insert_dq_nodes">insert_dq_nodes() (in module modelopt.onnx.quantization.qdq_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.onnx.quantization.graph_utils.html#modelopt.onnx.quantization.graph_utils.insert_fp8_mha_casts">insert_fp8_mha_casts() (in module modelopt.onnx.quantization.graph_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.onnx.quantization.graph_utils.html#modelopt.onnx.quantization.graph_utils.insert_matmul_casts">insert_matmul_casts() (in module modelopt.onnx.quantization.graph_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.qdq_utils.html#modelopt.onnx.quantization.qdq_utils.insert_qdq_nodes">insert_qdq_nodes() (in module modelopt.onnx.quantization.qdq_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.insert_quant_recipe_hparams">insert_quant_recipe_hparams() (AutoQuantizeSearcher class method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html#modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor">INT4QTensor (class in modelopt.torch.quantization.qtensor.int4_tensor)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.sparsity.sparsegpt.html#modelopt.torch.sparsity.sparsegpt.invert">invert() (in module modelopt.torch.sparsity.sparsegpt)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.is_attention">is_attention() (in module modelopt.torch.export.layer_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.utils.distributed.html#modelopt.torch.utils.distributed.is_available">is_available() (in module modelopt.torch.utils.distributed)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.op_types.html#modelopt.onnx.op_types.is_binary_op">is_binary_op() (in module modelopt.onnx.op_types)</a>
 </li>
@@ -960,19 +1183,23 @@ <h2 id="I">I</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.is_enabled">is_enabled (TensorQuantizer property)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.is_export_mode">is_export_mode (ExportSparseModeDescriptor property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.ExportStudentModeDescriptor.is_export_mode">(ExportStudentModeDescriptor property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.is_export_mode">(QuantizeExportModeDescriptor property)</a>
 </li>
       </ul></li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.onnx.op_types.html#modelopt.onnx.op_types.is_fusible_reduction_op">is_fusible_reduction_op() (in module modelopt.onnx.op_types)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.op_types.html#modelopt.onnx.op_types.is_generator_op">is_generator_op() (in module modelopt.onnx.op_types)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.distribute.html#modelopt.torch.export.distribute.NFSWorkspace.is_initialized">is_initialized (NFSWorkspace property)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.utils.distributed.html#modelopt.torch.utils.distributed.is_initialized">is_initialized() (in module modelopt.torch.utils.distributed)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.op_types.html#modelopt.onnx.op_types.is_irregular_mem_access_op">is_irregular_mem_access_op() (in module modelopt.onnx.op_types)</a>
 </li>
@@ -1009,6 +1236,10 @@ <h2 id="I">I</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.utils.html#modelopt.torch.quantization.utils.is_quantized_layer_with_weight">is_quantized_layer_with_weight() (in module modelopt.torch.quantization.utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.utils.html#modelopt.torch.quantization.utils.is_quantized_row_parallel_linear">is_quantized_row_parallel_linear() (in module modelopt.torch.quantization.utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.is_quantlinear">is_quantlinear() (in module modelopt.torch.export.layer_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#modelopt.torch.export.layer_utils.is_recurrent">is_recurrent() (in module modelopt.torch.export.layer_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.op_types.html#modelopt.onnx.op_types.is_recurrent_op">is_recurrent_op() (in module modelopt.onnx.op_types)</a>
 </li>
@@ -1042,6 +1273,8 @@ <h2 id="K">K</h2>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor">KnowledgeDistillationModeDescriptor (class in modelopt.torch.distill.mode)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.AttentionConfig.kv_cache_dtype">kv_cache_dtype (AttentionConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.AttentionConfig.kv_cache_scaling_factor">kv_cache_scaling_factor (AttentionConfig attribute)</a>
@@ -1053,6 +1286,8 @@ <h2 id="L">L</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.ModeloptStateManager.last_mode">last_mode (ModeloptStateManager property)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.layer_types">layer_types (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LayernormConfig.layernorm_type">layernorm_type (LayernormConfig attribute)</a>
 </li>
@@ -1060,24 +1295,40 @@ <h2 id="L">L</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.layers">layers (ModelConfig attribute)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.learn_amax">learn_amax (ScaledQuantDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.learn_amax">learn_amax (QuantizerAttributeConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction">LegacyFakeTensorQuantFunction (class in modelopt.torch.quantization.tensor_quant)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.html#modelopt.torch.quantization.nn.modules.quant_linear.Linear">Linear (in module modelopt.torch.quantization.nn.modules.quant_linear)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearActConfig.linear">linear (LinearActConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RecurrentConfig.linear_out">linear_out (RecurrentConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearConfig.linear_type">linear_type (LinearConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RecurrentConfig.linear_x">linear_x (RecurrentConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RecurrentConfig.linear_y">linear_y (RecurrentConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearActConfig">LinearActConfig (class in modelopt.torch.export.model_config)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearConfig">LinearConfig (class in modelopt.torch.export.model_config)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.QuantRecipeHparam.link_to">link_to() (QuantRecipeHparam method)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.utils.list.html#modelopt.torch.utils.list.list_closest_to_median">list_closest_to_median() (in module modelopt.torch.utils.list)</a>
 </li>
       <li><a href="reference/generated/modelopt.deploy.llm.generate.html#modelopt.deploy.llm.generate.LLM">LLM (class in modelopt.deploy.llm.generate)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.lm_head">lm_head (ModelConfig attribute)</a>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MedusaHeadConfig.lm_head">lm_head (MedusaHeadConfig attribute)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.lm_head">(ModelConfig attribute)</a>
 </li>
+      </ul></li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.ln_embed">ln_embed (ModelConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.ln_f">ln_f (ModelConfig attribute)</a>
@@ -1088,9 +1339,35 @@ <h2 id="L">L</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.load_search_checkpoint">load_search_checkpoint() (BaseSearcher method)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.ModeloptStateManager.load_state_dict">load_state_dict() (ModeloptStateManager method)</a>
+      <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.load_state_dict">load_state_dict() (DistillationModel method)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.ModeloptStateManager.load_state_dict">(ModeloptStateManager method)</a>
 </li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.EmbeddingConfig.local_vocab_size">local_vocab_size (EmbeddingConfig property)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.logits_soft_cap">logits_soft_cap (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.losses.html#modelopt.torch.distill.losses.LogitsDistillationLoss">LogitsDistillationLoss (class in modelopt.torch.distill.losses)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.longrope_long_mscale">longrope_long_mscale (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.longrope_scaling_long_factors">longrope_scaling_long_factors (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.longrope_scaling_short_factors">longrope_scaling_short_factors (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.longrope_short_mscale">longrope_short_mscale (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.loss_balancer">loss_balancer (DistillationModel property)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.config.html#modelopt.torch.distill.config.KDLossConfig.loss_balancer">(KDLossConfig attribute)</a>
+</li>
+      </ul></li>
+      <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.loss_modules">loss_modules (DistillationModel property)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.lstm_cell_with_proj">lstm_cell_with_proj() (in module modelopt.torch.quantization.nn.modules.quant_rnn)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -1125,6 +1402,8 @@ <h2 id="M">M</h2>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.max">max (Hparam property)</a>
 </li>
       <li><a href="reference/generated/modelopt.deploy.llm.generate.html#modelopt.deploy.llm.generate.LLM.max_beam_width">max_beam_width (LLM property)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQClipCalibConfig.max_co_batch_size">max_co_batch_size (AWQClipCalibConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.deploy.llm.generate.html#modelopt.deploy.llm.generate.LLM.max_input_len">max_input_len (LLM property)</a>
 </li>
@@ -1134,6 +1413,8 @@ <h2 id="M">M</h2>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.max_position_embeddings">(ModelConfig property)</a>
 </li>
       </ul></li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQClipCalibConfig.max_tokens_per_batch">max_tokens_per_batch (AWQClipCalibConfig attribute)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.maxbound">maxbound (TensorQuantizer property)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.calib.max.html#modelopt.torch.quantization.calib.max.MaxCalibrator">MaxCalibrator (class in modelopt.torch.quantization.calib.max)</a>
@@ -1143,16 +1424,30 @@ <h2 id="M">M</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool2d">MaxPool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool3d">MaxPool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.medusa_heads">medusa_heads (ModelConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MedusaHeadConfig.medusa_layers">medusa_layers (MedusaHeadConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MedusaHeadConfig">MedusaHeadConfig (class in modelopt.torch.export.model_config)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config_utils.html#modelopt.torch.export.model_config_utils.merge_fc1_gate">merge_fc1_gate() (in module modelopt.torch.export.model_config_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config_utils.html#modelopt.torch.export.model_config_utils.merge_qkv">merge_qkv() (in module modelopt.torch.export.model_config_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.merge_search_hparam_by_rules">merge_search_hparam_by_rules() (AutoQuantizeSearcher class method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MLPConfig.merged_fc1_gate">merged_fc1_gate (MLPConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizeAlgorithmConfig.method">method (QuantizeAlgorithmConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.losses.html#modelopt.torch.distill.losses.MGDLoss">MGDLoss (class in modelopt.torch.distill.losses)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.min">min (Hparam property)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.AWQClipHelper.min_alpha">min_alpha (AWQClipHelper attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQClipCalibConfig.min_clip_ratio">min_clip_ratio (AWQClipCalibConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.mlp">mlp (DecoderLayerConfig attribute)</a>
 </li>
@@ -1168,8 +1463,12 @@ <h2 id="M">M</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config_utils.html#modelopt.torch.export.model_config_utils.model_config_to_dict">model_config_to_dict() (in module modelopt.torch.export.model_config_utils)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig.model_dump">model_dump() (ModeloptBaseConfig method)</a>
+      <li><a href="reference/generated/modelopt.torch.distill.config.html#modelopt.torch.distill.config.KDLossConfig.model_dump">model_dump() (KDLossConfig method)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig.model_dump">(ModeloptBaseConfig method)</a>
 </li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig.model_dump_json">model_dump_json() (ModeloptBaseConfig method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.model_name">model_name (DecoderLayerConfig attribute)</a>
@@ -1197,13 +1496,6 @@ <h2 id="M">M</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.deploy.llm.generate.html#module-modelopt.deploy.llm.generate">module</a>
-</li>
-      </ul></li>
-      <li>
-    modelopt.deploy.llm.model_config_trt
-
-      <ul>
-        <li><a href="reference/generated/modelopt.deploy.llm.model_config_trt.html#module-modelopt.deploy.llm.model_config_trt">module</a>
 </li>
       </ul></li>
       <li>
@@ -1239,6 +1531,20 @@ <h2 id="M">M</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.onnx.quantization.calib_utils.html#module-modelopt.onnx.quantization.calib_utils">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.onnx.quantization.extensions
+
+      <ul>
+        <li><a href="reference/generated/modelopt.onnx.quantization.extensions.html#module-modelopt.onnx.quantization.extensions">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.onnx.quantization.fp8
+
+      <ul>
+        <li><a href="reference/generated/modelopt.onnx.quantization.fp8.html#module-modelopt.onnx.quantization.fp8">module</a>
 </li>
       </ul></li>
       <li>
@@ -1260,6 +1566,13 @@ <h2 id="M">M</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#module-modelopt.onnx.quantization.int4">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.onnx.quantization.int8
+
+      <ul>
+        <li><a href="reference/generated/modelopt.onnx.quantization.int8.html#module-modelopt.onnx.quantization.int8">module</a>
 </li>
       </ul></li>
       <li>
@@ -1305,24 +1618,73 @@ <h2 id="M">M</h2>
 </li>
       </ul></li>
       <li>
-    modelopt.onnx.quantization.quantize
+    modelopt.onnx.utils
 
       <ul>
-        <li><a href="reference/generated/modelopt.onnx.quantization.quantize.html#module-modelopt.onnx.quantization.quantize">module</a>
+        <li><a href="reference/generated/modelopt.onnx.utils.html#module-modelopt.onnx.utils">module</a>
 </li>
       </ul></li>
       <li>
-    modelopt.onnx.utils
+    modelopt.torch
 
       <ul>
-        <li><a href="reference/generated/modelopt.onnx.utils.html#module-modelopt.onnx.utils">module</a>
+        <li><a href="reference/generated/modelopt.torch.html#module-modelopt.torch">module</a>
 </li>
       </ul></li>
       <li>
-    modelopt.torch
+    modelopt.torch.distill
 
       <ul>
-        <li><a href="reference/generated/modelopt.torch.html#module-modelopt.torch">module</a>
+        <li><a href="reference/generated/modelopt.torch.distill.html#module-modelopt.torch.distill">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.distill.config
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.config.html#module-modelopt.torch.distill.config">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.distill.distillation
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.distillation.html#module-modelopt.torch.distill.distillation">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.distill.distillation_model
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#module-modelopt.torch.distill.distillation_model">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.distill.loss_balancers
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.loss_balancers.html#module-modelopt.torch.distill.loss_balancers">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.distill.losses
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.losses.html#module-modelopt.torch.distill.losses">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.distill.mode
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#module-modelopt.torch.distill.mode">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.distill.registry
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.registry.html#module-modelopt.torch.distill.registry">module</a>
 </li>
       </ul></li>
       <li>
@@ -1337,6 +1699,13 @@ <h2 id="M">M</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.export.distribute.html#module-modelopt.torch.export.distribute">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.export.hf_config_map
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.export.hf_config_map.html#module-modelopt.torch.export.hf_config_map">module</a>
 </li>
       </ul></li>
       <li>
@@ -1393,6 +1762,13 @@ <h2 id="M">M</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.export.transformer_engine.html#module-modelopt.torch.export.transformer_engine">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.export.vllm
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.export.vllm.html#module-modelopt.torch.export.vllm">module</a>
 </li>
       </ul></li>
       <li>
@@ -1463,6 +1839,13 @@ <h2 id="M">M</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.html#module-modelopt.torch.quantization">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.quantization.algorithms
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#module-modelopt.torch.quantization.algorithms">module</a>
 </li>
       </ul></li>
       <li>
@@ -1612,6 +1995,13 @@ <h2 id="M">M</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#module-modelopt.torch.quantization.nn.modules.quant_pooling">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.quantization.nn.modules.quant_rnn
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#module-modelopt.torch.quantization.nn.modules.quant_rnn">module</a>
 </li>
       </ul></li>
       <li>
@@ -1633,6 +2023,34 @@ <h2 id="M">M</h2>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.plugins.html#module-modelopt.torch.quantization.plugins">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.quantization.qtensor
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.html#module-modelopt.torch.quantization.qtensor">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.quantization.qtensor.base_qtensor
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#module-modelopt.torch.quantization.qtensor.base_qtensor">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.quantization.qtensor.int4_tensor
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html#module-modelopt.torch.quantization.qtensor.int4_tensor">module</a>
+</li>
+      </ul></li>
+      <li>
+    modelopt.torch.quantization.qtensor.nf4_tensor
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html#module-modelopt.torch.quantization.qtensor.nf4_tensor">module</a>
 </li>
       </ul></li>
       <li>
@@ -1804,9 +2222,11 @@ <h2 id="M">M</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.ModeloptStateManager.modes_with_states">modes_with_states() (ModeloptStateManager method)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule.modify">modify() (DynamicModule method)</a>
+      <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.modify">modify() (DistillationModel method)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule.modify">(DynamicModule method)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.sparsity.module.html#modelopt.torch.sparsity.module.SparseModule.modify">(SparseModule method)</a>
 </li>
       </ul></li>
@@ -1819,8 +2239,6 @@ <h2 id="M">M</h2>
         <li><a href="reference/generated/modelopt.deploy.llm.html#module-modelopt.deploy.llm">modelopt.deploy.llm</a>
 </li>
         <li><a href="reference/generated/modelopt.deploy.llm.generate.html#module-modelopt.deploy.llm.generate">modelopt.deploy.llm.generate</a>
-</li>
-        <li><a href="reference/generated/modelopt.deploy.llm.model_config_trt.html#module-modelopt.deploy.llm.model_config_trt">modelopt.deploy.llm.model_config_trt</a>
 </li>
         <li><a href="reference/generated/modelopt.deploy.llm.nemo_utils.html#module-modelopt.deploy.llm.nemo_utils">modelopt.deploy.llm.nemo_utils</a>
 </li>
@@ -1831,12 +2249,18 @@ <h2 id="M">M</h2>
         <li><a href="reference/generated/modelopt.onnx.quantization.html#module-modelopt.onnx.quantization">modelopt.onnx.quantization</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.calib_utils.html#module-modelopt.onnx.quantization.calib_utils">modelopt.onnx.quantization.calib_utils</a>
+</li>
+        <li><a href="reference/generated/modelopt.onnx.quantization.extensions.html#module-modelopt.onnx.quantization.extensions">modelopt.onnx.quantization.extensions</a>
+</li>
+        <li><a href="reference/generated/modelopt.onnx.quantization.fp8.html#module-modelopt.onnx.quantization.fp8">modelopt.onnx.quantization.fp8</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.graph_utils.html#module-modelopt.onnx.quantization.graph_utils">modelopt.onnx.quantization.graph_utils</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.gs_patching.html#module-modelopt.onnx.quantization.gs_patching">modelopt.onnx.quantization.gs_patching</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#module-modelopt.onnx.quantization.int4">modelopt.onnx.quantization.int4</a>
+</li>
+        <li><a href="reference/generated/modelopt.onnx.quantization.int8.html#module-modelopt.onnx.quantization.int8">modelopt.onnx.quantization.int8</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.operators.html#module-modelopt.onnx.quantization.operators">modelopt.onnx.quantization.operators</a>
 </li>
@@ -1849,16 +2273,32 @@ <h2 id="M">M</h2>
         <li><a href="reference/generated/modelopt.onnx.quantization.qdq_utils.html#module-modelopt.onnx.quantization.qdq_utils">modelopt.onnx.quantization.qdq_utils</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.quant_utils.html#module-modelopt.onnx.quantization.quant_utils">modelopt.onnx.quantization.quant_utils</a>
-</li>
-        <li><a href="reference/generated/modelopt.onnx.quantization.quantize.html#module-modelopt.onnx.quantization.quantize">modelopt.onnx.quantization.quantize</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.utils.html#module-modelopt.onnx.utils">modelopt.onnx.utils</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.html#module-modelopt.torch">modelopt.torch</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.html#module-modelopt.torch.distill">modelopt.torch.distill</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.config.html#module-modelopt.torch.distill.config">modelopt.torch.distill.config</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.distillation.html#module-modelopt.torch.distill.distillation">modelopt.torch.distill.distillation</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#module-modelopt.torch.distill.distillation_model">modelopt.torch.distill.distillation_model</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.loss_balancers.html#module-modelopt.torch.distill.loss_balancers">modelopt.torch.distill.loss_balancers</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.losses.html#module-modelopt.torch.distill.losses">modelopt.torch.distill.losses</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#module-modelopt.torch.distill.mode">modelopt.torch.distill.mode</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.registry.html#module-modelopt.torch.distill.registry">modelopt.torch.distill.registry</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.html#module-modelopt.torch.export">modelopt.torch.export</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.distribute.html#module-modelopt.torch.export.distribute">modelopt.torch.export.distribute</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.export.hf_config_map.html#module-modelopt.torch.export.hf_config_map">modelopt.torch.export.hf_config_map</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.layer_utils.html#module-modelopt.torch.export.layer_utils">modelopt.torch.export.layer_utils</a>
 </li>
@@ -1875,6 +2315,8 @@ <h2 id="M">M</h2>
         <li><a href="reference/generated/modelopt.torch.export.tensorrt_llm_utils.html#module-modelopt.torch.export.tensorrt_llm_utils">modelopt.torch.export.tensorrt_llm_utils</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.transformer_engine.html#module-modelopt.torch.export.transformer_engine">modelopt.torch.export.transformer_engine</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.export.vllm.html#module-modelopt.torch.export.vllm">modelopt.torch.export.vllm</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.opt.html#module-modelopt.torch.opt">modelopt.torch.opt</a>
 </li>
@@ -1895,6 +2337,8 @@ <h2 id="M">M</h2>
         <li><a href="reference/generated/modelopt.torch.opt.utils.html#module-modelopt.torch.opt.utils">modelopt.torch.opt.utils</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.html#module-modelopt.torch.quantization">modelopt.torch.quantization</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#module-modelopt.torch.quantization.algorithms">modelopt.torch.quantization.algorithms</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.calib.html#module-modelopt.torch.quantization.calib">modelopt.torch.quantization.calib</a>
 </li>
@@ -1937,12 +2381,22 @@ <h2 id="M">M</h2>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#module-modelopt.torch.quantization.nn.modules.quant_module">modelopt.torch.quantization.nn.modules.quant_module</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#module-modelopt.torch.quantization.nn.modules.quant_pooling">modelopt.torch.quantization.nn.modules.quant_pooling</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#module-modelopt.torch.quantization.nn.modules.quant_rnn">modelopt.torch.quantization.nn.modules.quant_rnn</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#module-modelopt.torch.quantization.nn.modules.tensor_quantizer">modelopt.torch.quantization.nn.modules.tensor_quantizer</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.optim.html#module-modelopt.torch.quantization.optim">modelopt.torch.quantization.optim</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.plugins.html#module-modelopt.torch.quantization.plugins">modelopt.torch.quantization.plugins</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.html#module-modelopt.torch.quantization.qtensor">modelopt.torch.quantization.qtensor</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#module-modelopt.torch.quantization.qtensor.base_qtensor">modelopt.torch.quantization.qtensor.base_qtensor</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html#module-modelopt.torch.quantization.qtensor.int4_tensor">modelopt.torch.quantization.qtensor.int4_tensor</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html#module-modelopt.torch.quantization.qtensor.nf4_tensor">modelopt.torch.quantization.qtensor.nf4_tensor</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.quant_modules.html#module-modelopt.torch.quantization.quant_modules">modelopt.torch.quantization.quant_modules</a>
 </li>
@@ -2000,6 +2454,14 @@ <h2 id="M">M</h2>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.moe_tp_mode">moe_tp_mode (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MOEConfig">MOEConfig (class in modelopt.torch.export.model_config)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.mup_attn_multiplier">mup_attn_multiplier (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.mup_embedding_multiplier">mup_embedding_multiplier (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.mup_use_scaling">mup_use_scaling (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.mup_width_multiplier">mup_width_multiplier (DecoderLayerConfig attribute)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -2012,11 +2474,13 @@ <h2 id="N">N</h2>
       <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.name">name (ExportSparseModeDescriptor property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.ExportStudentModeDescriptor.name">(ExportStudentModeDescriptor property)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.name">(KnowledgeDistillationModeDescriptor property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.name">(QuantizeExportModeDescriptor property)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.name">(QuantizeModeDescriptor property)</a>
-</li>
-        <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.name">(ScaledQuantDescriptor property)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.name">(SparseGPTModeDescriptor property)</a>
 </li>
@@ -2035,7 +2499,7 @@ <h2 id="N">N</h2>
         <li><a href="reference/generated/modelopt.torch.opt.utils.html#modelopt.torch.opt.utils.named_hparams">(in module modelopt.torch.opt.utils)</a>
 </li>
       </ul></li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.narrow_range">narrow_range (ScaledQuantDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.narrow_range">narrow_range (QuantizerAttributeConfig attribute)</a>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.narrow_range">(TensorQuantizer property)</a>
@@ -2043,14 +2507,18 @@ <h2 id="N">N</h2>
       </ul></li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.new_decoder_architecture">new_decoder_architecture (DecoderLayerConfig attribute)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.next_modes">next_modes (QuantizeModeDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.next_modes">next_modes (KnowledgeDistillationModeDescriptor property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.next_modes">(QuantizeModeDescriptor property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.next_modes">(SparseMagnitudeModeDescriptor property)</a>
 </li>
       </ul></li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor">NF4QTensor (class in modelopt.torch.quantization.qtensor.nf4_tensor)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.export.distribute.html#modelopt.torch.export.distribute.NFSWorkspace">NFSWorkspace (class in modelopt.torch.export.distribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.sparsity.config.html#modelopt.torch.sparsity.config.SparseGPTConfig.nn_conv2d">nn_conv2d (SparseGPTConfig attribute)</a>
@@ -2075,7 +2543,7 @@ <h2 id="N">N</h2>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.num_attention_heads">(ModelConfig property)</a>
 </li>
       </ul></li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.num_bits">num_bits (ScaledQuantDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.num_bits">num_bits (QuantizerAttributeConfig attribute)</a>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.num_bits">(TensorQuantizer property)</a>
@@ -2087,6 +2555,10 @@ <h2 id="N">N</h2>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.num_kv_heads">(ModelConfig property)</a>
 </li>
       </ul></li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.num_medusa_heads">num_medusa_heads (ModelConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.num_medusa_layers">num_medusa_layers (ModelConfig attribute)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.utils.tensor.html#modelopt.torch.utils.tensor.numpy_to_torch">numpy_to_torch() (in module modelopt.torch.utils.tensor)</a>
 </li>
   </ul></td>
@@ -2098,10 +2570,14 @@ <h2 id="O">O</h2>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.original">original (Hparam property)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.random.html#modelopt.torch.utils.random.original">original() (in module modelopt.torch.utils.random)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule.original_cls">original_cls (DynamicModule property)</a>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule.original_cls">original_cls (DynamicModule property)</a>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.original_max_position_embeddings">original_max_position_embeddings (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.original_meta_tensor">original_meta_tensor (BaseQuantizedTensor attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.output_quantizer">output_quantizer (QuantInputBase attribute)</a>
 </li>
@@ -2111,6 +2587,8 @@ <h2 id="O">O</h2>
 <h2 id="P">P</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.onnx.quantization.quant_utils.html#modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_cpp_based">pack_float32_to_4bit_cpp_based() (in module modelopt.onnx.quantization.quant_utils)</a>
+</li>
       <li><a href="reference/generated/modelopt.onnx.quantization.quant_utils.html#modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized">pack_float32_to_4bit_optimized() (in module modelopt.onnx.quantization.quant_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config_utils.html#modelopt.torch.export.model_config_utils.pack_linear_weights">pack_linear_weights() (in module modelopt.torch.export.model_config_utils)</a>
@@ -2137,10 +2615,12 @@ <h2 id="P">P</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.pipeline_parallel">pipeline_parallel (ModelConfig attribute)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.position_embedding">position_embedding (ModelConfig attribute)</a>
 </li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.post_feedforward_layernorm">post_feedforward_layernorm (DecoderLayerConfig attribute)</a>
+</li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.post_layernorm">post_layernorm (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.model_calib.html#modelopt.torch.quantization.model_calib.postprocess_amax">postprocess_amax() (in module modelopt.torch.quantization.model_calib)</a>
@@ -2148,10 +2628,16 @@ <h2 id="P">P</h2>
       <li><a href="reference/generated/modelopt.torch.export.postprocess.html#modelopt.torch.export.postprocess.postprocess_model_config">postprocess_model_config() (in module modelopt.torch.export.postprocess)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.postprocess.html#modelopt.torch.export.postprocess.postprocess_tensors">postprocess_tensors() (in module modelopt.torch.export.postprocess)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.pre_feedforward_layernorm">pre_feedforward_layernorm (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.pre_quant_scale">pre_quant_scale (TensorQuantizer property)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.sparsity.sparsegpt.html#modelopt.torch.sparsity.sparsegpt.prepare">prepare() (in module modelopt.torch.sparsity.sparsegpt)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.tensorrt_llm_utils.html#modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_decoder_layer">prepare_enc_dec_decoder_layer() (in module modelopt.torch.export.tensorrt_llm_utils)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.tensorrt_llm_utils.html#modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_export_dir">prepare_enc_dec_export_dir() (in module modelopt.torch.export.tensorrt_llm_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearConfig.prequant_scaling_factor">prequant_scaling_factor (LinearConfig attribute)</a>
 
@@ -2186,6 +2672,8 @@ <h2 id="Q">Q</h2>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.AttentionConfig.qkv">qkv (AttentionConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.QKVConfig">QKVConfig (class in modelopt.torch.export.model_config)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper">QTensorWrapper (class in modelopt.torch.quantization.qtensor.base_qtensor)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizeConfig.quant_cfg">quant_cfg (QuantizeConfig attribute)</a>
 </li>
@@ -2217,10 +2705,6 @@ <h2 id="Q">Q</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html#modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d">QuantConvTranspose3d (class in modelopt.torch.quantization.nn.modules.quant_conv)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.QuantDescriptor">QuantDescriptor (in module modelopt.torch.quantization.tensor_quant)</a>
-</li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase">QuantInputBase (class in modelopt.torch.quantization.nn.modules.quant_module)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.html#modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm1d">QuantInstanceNorm1d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)</a>
@@ -2235,24 +2719,50 @@ <h2 id="Q">Q</h2>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.quantization">(ModelConfig attribute)</a>
 </li>
       </ul></li>
-      <li><a href="reference/generated/modelopt.onnx.quantization.quantize.html#modelopt.onnx.quantization.quantize.quantize">quantize() (in module modelopt.onnx.quantization.quantize)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.quantize">quantize() (BaseQuantizedTensor class method)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.onnx.quantization.quantize.html#modelopt.onnx.quantization.quantize">(in module modelopt.onnx.quantization)</a>
+</li>
+        <li><a href="reference/generated/modelopt.onnx.quantization.fp8.html#modelopt.onnx.quantization.fp8.quantize">(in module modelopt.onnx.quantization.fp8)</a>
+</li>
+        <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.quantize">(in module modelopt.onnx.quantization.int4)</a>
+</li>
+        <li><a href="reference/generated/modelopt.onnx.quantization.int8.html#modelopt.onnx.quantization.int8.quantize">(in module modelopt.onnx.quantization.int8)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.quantize">(in module modelopt.torch.quantization.model_quant)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html#modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.quantize">(INT4QTensor class method)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.quantize">(NF4QTensor class method)</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.operators.html#modelopt.onnx.quantization.operators.QDQConvTranspose.quantize">(QDQConvTranspose method)</a>
 </li>
         <li><a href="reference/generated/modelopt.onnx.quantization.operators.html#modelopt.onnx.quantization.operators.QDQNormalization.quantize">(QDQNormalization method)</a>
 </li>
       </ul></li>
-      <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.quantize_int4">quantize_int4() (in module modelopt.onnx.quantization.int4)</a>
-</li>
-      <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.quantize_int4_awq_clip">quantize_int4_awq_clip() (in module modelopt.onnx.quantization.int4)</a>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.quantize_awq_clip">quantize_awq_clip() (in module modelopt.onnx.quantization.int4)</a>
 </li>
-      <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.quantize_int4_rtn">quantize_int4_rtn() (in module modelopt.onnx.quantization.int4)</a>
+      <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.quantize_rtn">quantize_rtn() (in module modelopt.onnx.quantization.int4)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.quantize_weight">quantize_weight() (QuantLinearConvBase method)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.quantize_weight">(QuantRNNBase method)</a>
+</li>
+      </ul></li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.quantized_cell_forward">quantized_cell_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)</a>
 </li>
+      <li><a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.quantized_data">quantized_data (BaseQuantizedTensor attribute)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html#modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.quantized_data">(INT4QTensor attribute)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.quantized_data">(NF4QTensor attribute)</a>
+</li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeExportModeDescriptor">QuantizeExportModeDescriptor (class in modelopt.torch.quantization.mode)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor">QuantizeModeDescriptor (class in modelopt.torch.quantization.mode)</a>
@@ -2266,6 +2776,18 @@ <h2 id="Q">Q</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool2d">QuantMaxPool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool3d">QuantMaxPool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.QuantRecipe">QuantRecipe (class in modelopt.torch.quantization.algorithms)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.QuantRecipeHparam">QuantRecipeHparam (class in modelopt.torch.quantization.algorithms)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase">QuantRNNBase (class in modelopt.torch.quantization.nn.modules.quant_rnn)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNFullBase">QuantRNNFullBase (class in modelopt.torch.quantization.nn.modules.quant_rnn)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.query_pre_attn_scalar">query_pre_attn_scalar (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.qwen_type">qwen_type (DecoderLayerConfig attribute)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -2286,6 +2808,14 @@ <h2 id="R">R</h2>
       <li><a href="reference/generated/modelopt.torch.utils.distributed.html#modelopt.torch.utils.distributed.rank">rank() (in module modelopt.torch.utils.distributed)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.distribute.html#modelopt.torch.export.distribute.NFSWorkspace.read_configs_and_weights_from_rank">read_configs_and_weights_from_rank() (NFSWorkspace method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.recurrent">recurrent (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RgLruConfig.recurrent_gate">recurrent_gate (RgLruConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RgLruConfig.recurrent_param">recurrent_param (RgLruConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RecurrentConfig">RecurrentConfig (class in modelopt.torch.export.model_config)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.utils.html#modelopt.torch.quantization.utils.reduce_amax">reduce_amax() (in module modelopt.torch.quantization.utils)</a>
 </li>
@@ -2294,6 +2824,12 @@ <h2 id="R">R</h2>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseRuleConfig.register_default">register_default() (ModeloptBaseRuleConfig class method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam.register_importance">register_importance() (Hparam method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.rel_attn_max_distance">rel_attn_max_distance (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.rel_attn_num_buckets">rel_attn_num_buckets (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.AttentionConfig.rel_attn_table">rel_attn_table (AttentionConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.network.html#modelopt.torch.utils.network.remove_bn">remove_bn() (in module modelopt.torch.utils.network)</a>
 </li>
@@ -2304,6 +2840,8 @@ <h2 id="R">R</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.utils.html#modelopt.torch.quantization.utils.replace_function">replace_function() (in module modelopt.torch.quantization.utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.conversion.html#modelopt.torch.quantization.conversion.replace_quant_module">replace_quant_module() (in module modelopt.torch.quantization.conversion)</a>
+</li>
+      <li><a href="reference/generated/modelopt.onnx.quantization.qdq_utils.html#modelopt.onnx.quantization.qdq_utils.replace_scale_values">replace_scale_values() (in module modelopt.onnx.quantization.qdq_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.replace_sequential_quantizer_with_single_quantizer">replace_sequential_quantizer_with_single_quantizer() (SequentialQuantizer static method)</a>
 </li>
@@ -2334,6 +2872,10 @@ <h2 id="R">R</h2>
       <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.restore">restore (ExportSparseModeDescriptor property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.ExportStudentModeDescriptor.restore">(ExportStudentModeDescriptor property)</a>
+</li>
+        <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.restore">(KnowledgeDistillationModeDescriptor property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.restore">(QuantizeExportModeDescriptor property)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.restore">(QuantizeModeDescriptor property)</a>
@@ -2350,6 +2892,14 @@ <h2 id="R">R</h2>
       <li><a href="reference/generated/modelopt.torch.export.model_config_utils.html#modelopt.torch.export.model_config_utils.restore_model_config">restore_model_config() (in module modelopt.torch.export.model_config_utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.restore_sparse_model">restore_sparse_model() (in module modelopt.torch.sparsity.mode)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RecurrentConfig.rg_lru">rg_lru (RecurrentConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RgLruConfig">RgLruConfig (class in modelopt.torch.export.model_config)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.rnn_hidden_size">rnn_hidden_size (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward">RNNLayerForward (class in modelopt.torch.quantization.nn.modules.quant_rnn)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.rope_ratio">rope_ratio (DecoderLayerConfig attribute)</a>
 </li>
@@ -2362,12 +2912,16 @@ <h2 id="R">R</h2>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MOEConfig.router">router (MOEConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.rtn">rtn() (in module modelopt.onnx.quantization.int4)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.rules">rules (AutoQuantizeSearcher attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.network.html#modelopt.torch.utils.network.run_forward_loop">run_forward_loop() (in module modelopt.torch.utils.network)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.run_search">run_search() (BaseSearcher method)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.run_search">run_search() (AutoQuantizeSearcher method)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.run_search">(BaseSearcher method)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.sparsity.searcher.html#modelopt.torch.sparsity.searcher.BaseSparseSearcher.run_search">(BaseSparseSearcher method)</a>
 </li>
       </ul></li>
@@ -2379,9 +2933,11 @@ <h2 id="S">S</h2>
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.utils.random.html#modelopt.torch.utils.random.sample">sample() (in module modelopt.torch.utils.random)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.sanitize_search_config">sanitize_search_config() (BaseSearcher method)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.sanitize_search_config">sanitize_search_config() (AutoQuantizeSearcher method)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.sanitize_search_config">(BaseSearcher method)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.sparsity.searcher.html#modelopt.torch.sparsity.searcher.BaseSparseSearcher.sanitize_search_config">(BaseSparseSearcher method)</a>
 </li>
       </ul></li>
@@ -2394,14 +2950,10 @@ <h2 id="S">S</h2>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.save_search_checkpoint">save_search_checkpoint() (BaseSearcher method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.scale">scale (TensorQuantizer property)</a>
-</li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.scale_amax">scale_amax (ScaledQuantDescriptor property)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.scaled_e4m3_abstract">scaled_e4m3_abstract() (in module modelopt.torch.quantization.tensor_quant)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledE4M3Function">ScaledE4M3Function (class in modelopt.torch.quantization.tensor_quant)</a>
-</li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor">ScaledQuantDescriptor (class in modelopt.torch.quantization.tensor_quant)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.search">search() (BaseSearcher method)</a>
 </li>
@@ -2414,6 +2966,10 @@ <h2 id="S">S</h2>
       <li><a href="reference/generated/modelopt.torch.opt.utils.html#modelopt.torch.opt.utils.search_space_size">search_space_size() (in module modelopt.torch.opt.utils)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicSpace.select">select() (DynamicSpace method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.self_attention">self_attention (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.self_attention_layernorm">self_attention_layernorm (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.seq_length">seq_length (DecoderLayerConfig attribute)</a>
 </li>
@@ -2421,10 +2977,10 @@ <h2 id="S">S</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.distributed.html#modelopt.torch.utils.distributed.set_data_parallel_group">set_data_parallel_group() (in module modelopt.torch.utils.distributed)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.set_from_attribute_dict">set_from_attribute_dict() (SequentialQuantizer method)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.set_from_attribute_config">set_from_attribute_config() (SequentialQuantizer method)</a>
 
       <ul>
-        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_dict">(TensorQuantizer method)</a>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_config">(TensorQuantizer method)</a>
 </li>
       </ul></li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_modelopt_state">set_from_modelopt_state() (TensorQuantizer method)</a>
@@ -2434,6 +2990,8 @@ <h2 id="S">S</h2>
       <li><a href="reference/generated/modelopt.torch.quantization.conversion.html#modelopt.torch.quantization.conversion.set_quantizer_attribute">set_quantizer_attribute() (in module modelopt.torch.quantization.conversion)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.conversion.html#modelopt.torch.quantization.conversion.set_quantizer_by_cfg">set_quantizer_by_cfg() (in module modelopt.torch.quantization.conversion)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.DistillationLossBalancer.set_student_loss_reduction_fn">set_student_loss_reduction_fn() (DistillationLossBalancer method)</a>
 </li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
@@ -2442,6 +3000,8 @@ <h2 id="S">S</h2>
       <li><a href="reference/generated/modelopt.torch.utils.distributed.html#modelopt.torch.utils.distributed.set_tensor_parallel_group">set_tensor_parallel_group() (in module modelopt.torch.utils.distributed)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.share_embedding_table">share_embedding_table (ModelConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQClipCalibConfig.shrink_step">shrink_step (AWQClipCalibConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.random.html#modelopt.torch.utils.random.shuffle">shuffle() (in module modelopt.torch.utils.random)</a>
 </li>
@@ -2476,9 +3036,13 @@ <h2 id="S">S</h2>
       <li><a href="reference/generated/modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher.state_dict">state_dict() (BaseSearcher method)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.state_dict">(DistillationModel method)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.ModeloptStateManager.state_dict">(ModeloptStateManager method)</a>
 </li>
       </ul></li>
+      <li><a href="reference/generated/modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.StaticLossBalancer">StaticLossBalancer (class in modelopt.torch.distill.loss_balancers)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.utils.list.html#modelopt.torch.utils.list.stats">stats() (in module modelopt.torch.utils.list)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.step_size">step_size (TensorQuantizer property)</a>
@@ -2501,6 +3065,12 @@ <h2 id="S">S</h2>
 <h2 id="T">T</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.teacher_model">teacher_model (DistillationModel property)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.distill.config.html#modelopt.torch.distill.config.KDLossConfig.teacher_model">(KDLossConfig attribute)</a>
+</li>
+      </ul></li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.tensor_parallel">tensor_parallel (ModelConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.tensor_quantizer_iterator">tensor_quantizer_iterator() (SequentialQuantizer static method)</a>
@@ -2511,10 +3081,10 @@ <h2 id="T">T</h2>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.perf.html#modelopt.torch.utils.perf.Timer">Timer (class in modelopt.torch.utils.perf)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.export.model_config_utils.html#modelopt.torch.export.model_config_utils.to_quantized_weight">to_quantized_weight() (in module modelopt.torch.export.model_config_utils)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.utils.tensor.html#modelopt.torch.utils.tensor.torch_detach">torch_detach() (in module modelopt.torch.utils.tensor)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.utils.tensor.html#modelopt.torch.utils.tensor.torch_to">torch_to() (in module modelopt.torch.utils.tensor)</a>
@@ -2524,6 +3094,14 @@ <h2 id="T">T</h2>
       <li><a href="reference/generated/modelopt.torch.export.model_config_export.html#modelopt.torch.export.model_config_export.torch_to_tensorrt_llm_checkpoint">torch_to_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.ModeloptStateManager.transfer_state_dict">transfer_state_dict() (ModeloptStateManager class method)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.trt_high_precision_dtype">trt_high_precision_dtype (QuantizerAttributeConfig attribute)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.trt_high_precision_dtype">(TensorQuantizer property)</a>
+</li>
+      </ul></li>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.type">type (QuantizerAttributeConfig attribute)</a>
 </li>
   </ul></td>
 </tr></table>
@@ -2531,25 +3109,31 @@ <h2 id="T">T</h2>
 <h2 id="U">U</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.udpate_domain">udpate_domain() (in module modelopt.onnx.utils)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.quantization.conversion.html#modelopt.torch.quantization.conversion.unregister">unregister() (in module modelopt.torch.quantization.conversion)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseRuleConfig.unregister_default">unregister_default() (ModeloptBaseRuleConfig class method)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.unsigned">unsigned (ScaledQuantDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig.unsigned">unsigned (QuantizerAttributeConfig attribute)</a>
 
       <ul>
         <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.unsigned">(TensorQuantizer property)</a>
 </li>
       </ul></li>
+      <li><a href="reference/generated/modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.QuantRecipe.UNSUPPORTED_RECIPES">UNSUPPORTED_RECIPES (QuantRecipe attribute)</a>
+</li>
       <li><a href="reference/generated/modelopt.torch.utils.network.html#modelopt.torch.utils.network.unwrap_model">unwrap_model() (in module modelopt.torch.utils.network)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig.update">update() (ModeloptBaseConfig method)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.int4.html#modelopt.onnx.quantization.int4.AWQClipHelper.update_best_params">update_best_params() (AWQClipHelper method)</a>
 </li>
-      <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.update_for_new_mode">update_for_new_mode (QuantizeModeDescriptor property)</a>
+      <li><a href="reference/generated/modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.update_for_new_mode">update_for_new_mode (KnowledgeDistillationModeDescriptor property)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.mode.html#modelopt.torch.quantization.mode.QuantizeModeDescriptor.update_for_new_mode">(QuantizeModeDescriptor property)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.sparsity.mode.html#modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.update_for_new_mode">(SparseMagnitudeModeDescriptor property)</a>
 </li>
       </ul></li>
@@ -2570,6 +3154,8 @@ <h2 id="U">U</h2>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.use_alibi">use_alibi (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.use_cache">use_cache (DecoderLayerConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig.use_scaled_rope">use_scaled_rope (DecoderLayerConfig attribute)</a>
 </li>
       <li><a href="reference/generated/modelopt.onnx.quantization.qdq_utils.html#modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops">use_trt_qdq_ops() (in module modelopt.onnx.quantization.qdq_utils)</a>
 </li>
@@ -2589,13 +3175,15 @@ <h2 id="V">V</h2>
 </li>
       <li><a href="reference/generated/modelopt.onnx.utils.html#modelopt.onnx.utils.validate_onnx">validate_onnx() (in module modelopt.onnx.utils)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseRule.validate_rule">validate_rule() (ModeloptBaseRule class method)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig.values">values() (ModeloptBaseConfig method)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.version">version (ModelConfig attribute)</a>
+</li>
+      <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward">VFRNNForward (class in modelopt.torch.quantization.nn.modules.quant_rnn)</a>
 </li>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ModelConfig.vocab_embedding">vocab_embedding (ModelConfig attribute)</a>
 </li>
@@ -2609,9 +3197,11 @@ <h2 id="V">V</h2>
 <h2 id="W">W</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
-      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.EmbeddingConfig.weight">weight (EmbeddingConfig attribute)</a>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ConvConfig.weight">weight (ConvConfig attribute)</a>
 
       <ul>
+        <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.EmbeddingConfig.weight">(EmbeddingConfig attribute)</a>
+</li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LayernormConfig.weight">(LayernormConfig attribute)</a>
 </li>
         <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearConfig.weight">(LinearConfig attribute)</a>
@@ -2620,7 +3210,11 @@ <h2 id="W">W</h2>
 </li>
       </ul></li>
       <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html#modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.weight_quantizer">weight_quantizer (QuantLinearConvBase attribute)</a>
+
+      <ul>
+        <li><a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.weight_quantizer">(QuantRNNBase attribute)</a>
 </li>
+      </ul></li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.LinearConfig.weights_scaling_factor">weights_scaling_factor (LinearConfig attribute)</a>
@@ -2642,6 +3236,14 @@ <h2 id="W">W</h2>
   </ul></td>
 </tr></table>
 
+<h2 id="Y">Y</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="reference/generated/modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.RecurrentConfig.y_bias">y_bias (RecurrentConfig attribute)</a>
+</li>
+  </ul></td>
+</tr></table>
+
 <h2 id="Z">Z</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
@@ -2665,7 +3267,7 @@ <h2 id="Z">Z</h2>
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -2676,7 +3278,7 @@ <h2 id="Z">Z</h2>
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/getting_started/1_overview.html b/getting_started/1_overview.html
index 1a3cb7b..373bfa1 100644
--- a/getting_started/1_overview.html
+++ b/getting_started/1_overview.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Overview &mdash; Model Optimizer 0.11.2</title>
+  <title>Overview &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Installation" href="2_installation.html" />
-    <link rel="prev" title="Welcome to Model Optimizer (ModelOpt) documentation!" href="../index.html" />
+    <link rel="prev" title="Welcome to Model Optimizer (ModelOpt) documentation!" href="../index.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -77,11 +77,13 @@
 </li>
 <li class="toctree-l1"><a class="reference internal" href="2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -90,11 +92,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -126,7 +128,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="overview">
 <h1>Overview<a class="headerlink" href="#overview" title="Link to this heading"></a></h1>
 <section id="nvidia-tensorrt-model-optimizer">
@@ -134,15 +136,15 @@ <h2><strong>NVIDIA TensorRT Model Optimizer</strong><a class="headerlink" href="
 <p>Minimizing inference costs presents a significant challenge as generative AI models continue to grow in complexity and size.
 The <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer" rel="noopener noreferrer" target="_blank">NVIDIA TensorRT Model Optimizer</a> (referred to as Model Optimizer, or ModelOpt)
 is a library comprising state-of-the-art model optimization techniques including quantization and sparsity to compress model.
-It accepts a torch or ONNX model as inputs and provides Python APIs for users to easily stack different model optimization
-techniques to produce quantized checkpoint. Seamlessly integrated within the NVIDIA AI software ecosystem, the quantized
+It accepts a torch or ONNX model as input and provides Python APIs for users to easily stack different model optimization
+techniques to produce optimized &amp; quantized checkpoints. Seamlessly integrated within the NVIDIA AI software ecosystem, the quantized
 checkpoint generated from Model Optimizer is ready for deployment in downstream inference frameworks like
 <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/tree/main/examples/quantization" rel="noopener noreferrer" target="_blank">TensorRT-LLM</a> or <a class="reference external" href="https://github.com/NVIDIA/TensorRT" rel="noopener noreferrer" target="_blank">TensorRT</a>.
 Further integrations are planned for <a class="reference external" href="https://github.com/NVIDIA/NeMo" rel="noopener noreferrer" target="_blank">NVIDIA NeMo</a> and <a class="reference external" href="https://github.com/NVIDIA/Megatron-LM" rel="noopener noreferrer" target="_blank">Megatron-LM</a>
 for training-in-the-loop optimization techniques. For enterprise users, the 8-bit quantization with Stable Diffusion is also available on
 <a class="reference external" href="https://developer.nvidia.com/blog/nvidia-nim-offers-optimized-inference-microservices-for-deploying-ai-models-at-scale/" rel="noopener noreferrer" target="_blank">NVIDIA NIM</a>.</p>
 <p>Model Optimizer is available for free for all developers on <a class="reference external" href="https://pypi.org/project/nvidia-modelopt/" rel="noopener noreferrer" target="_blank">NVIDIA PyPI</a>.
-Visit <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer" rel="noopener noreferrer" target="_blank">/NVIDIA/TensorRT-Model-Optimizer repository</a> for end-to-end
+Visit the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer" rel="noopener noreferrer" target="_blank">TensorRT Model Optimizer GitHub repository</a> for end-to-end
 example scripts and recipes optimized for NVIDIA GPUs.</p>
 <section id="techniques">
 <h3>Techniques<a class="headerlink" href="#techniques" title="Link to this heading"></a></h3>
@@ -158,11 +160,14 @@ <h4>Quantization<a class="headerlink" href="#quantization" title="Link to this h
 <section id="sparsity">
 <h4>Sparsity<a class="headerlink" href="#sparsity" title="Link to this heading"></a></h4>
 <p>Sparsity is a technique to further reduce the memory footprint of deep learning models and accelerate the inference.
-Model Optimizer provides Python API <a class="reference internal" href="../reference/generated/modelopt.torch.sparsity.sparsification.html#modelopt.torch.sparsity.sparsification.sparsify" title="modelopt.torch.sparsity.sparsification.sparsify"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mts.sparsify()</span></code></a> to apply
-weight sparsity to a given model. The <code class="docutils literal notranslate"><span class="pre">mts.sparsify()</span></code> API supports <a class="reference external" href="https://arxiv.org/pdf/2104.0837" rel="noopener noreferrer" target="_blank">NVIDIA 2:4</a>
-sparsity pattern and various sparsification methods, such as NVIDIA <a class="reference external" href="https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity" rel="noopener noreferrer" target="_blank">ASP</a>
-and <a class="reference external" href="https://arxiv.org/abs/2301.00774" rel="noopener noreferrer" target="_blank">SparseGPT</a>. It supports both post-training sparsity and sparsity with fine-tuning.
-The latter workflow is recommended to minimize accuracy degradation.</p>
+Model Optimizer provides the Python API <a class="reference internal" href="../reference/generated/modelopt.torch.sparsity.sparsification.html#modelopt.torch.sparsity.sparsification.sparsify" title="modelopt.torch.sparsity.sparsification.sparsify"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mts.sparsify()</span></code></a> to
+automatically apply weight sparsity to a given model. The
+<a class="reference internal" href="../reference/generated/modelopt.torch.sparsity.sparsification.html#modelopt.torch.sparsity.sparsification.sparsify" title="modelopt.torch.sparsity.sparsification.sparsify"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mts.sparsify()</span></code></a> API supports
+<a class="reference external" href="https://arxiv.org/pdf/2104.0837" rel="noopener noreferrer" target="_blank">NVIDIA 2:4</a> sparsity pattern and various sparsification methods,
+such as <a class="reference external" href="https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity" rel="noopener noreferrer" target="_blank">NVIDIA ASP</a> and
+<a class="reference external" href="https://arxiv.org/abs/2301.00774" rel="noopener noreferrer" target="_blank">SparseGPT</a>. It supports both post-training sparsity (PTS) and
+sparsity-aware training (SAT). The latter workflow is recommended to minimize accuracy
+degradation.</p>
 </section>
 </section>
 </section>
@@ -185,7 +190,7 @@ <h4>Sparsity<a class="headerlink" href="#sparsity" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -196,7 +201,7 @@ <h4>Sparsity<a class="headerlink" href="#sparsity" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/getting_started/2_installation.html b/getting_started/2_installation.html
index e4feb71..bf2e009 100644
--- a/getting_started/2_installation.html
+++ b/getting_started/2_installation.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Installation &mdash; Model Optimizer 0.11.2</title>
+  <title>Installation &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Quick Start: Quantization" href="3_quantization.html" />
-    <link rel="prev" title="Overview" href="1_overview.html" />
+    <link rel="prev" title="Overview" href="1_overview.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -72,11 +72,13 @@
 </ul>
 </li>
 <li class="toctree-l1"><a class="reference internal" href="3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -85,11 +87,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -121,7 +123,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="installation">
 <h1>Installation<a class="headerlink" href="#installation" title="Link to this heading"></a></h1>
 <section id="system-requirements">
@@ -130,19 +132,22 @@ <h2>System requirements<a class="headerlink" href="#system-requirements" title="
 <table class="docutils align-default">
 <tbody>
 <tr class="row-odd"><td><p>OS</p></td>
-<td><p>Linux, Windows</p></td>
+<td><p>Linux</p></td>
 </tr>
 <tr class="row-even"><td><p>Architecture</p></td>
-<td><p>x86_64, aarch64, win_amd64</p></td>
+<td><p>x86_64</p></td>
 </tr>
 <tr class="row-odd"><td><p>Python</p></td>
-<td><p>&gt;=3.8,&lt;3.12</p></td>
+<td><p>&gt;=3.8,&lt;3.13</p></td>
 </tr>
-<tr class="row-even"><td><p>PyTorch</p></td>
+<tr class="row-even"><td><p>CUDA</p></td>
+<td><p>&gt;=11.8 (Recommended)</p></td>
+</tr>
+<tr class="row-odd"><td><p>PyTorch (Optional)</p></td>
 <td><p>&gt;=1.11</p></td>
 </tr>
-<tr class="row-odd"><td><p>CUDA</p></td>
-<td><p>&gt;=11.8 (Recommended)</p></td>
+<tr class="row-even"><td><p>TensorRT-LLM (Optional)</p></td>
+<td><p>0.11</p></td>
 </tr>
 </tbody>
 </table>
@@ -157,8 +162,8 @@ <h2>Install Model Optimizer<a class="headerlink" href="#install-model-optimizer"
 <input class="tab-input" id="tab-set--0-input--2" name="tab-set--0" type="radio"><label class="tab-label" for="tab-set--0-input--2">Detailed instructions</label><div class="tab-content docutils container">
 <p><strong>Setting up a virtual environment</strong></p>
 <p>We recommend setting up a virtual environment if you don’t have one already. Run the following
-command to set up and activate a <code class="docutils literal notranslate"><span class="pre">conda</span></code> virtual environment named <code class="docutils literal notranslate"><span class="pre">modelopt</span></code> with Python 3.11:</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>conda<span class="w"> </span>create<span class="w"> </span>-n<span class="w"> </span>modelopt<span class="w"> </span><span class="nv">python</span><span class="o">=</span><span class="m">3</span>.11<span class="w"> </span>pip
+command to set up and activate a <code class="docutils literal notranslate"><span class="pre">conda</span></code> virtual environment named <code class="docutils literal notranslate"><span class="pre">modelopt</span></code> with Python 3.12:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>conda<span class="w"> </span>create<span class="w"> </span>-n<span class="w"> </span>modelopt<span class="w"> </span><span class="nv">python</span><span class="o">=</span><span class="m">3</span>.12<span class="w"> </span>pip
 </pre></div>
 </div>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>conda<span class="w"> </span>activate<span class="w"> </span>modelopt
@@ -219,10 +224,12 @@ <h2>Install Model Optimizer<a class="headerlink" href="#install-model-optimizer"
 </tr>
 </tbody>
 </table>
+<p>If you want to install only partial dependencies, please replace <code class="docutils literal notranslate"><span class="pre">[all]</span></code> with the desired
+optional dependencies for the below <code class="docutils literal notranslate"><span class="pre">pip</span></code> installation command.</p>
 </div>
 </div>
 <p><strong>Install Model Optimizer</strong> (<code class="docutils literal notranslate"><span class="pre">nvidia-modelopt</span></code>)</p>
-<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span><span class="s2">&quot;nvidia-modelopt[all]&quot;</span><span class="w"> </span>--no-cache-dir<span class="w"> </span>--extra-index-url<span class="w"> </span>https://pypi.nvidia.com
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>pip<span class="w"> </span>install<span class="w"> </span><span class="s2">&quot;nvidia-modelopt[all]&quot;</span><span class="w"> </span>--extra-index-url<span class="w"> </span>https://pypi.nvidia.com
 </pre></div>
 </div>
 </section>
@@ -233,7 +240,7 @@ <h2>Check installation<a class="headerlink" href="#check-installation" title="Li
 <p>When you use ModelOpt’s PyTorch quantization APIs for the first time, it will compile the fast quantization kernels
 using your installed torch and CUDA if available.
 This may take a few minutes but subsequent quantization calls will be much faster.
-To invoke the compilation now and check if it is successful, run the following command:</p>
+To invoke the compilation and check if it is successful or pre-compile for docker builds, run the following command:</p>
 <div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>-c<span class="w"> </span><span class="s2">&quot;import modelopt.torch.quantization.extensions as ext; print(ext.cuda_ext); print(ext.cuda_ext_fp8)&quot;</span>
 </pre></div>
 </div>
@@ -258,7 +265,7 @@ <h2>Check installation<a class="headerlink" href="#check-installation" title="Li
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -269,7 +276,7 @@ <h2>Check installation<a class="headerlink" href="#check-installation" title="Li
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/getting_started/3_quantization.html b/getting_started/3_quantization.html
index 4f5f043..88bd4e5 100644
--- a/getting_started/3_quantization.html
+++ b/getting_started/3_quantization.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Quick Start: Quantization &mdash; Model Optimizer 0.11.2</title>
+  <title>Quick Start: Quantization &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
-    <link rel="next" title="Quick Start: Sparsity" href="6_sparsity.html" />
-    <link rel="prev" title="Installation" href="2_installation.html" />
+    <link rel="next" title="Quick Start: Distillation" href="5_distillation.html" />
+    <link rel="prev" title="Installation" href="2_installation.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -72,11 +72,13 @@
 <li class="toctree-l2"><a class="reference internal" href="#deployment">Deployment</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -85,11 +87,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -121,7 +123,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quick-start-quantization">
 <h1>Quick Start: Quantization<a class="headerlink" href="#quick-start-quantization" title="Link to this heading"></a></h1>
 <section id="quantization">
@@ -129,8 +131,8 @@ <h2>Quantization<a class="headerlink" href="#quantization" title="Link to this h
 <p>Quantization is an effective technique to reduce the memory footprint of deep learning models and to
 accelerate the inference speed.</p>
 <p>ModelOpt’s <a class="reference internal" href="../reference/generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.quantize" title="modelopt.torch.quantization.model_quant.quantize"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mtq.quantize()</span></code></a> API enables
-users to quantize a model with advanced algorithms like SmoothQuant, AWQ etc. ModelOpt supports both
-Post Training Quantization (PTQ) and Quantization Aware Training (QAT).</p>
+users to quantize a model with advanced algorithms like SmoothQuant, AWQ, and more. ModelOpt
+supports both Post Training Quantization (PTQ) and Quantization Aware Training (QAT).</p>
 <div class="admonition tip">
 <p class="admonition-title">Tip</p>
 <p>Please refer to <a class="reference internal" href="../reference/generated/modelopt.torch.quantization.config.html#quantization-formats"><span class="std std-ref">Quantization Formats</span></a> for details on the ModelOpt supported quantization
@@ -140,7 +142,7 @@ <h2>Quantization<a class="headerlink" href="#quantization" title="Link to this h
 <section id="ptq-for-pytorch-models">
 <h2>PTQ for PyTorch models<a class="headerlink" href="#ptq-for-pytorch-models" title="Link to this heading"></a></h2>
 <p><a class="reference internal" href="../reference/generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.quantize" title="modelopt.torch.quantization.model_quant.quantize"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mtq.quantize</span></code></a> requires the model,
-the appropriate quantization configuration and a forward loop as inputs. Here is a quick example of
+the appropriate quantization configuration, and a forward loop as inputs. Here is a quick example of
 quantizing a model with int8 SmoothQuant using
 <a class="reference internal" href="../reference/generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.quantize" title="modelopt.torch.quantization.model_quant.quantize"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mtq.quantize</span></code></a>:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">modelopt.torch.quantization</span> <span class="k">as</span> <span class="nn">mtq</span>
@@ -170,8 +172,8 @@ <h2>PTQ for PyTorch models<a class="headerlink" href="#ptq-for-pytorch-models" t
 <h2>Deployment<a class="headerlink" href="#deployment" title="Link to this heading"></a></h2>
 <p>The quantized model is just like a regular Pytorch model and is ready for evaluation or deployment.</p>
 <p>Huggingface or Nemo LLM models can be exported to TensorRT-LLM using ModelOpt.
-Please see <a class="reference internal" href="../deployment/1_tensorrt_llm_deployment.html"><span class="doc">TensorRT-LLM Deployment</span></a> guide for more
-details.</p>
+Please see the <a class="reference internal" href="../deployment/1_tensorrt_llm_deployment.html"><span class="doc">TensorRT-LLM Deployment</span></a> guide for
+more details.</p>
 <p>The model can be also exported to ONNX using
 <a class="reference external" href="https://pytorch.org/docs/stable/onnx_torchscript.html#torch.onnx.export" rel="noopener noreferrer" target="_blank">torch.onnx.export</a>.</p>
 <hr class="docutils" />
@@ -192,7 +194,7 @@ <h2>Deployment<a class="headerlink" href="#deployment" title="Link to this headi
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="2_installation.html" class="btn btn-neutral float-left" title="Installation" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="6_sparsity.html" class="btn btn-neutral float-right" title="Quick Start: Sparsity" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="5_distillation.html" class="btn btn-neutral float-right" title="Quick Start: Distillation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -204,7 +206,7 @@ <h2>Deployment<a class="headerlink" href="#deployment" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -215,7 +217,7 @@ <h2>Deployment<a class="headerlink" href="#deployment" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/getting_started/5_distillation.html b/getting_started/5_distillation.html
new file mode 100644
index 0000000..cd3caab
--- /dev/null
+++ b/getting_started/5_distillation.html
@@ -0,0 +1,257 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Quick Start: Distillation &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../_static/doctools.js?v=888ff710"></script>
+        <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Quick Start: Sparsity" href="6_sparsity.html" />
+    <link rel="prev" title="Quick Start: Quantization" href="3_quantization.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1 current"><a class="current reference internal" href="#">Quick Start: Distillation</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="#set-up-your-base-models">Set up your base models</a></li>
+<li class="toctree-l2"><a class="reference internal" href="#set-up-the-meta-model">Set up the meta model</a></li>
+<li class="toctree-l2"><a class="reference internal" href="#distill-during-training">Distill during training</a></li>
+<li class="toctree-l2"><a class="reference internal" href="#export-trained-model">Export trained model</a></li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
+      <li class="breadcrumb-item active">Quick Start: Distillation</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../_sources/getting_started/5_distillation.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="quick-start-distillation">
+<h1>Quick Start: Distillation<a class="headerlink" href="#quick-start-distillation" title="Link to this heading"></a></h1>
+<p>ModelOpt’s <a class="reference internal" href="../guides/4_distillation.html"><span class="doc">Distillation</span></a> is a set of wrappers and utilities
+to easily perform Knowledge Distillation among teacher and student models.
+Given a pretrained teacher model, Distillation has the potential to train a smaller student model
+faster and/or with higher accuracy than the student model could achieve on its own.</p>
+<p>This quick-start guide shows the necessary steps to integrate Distillation into your
+training pipeline.</p>
+<section id="set-up-your-base-models">
+<h2>Set up your base models<a class="headerlink" href="#set-up-your-base-models" title="Link to this heading"></a></h2>
+<p>First obtain both a pretrained model to act as the teacher and a (usualy smaller) model to serve
+as the student.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">torchvision.models</span> <span class="kn">import</span> <span class="n">resnet50</span><span class="p">,</span> <span class="n">resnet18</span>
+
+<span class="c1"># Define student</span>
+<span class="n">student_model</span> <span class="o">=</span> <span class="n">resnet18</span><span class="p">()</span>
+
+
+<span class="c1"># Define callable which returns teacher</span>
+<span class="k">def</span> <span class="nf">teacher_factory</span><span class="p">():</span>
+    <span class="n">teacher_model</span> <span class="o">=</span> <span class="n">resnet50</span><span class="p">()</span>
+    <span class="n">teacher_model</span><span class="o">.</span><span class="n">load_state_dict</span><span class="p">(</span><span class="n">pretrained_weights</span><span class="p">)</span>
+    <span class="k">return</span> <span class="n">teacher_model</span>
+</pre></div>
+</div>
+</section>
+<section id="set-up-the-meta-model">
+<h2>Set up the meta model<a class="headerlink" href="#set-up-the-meta-model" title="Link to this heading"></a></h2>
+<p>As Knowledge Distillation involves (at least) two models, ModelOpt simplifies the integration
+process by wrapping both student and teacher into one meta model.</p>
+<p>Please see an example Distillation setup below. This example assumes the outputs
+of <code class="docutils literal notranslate"><span class="pre">teacher_model</span></code> and <code class="docutils literal notranslate"><span class="pre">student_model</span></code> are logits.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">modelopt.torch.distill</span> <span class="k">as</span> <span class="nn">mtd</span>
+
+<span class="n">distillation_config</span> <span class="o">=</span> <span class="p">{</span>
+    <span class="s2">&quot;teacher_model&quot;</span><span class="p">:</span> <span class="n">teacher_factory</span><span class="p">,</span>  <span class="c1"># model initializer</span>
+    <span class="s2">&quot;criterion&quot;</span><span class="p">:</span> <span class="n">mtd</span><span class="o">.</span><span class="n">LogitsDistillationLoss</span><span class="p">(),</span>  <span class="c1"># callable receiving student and teacher outputs, in order</span>
+    <span class="s2">&quot;loss_balancer&quot;</span><span class="p">:</span> <span class="n">mtd</span><span class="o">.</span><span class="n">StaticLossBalancer</span><span class="p">(),</span>  <span class="c1"># combines multiple losses; omit if only one distillation loss used</span>
+<span class="p">}</span>
+
+<span class="n">distillation_model</span> <span class="o">=</span> <span class="n">mtd</span><span class="o">.</span><span class="n">convert</span><span class="p">(</span><span class="n">student_model</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="p">[(</span><span class="s2">&quot;kd_loss&quot;</span><span class="p">,</span> <span class="n">distillation_config</span><span class="p">)])</span>
+</pre></div>
+</div>
+<p>The <code class="docutils literal notranslate"><span class="pre">teacher_model</span></code> can be either a callable which returns an <code class="docutils literal notranslate"><span class="pre">nn.Module</span></code> or a tuple of <code class="docutils literal notranslate"><span class="pre">(model_cls,</span> <span class="pre">args,</span> <span class="pre">kwargs)</span></code>.
+The <code class="docutils literal notranslate"><span class="pre">criterion</span></code> is the distillation loss used between student and teacher tensors.
+The <code class="docutils literal notranslate"><span class="pre">loss_balancer</span></code> determines how the original and distillation losses are combined (if needed).</p>
+<p>See <a class="reference internal" href="../guides/4_distillation.html"><span class="doc">Distillation</span></a> for more info.</p>
+</section>
+<section id="distill-during-training">
+<h2>Distill during training<a class="headerlink" href="#distill-during-training" title="Link to this heading"></a></h2>
+<p>To Distill from teacher to student, simply use the meta model in the usual training loop, while
+also using the meta model’s <code class="docutils literal notranslate"><span class="pre">.compute_kd_loss()</span></code> method to compute the distillation loss, in addition to
+the original user loss.</p>
+<p>An example of Distillation training is given below:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Setup the data loaders. As example:</span>
+<span class="n">train_loader</span> <span class="o">=</span> <span class="n">get_train_loader</span><span class="p">()</span>
+
+<span class="c1"># Define user loss function. As example:</span>
+<span class="n">loss_fn</span> <span class="o">=</span> <span class="n">get_user_loss_fn</span><span class="p">()</span>
+
+<span class="k">for</span> <span class="nb">input</span><span class="p">,</span> <span class="n">labels</span> <span class="ow">in</span> <span class="n">train_dataloader</span><span class="p">:</span>
+    <span class="n">distillation_model</span><span class="o">.</span><span class="n">zero_grad</span><span class="p">()</span>
+    <span class="c1"># Forward through the wrapped models</span>
+    <span class="n">out</span> <span class="o">=</span> <span class="n">distillation_model</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
+    <span class="c1"># Same loss as originally present</span>
+    <span class="n">loss</span> <span class="o">=</span> <span class="n">loss_fn</span><span class="p">(</span><span class="n">out</span><span class="p">,</span> <span class="n">labels</span><span class="p">)</span>
+    <span class="c1"># Combine distillation and user losses</span>
+<span class="hll">    <span class="n">loss_total</span> <span class="o">=</span> <span class="n">distillation_model</span><span class="o">.</span><span class="n">compute_kd_loss</span><span class="p">(</span><span class="n">student_loss</span><span class="o">=</span><span class="n">loss</span><span class="p">)</span>
+</span>    <span class="n">loss_total</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
+</pre></div>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p><a class="reference external" href="https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html" rel="noopener noreferrer" target="_blank">DataParallel</a> may
+break ModelOpt’s Distillation feature.
+Note that <a class="reference external" href="https://huggingface.co/docs/transformers/en/main_classes/trainer" rel="noopener noreferrer" target="_blank">HuggingFace Trainer</a>
+uses DataParallel by default.</p>
+</div>
+</section>
+<section id="export-trained-model">
+<h2>Export trained model<a class="headerlink" href="#export-trained-model" title="Link to this heading"></a></h2>
+<p>The model can easily be reverted to its original class for further use (i.e deployment)
+without any ModelOpt modifications attached.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">model</span> <span class="o">=</span> <span class="n">mtd</span><span class="o">.</span><span class="n">export</span><span class="p">(</span><span class="n">distillation_model</span><span class="p">)</span>
+</pre></div>
+</div>
+<hr class="docutils" />
+<dl class="simple">
+<dt><strong>Next steps</strong></dt><dd><ul class="simple">
+<li><p>Learn more about <a class="reference internal" href="../guides/4_distillation.html"><span class="doc">Distillation</span></a>.</p></li>
+<li><p>See ModelOpt’s <a class="reference internal" href="../reference/1_modelopt_api.html"><span class="doc">API documentation</span></a> for detailed
+functionality and usage information.</p></li>
+</ul>
+</dd>
+</dl>
+</section>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="3_quantization.html" class="btn btn-neutral float-left" title="Quick Start: Quantization" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="6_sparsity.html" class="btn btn-neutral float-right" title="Quick Start: Sparsity" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/getting_started/6_sparsity.html b/getting_started/6_sparsity.html
index d5a768c..1e139d9 100644
--- a/getting_started/6_sparsity.html
+++ b/getting_started/6_sparsity.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Quick Start: Sparsity &mdash; Model Optimizer 0.11.2</title>
+  <title>Quick Start: Sparsity &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Quantization" href="../guides/1_quantization.html" />
-    <link rel="prev" title="Quick Start: Quantization" href="3_quantization.html" />
+    <link rel="prev" title="Quick Start: Distillation" href="5_distillation.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,15 +67,18 @@
 <li class="toctree-l1"><a class="reference internal" href="1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1 current"><a class="current reference internal" href="#">Quick Start: Sparsity</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="#sparsity">Sparsity</a></li>
 <li class="toctree-l2"><a class="reference internal" href="#post-training-sparsification-pts-for-pytorch-models">Post-Training Sparsification (PTS) for PyTorch models</a></li>
+<li class="toctree-l2"><a class="reference internal" href="#sparsity-aware-training-sat-for-pytorch-models">Sparsity-aware Training (SAT) for PyTorch models</a></li>
 </ul>
 </li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -84,11 +87,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -120,19 +123,19 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quick-start-sparsity">
 <h1>Quick Start: Sparsity<a class="headerlink" href="#quick-start-sparsity" title="Link to this heading"></a></h1>
 <section id="sparsity">
 <h2>Sparsity<a class="headerlink" href="#sparsity" title="Link to this heading"></a></h2>
 <p>ModelOpt’s <a class="reference internal" href="../guides/5_sparsity.html"><span class="doc">sparsity</span></a> feature is an effective technique to reduce the
-memory footprint of deep learning models and accelerate the inference speed. ModelOpt provides an
+memory footprint of deep learning models and accelerate the inference speed. ModelOpt provides the
 easy-to-use API <a class="reference internal" href="../reference/generated/modelopt.torch.sparsity.sparsification.html#modelopt.torch.sparsity.sparsification.sparsify" title="modelopt.torch.sparsity.sparsification.sparsify"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mts.sparsify()</span></code></a> to apply
 weight sparsity to a given model.
 <a class="reference internal" href="../reference/generated/modelopt.torch.sparsity.sparsification.html#modelopt.torch.sparsity.sparsification.sparsify" title="modelopt.torch.sparsity.sparsification.sparsify"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mts.sparsify()</span></code></a> supports
 <a class="reference external" href="https://arxiv.org/abs/2104.08378" rel="noopener noreferrer" target="_blank">NVIDIA 2:4 Sparsity</a> sparsity pattern and various sparsification
-methods, such as (<a class="reference external" href="https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity" rel="noopener noreferrer" target="_blank">NVIDIA ASP</a>)
-and (<a class="reference external" href="https://arxiv.org/abs/2301.00774" rel="noopener noreferrer" target="_blank">SparseGPT</a>).</p>
+methods, such as <a class="reference external" href="https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity" rel="noopener noreferrer" target="_blank">NVIDIA ASP</a>
+and <a class="reference external" href="https://arxiv.org/abs/2301.00774" rel="noopener noreferrer" target="_blank">SparseGPT</a>.</p>
 <p>This guide provides a quick start to apply weight sparsity to a PyTorch model using ModelOpt.</p>
 </section>
 <section id="post-training-sparsification-pts-for-pytorch-models">
@@ -153,7 +156,7 @@ <h2>Post-Training Sparsification (PTS) for PyTorch models<a class="headerlink" h
 <span class="n">sparsity_config</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;data_loader&quot;</span><span class="p">:</span> <span class="n">data_loader</span><span class="p">,</span> <span class="s2">&quot;collect_func&quot;</span><span class="p">:</span> <span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">}</span>
 
 <span class="c1"># Sparsify the model and perform calibration (PTS)</span>
-<span class="n">model</span> <span class="o">=</span> <span class="n">mts</span><span class="o">.</span><span class="n">sparsity</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s2">&quot;sparsegpt&quot;</span><span class="p">,</span> <span class="n">config</span><span class="o">=</span><span class="n">sparsity_config</span><span class="p">)</span>
+<span class="n">model</span> <span class="o">=</span> <span class="n">mts</span><span class="o">.</span><span class="n">sparsify</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="s2">&quot;sparsegpt&quot;</span><span class="p">,</span> <span class="n">config</span><span class="o">=</span><span class="n">sparsity_config</span><span class="p">)</span>
 </pre></div>
 </div>
 <div class="admonition note">
@@ -166,13 +169,20 @@ <h2>Post-Training Sparsification (PTS) for PyTorch models<a class="headerlink" h
 <p><cite>data_loader</cite> and <cite>collect_func</cite> can be substituted with a <cite>forward_loop</cite> that iterates the model through the
 calibration dataset.</p>
 </div>
+</section>
+<section id="sparsity-aware-training-sat-for-pytorch-models">
+<h2>Sparsity-aware Training (SAT) for PyTorch models<a class="headerlink" href="#sparsity-aware-training-sat-for-pytorch-models" title="Link to this heading"></a></h2>
+<p>After sparsifying the model, you can save the checkpoint for the sparsified model and use it for
+fine-tuning the sparsified model. Check out the
+<a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/llm_sparsity" rel="noopener noreferrer" target="_blank">GitHub end-to-end example</a>
+to learn more about SAT.</p>
 <hr class="docutils" />
 <dl class="simple">
 <dt><strong>Next Steps</strong></dt><dd><ul class="simple">
 <li><p>Learn more about sparsity and advanced usage of ModelOpt sparsity in
 <a class="reference internal" href="../guides/5_sparsity.html"><span class="doc">Sparsity guide</span></a>.</p></li>
-<li><p>Checkout out the end-to-end examples on GitHub for PTQ and QAT
-<a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer?tab=readme-ov-file#examples" rel="noopener noreferrer" target="_blank">here</a>.</p></li>
+<li><p>Checkout out the <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/llm_sparsity" rel="noopener noreferrer" target="_blank">end-to-end example on GitHub</a>
+for PTS and SAT.</p></li>
 </ul>
 </dd>
 </dl>
@@ -183,7 +193,7 @@ <h2>Post-Training Sparsification (PTS) for PyTorch models<a class="headerlink" h
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="3_quantization.html" class="btn btn-neutral float-left" title="Quick Start: Quantization" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="5_distillation.html" class="btn btn-neutral float-left" title="Quick Start: Distillation" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="../guides/1_quantization.html" class="btn btn-neutral float-right" title="Quantization" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -196,7 +206,7 @@ <h2>Post-Training Sparsification (PTS) for PyTorch models<a class="headerlink" h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -207,7 +217,7 @@ <h2>Post-Training Sparsification (PTS) for PyTorch models<a class="headerlink" h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/guides/1_quantization.html b/guides/1_quantization.html
index fbfc259..04e2d82 100644
--- a/guides/1_quantization.html
+++ b/guides/1_quantization.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Quantization &mdash; Model Optimizer 0.11.2</title>
+  <title>Quantization &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Basic Concepts" href="_basic_quantization.html" />
-    <link rel="prev" title="Quick Start: Sparsity" href="../getting_started/6_sparsity.html" />
+    <link rel="prev" title="Quick Start: Sparsity" href="../getting_started/6_sparsity.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,6 +67,7 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
@@ -78,6 +79,7 @@
 <li class="toctree-l2"><a class="reference internal" href="_onnx_quantization.html">ONNX Quantization (Beta)</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -86,11 +88,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -122,15 +124,15 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quantization">
 <h1>Quantization<a class="headerlink" href="#quantization" title="Link to this heading"></a></h1>
 <p>ModelOpt quantization toolkit supports quantization for NVIDIA’s hardware and software stack.
 Currently ModelOpt supports quantization in PyTorch and ONNX frameworks.</p>
-<p>ModelOpt is based on simulated quantization in the original precision to simulate, test and optimize
-for the best trade-off between the accuracy of the model and different low-precision formats. To
-achieve actual speedups and memory savings, the model with simulated quantization can be exported to
-deployment frameworks, like TensorRT or TensorRT-LLM. Please refer to the
+<p>ModelOpt is based on simulated quantization in the original precision to simulate, test, and
+optimize for the best trade-off between the accuracy of the model and different low-precision
+formats. To achieve actual speedups and memory savings, the model with simulated quantization can be
+exported to deployment frameworks, like TensorRT or TensorRT-LLM. Please refer to the
 <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer" rel="noopener noreferrer" target="_blank">TensorRT-Model-Optimizer GitHub repository</a>
 for more details and examples.</p>
 <p>Below, you can find the documentation for the quantization toolkit in ModelOpt:</p>
@@ -161,7 +163,7 @@ <h1>Quantization<a class="headerlink" href="#quantization" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -172,7 +174,7 @@ <h1>Quantization<a class="headerlink" href="#quantization" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/guides/4_distillation.html b/guides/4_distillation.html
new file mode 100644
index 0000000..b03f434
--- /dev/null
+++ b/guides/4_distillation.html
@@ -0,0 +1,346 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Distillation &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../_static/doctools.js?v=888ff710"></script>
+        <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../genindex.html" />
+    <link rel="search" title="Search" href="../search.html" />
+    <link rel="next" title="Sparsity" href="5_sparsity.html" />
+    <link rel="prev" title="ONNX Quantization (Beta)" href="_onnx_quantization.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="1_quantization.html">Quantization</a></li>
+<li class="toctree-l1 current"><a class="current reference internal" href="#">Distillation</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="#introduction">Introduction</a></li>
+<li class="toctree-l2"><a class="reference internal" href="#convert-and-integrate">Convert and integrate</a></li>
+<li class="toctree-l2"><a class="reference internal" href="#distillation-concepts">Distillation Concepts</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#overview">Overview</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#concepts">Concepts</a><ul>
+<li class="toctree-l4"><a class="reference internal" href="#knowledge-distillation">Knowledge Distillation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#student">Student</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#teacher">Teacher</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#distillation-loss">Distillation loss</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#loss-balancer">Loss Balancer</a></li>
+<li class="toctree-l4"><a class="reference internal" href="#soft-label-distillation">Soft-label Distillation</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</li>
+<li class="toctree-l1"><a class="reference internal" href="5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
+      <li class="breadcrumb-item active">Distillation</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../_sources/guides/4_distillation.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="distillation">
+<h1>Distillation<a class="headerlink" href="#distillation" title="Link to this heading"></a></h1>
+<section id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Link to this heading"></a></h2>
+<p>ModelOpt’s Distillation API (<a class="reference internal" href="../reference/generated/modelopt.torch.distill.html#module-modelopt.torch.distill" title="modelopt.torch.distill"><code class="xref py py-mod docutils literal notranslate"><span class="pre">modelopt.torch.distill</span></code></a>) allows you to enable a
+knowledge-distillation training pipeline with minimal script modification.</p>
+<p>Follow the steps described below to obtain a model trained with direct knowledge transferred from
+a more powerful teacher model using <a class="reference internal" href="../reference/generated/modelopt.torch.distill.html#module-modelopt.torch.distill" title="modelopt.torch.distill"><code class="xref py py-mod docutils literal notranslate"><span class="pre">modelopt.torch.distill</span></code></a>:</p>
+<ol class="arabic simple">
+<li><p><strong>Convert your model via</strong> <a class="reference internal" href="../reference/generated/modelopt.torch.distill.distillation.html#modelopt.torch.distill.distillation.convert" title="modelopt.torch.distill.distillation.convert"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mtd.convert</span></code></a>:
+Wrap both a teacher and student model into a larger meta-model which abstracts away the
+interaction between the two.</p></li>
+<li><p><strong>Distillation training</strong>: Seamlessly use the meta-model in place of the original model and run
+the orignal script with only one additional line of code for loss calculation.</p></li>
+<li><p><strong>Checkpoint and re-load</strong>: Save the model via <a class="reference internal" href="../reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.save" title="modelopt.torch.opt.conversion.save"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mto.save</span></code></a> and
+restore via <a class="reference internal" href="../reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.restore" title="modelopt.torch.opt.conversion.restore"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mto.restore</span></code></a></p></li>
+</ol>
+<p><em>To find out more about Distillation and related concepts, please refer to the below section</em>
+<a class="reference internal" href="#distillation-concepts"><span class="std std-ref">Distillation Concepts</span></a>.</p>
+</section>
+<section id="convert-and-integrate">
+<span id="distillation-conversion"></span><h2>Convert and integrate<a class="headerlink" href="#convert-and-integrate" title="Link to this heading"></a></h2>
+<p>You can convert your model into a <a class="reference internal" href="../reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel" title="modelopt.torch.distill.distillation_model.DistillationModel"><code class="xref py py-class docutils literal notranslate"><span class="pre">DistillationModel</span></code></a>
+using <a class="reference internal" href="../reference/generated/modelopt.torch.distill.distillation.html#modelopt.torch.distill.distillation.convert" title="modelopt.torch.distill.distillation.convert"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mtd.convert()</span></code></a>.</p>
+<p>Example usage:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">modelopt.torch.distill</span> <span class="k">as</span> <span class="nn">mtd</span>
+<span class="kn">from</span> <span class="nn">torchvision.models</span> <span class="kn">import</span> <span class="n">resnet50</span>
+
+<span class="c1"># User-defined model (student)</span>
+<span class="n">model</span> <span class="o">=</span> <span class="n">resnet50</span><span class="p">()</span>
+
+<span class="c1"># Configure and convert for distillation</span>
+<span class="n">distillation_config</span> <span class="o">=</span> <span class="p">{</span>
+    <span class="c1"># `teacher_model` is a model class or callable, or a tuple.</span>
+    <span class="c1"># If a tuple, it must be of the form (model_cls_or_callable,) or</span>
+    <span class="c1"># (model_cls_or_callable, args) or (model_cls_or_callable, args, kwargs).</span>
+    <span class="s2">&quot;teacher_model&quot;</span><span class="p">:</span> <span class="n">teacher_model</span><span class="p">,</span>
+    <span class="s2">&quot;criterion&quot;</span><span class="p">:</span> <span class="n">mtd</span><span class="o">.</span><span class="n">LogitsDistillationLoss</span><span class="p">(),</span>
+    <span class="s2">&quot;loss_balancer&quot;</span><span class="p">:</span> <span class="n">mtd</span><span class="o">.</span><span class="n">StaticLossBalancer</span><span class="p">(),</span>
+<span class="p">}</span>
+<span class="n">distillation_model</span> <span class="o">=</span> <span class="n">mtd</span><span class="o">.</span><span class="n">convert</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="p">[(</span><span class="s2">&quot;kd_loss&quot;</span><span class="p">,</span> <span class="n">distillation_config</span><span class="p">)])</span>
+
+<span class="c1"># Export model in original class form</span>
+<span class="n">model_exported</span> <span class="o">=</span> <span class="n">mtd</span><span class="o">.</span><span class="n">export</span><span class="p">(</span><span class="n">distillation_model</span><span class="p">)</span>
+</pre></div>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>The config requires a (non-lambda) Callable to return a teacher model in place of the model
+itself. This is to avoid re-saving the teacher state dict upon saving the Distillation
+meta model. Thus, the same callable must be available in the namespace when restoring via
+the <a class="reference internal" href="../reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.restore" title="modelopt.torch.opt.conversion.restore"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mto.restore</span></code></a> utility.</p>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>As the model is not of the same class anymore, calling <code class="docutils literal notranslate"><span class="pre">type()</span></code> on the model after conversion
+will not work as expected.
+Though <code class="docutils literal notranslate"><span class="pre">isinstance()</span></code> will still work, as the model dynamically becomes a subclass of the original’s.</p>
+</div>
+<p>—</p>
+</section>
+<section id="distillation-concepts">
+<span id="id1"></span><h2>Distillation Concepts<a class="headerlink" href="#distillation-concepts" title="Link to this heading"></a></h2>
+<p>Below, we will provide an overview of ModelOpt’s distillation feature as well as its basic
+concepts and terminology.</p>
+<section id="overview">
+<h3>Overview<a class="headerlink" href="#overview" title="Link to this heading"></a></h3>
+<table class="docutils align-default" id="id3">
+<caption><span class="caption-text">Glossary</span><a class="headerlink" href="#id3" title="Link to this table"></a></caption>
+<colgroup>
+<col style="width: 37.9%" />
+<col style="width: 62.1%" />
+</colgroup>
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#knowledge-distillation">Knowledge Distillation</a></p></td>
+<td><p>The transfer of learnable feature information from a teacher model to a student.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#student">Student</a></p></td>
+<td><p>The model to be trained (can either start from scratch or pre-trained).</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#teacher">Teacher</a></p></td>
+<td><p>The fixed, pre-trained model used as the example the student will “learn” from.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#distillation-loss">Distillation loss</a></p></td>
+<td><p>A loss function used between the features of a student and teacher to perform Knowledge
+Distillation, separate from the student’s original task loss.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#loss-balancer">Loss Balancer</a></p></td>
+<td><p>An implementation for a utility which determines how to combine Distillation loss(es) and
+orignal student task loss into a single scalar.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#soft-label-distillation">Soft-label Distillation</a></p></td>
+<td><p>The specific process of performing Knowledge Distillation between output logits of a teacher
+and student models.</p></td>
+</tr>
+</tbody>
+</table>
+</section>
+<section id="concepts">
+<h3>Concepts<a class="headerlink" href="#concepts" title="Link to this heading"></a></h3>
+<section id="knowledge-distillation">
+<h4>Knowledge Distillation<a class="headerlink" href="#knowledge-distillation" title="Link to this heading"></a></h4>
+<p>Distillation can be a broader term used to define any sort of information compressed among models,
+but in this case we refer to basic teacher-student Knowledge Distillation. The process creates an
+auxilliary loss (or can replace the orignal one) between a model which is already trained (teacher)
+and a model which is not (student), in hopes of making the student learn information (i.e. feature
+maps or logits) which the teacher has already mastered. This can serve multiple purposes:</p>
+<blockquote>
+<div><p><strong>A.</strong> Model-size reduction: A smaller, efficient student model (potentially a pruned teacher) reaching
+accuracies near or exceeding that of the larger, slower teacher model. (See the
+<a class="reference external" href="https://arxiv.org/abs/1803.03635" rel="noopener noreferrer" target="_blank">Lottery Ticket Hypothesis</a> for reasoning behind this, which also applies to pruning)</p>
+<p><strong>B.</strong> An alternative to pure training: Distilling a model from an existing one (and then
+fine-tuning) can often be faster than training it from scratch.</p>
+<p><strong>C.</strong> Module replacement: One can replace a single module within a model with a more efficient one
+and use distillation on its original outputs to effectively re-integrate it into the whole model.</p>
+</div></blockquote>
+</section>
+<section id="student">
+<h4>Student<a class="headerlink" href="#student" title="Link to this heading"></a></h4>
+<p>This is the model we wish to train and use in the end. It ideally meets the desired architectural
+and computational requirements, but is either untrained or requires a boost in accuracy.</p>
+</section>
+<section id="teacher">
+<h4>Teacher<a class="headerlink" href="#teacher" title="Link to this heading"></a></h4>
+<p>This is the model from which learned features/information are used to create a loss for the student.
+Usually it is larger and/or slower than desired, but possesses a satisfactory accuracy.</p>
+</section>
+<section id="distillation-loss">
+<h4>Distillation loss<a class="headerlink" href="#distillation-loss" title="Link to this heading"></a></h4>
+<p>To actually “transfer” knowledge from a teacher to student, we need to add (or replace) an
+optimization objective to the student’s original loss function(s). This can be as simple as enacting
+MSE on two same-sized activation tensors between the teacher and student, with the assumption that
+the features learned by the teacher are of high-quality and should be imitated as much as possible.</p>
+<p>ModelOpt supports specifying a different loss function per layer-output pair, and includes a few
+pre-defined functions for use, though users may often need to define their own.
+Module-pairs-to-loss-function mappings are specified via the <code class="docutils literal notranslate"><span class="pre">criterion</span></code> key of the configuration
+dictionary - student and teacher, respectively in order - and the loss function itself should accept
+outputs in the same order as well:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Example using pairwise-mapped criterion.</span>
+<span class="c1"># Will perform the loss on the output of ``student_model.classifier`` and ``teacher_model.layers.18``</span>
+<span class="n">distillation_config</span> <span class="o">=</span> <span class="p">{</span>
+    <span class="s2">&quot;teacher_model&quot;</span><span class="p">:</span> <span class="n">teacher_model</span><span class="p">,</span>
+    <span class="s2">&quot;criterion&quot;</span><span class="p">:</span> <span class="p">{(</span><span class="s2">&quot;classifier&quot;</span><span class="p">,</span> <span class="s2">&quot;layers.18&quot;</span><span class="p">):</span> <span class="n">mtd</span><span class="o">.</span><span class="n">LogitsDistillationLoss</span><span class="p">()},</span>
+<span class="p">}</span>
+<span class="n">distillation_model</span> <span class="o">=</span> <span class="n">atd</span><span class="o">.</span><span class="n">convert</span><span class="p">(</span><span class="n">student_model</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="p">[(</span><span class="s2">&quot;kd_loss&quot;</span><span class="p">,</span> <span class="n">distillation_config</span><span class="p">)])</span>
+</pre></div>
+</div>
+<p>The intermediate outputs for the losses are captured by the
+<a class="reference internal" href="../reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel" title="modelopt.torch.distill.distillation_model.DistillationModel"><code class="xref py py-class docutils literal notranslate"><span class="pre">DistillationModel</span></code></a> and then the loss(es) are
+invoked using <a class="reference internal" href="../reference/generated/modelopt.torch.distill.distillation_model.html#modelopt.torch.distill.distillation_model.DistillationModel.compute_kd_loss" title="modelopt.torch.distill.distillation_model.DistillationModel.compute_kd_loss"><code class="xref py py-meth docutils literal notranslate"><span class="pre">DistillationModel.compute_kd_loss()</span></code></a>.
+If present, the orignal student’s non-distillation loss is passed in as an argument.</p>
+<p>Writing a custom loss function is often necessary, especially to handle outputs that need to be processed
+to obtain the logits and activations.</p>
+</section>
+<section id="loss-balancer">
+<h4>Loss Balancer<a class="headerlink" href="#loss-balancer" title="Link to this heading"></a></h4>
+<p>As Distillation losses may be applied to several pairs of layers, the losses are returned in the
+form of a dictionary which should be reduced into a scalar value for backpropagation. A Loss
+Balancer (whose interface is defined by
+<a class="reference internal" href="../reference/generated/modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.DistillationLossBalancer" title="modelopt.torch.distill.loss_balancers.DistillationLossBalancer"><code class="xref py py-class docutils literal notranslate"><span class="pre">DistillationLossBalancer</span></code></a>) serves to fill
+this purpose.</p>
+<p>If Distillation loss is only applied to a single pair of layer outputs, and no student loss is available,
+a Loss Balancer should not be provided.</p>
+<p>ModelOpt provides a simple Balancer implementation, and the aforementioned interface can be used to create custom ones.</p>
+</section>
+<section id="soft-label-distillation">
+<h4>Soft-label Distillation<a class="headerlink" href="#soft-label-distillation" title="Link to this heading"></a></h4>
+<p>The scenario involving distillation only on the output logits of student/teacher classification
+models is known as Soft-label Distillation. In this case, one could even omit the student’s original
+classification loss altogether if the teacher’s outputs are purely preferred over whatever the
+ground truth labels may be.</p>
+</section>
+</section>
+</section>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="_onnx_quantization.html" class="btn btn-neutral float-left" title="ONNX Quantization (Beta)" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="5_sparsity.html" class="btn btn-neutral float-right" title="Sparsity" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/guides/5_sparsity.html b/guides/5_sparsity.html
index 3b00a57..00816c8 100644
--- a/guides/5_sparsity.html
+++ b/guides/5_sparsity.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Sparsity &mdash; Model Optimizer 0.11.2</title>
+  <title>Sparsity &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="TensorRT-LLM Deployment" href="../deployment/1_tensorrt_llm_deployment.html" />
-    <link rel="prev" title="ONNX Quantization (Beta)" href="_onnx_quantization.html" />
+    <link rel="prev" title="Distillation" href="4_distillation.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="4_distillation.html">Distillation</a></li>
 <li class="toctree-l1 current"><a class="current reference internal" href="#">Sparsity</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="#introduction">Introduction</a></li>
 <li class="toctree-l2"><a class="reference internal" href="#post-training-sparsification">Post-Training Sparsification</a><ul>
@@ -93,11 +95,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -129,14 +131,14 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="sparsity">
 <h1>Sparsity<a class="headerlink" href="#sparsity" title="Link to this heading"></a></h1>
 <section id="introduction">
 <h2>Introduction<a class="headerlink" href="#introduction" title="Link to this heading"></a></h2>
 <p>ModelOpt’s Sparsity module (<a class="reference internal" href="../reference/generated/modelopt.torch.sparsity.html#module-modelopt.torch.sparsity" title="modelopt.torch.sparsity"><code class="xref py py-mod docutils literal notranslate"><span class="pre">modelopt.torch.sparsity</span></code></a>) enables
 you to sparsify the weights of your model. This can be useful for reducing the memory footprint of
-your model, and can also be used to speed up inference.</p>
+your model and can also be used to speed up inference.</p>
 <p>Follow the steps described below to obtain a model with sparse weights using ModelOpt’s Sparsity
 module <a class="reference internal" href="../reference/generated/modelopt.torch.sparsity.html#module-modelopt.torch.sparsity" title="modelopt.torch.sparsity"><code class="xref py py-mod docutils literal notranslate"><span class="pre">modelopt.torch.sparsity</span></code></a>:</p>
 <ol class="arabic simple">
@@ -147,7 +149,7 @@ <h2>Introduction<a class="headerlink" href="#introduction" title="Link to this h
 <li><p><strong>Checkpoint and re-load</strong>: Save the model via <a class="reference internal" href="../reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.save" title="modelopt.torch.opt.conversion.save"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mto.save</span></code></a>
 and restore via <a class="reference internal" href="../reference/generated/modelopt.torch.opt.conversion.html#modelopt.torch.opt.conversion.restore" title="modelopt.torch.opt.conversion.restore"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mto.restore</span></code></a></p></li>
 </ol>
-<p><em>To find out more about Sparsity and related concepts, please refer to the section below</em>
+<p><em>To find out more about Sparsity and related concepts, please refer to the section on</em>
 <a class="reference internal" href="#sparsity-concepts"><span class="std std-ref">Sparsity Concepts</span></a>.</p>
 </section>
 <section id="post-training-sparsification">
@@ -159,7 +161,7 @@ <h2>Introduction<a class="headerlink" href="#introduction" title="Link to this h
 config and a sparsity format as input and returns a sparse model. The sparsity config is a
 dictionary specifying the layers to sparsify and the optional dataloader for
 calibration in data-driven sparsity, e.g., SparseGPT.</p>
-<p><code class="xref py py-meth docutils literal notranslate"><span class="pre">mts.sparsify()</span></code> supports (<a class="reference external" href="https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity" rel="noopener noreferrer" target="_blank">NVIDIA ASP</a>) and <a class="reference external" href="https://arxiv.org/abs/2301.00774" rel="noopener noreferrer" target="_blank">SparseGPT</a> methods for magnitude-based
+<p><code class="xref py py-meth docutils literal notranslate"><span class="pre">mts.sparsify()</span></code> supports <a class="reference external" href="https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity" rel="noopener noreferrer" target="_blank">NVIDIA ASP</a> and <a class="reference external" href="https://arxiv.org/abs/2301.00774" rel="noopener noreferrer" target="_blank">SparseGPT</a> methods for magnitude-based
 and data-driven sparsity, respectively.</p>
 <p>Example usage:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">torch</span>
@@ -274,7 +276,7 @@ <h3>Sparsification algorithm<a class="headerlink" href="#sparsification-algorith
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="_onnx_quantization.html" class="btn btn-neutral float-left" title="ONNX Quantization (Beta)" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="4_distillation.html" class="btn btn-neutral float-left" title="Distillation" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="../deployment/1_tensorrt_llm_deployment.html" class="btn btn-neutral float-right" title="TensorRT-LLM Deployment" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -287,7 +289,7 @@ <h3>Sparsification algorithm<a class="headerlink" href="#sparsification-algorith
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -298,7 +300,7 @@ <h3>Sparsification algorithm<a class="headerlink" href="#sparsification-algorith
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/guides/_basic_quantization.html b/guides/_basic_quantization.html
index 797ada4..bf883ba 100644
--- a/guides/_basic_quantization.html
+++ b/guides/_basic_quantization.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Basic Concepts &mdash; Model Optimizer 0.11.2</title>
+  <title>Basic Concepts &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="Best practices to choose the right quantization methods" href="_choosing_quant_methods.html" />
-    <link rel="prev" title="Quantization" href="1_quantization.html" />
+    <link rel="prev" title="Quantization" href="1_quantization.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,6 +67,7 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
@@ -86,6 +87,7 @@
 <li class="toctree-l2"><a class="reference internal" href="_onnx_quantization.html">ONNX Quantization (Beta)</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -94,11 +96,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -131,7 +133,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="basic-concepts">
 <h1>Basic Concepts<a class="headerlink" href="#basic-concepts" title="Link to this heading"></a></h1>
 <p>A quantization format consists of the precision format, the block format, and the calibration
@@ -142,7 +144,7 @@ <h1>Basic Concepts<a class="headerlink" href="#basic-concepts" title="Link to th
 <h2>Precision format<a class="headerlink" href="#precision-format" title="Link to this heading"></a></h2>
 <p>The precision format defines the bit-width of the quantized values. Generally, there are integer
 formats (sign bit + mantissa bits) and floating-point formats (sign bit + exponent bits + mantissa
-bits). <a class="reference external" href="https://arxiv.org/pdf/2209.05433" rel="noopener noreferrer" target="_blank">FP8 FORMATS FOR DEEP LEARNING</a> provides a detailed
+bits). <a class="reference external" href="https://arxiv.org/pdf/2209.05433" rel="noopener noreferrer" target="_blank">Fp8 Formats for Deep Learning</a> provides a detailed
 explanation of the floating-point formats.</p>
 </section>
 <section id="scaling-factor">
@@ -210,7 +212,7 @@ <h2>More Readings<a class="headerlink" href="#more-readings" title="Link to this
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -221,7 +223,7 @@ <h2>More Readings<a class="headerlink" href="#more-readings" title="Link to this
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/guides/_choosing_quant_methods.html b/guides/_choosing_quant_methods.html
index 820a6c0..a6e8eeb 100644
--- a/guides/_choosing_quant_methods.html
+++ b/guides/_choosing_quant_methods.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Best practices to choose the right quantization methods &mdash; Model Optimizer 0.11.2</title>
+  <title>Best practices to choose the right quantization methods &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="PyTorch Quantization" href="_pytorch_quantization.html" />
-    <link rel="prev" title="Basic Concepts" href="_basic_quantization.html" />
+    <link rel="prev" title="Basic Concepts" href="_basic_quantization.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,6 +67,7 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
@@ -78,6 +79,7 @@
 <li class="toctree-l2"><a class="reference internal" href="_onnx_quantization.html">ONNX Quantization (Beta)</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -86,11 +88,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -123,7 +125,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="best-practices-to-choose-the-right-quantization-methods">
 <h1>Best practices to choose the right quantization methods<a class="headerlink" href="#best-practices-to-choose-the-right-quantization-methods" title="Link to this heading"></a></h1>
 <p>A quantization method comprises three primary components:</p>
@@ -235,7 +237,7 @@ <h1>Best practices to choose the right quantization methods<a class="headerlink"
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -246,7 +248,7 @@ <h1>Best practices to choose the right quantization methods<a class="headerlink"
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/guides/_onnx_quantization.html b/guides/_onnx_quantization.html
index abbea04..5796215 100644
--- a/guides/_onnx_quantization.html
+++ b/guides/_onnx_quantization.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>ONNX Quantization (Beta) &mdash; Model Optimizer 0.11.2</title>
+  <title>ONNX Quantization (Beta) &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
-    <link rel="next" title="Sparsity" href="5_sparsity.html" />
-    <link rel="prev" title="PyTorch Quantization" href="_pytorch_quantization.html" />
+    <link rel="next" title="Distillation" href="4_distillation.html" />
+    <link rel="prev" title="PyTorch Quantization" href="_pytorch_quantization.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,6 +67,7 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
@@ -88,6 +89,7 @@
 </li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -96,11 +98,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -133,7 +135,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="onnx-quantization-beta">
 <h1>ONNX Quantization (Beta)<a class="headerlink" href="#onnx-quantization-beta" title="Link to this heading"></a></h1>
 <p>ModelOpt provides ONNX quantization that works together with <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#explicit-implicit-quantization" rel="noopener noreferrer" target="_blank">TensorRT Explicit Quantization (EQ)</a>. The key advantages offered by ModelOpt’s ONNX quantization:</p>
@@ -142,7 +144,7 @@ <h1>ONNX Quantization (Beta)<a class="headerlink" href="#onnx-quantization-beta"
 <li><p>White-box design allowing expert users to customize the quantization process.</p></li>
 <li><p>Better support for vision transformers.</p></li>
 </ol>
-<p>Currently ONNX quantization only supports INT8 quantization.</p>
+<p>Currently ONNX quantization supports INT4 and INT8 quantization.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
 <p>ModelOpt ONNX quantization generates new ONNX models with QDQ nodes following TensorRT rules.
@@ -151,7 +153,7 @@ <h1>ONNX Quantization (Beta)<a class="headerlink" href="#onnx-quantization-beta"
 <section id="requirements">
 <h2>Requirements<a class="headerlink" href="#requirements" title="Link to this heading"></a></h2>
 <ol class="arabic simple">
-<li><p>TensorRT &gt;= 8.6 ( &gt;= 9.1 preferred). Please refer to <a class="reference external" href="https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/9.1.0/tars/tensorrt-9.1.0.4.linux.x86_64-gnu.cuda-12.2.tar.gz" rel="noopener noreferrer" target="_blank">TensorRT 9.1 download link</a>.</p></li>
+<li><p>TensorRT &gt;= 8.6 ( &gt;= 10.0 preferred). Please refer to <a class="reference external" href="https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/tars/TensorRT-10.0.1.6.Linux.x86_64-gnu.cuda-12.4.tar.gz" rel="noopener noreferrer" target="_blank">TensorRT 10.0 download link</a>.</p></li>
 </ol>
 </section>
 <section id="apply-post-training-quantization-ptq">
@@ -159,9 +161,7 @@ <h2>Apply Post Training Quantization (PTQ)<a class="headerlink" href="#apply-pos
 <p>PTQ should be done with a calibration dataset. If calibration dataset is not provided, ModelOpt will use random scales for the QDQ nodes.</p>
 <section id="prepare-calibration-dataset">
 <h3>Prepare calibration dataset<a class="headerlink" href="#prepare-calibration-dataset" title="Link to this heading"></a></h3>
-<p>ModelOpt supports two types of calibration data format: image directory or numpy file.</p>
-<p>Image directory only works for single-input ONNX models.</p>
-<p>Numpy file works for both single-input and multi-input ONNX models. In the case of multi-input ONNX models, the numpy file should be a dictionary with keys as input names and values as numpy arrays.</p>
+<p>ModelOpt supports npz/npy file as calibration data format and that numpy file should be a dictionary with keys as model input names and values as numpy arrays.</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Example numpy file for single-input ONNX</span>
 <span class="n">calib_data</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">batch_size</span><span class="p">,</span> <span class="n">channels</span><span class="p">,</span> <span class="n">h</span><span class="p">,</span> <span class="n">w</span><span class="p">)</span>
 <span class="n">np</span><span class="o">.</span><span class="n">save</span><span class="p">(</span><span class="s2">&quot;calib_data.npy&quot;</span><span class="p">,</span> <span class="n">calib_data</span><span class="p">)</span>
@@ -197,7 +197,7 @@ <h3>Call PTQ function<a class="headerlink" href="#call-ptq-function" title="Link
 <span class="w">    </span>--quantize_mode<span class="w"> </span>int8
 </pre></div>
 </div>
-<p>By default, after running the calibraton, the quantization tool will insert the QDQ nodes by following TensorRT friendly QDQ insertion algorithm. Users can change the default quantization behavior by tweaking the API params like op_types_to_quantize, op_types_to_exclude etc. See the <a class="reference internal" href="../reference/generated/modelopt.onnx.quantization.quantize.html#module-modelopt.onnx.quantization.quantize" title="modelopt.onnx.quantization.quantize"><code class="xref py py-meth docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.quantize()</span></code></a> for details.</p>
+<p>By default, after running the calibraton, the quantization tool will insert the QDQ nodes by following TensorRT friendly QDQ insertion algorithm. Users can change the default quantization behavior by tweaking the API params like op_types_to_quantize, op_types_to_exclude etc. See the <a class="reference internal" href="../reference/generated/modelopt.onnx.quantization.quantize.html#modelopt.onnx.quantization.quantize" title="modelopt.onnx.quantization.quantize"><code class="xref py py-meth docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.quantize()</span></code></a> for details.</p>
 </section>
 </section>
 <section id="deploy-quantized-onnx-model">
@@ -225,7 +225,7 @@ <h2>Compare the performance<a class="headerlink" href="#compare-the-performance"
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="_pytorch_quantization.html" class="btn btn-neutral float-left" title="PyTorch Quantization" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="5_sparsity.html" class="btn btn-neutral float-right" title="Sparsity" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="4_distillation.html" class="btn btn-neutral float-right" title="Distillation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -237,7 +237,7 @@ <h2>Compare the performance<a class="headerlink" href="#compare-the-performance"
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -248,7 +248,7 @@ <h2>Compare the performance<a class="headerlink" href="#compare-the-performance"
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/guides/_pytorch_quantization.html b/guides/_pytorch_quantization.html
index 8719540..e7523ab 100644
--- a/guides/_pytorch_quantization.html
+++ b/guides/_pytorch_quantization.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>PyTorch Quantization &mdash; Model Optimizer 0.11.2</title>
+  <title>PyTorch Quantization &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="ONNX Quantization (Beta)" href="_onnx_quantization.html" />
-    <link rel="prev" title="Best practices to choose the right quantization methods" href="_choosing_quant_methods.html" />
+    <link rel="prev" title="Best practices to choose the right quantization methods" href="_choosing_quant_methods.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,6 +67,7 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
@@ -85,11 +86,13 @@
 <li class="toctree-l4"><a class="reference internal" href="#fast-evaluation">Fast evaluation</a></li>
 </ul>
 </li>
+<li class="toctree-l3"><a class="reference internal" href="#migrate-from-pytorch-quantization">Migrate from pytorch_quantization</a></li>
 </ul>
 </li>
 <li class="toctree-l2"><a class="reference internal" href="_onnx_quantization.html">ONNX Quantization (Beta)</a></li>
 </ul>
 </li>
+<li class="toctree-l1"><a class="reference internal" href="4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -98,11 +101,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -135,10 +138,9 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="pytorch-quantization">
 <h1>PyTorch Quantization<a class="headerlink" href="#pytorch-quantization" title="Link to this heading"></a></h1>
-<p>ModelOpt PyTorch quantization is refactored based on <a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/index.html" rel="noopener noreferrer" target="_blank">pytorch_quantization</a>.</p>
 <p>Key advantages offered by ModelOpt’s PyTorch quantization:</p>
 <ol class="arabic simple">
 <li><p>Support advanced quantization formats, e.g., Block-wise Int4 and FP8.</p></li>
@@ -194,7 +196,7 @@ <h2>Apply Post Training Quantization (PTQ)<a class="headerlink" href="#apply-pos
 <p>To verify that the quantizer nodes are placed correctly in the model, let’s print the quantized model summary as show below:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Print quantization summary after successfully quantizing the model with mtq.quantize</span>
 <span class="c1"># This will show the quantizers inserted in the model and their configurations</span>
-<span class="n">mtq</span><span class="o">.</span><span class="n">print_quantization_summary</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
+<span class="n">mtq</span><span class="o">.</span><span class="n">print_quant_summary</span><span class="p">(</span><span class="n">model</span><span class="p">)</span>
 </pre></div>
 </div>
 <p>After PTQ, the model can be exported to ONNX with the normal PyTorch ONNX export flow.</p>
@@ -277,24 +279,30 @@ <h3>TensorQuantizer<a class="headerlink" href="#tensorquantizer" title="Link to
 <p>Under the hood, ModelOpt <a class="reference internal" href="../reference/generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.quantize" title="modelopt.torch.quantization.model_quant.quantize"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mtq.quantize()</span></code></a> inserts
 <a class="reference internal" href="../reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code></a>
 (quantizer modules) into the model layers like linear layer, conv layer etc. and patches their forward method to perform quantization.</p>
-<p>To create <code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code> instance, you need to specify <a class="reference internal" href="../reference/generated/modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.QuantDescriptor" title="modelopt.torch.quantization.tensor_quant.QuantDescriptor"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantDescriptor</span></code></a>, which
-describes the quantization parameters like quantization bits, axis etc.</p>
+<p>The quantization parameters are as described in <a class="reference internal" href="../reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a>.
+They can be set at initialization by passing <a class="reference internal" href="../reference/generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a>
+or later by calling  <a class="reference internal" href="../reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_config" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_config"><code class="xref py py-meth docutils literal notranslate"><span class="pre">TensorQuantizer.set_from_attribute_config()</span></code></a>.
+If the quantization parameters are not set explicitly, the quantizer will use the default values.</p>
 <p>Here is an example of creating a quantizer module:</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">modelopt.torch.quantization.tensor_quant</span> <span class="kn">import</span> <span class="n">QuantDescriptor</span>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">modelopt.torch.quantization.config</span> <span class="kn">import</span> <span class="n">QuantizerAttributeConfig</span>
 <span class="kn">from</span> <span class="nn">modelopt.torch.quantization.nn</span> <span class="kn">import</span> <span class="n">TensorQuantizer</span>
 
-<span class="c1"># Create quantizer descriptor</span>
-<span class="n">quant_desc</span> <span class="o">=</span> <span class="n">QuantDescriptor</span><span class="p">(</span><span class="n">num_bits</span><span class="o">=</span><span class="mi">8</span><span class="p">,</span> <span class="n">axis</span><span class="o">=</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,),</span> <span class="n">unsigned</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
-
-<span class="c1"># Create quantizer module</span>
-<span class="n">quantizer</span> <span class="o">=</span> <span class="n">TensorQuantizer</span><span class="p">(</span><span class="n">quant_desc</span><span class="p">)</span>
+<span class="c1"># Create quantizer module with default quantization parameters</span>
+<span class="n">quantizer</span> <span class="o">=</span> <span class="n">TensorQuantizer</span><span class="p">()</span>
 
 <span class="n">quant_x</span> <span class="o">=</span> <span class="n">quantizer</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>  <span class="c1"># Quantize input x</span>
+
+<span class="c1"># Create quantizer module with custom quantization parameters</span>
+<span class="c1"># Example setting for INT4 block-wise quantization</span>
+<span class="n">quantizer_custom</span> <span class="o">=</span> <span class="n">TensorQuantizer</span><span class="p">(</span><span class="n">QuantizerAttributeConfig</span><span class="p">(</span><span class="n">num_bits</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">block_sizes</span><span class="o">=</span><span class="p">{</span><span class="o">-</span><span class="mi">1</span><span class="p">:</span> <span class="mi">128</span><span class="p">}))</span>
+
+<span class="c1"># Quantize input with custom quantization parameters</span>
+<span class="n">quant_x</span> <span class="o">=</span> <span class="n">quantizer_custom</span><span class="p">(</span><span class="n">x</span><span class="p">)</span>  <span class="c1"># Quantize input x</span>
 </pre></div>
 </div>
 </section>
 <section id="customize-quantizer-config">
-<span id="id2"></span><h3>Customize quantizer config<a class="headerlink" href="#customize-quantizer-config" title="Link to this heading"></a></h3>
+<span id="id1"></span><h3>Customize quantizer config<a class="headerlink" href="#customize-quantizer-config" title="Link to this heading"></a></h3>
 <p>ModelOpt inserts input quantizer, weight quantizer and output quantizer into common layers, but by default disables the output quantizer.
 Expert users who want to customize the default quantizer configuration can update the <code class="docutils literal notranslate"><span class="pre">config</span></code> dictionary provided to <code class="docutils literal notranslate"><span class="pre">mtq.quantize</span></code> using wildcard or filter function match.</p>
 <p>Here is an example of specifying a custom quantizer configuration to <code class="docutils literal notranslate"><span class="pre">mtq.quantize</span></code>:</p>
@@ -370,6 +378,13 @@ <h3>Fast evaluation<a class="headerlink" href="#fast-evaluation" title="Link to
 </div>
 </section>
 </section>
+<section id="migrate-from-pytorch-quantization">
+<h2>Migrate from pytorch_quantization<a class="headerlink" href="#migrate-from-pytorch-quantization" title="Link to this heading"></a></h2>
+<p>ModelOpt PyTorch quantization is refactored from and extends upon
+<a class="reference external" href="https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/index.html" rel="noopener noreferrer" target="_blank">pytorch_quantization</a>.</p>
+<p>Previous users of <code class="docutils literal notranslate"><span class="pre">pytorch_quantization</span></code> can simply migrate to <code class="docutils literal notranslate"><span class="pre">modelopt.torch.quantization</span></code> by
+replacing the import statements.</p>
+</section>
 </section>
 
 
@@ -389,7 +404,7 @@ <h3>Fast evaluation<a class="headerlink" href="#fast-evaluation" title="Link to
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -400,7 +415,7 @@ <h3>Fast evaluation<a class="headerlink" href="#fast-evaluation" title="Link to
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/index.html b/index.html
index 12a754a..877186b 100644
--- a/index.html
+++ b/index.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Welcome to Model Optimizer (ModelOpt) documentation! &mdash; Model Optimizer 0.11.2</title>
+  <title>Welcome to Model Optimizer (ModelOpt) documentation! &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="_static/jquery.js?v=5d32c60e"></script>
         <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,22 +36,22 @@
     <script src="_static/js/theme.js"></script>
     <link rel="index" title="Index" href="genindex.html" />
     <link rel="search" title="Search" href="search.html" />
-    <link rel="next" title="Overview" href="getting_started/1_overview.html" />
+    <link rel="next" title="Overview" href="getting_started/1_overview.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="#" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
@@ -66,11 +66,13 @@
 <li class="toctree-l1"><a class="reference internal" href="getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -79,11 +81,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -115,7 +117,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="welcome-to-model-optimizer-modelopt-documentation">
 <h1>Welcome to Model Optimizer (ModelOpt) documentation!<a class="headerlink" href="#welcome-to-model-optimizer-modelopt-documentation" title="Link to this heading"></a></h1>
 <div class="toctree-wrapper compound">
@@ -124,6 +126,7 @@ <h1>Welcome to Model Optimizer (ModelOpt) documentation!<a class="headerlink" hr
 <li class="toctree-l1"><a class="reference internal" href="getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 </div>
@@ -131,6 +134,7 @@ <h1>Welcome to Model Optimizer (ModelOpt) documentation!<a class="headerlink" hr
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 </div>
@@ -143,13 +147,13 @@ <h1>Welcome to Model Optimizer (ModelOpt) documentation!<a class="headerlink" hr
 <div class="toctree-wrapper compound">
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 </div>
 <div class="toctree-wrapper compound">
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 </div>
@@ -178,7 +182,7 @@ <h1>Welcome to Model Optimizer (ModelOpt) documentation!<a class="headerlink" hr
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -189,7 +193,7 @@ <h1>Welcome to Model Optimizer (ModelOpt) documentation!<a class="headerlink" hr
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/objects.inv b/objects.inv
index 61bab42..c400fb2 100644
Binary files a/objects.inv and b/objects.inv differ
diff --git a/py-modindex.html b/py-modindex.html
index 35a1e3a..765c01f 100644
--- a/py-modindex.html
+++ b/py-modindex.html
@@ -3,7 +3,7 @@
 <head>
   <meta charset="utf-8" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Python Module Index &mdash; Model Optimizer 0.11.2</title>
+  <title>Python Module Index &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />
@@ -12,11 +12,11 @@
       <link rel="stylesheet" type="text/css" href="_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="_static/jquery.js?v=5d32c60e"></script>
         <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="_static/documentation_options.js?v=5929fcd5"></script>
@@ -35,24 +35,24 @@
     <script src="_static/js/theme.js"></script>
     <link rel="index" title="Index" href="genindex.html" />
     <link rel="search" title="Search" href="search.html" />
-
+ 
 
 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,11 +82,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -115,7 +117,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
 
    <h1>Python Module Index</h1>
 
@@ -148,11 +150,6 @@ <h1>Python Module Index</h1>
        <td>&#160;&#160;&#160;
        <a href="reference/generated/modelopt.deploy.llm.generate.html#module-modelopt.deploy.llm.generate"><code class="xref">modelopt.deploy.llm.generate</code></a></td><td>
        <em></em></td></tr>
-     <tr class="cg-1">
-       <td></td>
-       <td>&#160;&#160;&#160;
-       <a href="reference/generated/modelopt.deploy.llm.model_config_trt.html#module-modelopt.deploy.llm.model_config_trt"><code class="xref">modelopt.deploy.llm.model_config_trt</code></a></td><td>
-       <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
@@ -178,6 +175,16 @@ <h1>Python Module Index</h1>
        <td>&#160;&#160;&#160;
        <a href="reference/generated/modelopt.onnx.quantization.calib_utils.html#module-modelopt.onnx.quantization.calib_utils"><code class="xref">modelopt.onnx.quantization.calib_utils</code></a></td><td>
        <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.onnx.quantization.extensions.html#module-modelopt.onnx.quantization.extensions"><code class="xref">modelopt.onnx.quantization.extensions</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.onnx.quantization.fp8.html#module-modelopt.onnx.quantization.fp8"><code class="xref">modelopt.onnx.quantization.fp8</code></a></td><td>
+       <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
@@ -193,6 +200,11 @@ <h1>Python Module Index</h1>
        <td>&#160;&#160;&#160;
        <a href="reference/generated/modelopt.onnx.quantization.int4.html#module-modelopt.onnx.quantization.int4"><code class="xref">modelopt.onnx.quantization.int4</code></a></td><td>
        <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.onnx.quantization.int8.html#module-modelopt.onnx.quantization.int8"><code class="xref">modelopt.onnx.quantization.int8</code></a></td><td>
+       <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
@@ -226,17 +238,52 @@ <h1>Python Module Index</h1>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
-       <a href="reference/generated/modelopt.onnx.quantization.quantize.html#module-modelopt.onnx.quantization.quantize"><code class="xref">modelopt.onnx.quantization.quantize</code></a></td><td>
+       <a href="reference/generated/modelopt.onnx.utils.html#module-modelopt.onnx.utils"><code class="xref">modelopt.onnx.utils</code></a></td><td>
        <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
-       <a href="reference/generated/modelopt.onnx.utils.html#module-modelopt.onnx.utils"><code class="xref">modelopt.onnx.utils</code></a></td><td>
+       <a href="reference/generated/modelopt.torch.html#module-modelopt.torch"><code class="xref">modelopt.torch</code></a></td><td>
        <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
-       <a href="reference/generated/modelopt.torch.html#module-modelopt.torch"><code class="xref">modelopt.torch</code></a></td><td>
+       <a href="reference/generated/modelopt.torch.distill.html#module-modelopt.torch.distill"><code class="xref">modelopt.torch.distill</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.distill.config.html#module-modelopt.torch.distill.config"><code class="xref">modelopt.torch.distill.config</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.distill.distillation.html#module-modelopt.torch.distill.distillation"><code class="xref">modelopt.torch.distill.distillation</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.distill.distillation_model.html#module-modelopt.torch.distill.distillation_model"><code class="xref">modelopt.torch.distill.distillation_model</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.distill.loss_balancers.html#module-modelopt.torch.distill.loss_balancers"><code class="xref">modelopt.torch.distill.loss_balancers</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.distill.losses.html#module-modelopt.torch.distill.losses"><code class="xref">modelopt.torch.distill.losses</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.distill.mode.html#module-modelopt.torch.distill.mode"><code class="xref">modelopt.torch.distill.mode</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.distill.registry.html#module-modelopt.torch.distill.registry"><code class="xref">modelopt.torch.distill.registry</code></a></td><td>
        <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
@@ -248,6 +295,11 @@ <h1>Python Module Index</h1>
        <td>&#160;&#160;&#160;
        <a href="reference/generated/modelopt.torch.export.distribute.html#module-modelopt.torch.export.distribute"><code class="xref">modelopt.torch.export.distribute</code></a></td><td>
        <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.export.hf_config_map.html#module-modelopt.torch.export.hf_config_map"><code class="xref">modelopt.torch.export.hf_config_map</code></a></td><td>
+       <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
@@ -288,6 +340,11 @@ <h1>Python Module Index</h1>
        <td>&#160;&#160;&#160;
        <a href="reference/generated/modelopt.torch.export.transformer_engine.html#module-modelopt.torch.export.transformer_engine"><code class="xref">modelopt.torch.export.transformer_engine</code></a></td><td>
        <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.export.vllm.html#module-modelopt.torch.export.vllm"><code class="xref">modelopt.torch.export.vllm</code></a></td><td>
+       <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
@@ -338,6 +395,11 @@ <h1>Python Module Index</h1>
        <td>&#160;&#160;&#160;
        <a href="reference/generated/modelopt.torch.quantization.html#module-modelopt.torch.quantization"><code class="xref">modelopt.torch.quantization</code></a></td><td>
        <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.quantization.algorithms.html#module-modelopt.torch.quantization.algorithms"><code class="xref">modelopt.torch.quantization.algorithms</code></a></td><td>
+       <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
@@ -443,6 +505,11 @@ <h1>Python Module Index</h1>
        <td>&#160;&#160;&#160;
        <a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html#module-modelopt.torch.quantization.nn.modules.quant_pooling"><code class="xref">modelopt.torch.quantization.nn.modules.quant_pooling</code></a></td><td>
        <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html#module-modelopt.torch.quantization.nn.modules.quant_rnn"><code class="xref">modelopt.torch.quantization.nn.modules.quant_rnn</code></a></td><td>
+       <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
@@ -458,6 +525,26 @@ <h1>Python Module Index</h1>
        <td>&#160;&#160;&#160;
        <a href="reference/generated/modelopt.torch.quantization.plugins.html#module-modelopt.torch.quantization.plugins"><code class="xref">modelopt.torch.quantization.plugins</code></a></td><td>
        <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.quantization.qtensor.html#module-modelopt.torch.quantization.qtensor"><code class="xref">modelopt.torch.quantization.qtensor</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html#module-modelopt.torch.quantization.qtensor.base_qtensor"><code class="xref">modelopt.torch.quantization.qtensor.base_qtensor</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html#module-modelopt.torch.quantization.qtensor.int4_tensor"><code class="xref">modelopt.torch.quantization.qtensor.int4_tensor</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html#module-modelopt.torch.quantization.qtensor.nf4_tensor"><code class="xref">modelopt.torch.quantization.qtensor.nf4_tensor</code></a></td><td>
+       <em></em></td></tr>
      <tr class="cg-1">
        <td></td>
        <td>&#160;&#160;&#160;
@@ -589,7 +676,7 @@ <h1>Python Module Index</h1>
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -600,7 +687,7 @@ <h1>Python Module Index</h1>
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/0_versions.html b/reference/0_versions.html
index f8a890f..e8b4366 100644
--- a/reference/0_versions.html
+++ b/reference/0_versions.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Model Optimizer Changelog &mdash; Model Optimizer 0.11.2</title>
+  <title>Changelog &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="modelopt API" href="1_modelopt_api.html" />
-    <link rel="prev" title="All ModelOpt Examples" href="../examples/0_all_examples.html" />
+    <link rel="prev" title="GitHub Examples" href="../examples/0_all_examples.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,12 +82,17 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1 current"><a class="current reference internal" href="#">Model Optimizer Changelog</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="#id1">0.11 (2024-05-07)</a></li>
+<li class="toctree-l1 current"><a class="current reference internal" href="#">Changelog</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="#model-optimizer-changelog">Model Optimizer Changelog</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#id1">0.15 (2024-07-25)</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#id2">0.13 (2024-06-14)</a></li>
+<li class="toctree-l3"><a class="reference internal" href="#id4">0.11 (2024-05-07)</a></li>
+</ul>
+</li>
 </ul>
 </li>
 <li class="toctree-l1"><a class="reference internal" href="1_modelopt_api.html">modelopt API</a></li>
@@ -110,7 +117,7 @@
           <div role="navigation" aria-label="Page navigation">
   <ul class="wy-breadcrumbs">
       <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
-      <li class="breadcrumb-item active">Model Optimizer Changelog</li>
+      <li class="breadcrumb-item active">Changelog</li>
       <li class="wy-breadcrumbs-aside">
             <a href="../_sources/reference/0_versions.rst.txt" rel="nofollow"> View page source</a>
       </li>
@@ -119,11 +126,63 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
-  <section id="model-optimizer-changelog">
-<h1>Model Optimizer Changelog<a class="headerlink" href="#model-optimizer-changelog" title="Link to this heading"></a></h1>
+             
+  <section id="changelog">
+<h1>Changelog<a class="headerlink" href="#changelog" title="Link to this heading"></a></h1>
+<section id="model-optimizer-changelog">
+<h2>Model Optimizer Changelog<a class="headerlink" href="#model-optimizer-changelog" title="Link to this heading"></a></h2>
 <section id="id1">
-<h2>0.11 (2024-05-07)<a class="headerlink" href="#id1" title="Link to this heading"></a></h2>
+<h3>0.15 (2024-07-25)<a class="headerlink" href="#id1" title="Link to this heading"></a></h3>
+<p><strong>Backward Breaking Changes</strong></p>
+<ul class="simple">
+<li><p>Deprecated <code class="xref py py-class docutils literal notranslate"><span class="pre">QuantDescriptor</span></code>.
+Use <a class="reference internal" href="generated/modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a> to
+configure <code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code>.
+<code class="xref py py-meth docutils literal notranslate"><span class="pre">set_from_attribute_config</span></code>
+can be used to set the quantizer attributes from the config class or attribute dictionary. This change applies only
+to backend APIs. The change is backward compatible if you are using
+only the <a class="reference internal" href="generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.quantize" title="modelopt.torch.quantization.model_quant.quantize"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mtq.quantize</span></code></a> API.</p></li>
+</ul>
+<p><strong>New Features</strong></p>
+<ul class="simple">
+<li><p>Added quantization support for torch <code class="docutils literal notranslate"><span class="pre">RNN,</span> <span class="pre">LSTM,</span> <span class="pre">GRU</span></code> modules. Only available for <code class="docutils literal notranslate"><span class="pre">torch&gt;=2.0</span></code>.</p></li>
+<li><p><code class="docutils literal notranslate"><span class="pre">modelopt.torch.quantization</span></code> now supports module class based quantizer attribute setting for
+<a class="reference internal" href="generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.quantize" title="modelopt.torch.quantization.model_quant.quantize"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mtq.quantize</span></code></a> API.</p></li>
+<li><p>Added new LLM PTQ example for DBRX model.</p></li>
+<li><p>Added new LLM (Gemma 2) PTQ and TensorRT-LLM checkpoint export support.</p></li>
+<li><p>Added new LLM QAT example for NVIDIA NeMo framework.</p></li>
+<li><p>TensorRT-LLM dependency upgraded to 0.11.0.</p></li>
+<li><p>(Experimental): <a class="reference internal" href="generated/modelopt.torch.quantization.model_quant.html#modelopt.torch.quantization.model_quant.auto_quantize" title="modelopt.torch.quantization.model_quant.auto_quantize"><code class="xref py py-meth docutils literal notranslate"><span class="pre">mtq.auto_quantize</span></code></a> API which quantizes a model
+by searching for the best per-layer quantization formats.</p></li>
+<li><p>(Experimental): Added new LLM QLoRA example with NF4 and INT4_AWQ quantization.</p></li>
+<li><p>(Experimental): <code class="docutils literal notranslate"><span class="pre">modelopt.torch.export</span></code> now supports exporting quantized checkpoints with packed weights for
+Hugging Face models with namings aligned with its original checkpoints.</p></li>
+</ul>
+<p><strong>Misc</strong></p>
+<ul class="simple">
+<li><p>Added deprecation warning for Pytorch &amp;lt; 2.0. Support will be dropped in next release.</p></li>
+</ul>
+</section>
+<section id="id2">
+<h3>0.13 (2024-06-14)<a class="headerlink" href="#id2" title="Link to this heading"></a></h3>
+<p><strong>Backward Breaking Changes</strong></p>
+<ul class="simple">
+<li><p><a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/llm_ptq" rel="noopener noreferrer" target="_blank">PTQ examples</a> have been
+upgraded to use TensorRT-LLM 0.10.</p></li>
+</ul>
+<p><strong>New Features</strong></p>
+<ul class="simple">
+<li><p>Adding TensorRT-LLM checkpoint export support for Medusa decoding (official <code class="docutils literal notranslate"><span class="pre">MedusaModel</span></code> and Megatron Core <code class="docutils literal notranslate"><span class="pre">GPTModel</span></code>).</p></li>
+<li><p>Enable support for mixtral, recurrentgemma, starcoder, qwen in <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/llm_ptq" rel="noopener noreferrer" target="_blank">PTQ examples</a>.</p></li>
+<li><p>Adding TensorRT-LLM checkpoint export and engine building support for sparse models.</p></li>
+<li><p>Import scales from TensorRT calibration cache and use them for quantization.</p></li>
+<li><p>(Experimental) Enable low GPU memory FP8 calibration for the Hugging Face models when the original model size does not fit into the GPU memory.</p></li>
+<li><p>(Experimental) Support exporting FP8 calibrated model to VLLM deployment.</p></li>
+<li><p>(Experimental) Python 3.12 support added.</p></li>
+</ul>
+</section>
+<section id="id4">
+<h3>0.11 (2024-05-07)<a class="headerlink" href="#id4" title="Link to this heading"></a></h3>
 <p><strong>Backward Breaking Changes</strong></p>
 <ul class="simple">
 <li><p>[!!!] The package was renamed from <code class="docutils literal notranslate"><span class="pre">ammo</span></code> to <code class="docutils literal notranslate"><span class="pre">modelopt</span></code>. The new full product
@@ -161,13 +220,14 @@ <h2>0.11 (2024-05-07)<a class="headerlink" href="#id1" title="Link to this headi
 <li><p>Fixed copy extra data to tmp folder issue for ONNX PTQ.</p></li>
 </ul>
 </section>
+</section>
 </section>
 
 
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="../examples/0_all_examples.html" class="btn btn-neutral float-left" title="All ModelOpt Examples" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="../examples/0_all_examples.html" class="btn btn-neutral float-left" title="GitHub Examples" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="1_modelopt_api.html" class="btn btn-neutral float-right" title="modelopt API" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -180,7 +240,7 @@ <h2>0.11 (2024-05-07)<a class="headerlink" href="#id1" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -191,7 +251,7 @@ <h2>0.11 (2024-05-07)<a class="headerlink" href="#id1" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/1_modelopt_api.html b/reference/1_modelopt_api.html
index 00ccf45..31fe229 100644
--- a/reference/1_modelopt_api.html
+++ b/reference/1_modelopt_api.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>modelopt API &mdash; Model Optimizer 0.11.2</title>
+  <title>modelopt API &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="deploy" href="generated/modelopt.deploy.html" />
-    <link rel="prev" title="Model Optimizer Changelog" href="0_versions.html" />
+    <link rel="prev" title="Changelog" href="0_versions.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,11 +82,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="current reference internal" href="#">modelopt API</a><ul>
 <li class="toctree-l2"><a class="reference internal" href="generated/modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="generated/modelopt.onnx.html">onnx</a></li>
@@ -121,7 +123,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="modelopt-api">
 <h1>modelopt API<a class="headerlink" href="#modelopt-api" title="Link to this heading"></a></h1>
 <table class="autosummary longtable docutils align-default">
@@ -143,7 +145,7 @@ <h1>modelopt API<a class="headerlink" href="#modelopt-api" title="Link to this h
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="0_versions.html" class="btn btn-neutral float-left" title="Model Optimizer Changelog" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="0_versions.html" class="btn btn-neutral float-left" title="Changelog" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="generated/modelopt.deploy.html" class="btn btn-neutral float-right" title="deploy" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -156,7 +158,7 @@ <h1>modelopt API<a class="headerlink" href="#modelopt-api" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -167,7 +169,7 @@ <h1>modelopt API<a class="headerlink" href="#modelopt-api" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.deploy.html b/reference/generated/modelopt.deploy.html
index 7c51e77..ca4214c 100644
--- a/reference/generated/modelopt.deploy.html
+++ b/reference/generated/modelopt.deploy.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>deploy &mdash; Model Optimizer 0.11.2</title>
+  <title>deploy &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="llm" href="modelopt.deploy.llm.html" />
-    <link rel="prev" title="modelopt API" href="../1_modelopt_api.html" />
+    <link rel="prev" title="modelopt API" href="../1_modelopt_api.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,11 +82,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2 current"><a class="current reference internal" href="#">deploy</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.deploy.llm.html">llm</a></li>
@@ -125,14 +127,14 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="deploy">
 <h1>deploy<a class="headerlink" href="#deploy" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="modelopt.deploy.llm.html#module-modelopt.deploy.llm" title="modelopt.deploy.llm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.deploy.llm</span></code></a></p></td>
-<td><p>LLM deployment package with tensorrt_llm.</p></td>
+<td><p>LLM deployment utils with tensorrt_llm.</p></td>
 </tr>
 </tbody>
 </table>
@@ -156,7 +158,7 @@ <h1>deploy<a class="headerlink" href="#deploy" title="Link to this heading"><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -167,7 +169,7 @@ <h1>deploy<a class="headerlink" href="#deploy" title="Link to this heading"><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.deploy.llm.generate.html b/reference/generated/modelopt.deploy.llm.generate.html
index e118f7b..e690ad7 100644
--- a/reference/generated/modelopt.deploy.llm.generate.html
+++ b/reference/generated/modelopt.deploy.llm.generate.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>generate &mdash; Model Optimizer 0.11.2</title>
+  <title>generate &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="model_config_trt" href="modelopt.deploy.llm.model_config_trt.html" />
-    <link rel="prev" title="llm" href="modelopt.deploy.llm.html" />
+    <link rel="next" title="nemo_utils" href="modelopt.deploy.llm.nemo_utils.html" />
+    <link rel="prev" title="llm" href="modelopt.deploy.llm.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,16 +82,15 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.deploy.html">deploy</a><ul class="current">
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.deploy.llm.html">llm</a><ul class="current">
 <li class="toctree-l4 current"><a class="current reference internal" href="#">generate</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.deploy.llm.model_config_trt.html">model_config_trt</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.deploy.llm.nemo_utils.html">nemo_utils</a></li>
 </ul>
 </li>
@@ -132,7 +133,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="generate">
 <h1>generate<a class="headerlink" href="#generate" title="Link to this heading"></a></h1>
 <p id="module-modelopt.deploy.llm.generate">A wrapper over the TensorRT-LLM high level API runner.</p>
@@ -187,6 +188,28 @@ <h1>generate<a class="headerlink" href="#generate" title="Link to this heading">
 </dl>
 </dd></dl>
 
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.deploy.llm.generate.LLM.generate_tokens">
+<span class="sig-name descname"><span class="pre">generate_tokens</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">prompts</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_new_tokens</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">temperature</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keep_input_prompt</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.deploy.llm.generate.LLM.generate_tokens" title="Link to this definition"></a></dt>
+<dd><p>Generates the tokens based on the input prompts.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>prompts</strong> (<em>Iterable</em><em>[</em><em>str</em><em>] </em><em>| </em><em>Iterable</em><em>[</em><em>List</em><em>[</em><em>int</em><em>]</em><em>]</em>) – The input prompts. Could be a list of strings or token lists.</p></li>
+<li><p><strong>max_new_tokens</strong> (<em>int</em>) – The max output token length.</p></li>
+<li><p><strong>temperature</strong> (<em>float</em>) – The sampling temperature</p></li>
+<li><p><strong>keep_input_prompt</strong> (<em>bool</em>) – Set to include input prommpts in the outputs.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>a list of output token lists if max_beam_width is 1 or a 3D list with shape [batch, beam, sequence_len].</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>List</em>[<em>List</em>[<em>int</em>]] | <em>List</em>[<em>List</em>[<em>List</em>[<em>int</em>]]]</p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.deploy.llm.generate.LLM.max_beam_width">
 <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_beam_width</span></span><a class="headerlink" href="#modelopt.deploy.llm.generate.LLM.max_beam_width" title="Link to this definition"></a></dt>
@@ -208,7 +231,7 @@ <h1>generate<a class="headerlink" href="#generate" title="Link to this heading">
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.deploy.llm.html" class="btn btn-neutral float-left" title="llm" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.deploy.llm.model_config_trt.html" class="btn btn-neutral float-right" title="model_config_trt" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.deploy.llm.nemo_utils.html" class="btn btn-neutral float-right" title="nemo_utils" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -220,7 +243,7 @@ <h1>generate<a class="headerlink" href="#generate" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -231,7 +254,7 @@ <h1>generate<a class="headerlink" href="#generate" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.deploy.llm.html b/reference/generated/modelopt.deploy.llm.html
index c2a016a..f5f38d8 100644
--- a/reference/generated/modelopt.deploy.llm.html
+++ b/reference/generated/modelopt.deploy.llm.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>llm &mdash; Model Optimizer 0.11.2</title>
+  <title>llm &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="generate" href="modelopt.deploy.llm.generate.html" />
-    <link rel="prev" title="deploy" href="modelopt.deploy.html" />
+    <link rel="prev" title="deploy" href="modelopt.deploy.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,16 +82,15 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.deploy.html">deploy</a><ul class="current">
 <li class="toctree-l3 current"><a class="current reference internal" href="#">llm</a><ul>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.deploy.llm.generate.html">generate</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.deploy.llm.model_config_trt.html">model_config_trt</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.deploy.llm.nemo_utils.html">nemo_utils</a></li>
 </ul>
 </li>
@@ -131,7 +132,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="llm">
 <h1>llm<a class="headerlink" href="#llm" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -140,46 +141,12 @@ <h1>llm<a class="headerlink" href="#llm" title="Link to this heading"></a></h
 <tr class="row-odd"><td><p><a class="reference internal" href="modelopt.deploy.llm.generate.html#module-modelopt.deploy.llm.generate" title="modelopt.deploy.llm.generate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.deploy.llm.generate</span></code></a></p></td>
 <td><p>A wrapper over the TensorRT-LLM high level API runner.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.deploy.llm.model_config_trt.html#module-modelopt.deploy.llm.model_config_trt" title="modelopt.deploy.llm.model_config_trt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.deploy.llm.model_config_trt</span></code></a></p></td>
-<td><p>The API convert the TensorRT-LLM checkpoint to the engines.</p></td>
-</tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.deploy.llm.nemo_utils.html#module-modelopt.deploy.llm.nemo_utils" title="modelopt.deploy.llm.nemo_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.deploy.llm.nemo_utils</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.deploy.llm.nemo_utils.html#module-modelopt.deploy.llm.nemo_utils" title="modelopt.deploy.llm.nemo_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.deploy.llm.nemo_utils</span></code></a></p></td>
 <td><p>The utils to support Nemo models.</p></td>
 </tr>
 </tbody>
 </table>
-<p id="module-modelopt.deploy.llm">LLM deployment package with tensorrt_llm.</p>
-<p>Model Optimizer supports automatic conversion of Model Optimizer exported LLM to TensorRT-LLM
-engines for accelerated inferencing.</p>
-<p>Convert to TensorRT-LLM:</p>
-<p>Model Optimizer offers a single API to build the exported model from the quantization stage on top
-of the TensorRT-LLM build API.</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">modelopt.deploy.llm</span> <span class="kn">import</span> <span class="n">build_tensorrt_llm</span>
-
-<span class="n">build_tensorrt_llm</span><span class="p">(</span>
-    <span class="n">pretrained_config</span><span class="o">=</span><span class="n">pretrained_config_json_path</span><span class="p">,</span>
-    <span class="n">engine_dir</span><span class="o">=</span><span class="n">engine_dir</span><span class="p">,</span>
-    <span class="n">max_input_len</span><span class="o">=</span><span class="n">max_input_len</span><span class="p">,</span>
-    <span class="n">max_output_len</span><span class="o">=</span><span class="n">max_output_len</span><span class="p">,</span>
-    <span class="n">max_batch_size</span><span class="o">=</span><span class="n">max_batch_size</span><span class="p">,</span>
-    <span class="n">max_beam_width</span><span class="o">=</span><span class="n">max_num_beams</span><span class="p">,</span>
-    <span class="n">num_build_workers</span><span class="o">=</span><span class="n">num_build_workers</span><span class="p">,</span>
-<span class="p">)</span>
-</pre></div>
-</div>
-<p>Batched Inference with TensorRT-LLM:</p>
-<p>Model Optimizer offers an easy-to-use python API to run batched offline inferences to test the TensorRT-LLM
-engine(s) built.</p>
-<p>For example:</p>
-<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">modelopt.deploy.llm</span> <span class="kn">import</span> <span class="n">generate</span><span class="p">,</span> <span class="n">load</span>
-
-<span class="c1"># The host_context loading (called once).</span>
-<span class="n">host_context</span> <span class="o">=</span> <span class="n">load</span><span class="p">(</span><span class="n">tokenizer</span><span class="o">=</span><span class="n">tokenizer</span><span class="p">,</span> <span class="n">engine_dir</span><span class="o">=</span><span class="n">engine_dir</span><span class="p">,</span> <span class="n">num_beams</span><span class="o">=</span><span class="n">num_beams</span><span class="p">)</span>
-<span class="c1"># generate could be called multiple times as long as the host_context is present.</span>
-<span class="n">outputs</span> <span class="o">=</span> <span class="n">generate</span><span class="p">(</span><span class="n">input_texts</span><span class="p">,</span> <span class="n">max_output_len</span><span class="p">,</span> <span class="n">host_context</span><span class="p">)</span>
-<span class="nb">print</span><span class="p">(</span><span class="n">outputs</span><span class="p">)</span>
-</pre></div>
-</div>
+<p id="module-modelopt.deploy.llm">LLM deployment utils with tensorrt_llm.</p>
 </section>
 
 
@@ -199,7 +166,7 @@ <h1>llm<a class="headerlink" href="#llm" title="Link to this heading"></a></h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -210,7 +177,7 @@ <h1>llm<a class="headerlink" href="#llm" title="Link to this heading"></a></h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.deploy.llm.model_config_trt.html b/reference/generated/modelopt.deploy.llm.model_config_trt.html
deleted file mode 100644
index ff04b70..0000000
--- a/reference/generated/modelopt.deploy.llm.model_config_trt.html
+++ /dev/null
@@ -1,256 +0,0 @@
-<!DOCTYPE html>
-<html class="writer-html5" lang="en" data-content_root="../../">
-<head>
-  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
-
-  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>model_config_trt &mdash; Model Optimizer 0.11.2</title>
-      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
-      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
-      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
-      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
-      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
-      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
-      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
-
-
-  <!--[if lt IE 9]>
-    <script src="../../_static/js/html5shiv.min.js"></script>
-  <![endif]-->
-
-        <script src="../../_static/jquery.js?v=5d32c60e"></script>
-        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
-        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
-        <script src="../../_static/doctools.js?v=888ff710"></script>
-        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
-        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
-        <script src="../../_static/copybutton.js?v=20d3d275"></script>
-        <script src="../../_static/tabs.js?v=3ee01567"></script>
-        <script>let toggleHintShow = 'Click to show';</script>
-        <script>let toggleHintHide = 'Click to hide';</script>
-        <script>let toggleOpenOnPrint = 'true';</script>
-        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
-        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
-        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
-    <script src="../../_static/js/theme.js"></script>
-    <link rel="index" title="Index" href="../../genindex.html" />
-    <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="nemo_utils" href="modelopt.deploy.llm.nemo_utils.html" />
-    <link rel="prev" title="generate" href="modelopt.deploy.llm.generate.html" />
-</head>
-
-<body class="wy-body-for-nav">
-  <div class="wy-grid-for-nav">
-    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
-      <div class="wy-side-scroll">
-        <div class="wy-side-nav-search" >
-
-
-
-          <a href="../../index.html" class="icon icon-home">
-            TensorRT Model Optimizer
-          </a>
-              <div class="version">
-                0.11.2
-              </div>
-<div role="search">
-  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
-    <input type="hidden" name="check_keywords" value="yes" />
-    <input type="hidden" name="area" value="default" />
-  </form>
-</div>
-        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
-              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
-<ul>
-<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
-</ul>
-<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
-<ul>
-<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
-</ul>
-<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
-<ul>
-<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
-</ul>
-<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
-<ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
-</ul>
-<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
-<ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
-<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
-<li class="toctree-l2 current"><a class="reference internal" href="modelopt.deploy.html">deploy</a><ul class="current">
-<li class="toctree-l3 current"><a class="reference internal" href="modelopt.deploy.llm.html">llm</a><ul class="current">
-<li class="toctree-l4"><a class="reference internal" href="modelopt.deploy.llm.generate.html">generate</a></li>
-<li class="toctree-l4 current"><a class="current reference internal" href="#">model_config_trt</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.deploy.llm.nemo_utils.html">nemo_utils</a></li>
-</ul>
-</li>
-</ul>
-</li>
-<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
-<li class="toctree-l2"><a class="reference internal" href="modelopt.torch.html">torch</a></li>
-</ul>
-</li>
-</ul>
-<p class="caption" role="heading"><span class="caption-text">Support</span></p>
-<ul>
-<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
-<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
-</ul>
-
-        </div>
-      </div>
-    </nav>
-
-    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
-          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
-          <a href="../../index.html">TensorRT Model Optimizer</a>
-      </nav>
-
-      <div class="wy-nav-content">
-        <div class="rst-content">
-          <div role="navigation" aria-label="Page navigation">
-  <ul class="wy-breadcrumbs">
-      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
-          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
-          <li class="breadcrumb-item"><a href="modelopt.deploy.html">deploy</a></li>
-          <li class="breadcrumb-item"><a href="modelopt.deploy.llm.html">llm</a></li>
-      <li class="breadcrumb-item active">model_config_trt</li>
-      <li class="wy-breadcrumbs-aside">
-            <a href="../../_sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt" rel="nofollow"> View page source</a>
-      </li>
-  </ul>
-  <hr/>
-</div>
-          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
-           <div itemprop="articleBody">
-
-  <section id="model-config-trt">
-<h1>model_config_trt<a class="headerlink" href="#model-config-trt" title="Link to this heading"></a></h1>
-<p id="module-modelopt.deploy.llm.model_config_trt">The API convert the TensorRT-LLM checkpoint to the engines.</p>
-<p class="rubric">Functions</p>
-<table class="autosummary longtable docutils align-default">
-<tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.deploy.llm.model_config_trt.build_tensorrt_llm" title="modelopt.deploy.llm.model_config_trt.build_tensorrt_llm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_tensorrt_llm</span></code></a></p></td>
-<td><p>The API to convert the TensorRT-LLM checkpoint to engines.</p></td>
-</tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.deploy.llm.model_config_trt.build_tensorrt_llm_rank" title="modelopt.deploy.llm.model_config_trt.build_tensorrt_llm_rank"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_tensorrt_llm_rank</span></code></a></p></td>
-<td><p>The API to convert the TensorRT-LLM checkpoint to the engine for a single rank.</p></td>
-</tr>
-</tbody>
-</table>
-<dl class="py function">
-<dt class="sig sig-object py" id="modelopt.deploy.llm.model_config_trt.build_tensorrt_llm">
-<span class="sig-name descname"><span class="pre">build_tensorrt_llm</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pretrained_config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">engine_dir</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_output_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_num_tokens</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_build_workers</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">enable_sparsity</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_prompt_embedding_table_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.deploy.llm.model_config_trt.build_tensorrt_llm" title="Link to this definition"></a></dt>
-<dd><p>The API to convert the TensorRT-LLM checkpoint to engines.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>pretrained_config</strong> (<em>str</em><em> | </em><em>Path</em>) – The pretrained_config (file path) exported by
-<code class="docutils literal notranslate"><span class="pre">modelopt.torch.export.export_tensorrt_llm_checkpoint</span></code>.</p></li>
-<li><p><strong>engine_dir</strong> (<em>str</em><em> | </em><em>Path</em>) – The target output directory to save the built tensorrt_llm engines.</p></li>
-<li><p><strong>max_input_len</strong> (<em>int</em>) – The max input sequence length.</p></li>
-<li><p><strong>max_output_len</strong> (<em>int</em>) – The max output sequence length.</p></li>
-<li><p><strong>max_batch_size</strong> (<em>int</em>) – The max batch size.</p></li>
-<li><p><strong>max_beam_width</strong> (<em>int</em>) – The max beam search width.</p></li>
-<li><p><strong>max_num_tokens</strong> (<em>int</em><em> | </em><em>None</em>) – The max number of tokens that can be processed at the same time.
-For the context phase, the max_num_tokens counts the full sequence length.
-For the generation phase, the max_num_tokens counts only the ones under generation
-as the input sequence has been processed as cached.
-max_num_tokens should fall between [max_batch_size * max_beam_width, max_batch_size * max_input_len].
-when inflight batching is enabled.
-Higher max_num_tokens means more GPU memory will be used for resource allocation.
-If not specified the max_num_tokens will be set to the max bound.
-Details: <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/perf_best_practices.md" rel="noopener noreferrer" target="_blank">https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/perf_best_practices.md</a></p></li>
-<li><p><strong>num_build_workers</strong> (<em>int</em>) – The number of workers to use for the building process.
-If build time is a concern, you can increase this worker count to num of GPUs.
-At a lost of higer CPU memory usage footprint.
-If CPU memory is limited, num_build_workers should be set to 1 to conserve memory.</p></li>
-<li><p><strong>enable_sparsity</strong> (<em>bool</em>) – The switch to enable sparsity for TRT compiler.
-With this flag, the TRT compiler will search tactics of sparse kernels for each node of which
-weight tensors are sparsified. This increases engine building time significantly.</p></li>
-<li><p><strong>max_prompt_embedding_table_size</strong> (<em>int</em>) – Length of the prepended/concatenated embeddings (either multimodal
-feature embeddings or prompt tuning embeddings) to the LLM input embeddings.</p></li>
-</ul>
-</dd>
-</dl>
-</dd></dl>
-
-<dl class="py function">
-<dt class="sig sig-object py" id="modelopt.deploy.llm.model_config_trt.build_tensorrt_llm_rank">
-<span class="sig-name descname"><span class="pre">build_tensorrt_llm_rank</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pretrained_config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weights</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rank</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">engine_dir</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_input_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_output_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">200</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_beam_width</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_num_tokens</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">enable_sparsity</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_prompt_embedding_table_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.deploy.llm.model_config_trt.build_tensorrt_llm_rank" title="Link to this definition"></a></dt>
-<dd><p>The API to convert the TensorRT-LLM checkpoint to the engine for a single rank.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>pretrained_config</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Any</em><em>]</em>) – The pretrained_config (dict) exported by
-<code class="docutils literal notranslate"><span class="pre">modelopt.torch.export.torch_to_tensorrt_llm_checkpoint</span></code>.</p></li>
-<li><p><strong>weights</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Tensor</em><em>]</em>) – a dict of model weights and scaling factors.
-If not provided, the weights will be loaded from the directory of the pretrained_config.</p></li>
-<li><p><strong>rank</strong> (<em>int</em>) – the GPU rank of the engine to build.</p></li>
-<li><p><strong>engine_dir</strong> (<em>str</em><em> | </em><em>Path</em>) – The target output directory to save the built tensorrt_llm engines.</p></li>
-<li><p><strong>max_input_len</strong> (<em>int</em>) – The max input sequence length.</p></li>
-<li><p><strong>max_output_len</strong> (<em>int</em>) – The max output sequence length.</p></li>
-<li><p><strong>max_batch_size</strong> (<em>int</em>) – The max batch size.</p></li>
-<li><p><strong>max_beam_width</strong> (<em>int</em>) – The max beam search width.</p></li>
-<li><p><strong>max_num_tokens</strong> (<em>int</em><em> | </em><em>None</em>) – The max number of tokens that can be processed at the same time.
-For the context phase, the max_num_tokens counts the full sequence length.
-For the generation phase, the max_num_tokens counts only the ones under generation
-as the input sequence has been processed as cached.
-max_num_tokens should fall between [max_batch_size * max_beam_width, max_batch_size * max_input_len].
-when inflight batching is enabled.
-Higher max_num_tokens means more GPU memory will be used for resource allocation.
-If not specified the max_num_tokens will be set to the max bound.
-Details: <a class="reference external" href="https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/perf_best_practices.md" rel="noopener noreferrer" target="_blank">https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/perf_best_practices.md</a></p></li>
-<li><p><strong>enable_sparsity</strong> (<em>bool</em>) – The switch to enable sparsity for TRT compiler.
-With this flag, the TRT compiler will search tactics of sparse kernels for each node of which
-weight tensors are sparsified. This increases engine building time significantly.</p></li>
-<li><p><strong>max_prompt_embedding_table_size</strong> (<em>int</em>) – Length of the prepended/concatenated embeddings (either multimodal
-feature embeddings or prompt tuning embeddings) to the LLM input embeddings.</p></li>
-</ul>
-</dd>
-</dl>
-</dd></dl>
-
-</section>
-
-
-           </div>
-          </div>
-          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.deploy.llm.generate.html" class="btn btn-neutral float-left" title="generate" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.deploy.llm.nemo_utils.html" class="btn btn-neutral float-right" title="nemo_utils" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
-    </div>
-
-  <hr/>
-
-  <div role="contentinfo">
-    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
-  </div>
-
-  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
-    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
-    provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
-
-</footer>
-        </div>
-      </div>
-    </section>
-  </div>
-  <script>
-      jQuery(function () {
-          SphinxRtdTheme.Navigation.enable(true);
-      });
-  </script>
-
-</body>
-</html>
diff --git a/reference/generated/modelopt.deploy.llm.nemo_utils.html b/reference/generated/modelopt.deploy.llm.nemo_utils.html
index 49cd82b..2d32298 100644
--- a/reference/generated/modelopt.deploy.llm.nemo_utils.html
+++ b/reference/generated/modelopt.deploy.llm.nemo_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>nemo_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>nemo_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="onnx" href="modelopt.onnx.html" />
-    <link rel="prev" title="model_config_trt" href="modelopt.deploy.llm.model_config_trt.html" />
+    <link rel="prev" title="generate" href="modelopt.deploy.llm.generate.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,16 +82,15 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.deploy.html">deploy</a><ul class="current">
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.deploy.llm.html">llm</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.deploy.llm.generate.html">generate</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.deploy.llm.model_config_trt.html">model_config_trt</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">nemo_utils</a></li>
 </ul>
 </li>
@@ -132,7 +133,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="nemo-utils">
 <h1>nemo_utils<a class="headerlink" href="#nemo-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.deploy.llm.nemo_utils">The utils to support Nemo models.</p>
@@ -254,7 +255,7 @@ <h1>nemo_utils<a class="headerlink" href="#nemo-utils" title="Link to this headi
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.deploy.llm.model_config_trt.html" class="btn btn-neutral float-left" title="model_config_trt" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.deploy.llm.generate.html" class="btn btn-neutral float-left" title="generate" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.onnx.html" class="btn btn-neutral float-right" title="onnx" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -267,7 +268,7 @@ <h1>nemo_utils<a class="headerlink" href="#nemo-utils" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -278,7 +279,7 @@ <h1>nemo_utils<a class="headerlink" href="#nemo-utils" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.html b/reference/generated/modelopt.onnx.html
index 2bf97c0..4ccdb03 100644
--- a/reference/generated/modelopt.onnx.html
+++ b/reference/generated/modelopt.onnx.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>onnx &mdash; Model Optimizer 0.11.2</title>
+  <title>onnx &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="op_types" href="modelopt.onnx.op_types.html" />
-    <link rel="prev" title="nemo_utils" href="modelopt.deploy.llm.nemo_utils.html" />
+    <link rel="prev" title="nemo_utils" href="modelopt.deploy.llm.nemo_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,11 +82,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="current reference internal" href="#">onnx</a><ul>
@@ -127,7 +129,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="onnx">
 <h1>onnx<a class="headerlink" href="#onnx" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -164,7 +166,7 @@ <h1>onnx<a class="headerlink" href="#onnx" title="Link to this heading"></a><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -175,7 +177,7 @@ <h1>onnx<a class="headerlink" href="#onnx" title="Link to this heading"></a><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.op_types.html b/reference/generated/modelopt.onnx.op_types.html
index b8449b7..43ddfad 100644
--- a/reference/generated/modelopt.onnx.op_types.html
+++ b/reference/generated/modelopt.onnx.op_types.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>op_types &mdash; Model Optimizer 0.11.2</title>
+  <title>op_types &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quantization" href="modelopt.onnx.quantization.html" />
-    <link rel="prev" title="onnx" href="modelopt.onnx.html" />
+    <link rel="prev" title="onnx" href="modelopt.onnx.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,16 +82,15 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3 current"><a class="current reference internal" href="#">op_types</a><ul>
-<li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.op_types.get_quantizable_op_types"><code class="docutils literal notranslate"><span class="pre">get_quantizable_op_types()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.op_types.is_binary_op"><code class="docutils literal notranslate"><span class="pre">is_binary_op()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.op_types.is_control_flow_op"><code class="docutils literal notranslate"><span class="pre">is_control_flow_op()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.op_types.is_conversion_op"><code class="docutils literal notranslate"><span class="pre">is_conversion_op()</span></code></a></li>
@@ -151,95 +152,75 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="op-types">
 <h1>op_types<a class="headerlink" href="#op-types" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.op_types">Utility functions to categorize onnx ops.</p>
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.get_quantizable_op_types" title="modelopt.onnx.op_types.get_quantizable_op_types"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_quantizable_op_types</span></code></a></p></td>
-<td><p>Returns a set of quantizable op types.</p></td>
-</tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_binary_op" title="modelopt.onnx.op_types.is_binary_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_binary_op</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_binary_op" title="modelopt.onnx.op_types.is_binary_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_binary_op</span></code></a></p></td>
 <td><p>Returns whether the given op is a binary operator or not.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_control_flow_op" title="modelopt.onnx.op_types.is_control_flow_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_control_flow_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_control_flow_op" title="modelopt.onnx.op_types.is_control_flow_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_control_flow_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Control Flow category or not.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_conversion_op" title="modelopt.onnx.op_types.is_conversion_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_conversion_op</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_conversion_op" title="modelopt.onnx.op_types.is_conversion_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_conversion_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Conversion category or not.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_copy_op" title="modelopt.onnx.op_types.is_copy_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_copy_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_copy_op" title="modelopt.onnx.op_types.is_copy_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_copy_op</span></code></a></p></td>
 <td><p>Returns whether the given op is a copy operator or not.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_default_quantizable_op_by_ort" title="modelopt.onnx.op_types.is_default_quantizable_op_by_ort"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_default_quantizable_op_by_ort</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_default_quantizable_op_by_ort" title="modelopt.onnx.op_types.is_default_quantizable_op_by_ort"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_default_quantizable_op_by_ort</span></code></a></p></td>
 <td><p>Returns if ort quantizes the op type by default.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_fusible_reduction_op" title="modelopt.onnx.op_types.is_fusible_reduction_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_fusible_reduction_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_fusible_reduction_op" title="modelopt.onnx.op_types.is_fusible_reduction_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_fusible_reduction_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of reduction category and fusible by Myelin.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_generator_op" title="modelopt.onnx.op_types.is_generator_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_generator_op</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_generator_op" title="modelopt.onnx.op_types.is_generator_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_generator_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Generator category or not.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_irregular_mem_access_op" title="modelopt.onnx.op_types.is_irregular_mem_access_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_irregular_mem_access_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_irregular_mem_access_op" title="modelopt.onnx.op_types.is_irregular_mem_access_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_irregular_mem_access_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Irreggular mem access category or not.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_linear_op" title="modelopt.onnx.op_types.is_linear_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_linear_op</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_linear_op" title="modelopt.onnx.op_types.is_linear_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_linear_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Linear category or not.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_modifier_op" title="modelopt.onnx.op_types.is_modifier_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_modifier_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_modifier_op" title="modelopt.onnx.op_types.is_modifier_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_modifier_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Modifier category or not.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_multiclass_op" title="modelopt.onnx.op_types.is_multiclass_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_multiclass_op</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_multiclass_op" title="modelopt.onnx.op_types.is_multiclass_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_multiclass_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Multiclass category or not.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_non_reshape_copy_op" title="modelopt.onnx.op_types.is_non_reshape_copy_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_non_reshape_copy_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_non_reshape_copy_op" title="modelopt.onnx.op_types.is_non_reshape_copy_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_non_reshape_copy_op</span></code></a></p></td>
 <td><p>Returns whether the given op is a non-reshape copy op or not.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_normalization_op" title="modelopt.onnx.op_types.is_normalization_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_normalization_op</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_normalization_op" title="modelopt.onnx.op_types.is_normalization_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_normalization_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Normalization category or not.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_pointwise_or_elementwise_op" title="modelopt.onnx.op_types.is_pointwise_or_elementwise_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_pointwise_or_elementwise_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_pointwise_or_elementwise_op" title="modelopt.onnx.op_types.is_pointwise_or_elementwise_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_pointwise_or_elementwise_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Pointwise or Elementwise category or not.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_pooling_or_window_op" title="modelopt.onnx.op_types.is_pooling_or_window_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_pooling_or_window_op</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_pooling_or_window_op" title="modelopt.onnx.op_types.is_pooling_or_window_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_pooling_or_window_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Pooling/Window category or not.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_recurrent_op" title="modelopt.onnx.op_types.is_recurrent_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_recurrent_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_recurrent_op" title="modelopt.onnx.op_types.is_recurrent_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_recurrent_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Recurrent category or not.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_selection_op" title="modelopt.onnx.op_types.is_selection_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_selection_op</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_selection_op" title="modelopt.onnx.op_types.is_selection_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_selection_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Selection category or not.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_sequence_op" title="modelopt.onnx.op_types.is_sequence_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_sequence_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_sequence_op" title="modelopt.onnx.op_types.is_sequence_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_sequence_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Sequence category or not.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_shape_op" title="modelopt.onnx.op_types.is_shape_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_shape_op</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_shape_op" title="modelopt.onnx.op_types.is_shape_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_shape_op</span></code></a></p></td>
 <td><p>Returns whether the given op type is of Shape category or not.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_unary_op" title="modelopt.onnx.op_types.is_unary_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_unary_op</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.op_types.is_unary_op" title="modelopt.onnx.op_types.is_unary_op"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_unary_op</span></code></a></p></td>
 <td><p>Returns whether the given op is a unary operator or not.</p></td>
 </tr>
 </tbody>
 </table>
-<dl class="py function">
-<dt class="sig sig-object py" id="modelopt.onnx.op_types.get_quantizable_op_types">
-<span class="sig-name descname"><span class="pre">get_quantizable_op_types</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">op_types_to_quantize</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.op_types.get_quantizable_op_types" title="Link to this definition"></a></dt>
-<dd><p>Returns a set of quantizable op types.</p>
-<p>Note. This function should be called after quantize._configure_ort() is called once.
-This returns quantizable op types either from the user supplied parameter
-or from modelopt.onnx’s default quantizable ops setting.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>op_types_to_quantize</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p>
-</dd>
-<dt class="field-even">Return type<span class="colon">:</span></dt>
-<dd class="field-even"><p><em>List</em>[<em>str</em>]</p>
-</dd>
-</dl>
-</dd></dl>
-
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.onnx.op_types.is_binary_op">
 <span class="sig-name descname"><span class="pre">is_binary_op</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">op_type</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.op_types.is_binary_op" title="Link to this definition"></a></dt>
@@ -484,7 +465,7 @@ <h1>op_types<a class="headerlink" href="#op-types" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -495,7 +476,7 @@ <h1>op_types<a class="headerlink" href="#op-types" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.calib_utils.html b/reference/generated/modelopt.onnx.quantization.calib_utils.html
index 722bdd7..947f3e3 100644
--- a/reference/generated/modelopt.onnx.quantization.calib_utils.html
+++ b/reference/generated/modelopt.onnx.quantization.calib_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>calib_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>calib_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="graph_utils" href="modelopt.onnx.quantization.graph_utils.html" />
-    <link rel="prev" title="quantization" href="modelopt.onnx.quantization.html" />
+    <link rel="next" title="extensions" href="modelopt.onnx.quantization.extensions.html" />
+    <link rel="prev" title="quantization" href="modelopt.onnx.quantization.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4 current"><a class="current reference internal" href="#">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="calib-utils">
 <h1>calib_utils<a class="headerlink" href="#calib-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.quantization.calib_utils">Provides basic calibration utils.</p>
@@ -157,6 +162,14 @@ <h1>calib_utils<a class="headerlink" href="#calib-utils" title="Link to this hea
 </tr>
 </tbody>
 </table>
+<p class="rubric">Functions</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.calib_utils.import_scales_from_calib_cache" title="modelopt.onnx.quantization.calib_utils.import_scales_from_calib_cache"><code class="xref py py-obj docutils literal notranslate"><span class="pre">import_scales_from_calib_cache</span></code></a></p></td>
+<td><p>Reads TensorRT calibration cache and returns as dictionary.</p></td>
+</tr>
+</tbody>
+</table>
 <dl class="py class">
 <dt class="sig sig-object py" id="modelopt.onnx.quantization.calib_utils.CalibrationDataProvider">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">CalibrationDataProvider</span></span><a class="headerlink" href="#modelopt.onnx.quantization.calib_utils.CalibrationDataProvider" title="Link to this definition"></a></dt>
@@ -212,6 +225,23 @@ <h1>calib_utils<a class="headerlink" href="#calib-utils" title="Link to this hea
 
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.calib_utils.import_scales_from_calib_cache">
+<span class="sig-name descname"><span class="pre">import_scales_from_calib_cache</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cache_path</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.calib_utils.import_scales_from_calib_cache" title="Link to this definition"></a></dt>
+<dd><p>Reads TensorRT calibration cache and returns as dictionary.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>cache_path</strong> (<em>str</em>) – Calibration cache path.</p>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>float_scale}.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>Dictionary with scales in the format {tensor_name</p>
+</dd>
+</dl>
+</dd></dl>
+
 </section>
 
 
@@ -219,7 +249,7 @@ <h1>calib_utils<a class="headerlink" href="#calib-utils" title="Link to this hea
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.onnx.quantization.html" class="btn btn-neutral float-left" title="quantization" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.onnx.quantization.graph_utils.html" class="btn btn-neutral float-right" title="graph_utils" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.onnx.quantization.extensions.html" class="btn btn-neutral float-right" title="extensions" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -231,7 +261,7 @@ <h1>calib_utils<a class="headerlink" href="#calib-utils" title="Link to this hea
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -242,7 +272,7 @@ <h1>calib_utils<a class="headerlink" href="#calib-utils" title="Link to this hea
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.extensions.html b/reference/generated/modelopt.onnx.quantization.extensions.html
new file mode 100644
index 0000000..07ef868
--- /dev/null
+++ b/reference/generated/modelopt.onnx.quantization.extensions.html
@@ -0,0 +1,187 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>extensions &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="fp8" href="modelopt.onnx.quantization.fp8.html" />
+    <link rel="prev" title="calib_utils" href="modelopt.onnx.quantization.calib_utils.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.torch.html">torch</a></li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.onnx.html">onnx</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.onnx.quantization.html">quantization</a></li>
+      <li class="breadcrumb-item active">extensions</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.onnx.quantization.extensions.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="extensions">
+<h1>extensions<a class="headerlink" href="#extensions" title="Link to this heading"></a></h1>
+<p id="module-modelopt.onnx.quantization.extensions">Module to load C++ extensions.</p>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.onnx.quantization.calib_utils.html" class="btn btn-neutral float-left" title="calib_utils" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.onnx.quantization.fp8.html" class="btn btn-neutral float-right" title="fp8" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.fp8.html b/reference/generated/modelopt.onnx.quantization.fp8.html
new file mode 100644
index 0000000..8e17ec1
--- /dev/null
+++ b/reference/generated/modelopt.onnx.quantization.fp8.html
@@ -0,0 +1,224 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>fp8 &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="graph_utils" href="modelopt.onnx.quantization.graph_utils.html" />
+    <link rel="prev" title="extensions" href="modelopt.onnx.quantization.extensions.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">fp8</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.torch.html">torch</a></li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.onnx.html">onnx</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.onnx.quantization.html">quantization</a></li>
+      <li class="breadcrumb-item active">fp8</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.onnx.quantization.fp8.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="fp8">
+<h1>fp8<a class="headerlink" href="#fp8" title="Link to this heading"></a></h1>
+<p id="module-modelopt.onnx.quantization.fp8">Perform FP8 GEMM only quantization of an ONNX model, and returns the ONNX ModelProto.</p>
+<p class="rubric">Functions</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.fp8.quantize" title="modelopt.onnx.quantization.fp8.quantize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize</span></code></a></p></td>
+<td><p>Applies FP8 GEMM only quantization to an ONNX file.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.fp8.quantize">
+<span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'distribution'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_data_reader</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_cache_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_quantize</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_exclude</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nodes_to_quantize</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nodes_to_exclude</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_external_data_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">intermediate_generated_files</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_extra_plugin_lib_paths</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.fp8.quantize" title="Link to this definition"></a></dt>
+<dd><p>Applies FP8 GEMM only quantization to an ONNX file.</p>
+<p>Currently [‘Conv’, ‘Gemm’, ‘MatMul’] quantization is supported.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>onnx_path</strong> (<em>str</em>) – </p></li>
+<li><p><strong>calibration_method</strong> (<em>str</em>) – </p></li>
+<li><p><strong>calibration_data_reader</strong> (<em>CalibrationDataReader</em>) – </p></li>
+<li><p><strong>calibration_cache_path</strong> (<em>str</em>) – </p></li>
+<li><p><strong>op_types_to_quantize</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>op_types_to_exclude</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>nodes_to_quantize</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>nodes_to_exclude</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>use_external_data_format</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>intermediate_generated_files</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>output_path</strong> (<em>str</em>) – </p></li>
+<li><p><strong>verbose</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>trt_extra_plugin_lib_paths</strong> (<em>str</em>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>ModelProto</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.onnx.quantization.extensions.html" class="btn btn-neutral float-left" title="extensions" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.onnx.quantization.graph_utils.html" class="btn btn-neutral float-right" title="graph_utils" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.graph_utils.html b/reference/generated/modelopt.onnx.quantization.graph_utils.html
index bd1e775..1e468eb 100644
--- a/reference/generated/modelopt.onnx.quantization.graph_utils.html
+++ b/reference/generated/modelopt.onnx.quantization.graph_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>graph_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>graph_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="gs_patching" href="modelopt.onnx.quantization.gs_patching.html" />
-    <link rel="prev" title="calib_utils" href="modelopt.onnx.quantization.calib_utils.html" />
+    <link rel="prev" title="fp8" href="modelopt.onnx.quantization.fp8.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,22 +147,34 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="graph-utils">
 <h1>graph_utils<a class="headerlink" href="#graph-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.quantization.graph_utils">Provides ONNX graph related utils for QDQ placement.</p>
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.build_non_residual_input_map" title="modelopt.onnx.quantization.graph_utils.build_non_residual_input_map"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_non_residual_input_map</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.add_fp16_fp32_cast" title="modelopt.onnx.quantization.graph_utils.add_fp16_fp32_cast"><code class="xref py py-obj docutils literal notranslate"><span class="pre">add_fp16_fp32_cast</span></code></a></p></td>
+<td><p>Adds cast_to_fp16 nodes to the inputs of a layer and cast_to_fp32 to the outputs.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.build_non_residual_input_map" title="modelopt.onnx.quantization.graph_utils.build_non_residual_input_map"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_non_residual_input_map</span></code></a></p></td>
 <td><p>Builds a map of non-residual Add input name to the Add node name from the given graph.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.classify_partition_nodes" title="modelopt.onnx.quantization.graph_utils.classify_partition_nodes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">classify_partition_nodes</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.classify_partition_nodes" title="modelopt.onnx.quantization.graph_utils.classify_partition_nodes"><code class="xref py py-obj docutils literal notranslate"><span class="pre">classify_partition_nodes</span></code></a></p></td>
 <td><p>We should partially quantize the partition nodes with inputs outside of the partition.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.filter_quantizable_kgen_heads" title="modelopt.onnx.quantization.graph_utils.filter_quantizable_kgen_heads"><code class="xref py py-obj docutils literal notranslate"><span class="pre">filter_quantizable_kgen_heads</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.filter_quantizable_kgen_heads" title="modelopt.onnx.quantization.graph_utils.filter_quantizable_kgen_heads"><code class="xref py py-obj docutils literal notranslate"><span class="pre">filter_quantizable_kgen_heads</span></code></a></p></td>
 <td><p>Returns the list of kgen head names if it follows a CASK partition.</p></td>
 </tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.find_fp8_mha_partitions" title="modelopt.onnx.quantization.graph_utils.find_fp8_mha_partitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">find_fp8_mha_partitions</span></code></a></p></td>
+<td><p>Match FP8 MHA: Q -&gt; DQ -&gt; BMM1 -&gt; (Mul/Div) -&gt; (Add) -&gt; Softmax -&gt; (Cast) -&gt; Q -&gt; DQ -&gt; BMM2 -&gt; Q -&gt; DQ.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.find_mha_partitions" title="modelopt.onnx.quantization.graph_utils.find_mha_partitions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">find_mha_partitions</span></code></a></p></td>
+<td><p>Match MHA: BMM1 -&gt; (Mul/Div) -&gt; (Add) -&gt; Softmax -&gt; (Cast) -&gt; BMM2.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.find_nodes_to_exclude" title="modelopt.onnx.quantization.graph_utils.find_nodes_to_exclude"><code class="xref py py-obj docutils literal notranslate"><span class="pre">find_nodes_to_exclude</span></code></a></p></td>
+<td><p>Find the node names from the ONNX graph which matches user's exclusion patterns.</p></td>
+</tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.get_fusible_backbone" title="modelopt.onnx.quantization.graph_utils.get_fusible_backbone"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_fusible_backbone</span></code></a></p></td>
 <td><p>Returns the linear backbone node for a given node if it matches the pattern.</p></td>
 </tr>
@@ -167,6 +184,12 @@ <h1>graph_utils<a class="headerlink" href="#graph-utils" title="Link to this hea
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.has_path_type" title="modelopt.onnx.quantization.graph_utils.has_path_type"><code class="xref py py-obj docutils literal notranslate"><span class="pre">has_path_type</span></code></a></p></td>
 <td><p>Checks if the given node is start/end of a given forward/backward path type.</p></td>
 </tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.insert_fp8_mha_casts" title="modelopt.onnx.quantization.graph_utils.insert_fp8_mha_casts"><code class="xref py py-obj docutils literal notranslate"><span class="pre">insert_fp8_mha_casts</span></code></a></p></td>
+<td><p>Insert three cast ops.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.insert_matmul_casts" title="modelopt.onnx.quantization.graph_utils.insert_matmul_casts"><code class="xref py py-obj docutils literal notranslate"><span class="pre">insert_matmul_casts</span></code></a></p></td>
+<td><p>Insert three cast nodes for MatMul's two inputs and output.</p></td>
+</tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.graph_utils.is_const_input" title="modelopt.onnx.quantization.graph_utils.is_const_input"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_const_input</span></code></a></p></td>
 <td><p>Returns whether the given tensor is an initializer or produced by const-foldable nodes.</p></td>
 </tr>
@@ -178,6 +201,12 @@ <h1>graph_utils<a class="headerlink" href="#graph-utils" title="Link to this hea
 </tr>
 </tbody>
 </table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.graph_utils.add_fp16_fp32_cast">
+<span class="sig-name descname"><span class="pre">add_fp16_fp32_cast</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">custom_ops_to_cast_to_fp16</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.graph_utils.add_fp16_fp32_cast" title="Link to this definition"></a></dt>
+<dd><p>Adds cast_to_fp16 nodes to the inputs of a layer and cast_to_fp32 to the outputs.</p>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.onnx.quantization.graph_utils.build_non_residual_input_map">
 <span class="sig-name descname"><span class="pre">build_non_residual_input_map</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">graph</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.graph_utils.build_non_residual_input_map" title="Link to this definition"></a></dt>
@@ -236,6 +265,33 @@ <h1>graph_utils<a class="headerlink" href="#graph-utils" title="Link to this hea
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.graph_utils.find_fp8_mha_partitions">
+<span class="sig-name descname"><span class="pre">find_fp8_mha_partitions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">graph</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.graph_utils.find_fp8_mha_partitions" title="Link to this definition"></a></dt>
+<dd><p>Match FP8 MHA: Q -&gt; DQ -&gt; BMM1 -&gt; (Mul/Div) -&gt; (Add) -&gt; Softmax -&gt; (Cast) -&gt; Q -&gt; DQ -&gt; BMM2 -&gt; Q -&gt; DQ.</p>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.graph_utils.find_mha_partitions">
+<span class="sig-name descname"><span class="pre">find_mha_partitions</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">graph</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.graph_utils.find_mha_partitions" title="Link to this definition"></a></dt>
+<dd><p>Match MHA: BMM1 -&gt; (Mul/Div) -&gt; (Add) -&gt; Softmax -&gt; (Cast) -&gt; BMM2.</p>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.graph_utils.find_nodes_to_exclude">
+<span class="sig-name descname"><span class="pre">find_nodes_to_exclude</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">graph</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nodes_to_exclude</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_exclude</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.graph_utils.find_nodes_to_exclude" title="Link to this definition"></a></dt>
+<dd><p>Find the node names from the ONNX graph which matches user’s exclusion patterns.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>graph</strong> (<em>Graph</em>) – </p></li>
+<li><p><strong>nodes_to_exclude</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>op_types_to_exclude</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.onnx.quantization.graph_utils.get_fusible_backbone">
 <span class="sig-name descname"><span class="pre">get_fusible_backbone</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">node</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">graph</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.graph_utils.get_fusible_backbone" title="Link to this definition"></a></dt>
@@ -299,6 +355,23 @@ <h1>graph_utils<a class="headerlink" href="#graph-utils" title="Link to this hea
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.graph_utils.insert_fp8_mha_casts">
+<span class="sig-name descname"><span class="pre">insert_fp8_mha_casts</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_model</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.graph_utils.insert_fp8_mha_casts" title="Link to this definition"></a></dt>
+<dd><p>Insert three cast ops.</p>
+<p>The first cast will be added before the input0 of MatMul to cast fp16 to fp32.
+The second cast will be added before the input1 of MatMul to cast fp16 to fp32.
+The third cast will be added after the output of MatMul to cast fp32 back to fp16.
+The insertion of Cast ops in the FP8 MHA part actually forbids the MHAs to run
+with FP16 accumulation because Myelin only has FP32 accumulation kernels for FP8 MHAs.</p>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.graph_utils.insert_matmul_casts">
+<span class="sig-name descname"><span class="pre">insert_matmul_casts</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">graph</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">matmul_node</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.graph_utils.insert_matmul_casts" title="Link to this definition"></a></dt>
+<dd><p>Insert three cast nodes for MatMul’s two inputs and output.</p>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.onnx.quantization.graph_utils.is_const_input">
 <span class="sig-name descname"><span class="pre">is_const_input</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tensor</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.graph_utils.is_const_input" title="Link to this definition"></a></dt>
@@ -353,7 +426,7 @@ <h1>graph_utils<a class="headerlink" href="#graph-utils" title="Link to this hea
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.onnx.quantization.calib_utils.html" class="btn btn-neutral float-left" title="calib_utils" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.onnx.quantization.fp8.html" class="btn btn-neutral float-left" title="fp8" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.onnx.quantization.gs_patching.html" class="btn btn-neutral float-right" title="gs_patching" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -366,7 +439,7 @@ <h1>graph_utils<a class="headerlink" href="#graph-utils" title="Link to this hea
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -377,7 +450,7 @@ <h1>graph_utils<a class="headerlink" href="#graph-utils" title="Link to this hea
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.gs_patching.html b/reference/generated/modelopt.onnx.quantization.gs_patching.html
index 0c37b2c..77f40b4 100644
--- a/reference/generated/modelopt.onnx.quantization.gs_patching.html
+++ b/reference/generated/modelopt.onnx.quantization.gs_patching.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>gs_patching &mdash; Model Optimizer 0.11.2</title>
+  <title>gs_patching &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="int4" href="modelopt.onnx.quantization.int4.html" />
-    <link rel="prev" title="graph_utils" href="modelopt.onnx.quantization.graph_utils.html" />
+    <link rel="prev" title="graph_utils" href="modelopt.onnx.quantization.graph_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="gs-patching">
 <h1>gs_patching<a class="headerlink" href="#gs-patching" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.quantization.gs_patching">Patch onnx_graphsurgeon to support explicitly setting a dtype.</p>
@@ -179,7 +184,7 @@ <h1>gs_patching<a class="headerlink" href="#gs-patching" title="Link to this hea
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -190,7 +195,7 @@ <h1>gs_patching<a class="headerlink" href="#gs-patching" title="Link to this hea
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.html b/reference/generated/modelopt.onnx.quantization.html
index 6e3f291..f0c2ed6 100644
--- a/reference/generated/modelopt.onnx.quantization.html
+++ b/reference/generated/modelopt.onnx.quantization.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quantization &mdash; Model Optimizer 0.11.2</title>
+  <title>quantization &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="calib_utils" href="modelopt.onnx.quantization.calib_utils.html" />
-    <link rel="prev" title="op_types" href="modelopt.onnx.op_types.html" />
+    <link rel="prev" title="op_types" href="modelopt.onnx.op_types.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="#">quantization</a><ul>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -141,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quantization">
 <h1>quantization<a class="headerlink" href="#quantization" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -150,6 +155,12 @@ <h1>quantization<a class="headerlink" href="#quantization" title="Link to this h
 <tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html#module-modelopt.onnx.quantization.calib_utils" title="modelopt.onnx.quantization.calib_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.calib_utils</span></code></a></p></td>
 <td><p>Provides basic calibration utils.</p></td>
 </tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.extensions.html#module-modelopt.onnx.quantization.extensions" title="modelopt.onnx.quantization.extensions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.extensions</span></code></a></p></td>
+<td><p>Module to load C++ extensions.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.fp8.html#module-modelopt.onnx.quantization.fp8" title="modelopt.onnx.quantization.fp8"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.fp8</span></code></a></p></td>
+<td><p>Perform FP8 GEMM only quantization of an ONNX model, and returns the ONNX ModelProto.</p></td>
+</tr>
 <tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html#module-modelopt.onnx.quantization.graph_utils" title="modelopt.onnx.quantization.graph_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.graph_utils</span></code></a></p></td>
 <td><p>Provides ONNX graph related utils for QDQ placement.</p></td>
 </tr>
@@ -157,28 +168,31 @@ <h1>quantization<a class="headerlink" href="#quantization" title="Link to this h
 <td><p>Patch onnx_graphsurgeon to support explicitly setting a dtype.</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.int4.html#module-modelopt.onnx.quantization.int4" title="modelopt.onnx.quantization.int4"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.int4</span></code></a></p></td>
-<td><p>Perform INT4 WoQ on an ONNX model, and write it back to disk.</p></td>
+<td><p>Perform INT4 WoQ on an ONNX model, and returns the ONNX ModelProto.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.operators.html#module-modelopt.onnx.quantization.operators" title="modelopt.onnx.quantization.operators"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.operators</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.int8.html#module-modelopt.onnx.quantization.int8" title="modelopt.onnx.quantization.int8"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.int8</span></code></a></p></td>
+<td><p>Perform INT8 quantization of an ONNX model, and returns the ONNX ModelProto.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.operators.html#module-modelopt.onnx.quantization.operators" title="modelopt.onnx.quantization.operators"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.operators</span></code></a></p></td>
 <td><p>Additional or modified QDQ operators on top ORT quantized operators.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html#module-modelopt.onnx.quantization.ort_patching" title="modelopt.onnx.quantization.ort_patching"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.ort_patching</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html#module-modelopt.onnx.quantization.ort_patching" title="modelopt.onnx.quantization.ort_patching"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.ort_patching</span></code></a></p></td>
 <td><p>This module contains all the patched functions from ORT.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html#module-modelopt.onnx.quantization.ort_utils" title="modelopt.onnx.quantization.ort_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.ort_utils</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html#module-modelopt.onnx.quantization.ort_utils" title="modelopt.onnx.quantization.ort_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.ort_utils</span></code></a></p></td>
 <td><p>Provides basic ORT inference utils, shoule be replaced by modelopt.torch.ort_client.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html#module-modelopt.onnx.quantization.partitioning" title="modelopt.onnx.quantization.partitioning"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.partitioning</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html#module-modelopt.onnx.quantization.partitioning" title="modelopt.onnx.quantization.partitioning"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.partitioning</span></code></a></p></td>
 <td><p>Utilities related to partitioning the ONNX model to place QDQ nodes.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html#module-modelopt.onnx.quantization.qdq_utils" title="modelopt.onnx.quantization.qdq_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.qdq_utils</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html#module-modelopt.onnx.quantization.qdq_utils" title="modelopt.onnx.quantization.qdq_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.qdq_utils</span></code></a></p></td>
 <td><p>Various utils to support inserting Q/DQ nodes.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html#module-modelopt.onnx.quantization.quant_utils" title="modelopt.onnx.quantization.quant_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.quant_utils</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html#module-modelopt.onnx.quantization.quant_utils" title="modelopt.onnx.quantization.quant_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.quant_utils</span></code></a></p></td>
 <td><p>Provides some basic utilities that can be used in quantize() methods.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.onnx.quantization.quantize.html#module-modelopt.onnx.quantization.quantize" title="modelopt.onnx.quantization.quantize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.quantize</span></code></a></p></td>
-<td><p>Convert ONNX model without QDQ nodes + calib data into ONNX model with QDQ nodes.</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.onnx.quantization.quantize.html#modelopt.onnx.quantization.quantize" title="modelopt.onnx.quantization.quantize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.onnx.quantization.quantize</span></code></a>(onnx_path)</p></td>
+<td><p>Quantize the given onnx model.</p></td>
 </tr>
 </tbody>
 </table>
@@ -202,7 +216,7 @@ <h1>quantization<a class="headerlink" href="#quantization" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -213,7 +227,7 @@ <h1>quantization<a class="headerlink" href="#quantization" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.int4.html b/reference/generated/modelopt.onnx.quantization.int4.html
index 8f49b01..5ba760a 100644
--- a/reference/generated/modelopt.onnx.quantization.int4.html
+++ b/reference/generated/modelopt.onnx.quantization.int4.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>int4 &mdash; Model Optimizer 0.11.2</title>
+  <title>int4 &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="operators" href="modelopt.onnx.quantization.operators.html" />
-    <link rel="prev" title="gs_patching" href="modelopt.onnx.quantization.gs_patching.html" />
+    <link rel="next" title="int8" href="modelopt.onnx.quantization.int8.html" />
+    <link rel="prev" title="gs_patching" href="modelopt.onnx.quantization.gs_patching.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,10 +147,10 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="int4">
 <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a></h1>
-<p id="module-modelopt.onnx.quantization.int4">Perform INT4 WoQ on an ONNX model, and write it back to disk.</p>
+<p id="module-modelopt.onnx.quantization.int4">Perform INT4 WoQ on an ONNX model, and returns the ONNX ModelProto.</p>
 <p class="rubric">Classes</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
@@ -166,13 +171,13 @@ <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a><
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.int4.quant_tensor" title="modelopt.onnx.quantization.int4.quant_tensor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quant_tensor</span></code></a></p></td>
 <td><p>Quantize a tensor using alpha etc.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.int4.quantize_int4" title="modelopt.onnx.quantization.int4.quantize_int4"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize_int4</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.int4.quantize" title="modelopt.onnx.quantization.int4.quantize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize</span></code></a></p></td>
 <td><p>Applies INT4 WoQ (Weight-Only-Quantization) to an ONNX file.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.int4.quantize_int4_awq_clip" title="modelopt.onnx.quantization.int4.quantize_int4_awq_clip"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize_int4_awq_clip</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.int4.quantize_awq_clip" title="modelopt.onnx.quantization.int4.quantize_awq_clip"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize_awq_clip</span></code></a></p></td>
 <td><p>Quantizes <cite>onnx_model</cite> using the Activation aware quantization a.k.a AWQ algorithm.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.int4.quantize_int4_rtn" title="modelopt.onnx.quantization.int4.quantize_int4_rtn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize_int4_rtn</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.int4.quantize_rtn" title="modelopt.onnx.quantization.int4.quantize_rtn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize_rtn</span></code></a></p></td>
 <td><p>Quantizes <cite>onnx_model</cite> using the RTN (Round-to-Nearest) algorithm.</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.int4.rtn" title="modelopt.onnx.quantization.int4.rtn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rtn</span></code></a></p></td>
@@ -271,18 +276,18 @@ <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a><
 </dd></dl>
 
 <dl class="py function">
-<dt class="sig sig-object py" id="modelopt.onnx.quantization.int4.quantize_int4">
-<span class="sig-name descname"><span class="pre">quantize_int4</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quantize_mode</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">onnx_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_data_reader</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_external_data_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">gemm_io_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.int4.quantize_int4" title="Link to this definition"></a></dt>
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.int4.quantize">
+<span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'awq_clip'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_data_reader</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_external_data_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.int4.quantize" title="Link to this definition"></a></dt>
 <dd><p>Applies INT4 WoQ (Weight-Only-Quantization) to an ONNX file.</p>
 <p>Currently only GEMM quantization is supported.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>quantize_mode</strong> (<em>str</em>) – </p></li>
-<li><p><strong>onnx_model</strong> (<em>ModelProto</em>) – </p></li>
+<li><p><strong>onnx_path</strong> (<em>str</em>) – </p></li>
+<li><p><strong>calibration_method</strong> (<em>str</em>) – </p></li>
 <li><p><strong>calibration_data_reader</strong> (<em>CalibrationDataReader</em>) – </p></li>
 <li><p><strong>use_external_data_format</strong> (<em>bool</em>) – </p></li>
-<li><p><strong>gemm_io_type</strong> (<em>&lt;google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f7a18433710&gt;</em>) – </p></li>
+<li><p><strong>output_path</strong> (<em>str</em>) – </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
@@ -292,8 +297,8 @@ <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a><
 </dd></dl>
 
 <dl class="py function">
-<dt class="sig sig-object py" id="modelopt.onnx.quantization.int4.quantize_int4_awq_clip">
-<span class="sig-name descname"><span class="pre">quantize_int4_awq_clip</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_reader</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_external_data_format</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">gemm_io_type</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.int4.quantize_int4_awq_clip" title="Link to this definition"></a></dt>
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.int4.quantize_awq_clip">
+<span class="sig-name descname"><span class="pre">quantize_awq_clip</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_reader</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_external_data_format</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">force_fp16</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.int4.quantize_awq_clip" title="Link to this definition"></a></dt>
 <dd><p>Quantizes <cite>onnx_model</cite> using the Activation aware quantization a.k.a AWQ algorithm.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@@ -301,7 +306,7 @@ <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a><
 <li><p><strong>onnx_model</strong> (<em>ModelProto</em>) – </p></li>
 <li><p><strong>data_reader</strong> (<em>CalibrationDataReader</em>) – </p></li>
 <li><p><strong>use_external_data_format</strong> (<em>bool</em>) – </p></li>
-<li><p><strong>gemm_io_type</strong> (<em>&lt;google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f7a18433710&gt;</em>) – </p></li>
+<li><p><strong>force_fp16</strong> (<em>bool</em>) – </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
@@ -311,8 +316,8 @@ <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a><
 </dd></dl>
 
 <dl class="py function">
-<dt class="sig sig-object py" id="modelopt.onnx.quantization.int4.quantize_int4_rtn">
-<span class="sig-name descname"><span class="pre">quantize_int4_rtn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">gemm_io_type</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dq_only</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.int4.quantize_int4_rtn" title="Link to this definition"></a></dt>
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.int4.quantize_rtn">
+<span class="sig-name descname"><span class="pre">quantize_rtn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">gemm_io_type</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dq_only</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.int4.quantize_rtn" title="Link to this definition"></a></dt>
 <dd><p>Quantizes <cite>onnx_model</cite> using the RTN (Round-to-Nearest) algorithm.</p>
 <p>This algorithm computes scale factors by computing s = max(abs(block)) / 8, for each block. The
 quantized weights are computed via Q(w) = round_to_even(w / s), where <cite>round_to_even</cite> denotes
@@ -323,7 +328,7 @@ <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a><
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>onnx_model</strong> (<em>ModelProto</em>) – </p></li>
-<li><p><strong>gemm_io_type</strong> (<em>&lt;google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f7a18433710&gt;</em>) – </p></li>
+<li><p><strong>gemm_io_type</strong> (<em>&lt;google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f2e8de70cb0&gt;</em>) – </p></li>
 <li><p><strong>dq_only</strong> (<em>bool</em>) – </p></li>
 </ul>
 </dd>
@@ -359,7 +364,7 @@ <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a><
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.onnx.quantization.gs_patching.html" class="btn btn-neutral float-left" title="gs_patching" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.onnx.quantization.operators.html" class="btn btn-neutral float-right" title="operators" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.onnx.quantization.int8.html" class="btn btn-neutral float-right" title="int8" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -371,7 +376,7 @@ <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -382,7 +387,7 @@ <h1>int4<a class="headerlink" href="#int4" title="Link to this heading"></a><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.int8.html b/reference/generated/modelopt.onnx.quantization.int8.html
new file mode 100644
index 0000000..dce9482
--- /dev/null
+++ b/reference/generated/modelopt.onnx.quantization.int8.html
@@ -0,0 +1,225 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>int8 &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="operators" href="modelopt.onnx.quantization.operators.html" />
+    <link rel="prev" title="int4" href="modelopt.onnx.quantization.int4.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">int8</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
+</ul>
+</li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.torch.html">torch</a></li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.onnx.html">onnx</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.onnx.quantization.html">quantization</a></li>
+      <li class="breadcrumb-item active">int8</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.onnx.quantization.int8.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="int8">
+<h1>int8<a class="headerlink" href="#int8" title="Link to this heading"></a></h1>
+<p id="module-modelopt.onnx.quantization.int8">Perform INT8 quantization of an ONNX model, and returns the ONNX ModelProto.</p>
+<p class="rubric">Functions</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.int8.quantize" title="modelopt.onnx.quantization.int8.quantize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize</span></code></a></p></td>
+<td><p>Applies INT8 quantization to an ONNX file using TensorRT/Myelin friendly heuristics.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.int8.quantize">
+<span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'entropy'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_data_reader</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_cache_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_quantize</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_exclude</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nodes_to_quantize</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nodes_to_exclude</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_external_data_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">intermediate_generated_files</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">[]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_extra_plugin_lib_paths</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.int8.quantize" title="Link to this definition"></a></dt>
+<dd><p>Applies INT8 quantization to an ONNX file using TensorRT/Myelin friendly heuristics.</p>
+<p>Quantization of [‘Add’, ‘AveragePool’, ‘BatchNormalization’, ‘Clip’, ‘Conv’, ‘ConvTranspose’,
+‘Gemm’, ‘GlobalAveragePool’, ‘MatMul’, ‘MaxPool’, ‘Mul’] op types are supported.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>onnx_path</strong> (<em>str</em>) – </p></li>
+<li><p><strong>calibration_method</strong> (<em>str</em>) – </p></li>
+<li><p><strong>calibration_data_reader</strong> (<em>CalibrationDataReader</em>) – </p></li>
+<li><p><strong>calibration_cache_path</strong> (<em>str</em>) – </p></li>
+<li><p><strong>op_types_to_quantize</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>op_types_to_exclude</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>nodes_to_quantize</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>nodes_to_exclude</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>use_external_data_format</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>intermediate_generated_files</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>output_path</strong> (<em>str</em>) – </p></li>
+<li><p><strong>verbose</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>trt_extra_plugin_lib_paths</strong> (<em>str</em>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>ModelProto</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.onnx.quantization.int4.html" class="btn btn-neutral float-left" title="int4" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.onnx.quantization.operators.html" class="btn btn-neutral float-right" title="operators" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.operators.html b/reference/generated/modelopt.onnx.quantization.operators.html
index 39e35a6..57406b0 100644
--- a/reference/generated/modelopt.onnx.quantization.operators.html
+++ b/reference/generated/modelopt.onnx.quantization.operators.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>operators &mdash; Model Optimizer 0.11.2</title>
+  <title>operators &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="ort_patching" href="modelopt.onnx.quantization.ort_patching.html" />
-    <link rel="prev" title="int4" href="modelopt.onnx.quantization.int4.html" />
+    <link rel="prev" title="int8" href="modelopt.onnx.quantization.int8.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="operators">
 <h1>operators<a class="headerlink" href="#operators" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.quantization.operators">Additional or modified QDQ operators on top ORT quantized operators.</p>
@@ -202,7 +207,7 @@ <h1>operators<a class="headerlink" href="#operators" title="Link to this heading
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.onnx.quantization.int4.html" class="btn btn-neutral float-left" title="int4" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.onnx.quantization.int8.html" class="btn btn-neutral float-left" title="int8" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.onnx.quantization.ort_patching.html" class="btn btn-neutral float-right" title="ort_patching" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -215,7 +220,7 @@ <h1>operators<a class="headerlink" href="#operators" title="Link to this heading
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -226,7 +231,7 @@ <h1>operators<a class="headerlink" href="#operators" title="Link to this heading
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.ort_patching.html b/reference/generated/modelopt.onnx.quantization.ort_patching.html
index 90fef4c..40d79af 100644
--- a/reference/generated/modelopt.onnx.quantization.ort_patching.html
+++ b/reference/generated/modelopt.onnx.quantization.ort_patching.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>ort_patching &mdash; Model Optimizer 0.11.2</title>
+  <title>ort_patching &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="ort_utils" href="modelopt.onnx.quantization.ort_utils.html" />
-    <link rel="prev" title="operators" href="modelopt.onnx.quantization.operators.html" />
+    <link rel="prev" title="operators" href="modelopt.onnx.quantization.operators.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="ort-patching">
 <h1>ort_patching<a class="headerlink" href="#ort-patching" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.quantization.ort_patching">This module contains all the patched functions from ORT.</p>
@@ -156,7 +161,7 @@ <h1>ort_patching<a class="headerlink" href="#ort-patching" title="Link to this h
 </table>
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.onnx.quantization.ort_patching.patch_ort_modules">
-<span class="sig-name descname"><span class="pre">patch_ort_modules</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.ort_patching.patch_ort_modules" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">patch_ort_modules</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">trt_extra_plugin_lib_paths</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.ort_patching.patch_ort_modules" title="Link to this definition"></a></dt>
 <dd><p>Patches the ORT modules.</p>
 </dd></dl>
 
@@ -179,7 +184,7 @@ <h1>ort_patching<a class="headerlink" href="#ort-patching" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -190,7 +195,7 @@ <h1>ort_patching<a class="headerlink" href="#ort-patching" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.ort_utils.html b/reference/generated/modelopt.onnx.quantization.ort_utils.html
index 400929c..f75dfc3 100644
--- a/reference/generated/modelopt.onnx.quantization.ort_utils.html
+++ b/reference/generated/modelopt.onnx.quantization.ort_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>ort_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>ort_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="partitioning" href="modelopt.onnx.quantization.partitioning.html" />
-    <link rel="prev" title="ort_patching" href="modelopt.onnx.quantization.ort_patching.html" />
+    <link rel="prev" title="ort_patching" href="modelopt.onnx.quantization.ort_patching.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,18 +147,38 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="ort-utils">
 <h1>ort_utils<a class="headerlink" href="#ort-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.quantization.ort_utils">Provides basic ORT inference utils, shoule be replaced by modelopt.torch.ort_client.</p>
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.ort_utils.create_inference_session" title="modelopt.onnx.quantization.ort_utils.create_inference_session"><code class="xref py py-obj docutils literal notranslate"><span class="pre">create_inference_session</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.ort_utils.configure_ort" title="modelopt.onnx.quantization.ort_utils.configure_ort"><code class="xref py py-obj docutils literal notranslate"><span class="pre">configure_ort</span></code></a></p></td>
+<td><p>Configure and patches ORT to support ModelOpt ONNX quantization.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.ort_utils.create_inference_session" title="modelopt.onnx.quantization.ort_utils.create_inference_session"><code class="xref py py-obj docutils literal notranslate"><span class="pre">create_inference_session</span></code></a></p></td>
 <td><p>Create an OnnxRuntime InferenceSession.</p></td>
 </tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.ort_utils.get_quantizable_op_types" title="modelopt.onnx.quantization.ort_utils.get_quantizable_op_types"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_quantizable_op_types</span></code></a></p></td>
+<td><p>Returns a set of quantizable op types.</p></td>
+</tr>
 </tbody>
 </table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.ort_utils.configure_ort">
+<span class="sig-name descname"><span class="pre">configure_ort</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">op_types</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_quantize</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_extra_plugin_lib_paths</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.ort_utils.configure_ort" title="Link to this definition"></a></dt>
+<dd><p>Configure and patches ORT to support ModelOpt ONNX quantization.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>op_types</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>op_types_to_quantize</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.onnx.quantization.ort_utils.create_inference_session">
 <span class="sig-name descname"><span class="pre">create_inference_session</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_path</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.ort_utils.create_inference_session" title="Link to this definition"></a></dt>
@@ -165,6 +190,23 @@ <h1>ort_utils<a class="headerlink" href="#ort-utils" title="Link to this heading
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.ort_utils.get_quantizable_op_types">
+<span class="sig-name descname"><span class="pre">get_quantizable_op_types</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">op_types_to_quantize</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.ort_utils.get_quantizable_op_types" title="Link to this definition"></a></dt>
+<dd><p>Returns a set of quantizable op types.</p>
+<p>Note. This function should be called after quantize._configure_ort() is called once.
+This returns quantizable op types either from the user supplied parameter
+or from modelopt.onnx’s default quantizable ops setting.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>op_types_to_quantize</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>List</em>[<em>str</em>]</p>
+</dd>
+</dl>
+</dd></dl>
+
 </section>
 
 
@@ -184,7 +226,7 @@ <h1>ort_utils<a class="headerlink" href="#ort-utils" title="Link to this heading
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -195,7 +237,7 @@ <h1>ort_utils<a class="headerlink" href="#ort-utils" title="Link to this heading
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.partitioning.html b/reference/generated/modelopt.onnx.quantization.partitioning.html
index 7a9d5b5..8c08fb4 100644
--- a/reference/generated/modelopt.onnx.quantization.partitioning.html
+++ b/reference/generated/modelopt.onnx.quantization.partitioning.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>partitioning &mdash; Model Optimizer 0.11.2</title>
+  <title>partitioning &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="qdq_utils" href="modelopt.onnx.quantization.qdq_utils.html" />
-    <link rel="prev" title="ort_utils" href="modelopt.onnx.quantization.ort_utils.html" />
+    <link rel="prev" title="ort_utils" href="modelopt.onnx.quantization.ort_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="partitioning">
 <h1>partitioning<a class="headerlink" href="#partitioning" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.quantization.partitioning">Utilities related to partitioning the ONNX model to place QDQ nodes.</p>
@@ -312,7 +317,7 @@ <h1>partitioning<a class="headerlink" href="#partitioning" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -323,7 +328,7 @@ <h1>partitioning<a class="headerlink" href="#partitioning" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.qdq_utils.html b/reference/generated/modelopt.onnx.quantization.qdq_utils.html
index b31e3cc..c2a9392 100644
--- a/reference/generated/modelopt.onnx.quantization.qdq_utils.html
+++ b/reference/generated/modelopt.onnx.quantization.qdq_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>qdq_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>qdq_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quant_utils" href="modelopt.onnx.quantization.quant_utils.html" />
-    <link rel="prev" title="partitioning" href="modelopt.onnx.quantization.partitioning.html" />
+    <link rel="prev" title="partitioning" href="modelopt.onnx.quantization.partitioning.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="qdq-utils">
 <h1>qdq_utils<a class="headerlink" href="#qdq-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.quantization.qdq_utils">Various utils to support inserting Q/DQ nodes.</p>
@@ -176,7 +181,10 @@ <h1>qdq_utils<a class="headerlink" href="#qdq-utils" title="Link to this heading
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.qdq_utils.make_gs_zp" title="modelopt.onnx.quantization.qdq_utils.make_gs_zp"><code class="xref py py-obj docutils literal notranslate"><span class="pre">make_gs_zp</span></code></a></p></td>
 <td><p>Create a GraphSurgeon zero-point tensor of all zeroes with the given shape.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops" title="modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops"><code class="xref py py-obj docutils literal notranslate"><span class="pre">use_trt_qdq_ops</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.qdq_utils.replace_scale_values" title="modelopt.onnx.quantization.qdq_utils.replace_scale_values"><code class="xref py py-obj docutils literal notranslate"><span class="pre">replace_scale_values</span></code></a></p></td>
+<td><p>Replaces the scales values from calibration cache.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops" title="modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops"><code class="xref py py-obj docutils literal notranslate"><span class="pre">use_trt_qdq_ops</span></code></a></p></td>
 <td><p>Globally set node names to TRT custom names.</p></td>
 </tr>
 </tbody>
@@ -280,7 +288,7 @@ <h1>qdq_utils<a class="headerlink" href="#qdq-utils" title="Link to this heading
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>name</strong> (<em>str</em>) – </p></li>
 <li><p><strong>shape</strong> (<em>Sequence</em><em>[</em><em>int</em><em>]</em>) – </p></li>
-<li><p><strong>dtype</strong> (<em>&lt;google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f7a18433710&gt;</em>) – </p></li>
+<li><p><strong>dtype</strong> (<em>&lt;google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f2e8de70cb0&gt;</em>) – </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
@@ -343,6 +351,20 @@ <h1>qdq_utils<a class="headerlink" href="#qdq-utils" title="Link to this heading
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.qdq_utils.replace_scale_values">
+<span class="sig-name descname"><span class="pre">replace_scale_values</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">graph</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">act_scales_dict</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.qdq_utils.replace_scale_values" title="Link to this definition"></a></dt>
+<dd><p>Replaces the scales values from calibration cache.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>graph</strong> (<em>GraphProto</em>) – </p></li>
+<li><p><strong>act_scales_dict</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>float</em><em>]</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops">
 <span class="sig-name descname"><span class="pre">use_trt_qdq_ops</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops" title="Link to this definition"></a></dt>
@@ -368,7 +390,7 @@ <h1>qdq_utils<a class="headerlink" href="#qdq-utils" title="Link to this heading
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -379,7 +401,7 @@ <h1>qdq_utils<a class="headerlink" href="#qdq-utils" title="Link to this heading
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.quant_utils.html b/reference/generated/modelopt.onnx.quantization.quant_utils.html
index 6fa873a..e3aeb0e 100644
--- a/reference/generated/modelopt.onnx.quantization.quant_utils.html
+++ b/reference/generated/modelopt.onnx.quantization.quant_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quant_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>quant_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="quantize" href="modelopt.onnx.quantization.quantize.html" />
-    <link rel="prev" title="qdq_utils" href="modelopt.onnx.quantization.qdq_utils.html" />
+    <link rel="next" title="modelopt.onnx.quantization.quantize" href="modelopt.onnx.quantization.quantize.html" />
+    <link rel="prev" title="qdq_utils" href="modelopt.onnx.quantization.qdq_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">quant_utils</a></li>
-<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">quantize</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quantize.html">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -142,18 +147,43 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quant-utils">
 <h1>quant_utils<a class="headerlink" href="#quant-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.quantization.quant_utils">Provides some basic utilities that can be used in quantize() methods.</p>
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized" title="modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized"><code class="xref py py-obj docutils literal notranslate"><span class="pre">pack_float32_to_4bit_optimized</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_cpp_based" title="modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_cpp_based"><code class="xref py py-obj docutils literal notranslate"><span class="pre">pack_float32_to_4bit_cpp_based</span></code></a></p></td>
+<td><p>Convert an array of float32 value to a 4bit data-type and pack every two concecutive elements in a byte.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized" title="modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized"><code class="xref py py-obj docutils literal notranslate"><span class="pre">pack_float32_to_4bit_optimized</span></code></a></p></td>
 <td><p>Convert an array of float32 value to a 4bit data-type and pack every two concecutive elements in a byte.</p></td>
 </tr>
 </tbody>
 </table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_cpp_based">
+<span class="sig-name descname"><span class="pre">pack_float32_to_4bit_cpp_based</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">array</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">signed</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_cpp_based" title="Link to this definition"></a></dt>
+<dd><p>Convert an array of float32 value to a 4bit data-type and pack every two concecutive elements in a byte.</p>
+<p>This is the optimized version of pack_float32_to_4bit() utility in ONNX helper file. The basic optimizations
+here is to implement this round_and_pack logic in C++, which is supposed to be faster.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>array</strong> (<em>ndarray</em><em> | </em><em>Sequence</em>) – array of float to convert and pack</p></li>
+<li><p><strong>signed</strong> (<em>bool</em>) – Whether the 4 bit variant is signed or unsigned</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>Packed array with size <cite>ceil(array.size/2)</cite> (single dimension).</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>ndarray</em></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized">
 <span class="sig-name descname"><span class="pre">pack_float32_to_4bit_optimized</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">array</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">signed</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized" title="Link to this definition"></a></dt>
@@ -169,7 +199,7 @@ <h1>quant_utils<a class="headerlink" href="#quant-utils" title="Link to this hea
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
-<dd class="field-even"><p>Packed array with size <cite>ceil(farray.size/2)</cite> (single dimension).</p>
+<dd class="field-even"><p>Packed array with size <cite>ceil(array.size/2)</cite> (single dimension).</p>
 </dd>
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
 <dd class="field-odd"><p><em>ndarray</em></p>
@@ -184,7 +214,7 @@ <h1>quant_utils<a class="headerlink" href="#quant-utils" title="Link to this hea
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.onnx.quantization.qdq_utils.html" class="btn btn-neutral float-left" title="qdq_utils" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.onnx.quantization.quantize.html" class="btn btn-neutral float-right" title="quantize" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.onnx.quantization.quantize.html" class="btn btn-neutral float-right" title="modelopt.onnx.quantization.quantize" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -196,7 +226,7 @@ <h1>quant_utils<a class="headerlink" href="#quant-utils" title="Link to this hea
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -207,7 +237,7 @@ <h1>quant_utils<a class="headerlink" href="#quant-utils" title="Link to this hea
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.quantization.quantize.html b/reference/generated/modelopt.onnx.quantization.quantize.html
index 7db3832..741f712 100644
--- a/reference/generated/modelopt.onnx.quantization.quantize.html
+++ b/reference/generated/modelopt.onnx.quantization.quantize.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quantize &mdash; Model Optimizer 0.11.2</title>
+  <title>modelopt.onnx.quantization.quantize &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="utils" href="modelopt.onnx.utils.html" />
-    <link rel="prev" title="quant_utils" href="modelopt.onnx.quantization.quant_utils.html" />
+    <link rel="prev" title="quant_utils" href="modelopt.onnx.quantization.quant_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,27 +82,30 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.calib_utils.html">calib_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.fp8.html">fp8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.graph_utils.html">graph_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.gs_patching.html">gs_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int4.html">int4</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.int8.html">int8</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.operators.html">operators</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_patching.html">ort_patching</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.ort_utils.html">ort_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.partitioning.html">partitioning</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.qdq_utils.html">qdq_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.onnx.quantization.quant_utils.html">quant_utils</a></li>
-<li class="toctree-l4 current"><a class="current reference internal" href="#">quantize</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">modelopt.onnx.quantization.quantize</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.utils.html">utils</a></li>
@@ -133,7 +138,7 @@
           <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
           <li class="breadcrumb-item"><a href="modelopt.onnx.html">onnx</a></li>
           <li class="breadcrumb-item"><a href="modelopt.onnx.quantization.html">quantization</a></li>
-      <li class="breadcrumb-item active">quantize</li>
+      <li class="breadcrumb-item active">modelopt.onnx.quantization.quantize</li>
       <li class="wy-breadcrumbs-aside">
             <a href="../../_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt" rel="nofollow"> View page source</a>
       </li>
@@ -142,35 +147,20 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
-  <section id="quantize">
-<h1>quantize<a class="headerlink" href="#quantize" title="Link to this heading"></a></h1>
-<p id="module-modelopt.onnx.quantization.quantize">Convert ONNX model without QDQ nodes + calib data into ONNX model with QDQ nodes.</p>
-<p>Typically quantizing linear operations like Conv, MatMul etc. gives most of the performance boost.
-But there are many other ops that are quantizable (aka low precision kernels available) and provides
-optimal performance with lower accuracy drop. The default op types that this ONNX ptq tool quantizes
-in different quantization modes are: INT8: [‘Add’, ‘AveragePool’, ‘BatchNormalization’, ‘Clip’,
-‘Conv’, ‘ConvTranspose’, ‘Gemm’, ‘GlobalAveragePool’, ‘MatMul’, ‘MaxPool’, ‘Mul’], INT4: [‘MatMul’],
-FP8: [‘MatMul’]. The tool inserts QDQ nodes following compiler friendly patterns and generates an
-explicit ONNX model.</p>
-<p class="rubric">Functions</p>
-<table class="autosummary longtable docutils align-default">
-<tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.quantization.quantize.quantize" title="modelopt.onnx.quantization.quantize.quantize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize</span></code></a></p></td>
-<td><p>Quantize the given onnx model.</p></td>
-</tr>
-</tbody>
-</table>
+             
+  <section id="modelopt-onnx-quantization-quantize">
+<h1>modelopt.onnx.quantization.quantize<a class="headerlink" href="#modelopt-onnx-quantization-quantize" title="Link to this heading"></a></h1>
 <dl class="py function">
-<dt class="sig sig-object py" id="modelopt.onnx.quantization.quantize.quantize">
-<span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_data</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'entropy'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_quantize</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_exclude</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nodes_to_quantize</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nodes_to_exclude</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_external_data_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keep_intermediate_files</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantize_mode</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'int8'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.quantize.quantize" title="Link to this definition"></a></dt>
+<dt class="sig sig-object py" id="modelopt.onnx.quantization.quantize">
+<span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_path</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_data</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calibration_cache_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_quantize</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_types_to_exclude</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nodes_to_quantize</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nodes_to_exclude</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_external_data_format</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">keep_intermediate_files</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantize_mode</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'int8'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_plugins</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_plugins_precision</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">high_precision_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'fp16'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mha_accumulation_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'fp32'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">disable_mha_qdq</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.quantization.quantize" title="Link to this definition"></a></dt>
 <dd><p>Quantize the given onnx model.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>onnx_path</strong> (<em>str</em>) – Path to the input onnx model.</p></li>
 <li><p><strong>calibration_data</strong> (<em>ndarray</em><em> | </em><em>Dict</em><em>[</em><em>str</em><em>, </em><em>ndarray</em><em>]</em>) – Calibration data, either a numpy array or list/dict of numpy array.</p></li>
-<li><p><strong>calibration_method</strong> (<em>str</em>) – Calibration method. Options={entropy (default), minmax}.</p></li>
+<li><p><strong>calibration_method</strong> (<em>str</em>) – Calibration method choices for int8, options={entropy (default), minmax}.</p></li>
+<li><p><strong>calibration_cache_path</strong> (<em>str</em>) – Pre-calculated activation tensor ranges aka calibration cache path.</p></li>
 <li><p><strong>op_types_to_quantize</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – List of types of operators to quantize. When this list is not None, only the types in this list
 are quantized. Example: [‘Conv’] indicates that only ops of type ‘Conv’ should be quantized.
 If this list is None (default), all supported operators are quantized.
@@ -195,6 +185,19 @@ <h1>quantize<a class="headerlink" href="#quantize" title="Link to this heading">
 <li><p><strong>quantize_mode</strong> (<em>str</em>) – Quantization mode. One of [‘int8’, ‘int4_rtn’, ‘int4_rtn_dq’, ‘int4_rtn_trt’, ‘int4_rtn_trt_dq’,
 ‘int4_awq_clip’, ‘int4_awq_clip_trt’, ‘fp8’]. ‘int8’ by default. Any INT4-based mode is Gemm, MatMul
 weight-only and FP8 mode is Conv, Gemm and MatMul only quantization.</p></li>
+<li><p><strong>trt_plugins</strong> (<em>str</em>) – Specifies custom TensorRT plugin library paths in .so format (compiled shared library).
+For multiple paths, separate them with a semicolon, i.e.: “lib_1.so;lib_2.so”.
+If this is not None, the TensorRTExecutionProvider is invoked, meaning that TensorRT is a requirement.</p></li>
+<li><p><strong>trt_plugins_precision</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – A space-separated list indicating the precision for each custom op.
+Each item should have the format &lt;op_type&gt;:&lt;precision&gt;, where precision can be fp32 (default) or fp16.
+For example: op_type_1:fp16 op_type_2:fp32.</p></li>
+<li><p><strong>high_precision_dtype</strong> (<em>str</em>) – High precision dtype. One of [‘fp32’, ‘fp16’]. ‘fp16’ by default.
+If quantize_mode == ‘fp8’ and high_precision_dtype == ‘fp16’, model’s weight and
+activation will be converted to fp16.</p></li>
+<li><p><strong>mha_accumulation_dtype</strong> (<em>str</em>) – MHA accumulation dtype. One of [‘fp32’, ‘fp16’]. ‘fp32’ by default.
+If quantize_mode == ‘fp8’ and high_precision_dtype == ‘fp32’, Cast nodes will be added to
+MHA’s bmm1 and bmm2’s input and output tensors.</p></li>
+<li><p><strong>disable_mha_qdq</strong> (<em>bool</em>) – Don’t add Q/DQ layers to MatMuls in MHA pattern.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -225,7 +228,7 @@ <h1>quantize<a class="headerlink" href="#quantize" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -236,7 +239,7 @@ <h1>quantize<a class="headerlink" href="#quantize" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.onnx.utils.html b/reference/generated/modelopt.onnx.utils.html
index 671befd..8484aec 100644
--- a/reference/generated/modelopt.onnx.utils.html
+++ b/reference/generated/modelopt.onnx.utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>utils &mdash; Model Optimizer 0.11.2</title>
+  <title>utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="torch" href="modelopt.torch.html" />
-    <link rel="prev" title="quantize" href="modelopt.onnx.quantization.quantize.html" />
+    <link rel="prev" title="modelopt.onnx.quantization.quantize" href="modelopt.onnx.quantization.quantize.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,18 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.onnx.html">onnx</a><ul class="current">
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.op_types.html">op_types</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.onnx.quantization.html">quantization</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="#">utils</a><ul>
-<li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.duplicate_shared_linear_weights"><code class="docutils literal notranslate"><span class="pre">duplicate_shared_linear_weights()</span></code></a></li>
+<li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.duplicate_shared_constants"><code class="docutils literal notranslate"><span class="pre">duplicate_shared_constants()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.find_lowest_common_ancestor"><code class="docutils literal notranslate"><span class="pre">find_lowest_common_ancestor()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.gen_random_inputs"><code class="docutils literal notranslate"><span class="pre">gen_random_inputs()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.get_all_input_names"><code class="docutils literal notranslate"><span class="pre">get_all_input_names()</span></code></a></li>
@@ -116,6 +118,7 @@
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.remove_weights_data"><code class="docutils literal notranslate"><span class="pre">remove_weights_data()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.save_onnx"><code class="docutils literal notranslate"><span class="pre">save_onnx()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.save_onnx_bytes_to_dir"><code class="docutils literal notranslate"><span class="pre">save_onnx_bytes_to_dir()</span></code></a></li>
+<li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.udpate_domain"><code class="docutils literal notranslate"><span class="pre">udpate_domain()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.validate_batch_size"><code class="docutils literal notranslate"><span class="pre">validate_batch_size()</span></code></a></li>
 <li class="toctree-l4"><a class="reference internal" href="#modelopt.onnx.utils.validate_onnx"><code class="docutils literal notranslate"><span class="pre">validate_onnx()</span></code></a></li>
 </ul>
@@ -157,15 +160,15 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="utils">
 <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.onnx.utils">Utility functions related to onnx.</p>
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.utils.duplicate_shared_linear_weights" title="modelopt.onnx.utils.duplicate_shared_linear_weights"><code class="xref py py-obj docutils literal notranslate"><span class="pre">duplicate_shared_linear_weights</span></code></a></p></td>
-<td><p>Duplicate weights of linear operators if they are shared.</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.utils.duplicate_shared_constants" title="modelopt.onnx.utils.duplicate_shared_constants"><code class="xref py py-obj docutils literal notranslate"><span class="pre">duplicate_shared_constants</span></code></a></p></td>
+<td><p>Duplicate constant tensors if they are shared.</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.utils.find_lowest_common_ancestor" title="modelopt.onnx.utils.find_lowest_common_ancestor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">find_lowest_common_ancestor</span></code></a></p></td>
 <td><p>Function to find the lowest common ancestor of two nodes.</p></td>
@@ -239,24 +242,27 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.utils.save_onnx_bytes_to_dir" title="modelopt.onnx.utils.save_onnx_bytes_to_dir"><code class="xref py py-obj docutils literal notranslate"><span class="pre">save_onnx_bytes_to_dir</span></code></a></p></td>
 <td><p>Saves the onnx bytes to a directory with specified file name.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.utils.validate_batch_size" title="modelopt.onnx.utils.validate_batch_size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">validate_batch_size</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.utils.udpate_domain" title="modelopt.onnx.utils.udpate_domain"><code class="xref py py-obj docutils literal notranslate"><span class="pre">udpate_domain</span></code></a></p></td>
+<td><p>Updates the domain of all the nodes of the specified op_type to the specified domain.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.utils.validate_batch_size" title="modelopt.onnx.utils.validate_batch_size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">validate_batch_size</span></code></a></p></td>
 <td><p>Returns True if all the model inputs has batch dimension equal to batch_size.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.onnx.utils.validate_onnx" title="modelopt.onnx.utils.validate_onnx"><code class="xref py py-obj docutils literal notranslate"><span class="pre">validate_onnx</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.onnx.utils.validate_onnx" title="modelopt.onnx.utils.validate_onnx"><code class="xref py py-obj docutils literal notranslate"><span class="pre">validate_onnx</span></code></a></p></td>
 <td><p>Returns True if the onnx_bytes is valid, else False.</p></td>
 </tr>
 </tbody>
 </table>
 <dl class="py function">
-<dt class="sig sig-object py" id="modelopt.onnx.utils.duplicate_shared_linear_weights">
-<span class="sig-name descname"><span class="pre">duplicate_shared_linear_weights</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">graph</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.utils.duplicate_shared_linear_weights" title="Link to this definition"></a></dt>
-<dd><p>Duplicate weights of linear operators if they are shared.</p>
+<dt class="sig sig-object py" id="modelopt.onnx.utils.duplicate_shared_constants">
+<span class="sig-name descname"><span class="pre">duplicate_shared_constants</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_model</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.utils.duplicate_shared_constants" title="Link to this definition"></a></dt>
+<dd><p>Duplicate constant tensors if they are shared.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>graph</strong> (<em>GraphProto</em>) – </p>
+<dd class="field-odd"><p><strong>onnx_model</strong> (<em>ModelProto</em>) – </p>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
-<dd class="field-even"><p><em>bool</em></p>
+<dd class="field-even"><p><em>Tuple</em>[<em>ModelProto</em>, <em>bool</em>]</p>
 </dd>
 </dl>
 </dd></dl>
@@ -644,6 +650,24 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.onnx.utils.udpate_domain">
+<span class="sig-name descname"><span class="pre">udpate_domain</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">op_type</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">domain</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.utils.udpate_domain" title="Link to this definition"></a></dt>
+<dd><p>Updates the domain of all the nodes of the specified op_type to the specified domain.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>onnx_model</strong> (<em>ModelProto</em>) – </p></li>
+<li><p><strong>op_type</strong> (<em>str</em>) – </p></li>
+<li><p><strong>domain</strong> (<em>str</em>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>ModelProto</em></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.onnx.utils.validate_batch_size">
 <span class="sig-name descname"><span class="pre">validate_batch_size</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">onnx_bytes</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.onnx.utils.validate_batch_size" title="Link to this definition"></a></dt>
@@ -681,7 +705,7 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.onnx.quantization.quantize.html" class="btn btn-neutral float-left" title="quantize" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.onnx.quantization.quantize.html" class="btn btn-neutral float-left" title="modelopt.onnx.quantization.quantize" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.torch.html" class="btn btn-neutral float-right" title="torch" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -694,7 +718,7 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -705,7 +729,7 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.distill.config.html b/reference/generated/modelopt.torch.distill.config.html
new file mode 100644
index 0000000..ab5e9c7
--- /dev/null
+++ b/reference/generated/modelopt.torch.distill.config.html
@@ -0,0 +1,245 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>config &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="distillation" href="modelopt.torch.distill.distillation.html" />
+    <link rel="prev" title="distill" href="modelopt.torch.distill.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.distill.html">distill</a><ul class="current">
+<li class="toctree-l4 current"><a class="current reference internal" href="#">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation.html">distillation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation_model.html">distillation_model</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.loss_balancers.html">loss_balancers</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.losses.html">losses</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.registry.html">registry</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.distill.html">distill</a></li>
+      <li class="breadcrumb-item active">config</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.distill.config.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="config">
+<h1>config<a class="headerlink" href="#config" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.distill.config">Configurations for distillation modes.</p>
+<dl class="py class pydantic_model">
+<dt class="sig sig-object py" id="modelopt.torch.distill.config.KDLossConfig">
+<em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">KDLossConfig</span></span><a class="headerlink" href="#modelopt.torch.distill.config.KDLossConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">ModeloptBaseConfig</span></code></a></p>
+<p>Configuration for the Knowledge-Distillation mode.</p>
+<p>This mode is used to distill knowledge from a teacher model to a student model.</p>
+<p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show default config as JSON</summary><dl class="field-list simple">
+<dt class="field-odd">Default config (JSON)<span class="colon">:</span></dt>
+<dd class="field-odd"><p></p></dd>
+</dl>
+<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
+<span class="w">   </span><span class="nt">&quot;teacher_model&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;criterion&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;loss_balancer&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;expose_minimal_state_dict&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">true</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</details></p><dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.distill.config.KDLossConfig.criterion">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">criterion</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">_Loss</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">_Loss</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.distill.config.KDLossConfig.criterion" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>If an instance of Loss class, a distillation loss will only be computed between outputs of a student and teacher; if a dictionary in the format {(student_layer_name, teacher_layer_name): loss_module}, a distillation loss will be computed for each specified student-teacher pair of layers using the corresponding <code class="docutils literal notranslate"><span class="pre">loss_module</span></code>.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.distill.config.KDLossConfig.expose_minimal_state_dict">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">expose_minimal_state_dict</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#modelopt.torch.distill.config.KDLossConfig.expose_minimal_state_dict" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>Hide teacher model’s state_dict in the returned wrapped model. This reduces the checkpoint size by not re-storing the teacher unnecessarily again. .. note: Set to False if using <a class="reference external" href="https://pytorch.org/docs/stable/fsdp.html" rel="noopener noreferrer" target="_blank">FSDP</a></p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.distill.config.KDLossConfig.loss_balancer">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">loss_balancer</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Any</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.distill.config.KDLossConfig.loss_balancer" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>A balancer to reduce distillation and non-distillation losses into a single value using some weighing scheme.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.distill.config.KDLossConfig.teacher_model">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">teacher_model</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Type</span><span class="p"><span class="pre">[</span></span><span class="pre">Module</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Callable</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.distill.config.KDLossConfig.teacher_model" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The class or callable or tuple to initialize the teacher model using <a class="reference internal" href="modelopt.torch.utils.network.html#modelopt.torch.utils.network.init_model_from_model_like" title="modelopt.torch.utils.network.init_model_from_model_like"><code class="xref py py-meth docutils literal notranslate"><span class="pre">init_model_from_model_like</span></code></a>. This cannot already be an instance of nn.Module.</p>
+</details></p></dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.config.KDLossConfig.model_dump">
+<span class="sig-name descname"><span class="pre">model_dump</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.config.KDLossConfig.model_dump" title="Link to this definition"></a></dt>
+<dd><p>Dump the config to a dictionary but avoid serializing teacher model to dict.</p>
+<p>This avoids issues when the teacher is a tuple with callable and args.
+If any of the args are Dataclasses, they are dumped as a dict and cannot be restored with their class.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>Dict</em>[<em>str</em>, <em>Any</em>]</p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.distill.html" class="btn btn-neutral float-left" title="distill" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.distill.distillation.html" class="btn btn-neutral float-right" title="distillation" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.distill.distillation.html b/reference/generated/modelopt.torch.distill.distillation.html
new file mode 100644
index 0000000..e5cf93f
--- /dev/null
+++ b/reference/generated/modelopt.torch.distill.distillation.html
@@ -0,0 +1,243 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>distillation &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="distillation_model" href="modelopt.torch.distill.distillation_model.html" />
+    <link rel="prev" title="config" href="modelopt.torch.distill.config.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.distill.html">distill</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.config.html">config</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">distillation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation_model.html">distillation_model</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.loss_balancers.html">loss_balancers</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.losses.html">losses</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.registry.html">registry</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.distill.html">distill</a></li>
+      <li class="breadcrumb-item active">distillation</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.distill.distillation.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="distillation">
+<h1>distillation<a class="headerlink" href="#distillation" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.distill.distillation">API for converting a model into a <cite>modelopt.torch.distill.DistillationModel</cite> to be used directly in training.</p>
+<p class="rubric">Functions</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.distill.distillation.convert" title="modelopt.torch.distill.distillation.convert"><code class="xref py py-obj docutils literal notranslate"><span class="pre">convert</span></code></a></p></td>
+<td><p>Main conversion function to turn a student model into a distillation-ready model.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.distill.distillation.export" title="modelopt.torch.distill.distillation.export"><code class="xref py py-obj docutils literal notranslate"><span class="pre">export</span></code></a></p></td>
+<td><p>Export a distillation meta-model to the original student model.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation.convert">
+<span class="sig-name descname"><span class="pre">convert</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mode</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.distillation.convert" title="Link to this definition"></a></dt>
+<dd><p>Main conversion function to turn a student model into a distillation-ready model.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>model</strong> (<em>Module</em>) – The base model to be used as the student.</p></li>
+<li><p><strong>mode</strong> (<em>_ModeDescriptor</em><em> | </em><em>str</em><em> | </em><em>List</em><em>[</em><em>_ModeDescriptor</em><em> | </em><em>str</em><em>] </em><em>| </em><em>List</em><em>[</em><em>Tuple</em><em>[</em><em>str</em><em>, </em><em>Dict</em><em>[</em><em>str</em><em>, </em><em>Any</em><em>]</em><em>]</em><em>]</em>) – <p>A (list of) string(s) or Mode(s) or a list of tuples containing the mode and its
+config indicating the desired mode(s) (and configurations) for the convert
+process. Modes set up the model for different algorithms for model optimization. The
+following modes are available:</p>
+<ul>
+<li><p><a class="reference internal" href="modelopt.torch.distill.mode.html#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor" title="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor"><code class="xref py py-meth docutils literal notranslate"><span class="pre">&quot;kd_loss&quot;</span></code></a>: The
+<code class="docutils literal notranslate"><span class="pre">model</span></code> will be converted into meta-model encapsulating both teacher and student.
+The mode’s config is described in
+<a class="reference internal" href="modelopt.torch.distill.config.html#modelopt.torch.distill.config.KDLossConfig" title="modelopt.torch.distill.config.KDLossConfig"><code class="xref py py-meth docutils literal notranslate"><span class="pre">KDLossConfig</span></code></a>.</p></li>
+</ul>
+<p>If the mode argument is specified as a dictionary, the keys should indicate the mode and
+the values specify the per-mode configuration.</p>
+</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>An instance of <code class="xref py py-class docutils literal notranslate"><span class="pre">DistillationModel</span> <span class="pre">&lt;modelopt.torch.distill.DistillationModel</span></code>.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>Module</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation.export">
+<span class="sig-name descname"><span class="pre">export</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.distillation.export" title="Link to this definition"></a></dt>
+<dd><p>Export a distillation meta-model to the original student model.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>model</strong> (<em>Module</em>) – Model to be exported out of a distillation mode into the student only.</p>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>The inner student model.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>Module</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.distill.config.html" class="btn btn-neutral float-left" title="config" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.distill.distillation_model.html" class="btn btn-neutral float-right" title="distillation_model" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.distill.distillation_model.html b/reference/generated/modelopt.torch.distill.distillation_model.html
new file mode 100644
index 0000000..a055dea
--- /dev/null
+++ b/reference/generated/modelopt.torch.distill.distillation_model.html
@@ -0,0 +1,314 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>distillation_model &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="loss_balancers" href="modelopt.torch.distill.loss_balancers.html" />
+    <link rel="prev" title="distillation" href="modelopt.torch.distill.distillation.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.distill.html">distill</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation.html">distillation</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">distillation_model</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.loss_balancers.html">loss_balancers</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.losses.html">losses</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.registry.html">registry</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.distill.html">distill</a></li>
+      <li class="breadcrumb-item active">distillation_model</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.distill.distillation_model.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="distillation-model">
+<h1>distillation_model<a class="headerlink" href="#distillation-model" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.distill.distillation_model">Meta-model wrapper to support knowledge-distillation learning.</p>
+<p class="rubric">Classes</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.distill.distillation_model.DistillationModel" title="modelopt.torch.distill.distillation_model.DistillationModel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DistillationModel</span></code></a></p></td>
+<td><p>Class to encapsulate multiple teacher and student models as a single model.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">DistillationModel</span></span><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule" title="modelopt.torch.opt.dynamic.DynamicModule"><code class="xref py py-class docutils literal notranslate"><span class="pre">DynamicModule</span></code></a></p>
+<p>Class to encapsulate multiple teacher and student models as a single model.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.compute_kd_loss">
+<span class="sig-name descname"><span class="pre">compute_kd_loss</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">student_loss</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loss_reduction_fn</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">skip_balancer</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.compute_kd_loss" title="Link to this definition"></a></dt>
+<dd><p>Compute total loss for distillation backpropagation.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>student_loss</strong> (<em>Tensor</em><em> | </em><em>None</em>) – Original loss computed from the student’s output.</p></li>
+<li><p><strong>loss_reduction_fn</strong> (<em>Callable</em>) – Callable to be called on each loss tensor prior to balancing. Useful for
+loss-masking situations where the callable changes arguments each iteration.</p></li>
+<li><p><strong>skip_balancer</strong> (<em>bool</em>) – Whether or not to use loss balancer to reduce the loss dict into a scalar.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>If reduce is True, the scalar total loss weighted between <code class="docutils literal notranslate"><span class="pre">student_loss</span></code> and the distillation losses.
+If reduce is False, a dict of student model output loss and layer-wise distillation losses.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>Tensor</em> | <em>Dict</em>[<em>str</em>, <em>Tensor</em>]</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.forward" title="Link to this definition"></a></dt>
+<dd><p>Implement forward pass.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>*args</strong> – Positional inputs to the student and teacher model.</p></li>
+<li><p><strong>**kwargs</strong> – Named inputs to the student and teacher model.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>The student model’s output.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>Any</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.hide_loss_modules">
+<span class="sig-name descname"><span class="pre">hide_loss_modules</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">enable</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.hide_loss_modules" title="Link to this definition"></a></dt>
+<dd><p>Context manager to temporarily hide teacher model from the model.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.hide_teacher_model">
+<span class="sig-name descname"><span class="pre">hide_teacher_model</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">enable</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.hide_teacher_model" title="Link to this definition"></a></dt>
+<dd><p>Context manager to temporarily hide teacher model from the model.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.load_state_dict">
+<span class="sig-name descname"><span class="pre">load_state_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">state_dict</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.load_state_dict" title="Link to this definition"></a></dt>
+<dd><p>Override to potentially load the state without teacher’s or loss modules’.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>Any</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.loss_balancer">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">loss_balancer</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.DistillationLossBalancer" title="modelopt.torch.distill.loss_balancers.DistillationLossBalancer"><span class="pre">DistillationLossBalancer</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.loss_balancer" title="Link to this definition"></a></dt>
+<dd><p>Fetch the loss balancer, if any.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.loss_modules">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">loss_modules</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">ModuleList</span></em><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.loss_modules" title="Link to this definition"></a></dt>
+<dd><p>Fetch the loss modules list.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.modify">
+<span class="sig-name descname"><span class="pre">modify</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">teacher_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">criterion</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loss_balancer</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">expose_minimal_state_dict</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.modify" title="Link to this definition"></a></dt>
+<dd><p>Constructor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>teacher_model</strong> (<em>Module</em>) – A teacher model which this class would encapsulate.</p></li>
+<li><p><strong>criterion</strong> (<em>Dict</em><em>[</em><em>Tuple</em><em>[</em><em>str</em><em>, </em><em>str</em><em>]</em><em>, </em><em>_Loss</em><em>]</em>) – A dictionary mapping the tuple of student and teacher
+model layer names to the loss function to apply to that layer pair.</p></li>
+<li><p><strong>loss_balancer</strong> (<a class="reference internal" href="modelopt.torch.distill.loss_balancers.html#modelopt.torch.distill.loss_balancers.DistillationLossBalancer" title="modelopt.torch.distill.loss_balancers.DistillationLossBalancer"><em>DistillationLossBalancer</em></a><em> | </em><em>None</em>) – Instance of
+<code class="xref py py-class docutils literal notranslate"><span class="pre">DistillationLossBalancer</span></code>
+which reduces distillation and non-distillation losses into a single value using some weighing scheme.</p></li>
+<li><p><strong>expose_minimal_state_dict</strong> (<em>bool</em>) – If True, will hide teacher’s state dict when calling <code class="docutils literal notranslate"><span class="pre">state_dict</span></code> on this
+class. This allows avoiding to save the teacher state unnecessarily during checkpointing.
+.. note: Set to False if using <a class="reference external" href="https://pytorch.org/docs/stable/fsdp.html" rel="noopener noreferrer" target="_blank">FSDP</a></p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.state_dict">
+<span class="sig-name descname"><span class="pre">state_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.state_dict" title="Link to this definition"></a></dt>
+<dd><p>Override to potentially return the state without teacher’s.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>Dict</em>[<em>str</em>, <em>Any</em>]</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.distillation_model.DistillationModel.teacher_model">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">teacher_model</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">ModuleList</span></em><a class="headerlink" href="#modelopt.torch.distill.distillation_model.DistillationModel.teacher_model" title="Link to this definition"></a></dt>
+<dd><p>Fetch the teacher model.</p>
+</dd></dl>
+
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.distill.distillation.html" class="btn btn-neutral float-left" title="distillation" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.distill.loss_balancers.html" class="btn btn-neutral float-right" title="loss_balancers" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.distill.html b/reference/generated/modelopt.torch.distill.html
new file mode 100644
index 0000000..917a990
--- /dev/null
+++ b/reference/generated/modelopt.torch.distill.html
@@ -0,0 +1,208 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>distill &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="config" href="modelopt.torch.distill.config.html" />
+    <link rel="prev" title="torch" href="modelopt.torch.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3 current"><a class="current reference internal" href="#">distill</a><ul>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation.html">distillation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation_model.html">distillation_model</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.loss_balancers.html">loss_balancers</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.losses.html">losses</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.registry.html">registry</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+      <li class="breadcrumb-item active">distill</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.distill.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="distill">
+<h1>distill<a class="headerlink" href="#distill" title="Link to this heading"></a></h1>
+<p class="rubric">Modules</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.distill.config.html#module-modelopt.torch.distill.config" title="modelopt.torch.distill.config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.distill.config</span></code></a></p></td>
+<td><p>Configurations for distillation modes.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.distill.distillation.html#module-modelopt.torch.distill.distillation" title="modelopt.torch.distill.distillation"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.distill.distillation</span></code></a></p></td>
+<td><p>API for converting a model into a <cite>modelopt.torch.distill.DistillationModel</cite> to be used directly in training.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.distill.distillation_model.html#module-modelopt.torch.distill.distillation_model" title="modelopt.torch.distill.distillation_model"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.distill.distillation_model</span></code></a></p></td>
+<td><p>Meta-model wrapper to support knowledge-distillation learning.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.distill.loss_balancers.html#module-modelopt.torch.distill.loss_balancers" title="modelopt.torch.distill.loss_balancers"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.distill.loss_balancers</span></code></a></p></td>
+<td><p>Basic loss balancers for Distillation task.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.distill.losses.html#module-modelopt.torch.distill.losses" title="modelopt.torch.distill.losses"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.distill.losses</span></code></a></p></td>
+<td><p>Different types of distillation losses.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.distill.mode.html#module-modelopt.torch.distill.mode" title="modelopt.torch.distill.mode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.distill.mode</span></code></a></p></td>
+<td><p>Module implementing and describing modes that can be used during the NAS convert process.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.distill.registry.html#module-modelopt.torch.distill.registry" title="modelopt.torch.distill.registry"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.distill.registry</span></code></a></p></td>
+<td><p>A dynamic module registry that holds information about available DynamicModules for Distillation-related purposes.</p></td>
+</tr>
+</tbody>
+</table>
+<p id="module-modelopt.torch.distill">Distillation Task API subpackage for torch.</p>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.html" class="btn btn-neutral float-left" title="torch" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.distill.config.html" class="btn btn-neutral float-right" title="config" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.distill.loss_balancers.html b/reference/generated/modelopt.torch.distill.loss_balancers.html
new file mode 100644
index 0000000..10628cb
--- /dev/null
+++ b/reference/generated/modelopt.torch.distill.loss_balancers.html
@@ -0,0 +1,291 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>loss_balancers &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="losses" href="modelopt.torch.distill.losses.html" />
+    <link rel="prev" title="distillation_model" href="modelopt.torch.distill.distillation_model.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.distill.html">distill</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation.html">distillation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation_model.html">distillation_model</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">loss_balancers</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.losses.html">losses</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.registry.html">registry</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.distill.html">distill</a></li>
+      <li class="breadcrumb-item active">loss_balancers</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.distill.loss_balancers.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="loss-balancers">
+<h1>loss_balancers<a class="headerlink" href="#loss-balancers" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.distill.loss_balancers">Basic loss balancers for Distillation task.</p>
+<p class="rubric">Classes</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.distill.loss_balancers.DistillationLossBalancer" title="modelopt.torch.distill.loss_balancers.DistillationLossBalancer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DistillationLossBalancer</span></code></a></p></td>
+<td><p>Interface for loss balancers.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.distill.loss_balancers.StaticLossBalancer" title="modelopt.torch.distill.loss_balancers.StaticLossBalancer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">StaticLossBalancer</span></code></a></p></td>
+<td><p>Static weights-based loss aggregation of KD losses.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.distill.loss_balancers.DistillationLossBalancer">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">DistillationLossBalancer</span></span><a class="headerlink" href="#modelopt.torch.distill.loss_balancers.DistillationLossBalancer" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
+<p>Interface for loss balancers.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.loss_balancers.DistillationLossBalancer.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.loss_balancers.DistillationLossBalancer.__init__" title="Link to this definition"></a></dt>
+<dd><p>Constructor.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.loss_balancers.DistillationLossBalancer.forward">
+<em class="property"><span class="pre">abstract</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">loss</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.loss_balancers.DistillationLossBalancer.forward" title="Link to this definition"></a></dt>
+<dd><p>Compute aggregate loss.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>loss</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Tensor</em><em>]</em>) – The loss dict to aggregate.
+The keys will be the class name of the loss function applied to obtain the loss,
+suffixed by <cite>_{idx}</cite> for uniqueness. And if a student loss is provided to
+<code class="docutils literal notranslate"><span class="pre">mtd.DistillationModel.compute_kd_loss</span></code> then it will have the key
+<code class="docutils literal notranslate"><span class="pre">mtd.loss_balancers.STUDENT_LOSS_KEY</span></code>. For example, if the <code class="docutils literal notranslate"><span class="pre">criterion</span></code> argument
+to <code class="docutils literal notranslate"><span class="pre">mtd.convert</span></code> is
+<code class="docutils literal notranslate"><span class="pre">{(&quot;mod1_s&quot;,</span> <span class="pre">&quot;mod1_t&quot;):</span> <span class="pre">torch.nn.MSELoss(),</span> <span class="pre">(&quot;mod2_s&quot;,</span> <span class="pre">&quot;mod2_t&quot;):</span> <span class="pre">torch.nn.MSELoss()}</span></code>
+and the student_loss provided to <code class="docutils literal notranslate"><span class="pre">mtd.DistillationModel.compute_kd_loss</span></code> is not
+None, then the loss dict here will look like
+<code class="docutils literal notranslate"><span class="pre">{&quot;student_loss&quot;:</span> <span class="pre">torch.tensor(...),</span> <span class="pre">&quot;MSELoss_0&quot;:</span> <span class="pre">torch.tensor(...),</span> <span class="pre">&quot;MSELoss_1&quot;:</span> <span class="pre">torch.tensor(...)}</span></code>.</p>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>The total loss after balancing student and kd loss loss components.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>Tensor</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.loss_balancers.DistillationLossBalancer.set_student_loss_reduction_fn">
+<span class="sig-name descname"><span class="pre">set_student_loss_reduction_fn</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">student_loss_reduction_fn</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.loss_balancers.DistillationLossBalancer.set_student_loss_reduction_fn" title="Link to this definition"></a></dt>
+<dd><p>Set student loss reduction function value.</p>
+<p>Needed in special case of loss-reducing the student loss prior to balancing.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>student_loss_reduction_fn</strong> (<em>Callable</em><em>[</em><em>[</em><em>Any</em><em>]</em><em>, </em><em>Tensor</em><em>]</em>) – </p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.distill.loss_balancers.StaticLossBalancer">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">StaticLossBalancer</span></span><a class="headerlink" href="#modelopt.torch.distill.loss_balancers.StaticLossBalancer" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="#modelopt.torch.distill.loss_balancers.DistillationLossBalancer" title="modelopt.torch.distill.loss_balancers.DistillationLossBalancer"><code class="xref py py-class docutils literal notranslate"><span class="pre">DistillationLossBalancer</span></code></a></p>
+<p>Static weights-based loss aggregation of KD losses.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.loss_balancers.StaticLossBalancer.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">kd_loss_weight</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.5</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.loss_balancers.StaticLossBalancer.__init__" title="Link to this definition"></a></dt>
+<dd><p>Constructor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>kd_loss_weight</strong> (<em>float</em><em> | </em><em>List</em><em>[</em><em>float</em><em>]</em>) – The static weight to be applied to balance the knowledge distillation
+loss and original student loss.
+If it is a float, it would be applied to the sum(KD losses).
+If it is a list, the keys are the KD loss keys, in order specified to the
+<code class="docutils literal notranslate"><span class="pre">criterion</span></code> argument, and the weight corresponding to each key is applied to
+the corresponding loss value.
+If the weights do not sum to 1.0, a <code class="docutils literal notranslate"><span class="pre">student_loss</span></code> should be passed into
+<code class="docutils literal notranslate"><span class="pre">mtd.DistillationModel.compute_kd_loss</span></code>, and the weight difference will be applied
+to this loss value.</p>
+</dd>
+<dt class="field-even">Raises<span class="colon">:</span></dt>
+<dd class="field-even"><p><strong>ValueError if kd_loss_weight is out</strong><strong> of </strong><strong>bounds.</strong> – </p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.loss_balancers.StaticLossBalancer.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">loss</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.loss_balancers.StaticLossBalancer.forward" title="Link to this definition"></a></dt>
+<dd><p>Compute aggregate loss.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>loss</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Tensor</em><em>]</em>) – The loss dict to aggregate.</p>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>The total loss after balancing student and kd loss loss components.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>Tensor</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.distill.distillation_model.html" class="btn btn-neutral float-left" title="distillation_model" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.distill.losses.html" class="btn btn-neutral float-right" title="losses" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.distill.losses.html b/reference/generated/modelopt.torch.distill.losses.html
new file mode 100644
index 0000000..190fe79
--- /dev/null
+++ b/reference/generated/modelopt.torch.distill.losses.html
@@ -0,0 +1,276 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>losses &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="mode" href="modelopt.torch.distill.mode.html" />
+    <link rel="prev" title="loss_balancers" href="modelopt.torch.distill.loss_balancers.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.distill.html">distill</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation.html">distillation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation_model.html">distillation_model</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.loss_balancers.html">loss_balancers</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">losses</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.registry.html">registry</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.distill.html">distill</a></li>
+      <li class="breadcrumb-item active">losses</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.distill.losses.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="losses">
+<h1>losses<a class="headerlink" href="#losses" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.distill.losses">Different types of distillation losses.</p>
+<p class="rubric">Classes</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.distill.losses.LogitsDistillationLoss" title="modelopt.torch.distill.losses.LogitsDistillationLoss"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LogitsDistillationLoss</span></code></a></p></td>
+<td><p>KL-Divergence loss on output logits.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.distill.losses.MGDLoss" title="modelopt.torch.distill.losses.MGDLoss"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MGDLoss</span></code></a></p></td>
+<td><p>PyTorch version of Masked Generative Distillation.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.distill.losses.LogitsDistillationLoss">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">LogitsDistillationLoss</span></span><a class="headerlink" href="#modelopt.torch.distill.losses.LogitsDistillationLoss" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">_Loss</span></code></p>
+<p>KL-Divergence loss on output logits.</p>
+<p>This function implements the distillation loss found in the paper: <a class="reference external" href="https://arxiv.org/abs/1503.02531" rel="noopener noreferrer" target="_blank">https://arxiv.org/abs/1503.02531</a>.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.losses.LogitsDistillationLoss.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">temperature</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reduction</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'batchmean'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.losses.LogitsDistillationLoss.__init__" title="Link to this definition"></a></dt>
+<dd><p>Constructor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>temperature</strong> (<em>float</em>) – A value used to soften the logits_t and logits_s before computing loss on them.</p></li>
+<li><p><strong>reduction</strong> (<em>str</em>) – How to reduce the final pointwise loss before returning. Pass <code class="docutils literal notranslate"><span class="pre">&quot;none&quot;</span></code> to
+use your own reduction function afterwards, i.e. with loss masks.</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.losses.LogitsDistillationLoss.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">logits_s</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_t</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.losses.LogitsDistillationLoss.forward" title="Link to this definition"></a></dt>
+<dd><p>Compute KD loss on student and teacher logits.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>logits_s</strong> (<em>Tensor</em>) – Student’s logits, treated as prediction.</p></li>
+<li><p><strong>logits_t</strong> (<em>Tensor</em>) – Teacher’s logits, treated as label.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>Tensor</em></p>
+</dd>
+</dl>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>Assumes class logits dimension is last.</p>
+</div>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.distill.losses.MGDLoss">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">MGDLoss</span></span><a class="headerlink" href="#modelopt.torch.distill.losses.MGDLoss" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">_Loss</span></code></p>
+<p>PyTorch version of Masked Generative Distillation.</p>
+<p>This function implements the distillation loss found in the paper: <a class="reference external" href="https://arxiv.org/abs/2205.01529" rel="noopener noreferrer" target="_blank">https://arxiv.org/abs/2205.01529</a>.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.losses.MGDLoss.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_student_channels</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_teacher_channels</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alpha_mgd</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lambda_mgd</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0.65</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.losses.MGDLoss.__init__" title="Link to this definition"></a></dt>
+<dd><p>Constructor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>num_student_channels</strong> (<em>int</em>) – Number of channels in the student’s feature map.</p></li>
+<li><p><strong>num_teacher_channels</strong> (<em>int</em>) – Number of channels in the teacher’s feature map.</p></li>
+<li><p><strong>alpha_mgd</strong> (<em>float</em>) – Scalar final loss is multiplied by. Defaults to 1.0.</p></li>
+<li><p><strong>lambda_mgd</strong> (<em>float</em>) – Masked ratio. Defaults to 0.65.</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.distill.losses.MGDLoss.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">out_s</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">out_t</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.distill.losses.MGDLoss.forward" title="Link to this definition"></a></dt>
+<dd><p>Forward function.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>out_s</strong> (<em>Tensor</em>) – Student’s feature map (shape BxCxHxW).</p></li>
+<li><p><strong>out_t</strong> (<em>Tensor</em>) – Teacher’s feature map (shape BxCxHxW).</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.distill.loss_balancers.html" class="btn btn-neutral float-left" title="loss_balancers" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.distill.mode.html" class="btn btn-neutral float-right" title="mode" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.distill.mode.html b/reference/generated/modelopt.torch.distill.mode.html
new file mode 100644
index 0000000..359af0b
--- /dev/null
+++ b/reference/generated/modelopt.torch.distill.mode.html
@@ -0,0 +1,283 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>mode &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="registry" href="modelopt.torch.distill.registry.html" />
+    <link rel="prev" title="losses" href="modelopt.torch.distill.losses.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.distill.html">distill</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation.html">distillation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation_model.html">distillation_model</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.loss_balancers.html">loss_balancers</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.losses.html">losses</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.registry.html">registry</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.distill.html">distill</a></li>
+      <li class="breadcrumb-item active">mode</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.distill.mode.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="mode">
+<h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.distill.mode">Module implementing and describing modes that can be used during the NAS convert process.</p>
+<p>Check out <code class="xref py py-meth docutils literal notranslate"><span class="pre">mtn.convert</span></code> to learn more about modes.</p>
+<p class="rubric">Classes</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.distill.mode.ExportStudentModeDescriptor" title="modelopt.torch.distill.mode.ExportStudentModeDescriptor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ExportStudentModeDescriptor</span></code></a></p></td>
+<td><p>Class to describe the specific Export mode to be used with Knowledge Distillation.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor" title="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">KnowledgeDistillationModeDescriptor</span></code></a></p></td>
+<td><p>Class to describe the Knowledge-Distillation mode.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.ExportStudentModeDescriptor">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">ExportStudentModeDescriptor</span></span><a class="headerlink" href="#modelopt.torch.distill.mode.ExportStudentModeDescriptor" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">_ModeDescriptor</span></code></p>
+<p>Class to describe the specific Export mode to be used with Knowledge Distillation.</p>
+<p>The properties of this mode can be inspected via the source code.</p>
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.ExportStudentModeDescriptor.config_class">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">config_class</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Type</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><span class="pre">ModeloptBaseConfig</span></a><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.distill.mode.ExportStudentModeDescriptor.config_class" title="Link to this definition"></a></dt>
+<dd><p>Specifies the config class for the mode.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.ExportStudentModeDescriptor.convert">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">convert</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Callable</span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><span class="pre">Module</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><span class="pre">ModeloptBaseConfig</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">Module</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.distill.mode.ExportStudentModeDescriptor.convert" title="Link to this definition"></a></dt>
+<dd><p>The mode’s entrypoint for converting a model.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.ExportStudentModeDescriptor.is_export_mode">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">is_export_mode</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#modelopt.torch.distill.mode.ExportStudentModeDescriptor.is_export_mode" title="Link to this definition"></a></dt>
+<dd><p>Specifies whether the mode is an export mode.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.ExportStudentModeDescriptor.name">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">name</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#modelopt.torch.distill.mode.ExportStudentModeDescriptor.name" title="Link to this definition"></a></dt>
+<dd><p>Returns the value (str representation) of the mode.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.ExportStudentModeDescriptor.restore">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">restore</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Callable</span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><span class="pre">Module</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><span class="pre">ModeloptBaseConfig</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Module</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.distill.mode.ExportStudentModeDescriptor.restore" title="Link to this definition"></a></dt>
+<dd><p>The mode’s entrypoint for restoring a model.</p>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">KnowledgeDistillationModeDescriptor</span></span><a class="headerlink" href="#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">_ModeDescriptor</span></code></p>
+<p>Class to describe the Knowledge-Distillation mode.</p>
+<p>The properties of this mode can be inspected via the source code.</p>
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.config_class">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">config_class</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Type</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><span class="pre">ModeloptBaseConfig</span></a><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.config_class" title="Link to this definition"></a></dt>
+<dd><p>Specifies the config class for the mode.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.convert">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">convert</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Callable</span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><span class="pre">Module</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><span class="pre">ModeloptBaseConfig</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">Module</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.convert" title="Link to this definition"></a></dt>
+<dd><p>The mode’s entrypoint for converting a model.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.export_mode">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">export_mode</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.export_mode" title="Link to this definition"></a></dt>
+<dd><p>The mode that corresponds to the export mode of this mode.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.name">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">name</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.name" title="Link to this definition"></a></dt>
+<dd><p>Returns the value (str representation) of the mode.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.next_modes">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">next_modes</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Set</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.next_modes" title="Link to this definition"></a></dt>
+<dd><p>Modes that must immediately follow this mode.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.restore">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">restore</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Callable</span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><span class="pre">Module</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><span class="pre">ModeloptBaseConfig</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Module</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.restore" title="Link to this definition"></a></dt>
+<dd><p>The mode’s entrypoint for restoring a model.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.update_for_new_mode">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">update_for_new_mode</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Callable</span><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><span class="pre">Module</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><span class="pre">ModeloptBaseConfig</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">None</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.update_for_new_mode" title="Link to this definition"></a></dt>
+<dd><p>The mode’s entrypoint for updating the models state for adding new mode.</p>
+</dd></dl>
+
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.distill.losses.html" class="btn btn-neutral float-left" title="losses" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.distill.registry.html" class="btn btn-neutral float-right" title="registry" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.distill.registry.html b/reference/generated/modelopt.torch.distill.registry.html
new file mode 100644
index 0000000..c580726
--- /dev/null
+++ b/reference/generated/modelopt.torch.distill.registry.html
@@ -0,0 +1,183 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>registry &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="export" href="modelopt.torch.export.html" />
+    <link rel="prev" title="mode" href="modelopt.torch.distill.mode.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.distill.html">distill</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation.html">distillation</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.distillation_model.html">distillation_model</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.loss_balancers.html">loss_balancers</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.losses.html">losses</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.distill.mode.html">mode</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">registry</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.distill.html">distill</a></li>
+      <li class="breadcrumb-item active">registry</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.distill.registry.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="registry">
+<h1>registry<a class="headerlink" href="#registry" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.distill.registry">A dynamic module registry that holds information about available DynamicModules for Distillation-related purposes.</p>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.distill.mode.html" class="btn btn-neutral float-left" title="mode" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.export.html" class="btn btn-neutral float-right" title="export" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.distribute.html b/reference/generated/modelopt.torch.export.distribute.html
index 2ca0694..1afa760 100644
--- a/reference/generated/modelopt.torch.export.distribute.html
+++ b/reference/generated/modelopt.torch.export.distribute.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>distribute &mdash; Model Optimizer 0.11.2</title>
+  <title>distribute &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="layer_utils" href="modelopt.torch.export.layer_utils.html" />
-    <link rel="prev" title="export" href="modelopt.torch.export.html" />
+    <link rel="next" title="hf_config_map" href="modelopt.torch.export.hf_config_map.html" />
+    <link rel="prev" title="export" href="modelopt.torch.export.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
 <li class="toctree-l4 current"><a class="current reference internal" href="#">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="distribute">
 <h1>distribute<a class="headerlink" href="#distribute" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.export.distribute">torch.distribute utils.</p>
@@ -157,24 +162,12 @@ <h1>distribute<a class="headerlink" href="#distribute" title="Link to this headi
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.distribute.barrier" title="modelopt.torch.export.distribute.barrier"><code class="xref py py-obj docutils literal notranslate"><span class="pre">barrier</span></code></a></p></td>
-<td><p>Set a parallel barrier.</p></td>
-</tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.distribute.get_configs_parallel" title="modelopt.torch.export.distribute.get_configs_parallel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_configs_parallel</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.distribute.get_configs_parallel" title="modelopt.torch.export.distribute.get_configs_parallel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_configs_parallel</span></code></a></p></td>
 <td><p>Gathers the layer config across distributed processes using shm or NFS.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.distribute.get_group" title="modelopt.torch.export.distribute.get_group"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_group</span></code></a></p></td>
-<td><p>Returns the process group if torch.distributed.is_initialized().</p></td>
-</tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.distribute.get_rank" title="modelopt.torch.export.distribute.get_rank"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_rank</span></code></a></p></td>
-<td><p>Safe method to get local rank.</p></td>
-</tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.distribute.get_tensors_parallel" title="modelopt.torch.export.distribute.get_tensors_parallel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_tensors_parallel</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.distribute.get_tensors_parallel" title="modelopt.torch.export.distribute.get_tensors_parallel"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_tensors_parallel</span></code></a></p></td>
 <td><p>Gathers the tensors across distributed processes using shm.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.distribute.get_world_size" title="modelopt.torch.export.distribute.get_world_size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_world_size</span></code></a></p></td>
-<td><p>Safe method to get world size.</p></td>
-</tr>
 </tbody>
 </table>
 <dl class="py class">
@@ -244,12 +237,6 @@ <h1>distribute<a class="headerlink" href="#distribute" title="Link to this headi
 
 </dd></dl>
 
-<dl class="py function">
-<dt class="sig sig-object py" id="modelopt.torch.export.distribute.barrier">
-<span class="sig-name descname"><span class="pre">barrier</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.distribute.barrier" title="Link to this definition"></a></dt>
-<dd><p>Set a parallel barrier.</p>
-</dd></dl>
-
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.distribute.get_configs_parallel">
 <span class="sig-name descname"><span class="pre">get_configs_parallel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ranks</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">workspace_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.distribute.get_configs_parallel" title="Link to this definition"></a></dt>
@@ -273,28 +260,6 @@ <h1>distribute<a class="headerlink" href="#distribute" title="Link to this headi
 The shm will be destroyed after consumption.</p>
 </dd></dl>
 
-<dl class="py function">
-<dt class="sig sig-object py" id="modelopt.torch.export.distribute.get_group">
-<span class="sig-name descname"><span class="pre">get_group</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ranks</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.distribute.get_group" title="Link to this definition"></a></dt>
-<dd><p>Returns the process group if torch.distributed.is_initialized().</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>ranks</strong> (<em>List</em><em>[</em><em>int</em><em>]</em>) – </p>
-</dd>
-</dl>
-</dd></dl>
-
-<dl class="py function">
-<dt class="sig sig-object py" id="modelopt.torch.export.distribute.get_rank">
-<span class="sig-name descname"><span class="pre">get_rank</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.distribute.get_rank" title="Link to this definition"></a></dt>
-<dd><p>Safe method to get local rank.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><em>int</em></p>
-</dd>
-</dl>
-</dd></dl>
-
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.distribute.get_tensors_parallel">
 <span class="sig-name descname"><span class="pre">get_tensors_parallel</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tensor</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ranks</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.distribute.get_tensors_parallel" title="Link to this definition"></a></dt>
@@ -316,17 +281,6 @@ <h1>distribute<a class="headerlink" href="#distribute" title="Link to this headi
 <p>The shm will be destroyed after consumption.</p>
 </dd></dl>
 
-<dl class="py function">
-<dt class="sig sig-object py" id="modelopt.torch.export.distribute.get_world_size">
-<span class="sig-name descname"><span class="pre">get_world_size</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.distribute.get_world_size" title="Link to this definition"></a></dt>
-<dd><p>Safe method to get world size.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Return type<span class="colon">:</span></dt>
-<dd class="field-odd"><p><em>int</em></p>
-</dd>
-</dl>
-</dd></dl>
-
 </section>
 
 
@@ -334,7 +288,7 @@ <h1>distribute<a class="headerlink" href="#distribute" title="Link to this headi
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.torch.export.html" class="btn btn-neutral float-left" title="export" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.torch.export.layer_utils.html" class="btn btn-neutral float-right" title="layer_utils" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.torch.export.hf_config_map.html" class="btn btn-neutral float-right" title="hf_config_map" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -346,7 +300,7 @@ <h1>distribute<a class="headerlink" href="#distribute" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -357,7 +311,7 @@ <h1>distribute<a class="headerlink" href="#distribute" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.hf_config_map.html b/reference/generated/modelopt.torch.export.hf_config_map.html
new file mode 100644
index 0000000..6865325
--- /dev/null
+++ b/reference/generated/modelopt.torch.export.hf_config_map.html
@@ -0,0 +1,187 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>hf_config_map &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="layer_utils" href="modelopt.torch.export.layer_utils.html" />
+    <link rel="prev" title="distribute" href="modelopt.torch.export.distribute.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">hf_config_map</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_utils.html">model_config_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.postprocess.html">postprocess</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.export.html">export</a></li>
+      <li class="breadcrumb-item active">hf_config_map</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.export.hf_config_map.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="hf-config-map">
+<h1>hf_config_map<a class="headerlink" href="#hf-config-map" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.export.hf_config_map">Define the config mapping between HF and modelopt.</p>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.export.distribute.html" class="btn btn-neutral float-left" title="distribute" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.export.layer_utils.html" class="btn btn-neutral float-right" title="layer_utils" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.html b/reference/generated/modelopt.torch.export.html
index 03fc645..d7c134a 100644
--- a/reference/generated/modelopt.torch.export.html
+++ b/reference/generated/modelopt.torch.export.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>export &mdash; Model Optimizer 0.11.2</title>
+  <title>export &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="distribute" href="modelopt.torch.export.distribute.html" />
-    <link rel="prev" title="torch" href="modelopt.torch.html" />
+    <link rel="prev" title="registry" href="modelopt.torch.distill.registry.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="#">export</a><ul>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -141,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="export">
 <h1>export<a class="headerlink" href="#export" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -150,30 +155,36 @@ <h1>export<a class="headerlink" href="#export" title="Link to this heading"><
 <tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.distribute.html#module-modelopt.torch.export.distribute" title="modelopt.torch.export.distribute"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.distribute</span></code></a></p></td>
 <td><p>torch.distribute utils.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.layer_utils.html#module-modelopt.torch.export.layer_utils" title="modelopt.torch.export.layer_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.layer_utils</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.hf_config_map.html#module-modelopt.torch.export.hf_config_map" title="modelopt.torch.export.hf_config_map"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.hf_config_map</span></code></a></p></td>
+<td><p>Define the config mapping between HF and modelopt.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.layer_utils.html#module-modelopt.torch.export.layer_utils" title="modelopt.torch.export.layer_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.layer_utils</span></code></a></p></td>
 <td><p>Utils for model_config export.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.model_config.html#module-modelopt.torch.export.model_config" title="modelopt.torch.export.model_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.model_config</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.model_config.html#module-modelopt.torch.export.model_config" title="modelopt.torch.export.model_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.model_config</span></code></a></p></td>
 <td><p>This module defines the model_config format.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.model_config_export.html#module-modelopt.torch.export.model_config_export" title="modelopt.torch.export.model_config_export"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.model_config_export</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.model_config_export.html#module-modelopt.torch.export.model_config_export" title="modelopt.torch.export.model_config_export"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.model_config_export</span></code></a></p></td>
 <td><p>Code that export optimized models to the TensorRT-LLM checkpoint.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.model_config_utils.html#module-modelopt.torch.export.model_config_utils" title="modelopt.torch.export.model_config_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.model_config_utils</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.model_config_utils.html#module-modelopt.torch.export.model_config_utils" title="modelopt.torch.export.model_config_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.model_config_utils</span></code></a></p></td>
 <td><p>Common utils for the ModelConfig.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.postprocess.html#module-modelopt.torch.export.postprocess" title="modelopt.torch.export.postprocess"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.postprocess</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.postprocess.html#module-modelopt.torch.export.postprocess" title="modelopt.torch.export.postprocess"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.postprocess</span></code></a></p></td>
 <td><p>Utils to load and process model_config.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html#module-modelopt.torch.export.scaling_factor_utils" title="modelopt.torch.export.scaling_factor_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.scaling_factor_utils</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html#module-modelopt.torch.export.scaling_factor_utils" title="modelopt.torch.export.scaling_factor_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.scaling_factor_utils</span></code></a></p></td>
 <td><p>Utils for scaling factors adjustments.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html#module-modelopt.torch.export.tensorrt_llm_utils" title="modelopt.torch.export.tensorrt_llm_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.tensorrt_llm_utils</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html#module-modelopt.torch.export.tensorrt_llm_utils" title="modelopt.torch.export.tensorrt_llm_utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.tensorrt_llm_utils</span></code></a></p></td>
 <td><p>Utils for TensorRT-LLM checkpoint export.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.transformer_engine.html#module-modelopt.torch.export.transformer_engine" title="modelopt.torch.export.transformer_engine"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.transformer_engine</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.transformer_engine.html#module-modelopt.torch.export.transformer_engine" title="modelopt.torch.export.transformer_engine"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.transformer_engine</span></code></a></p></td>
 <td><p>Convert the Model Optimizer quantized model to the transformer_engine.</p></td>
 </tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.vllm.html#module-modelopt.torch.export.vllm" title="modelopt.torch.export.vllm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export.vllm</span></code></a></p></td>
+<td><p>Utility to convert a Model Optimizer exported model to vLLM Checkpoint.</p></td>
+</tr>
 </tbody>
 </table>
 <p id="module-modelopt.torch.export">Export package. So far it only supports selected nemo and huggingface LLMs.</p>
@@ -183,7 +194,7 @@ <h1>export<a class="headerlink" href="#export" title="Link to this heading"><
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.torch.html" class="btn btn-neutral float-left" title="torch" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.distill.registry.html" class="btn btn-neutral float-left" title="registry" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.torch.export.distribute.html" class="btn btn-neutral float-right" title="distribute" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -196,7 +207,7 @@ <h1>export<a class="headerlink" href="#export" title="Link to this heading"><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -207,7 +218,7 @@ <h1>export<a class="headerlink" href="#export" title="Link to this heading"><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.layer_utils.html b/reference/generated/modelopt.torch.export.layer_utils.html
index 2ee3a84..ee0666b 100644
--- a/reference/generated/modelopt.torch.export.layer_utils.html
+++ b/reference/generated/modelopt.torch.export.layer_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>layer_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>layer_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="model_config" href="modelopt.torch.export.model_config.html" />
-    <link rel="prev" title="distribute" href="modelopt.torch.export.distribute.html" />
+    <link rel="prev" title="hf_config_map" href="modelopt.torch.export.hf_config_map.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">layer_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="layer-utils">
 <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.export.layer_utils">Utils for model_config export.</p>
@@ -153,18 +158,24 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_attention_config" title="modelopt.torch.export.layer_utils.build_attention_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_attention_config</span></code></a></p></td>
 <td><p>Builds the attention config from the module.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_decoder_config" title="modelopt.torch.export.layer_utils.build_decoder_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_decoder_config</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_conv_config" title="modelopt.torch.export.layer_utils.build_conv_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_conv_config</span></code></a></p></td>
+<td><p>Builds the conv config for this module.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_decoder_config" title="modelopt.torch.export.layer_utils.build_decoder_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_decoder_config</span></code></a></p></td>
 <td><p>Builds the full decoder config from the module.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_embedding_config" title="modelopt.torch.export.layer_utils.build_embedding_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_embedding_config</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_embedding_config" title="modelopt.torch.export.layer_utils.build_embedding_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_embedding_config</span></code></a></p></td>
 <td><p>Builds the embedding config from the module.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_layernorm_config" title="modelopt.torch.export.layer_utils.build_layernorm_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_layernorm_config</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_layernorm_config" title="modelopt.torch.export.layer_utils.build_layernorm_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_layernorm_config</span></code></a></p></td>
 <td><p>Builds the layernorm config from the module.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_linear_config" title="modelopt.torch.export.layer_utils.build_linear_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_linear_config</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_linear_config" title="modelopt.torch.export.layer_utils.build_linear_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_linear_config</span></code></a></p></td>
 <td><p>Builds the linear config for the module.</p></td>
 </tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_medusa_heads_config" title="modelopt.torch.export.layer_utils.build_medusa_heads_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_medusa_heads_config</span></code></a></p></td>
+<td><p>Build a list of MedusaHeadConfig if exists.</p></td>
+</tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_mlp_config" title="modelopt.torch.export.layer_utils.build_mlp_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_mlp_config</span></code></a></p></td>
 <td><p>Builds the MLP config for the module.</p></td>
 </tr>
@@ -174,24 +185,30 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_qkv" title="modelopt.torch.export.layer_utils.build_qkv"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_qkv</span></code></a></p></td>
 <td><p>Converts the qkv modules to the config.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_stacked_experts" title="modelopt.torch.export.layer_utils.build_stacked_experts"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_stacked_experts</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_recurrent_config" title="modelopt.torch.export.layer_utils.build_recurrent_config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_recurrent_config</span></code></a></p></td>
+<td><p>Builds the recurrent config for this module.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.build_stacked_experts" title="modelopt.torch.export.layer_utils.build_stacked_experts"><code class="xref py py-obj docutils literal notranslate"><span class="pre">build_stacked_experts</span></code></a></p></td>
 <td><p>Builds the experts_weight_1 and experts_weight_2 configs for the experts.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.check_model_compatibility" title="modelopt.torch.export.layer_utils.check_model_compatibility"><code class="xref py py-obj docutils literal notranslate"><span class="pre">check_model_compatibility</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.check_model_compatibility" title="modelopt.torch.export.layer_utils.check_model_compatibility"><code class="xref py py-obj docutils literal notranslate"><span class="pre">check_model_compatibility</span></code></a></p></td>
 <td><p>Returns whether the list of modules is compatible with the export logic.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_activation_scaling_factor" title="modelopt.torch.export.layer_utils.get_activation_scaling_factor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_activation_scaling_factor</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_activation_scaling_factor" title="modelopt.torch.export.layer_utils.get_activation_scaling_factor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_activation_scaling_factor</span></code></a></p></td>
 <td><p>Returns the activation scaling factor.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_kv_cache_dtype" title="modelopt.torch.export.layer_utils.get_kv_cache_dtype"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_kv_cache_dtype</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_kv_cache_dtype" title="modelopt.torch.export.layer_utils.get_kv_cache_dtype"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_kv_cache_dtype</span></code></a></p></td>
 <td><p>Returns the kv_cache dtype.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_kv_cache_scaling_factor" title="modelopt.torch.export.layer_utils.get_kv_cache_scaling_factor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_kv_cache_scaling_factor</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_kv_cache_scaling_factor" title="modelopt.torch.export.layer_utils.get_kv_cache_scaling_factor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_kv_cache_scaling_factor</span></code></a></p></td>
 <td><p>Returns the kv_cache scaling factor if output quantizer is set.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_prequant_scaling_factor" title="modelopt.torch.export.layer_utils.get_prequant_scaling_factor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_prequant_scaling_factor</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_prequant_scaling_factor" title="modelopt.torch.export.layer_utils.get_prequant_scaling_factor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_prequant_scaling_factor</span></code></a></p></td>
 <td><p>Returns the prequant scaling factor.</p></td>
 </tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_quantization_format" title="modelopt.torch.export.layer_utils.get_quantization_format"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_quantization_format</span></code></a></p></td>
+<td><p>Returns the quantization format.</p></td>
+</tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.get_scaling_factor" title="modelopt.torch.export.layer_utils.get_scaling_factor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_scaling_factor</span></code></a></p></td>
 <td><p>Returns scaling factor from the quantizer as torch.Tensor.</p></td>
 </tr>
@@ -228,6 +245,12 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.is_moe" title="modelopt.torch.export.layer_utils.is_moe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_moe</span></code></a></p></td>
 <td><p>Returns whether the module is an MOE layer.</p></td>
 </tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.is_quantlinear" title="modelopt.torch.export.layer_utils.is_quantlinear"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_quantlinear</span></code></a></p></td>
+<td><p>Returns whether the module is a quantized linear layer.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.layer_utils.is_recurrent" title="modelopt.torch.export.layer_utils.is_recurrent"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_recurrent</span></code></a></p></td>
+<td><p>Returns whether the module is a recurrent layer.</p></td>
+</tr>
 </tbody>
 </table>
 <dl class="py function">
@@ -248,6 +271,23 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.build_conv_config">
+<span class="sig-name descname"><span class="pre">build_conv_config</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.build_conv_config" title="Link to this definition"></a></dt>
+<dd><p>Builds the conv config for this module.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>module</strong> (<em>Module</em>) – </p></li>
+<li><p><strong>dtype</strong> (<em>dtype</em>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><a class="reference internal" href="modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.ConvConfig" title="modelopt.torch.export.model_config.ConvConfig"><em>ConvConfig</em></a></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.build_decoder_config">
 <span class="sig-name descname"><span class="pre">build_decoder_config</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">model_metadata_config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">decoder_type</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.build_decoder_config" title="Link to this definition"></a></dt>
@@ -319,6 +359,28 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.build_medusa_heads_config">
+<span class="sig-name descname"><span class="pre">build_medusa_heads_config</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.build_medusa_heads_config" title="Link to this definition"></a></dt>
+<dd><p>Build a list of MedusaHeadConfig if exists.</p>
+<p>Following TensorRT-LLM’s Medusa implementation, all Medusa heads (num_medusa_heads) should be
+placed inside a ‘torch.nn.ModuleList’ with attribute name ‘medsua_heads’. A Medusa head composes
+an additional ‘lm_head’ (vocab_size, hidden_size) and a list (num_medusa_layers) of Medusa layer
+(LinearActConfig). The only supported hidden_act for the layer is ‘silu’. All Linear layers are
+column-parallel.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>model</strong> (<em>Module</em><em> | </em><em>None</em>) – </p></li>
+<li><p><strong>dtype</strong> (<em>dtype</em>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>List</em>[<a class="reference internal" href="modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.MedusaHeadConfig" title="modelopt.torch.export.model_config.MedusaHeadConfig"><em>MedusaHeadConfig</em></a>] | <em>None</em></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.build_mlp_config">
 <span class="sig-name descname"><span class="pre">build_mlp_config</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">decoder_type</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.build_mlp_config" title="Link to this definition"></a></dt>
@@ -371,15 +433,30 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.build_recurrent_config">
+<span class="sig-name descname"><span class="pre">build_recurrent_config</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.build_recurrent_config" title="Link to this definition"></a></dt>
+<dd><p>Builds the recurrent config for this module.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>module</strong> (<em>Module</em>) – </p></li>
+<li><p><strong>dtype</strong> (<em>dtype</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.build_stacked_experts">
-<span class="sig-name descname"><span class="pre">build_stacked_experts</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">experts</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.build_stacked_experts" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">build_stacked_experts</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">experts</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">linear_names</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_experts</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">expert_getter</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.build_stacked_experts" title="Link to this definition"></a></dt>
 <dd><p>Builds the experts_weight_1 and experts_weight_2 configs for the experts.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>experts</strong> (<em>Module</em>) – </p></li>
 <li><p><strong>dtype</strong> (<em>dtype</em>) – </p></li>
+<li><p><strong>linear_names</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
 </ul>
 </dd>
 </dl>
@@ -420,16 +497,19 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
 
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.get_kv_cache_dtype">
-<span class="sig-name descname"><span class="pre">get_kv_cache_dtype</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">qkv_modules</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.get_kv_cache_dtype" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">get_kv_cache_dtype</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">modules</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.get_kv_cache_dtype" title="Link to this definition"></a></dt>
 <dd><p>Returns the kv_cache dtype.</p>
 <p>If num_bits of output_quantizer is (4, 3) then returns FP8; if it is 8, returns int8,
 otherwise returns None.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>qkv_modules</strong> (<em>List</em><em>[</em><em>Module</em><em>]</em>) – </p>
+<dd class="field-odd"><p><strong>modules</strong> (<em>Union</em><em>[</em><em>List</em><em>[</em><em>nn.Module</em><em>]</em><em>, </em><em>nn.Module</em><em>]</em>) – The module or list of modules to inspect.</p>
 </dd>
-<dt class="field-even">Return type<span class="colon">:</span></dt>
-<dd class="field-even"><p><em>str</em></p>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>The kv_cache dtype.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>str</em></p>
 </dd>
 </dl>
 </dd></dl>
@@ -465,6 +545,20 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.get_quantization_format">
+<span class="sig-name descname"><span class="pre">get_quantization_format</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.get_quantization_format" title="Link to this definition"></a></dt>
+<dd><p>Returns the quantization format.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>module</strong> (<em>Module</em>) – </p>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>str</em></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.get_scaling_factor">
 <span class="sig-name descname"><span class="pre">get_scaling_factor</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quantizer</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.get_scaling_factor" title="Link to this definition"></a></dt>
@@ -633,13 +727,41 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.is_quantlinear">
+<span class="sig-name descname"><span class="pre">is_quantlinear</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.is_quantlinear" title="Link to this definition"></a></dt>
+<dd><p>Returns whether the module is a quantized linear layer.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>module</strong> (<em>Module</em>) – </p>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>bool</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.layer_utils.is_recurrent">
+<span class="sig-name descname"><span class="pre">is_recurrent</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.layer_utils.is_recurrent" title="Link to this definition"></a></dt>
+<dd><p>Returns whether the module is a recurrent layer.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>module</strong> (<em>Module</em>) – </p>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>bool</em></p>
+</dd>
+</dl>
+</dd></dl>
+
 </section>
 
 
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.torch.export.distribute.html" class="btn btn-neutral float-left" title="distribute" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.export.hf_config_map.html" class="btn btn-neutral float-left" title="hf_config_map" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.torch.export.model_config.html" class="btn btn-neutral float-right" title="model_config" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -652,7 +774,7 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -663,7 +785,7 @@ <h1>layer_utils<a class="headerlink" href="#layer-utils" title="Link to this hea
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.model_config.html b/reference/generated/modelopt.torch.export.model_config.html
index b8d7c06..83646f6 100644
--- a/reference/generated/modelopt.torch.export.model_config.html
+++ b/reference/generated/modelopt.torch.export.model_config.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>model_config &mdash; Model Optimizer 0.11.2</title>
+  <title>model_config &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="model_config_export" href="modelopt.torch.export.model_config_export.html" />
-    <link rel="prev" title="layer_utils" href="modelopt.torch.export.layer_utils.html" />
+    <link rel="prev" title="layer_utils" href="modelopt.torch.export.layer_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">model_config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="model-config">
 <h1>model_config<a class="headerlink" href="#model-config" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.export.model_config">This module defines the model_config format.</p>
@@ -154,18 +159,24 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.AttentionConfig" title="modelopt.torch.export.model_config.AttentionConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">AttentionConfig</span></code></a></p></td>
 <td><p>The attention layer config.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.DecoderLayerConfig" title="modelopt.torch.export.model_config.DecoderLayerConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DecoderLayerConfig</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.ConvConfig" title="modelopt.torch.export.model_config.ConvConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ConvConfig</span></code></a></p></td>
+<td><p>The Conv layer config.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.DecoderLayerConfig" title="modelopt.torch.export.model_config.DecoderLayerConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">DecoderLayerConfig</span></code></a></p></td>
 <td><p>The decoder layer config.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.EmbeddingConfig" title="modelopt.torch.export.model_config.EmbeddingConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">EmbeddingConfig</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.EmbeddingConfig" title="modelopt.torch.export.model_config.EmbeddingConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">EmbeddingConfig</span></code></a></p></td>
 <td><p>The embedding layer config.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.ExpertConfig" title="modelopt.torch.export.model_config.ExpertConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ExpertConfig</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.ExpertConfig" title="modelopt.torch.export.model_config.ExpertConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ExpertConfig</span></code></a></p></td>
 <td><p>The Expert config.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LayernormConfig</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LayernormConfig</span></code></a></p></td>
 <td><p>The layernorm layer config.</p></td>
 </tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.LinearActConfig" title="modelopt.torch.export.model_config.LinearActConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LinearActConfig</span></code></a></p></td>
+<td><p>The linear + activation layer config.</p></td>
+</tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LinearConfig</span></code></a></p></td>
 <td><p>The linear layer config.</p></td>
 </tr>
@@ -175,12 +186,21 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.MOEConfig" title="modelopt.torch.export.model_config.MOEConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MOEConfig</span></code></a></p></td>
 <td><p>The Mixture of Expert layer config.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.ModelConfig" title="modelopt.torch.export.model_config.ModelConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ModelConfig</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.MedusaHeadConfig" title="modelopt.torch.export.model_config.MedusaHeadConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">MedusaHeadConfig</span></code></a></p></td>
+<td><p>The decoder layer config.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.ModelConfig" title="modelopt.torch.export.model_config.ModelConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ModelConfig</span></code></a></p></td>
 <td><p>The full LLM model config that includes the full information needed for tensorrt_llm engine building.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.QKVConfig" title="modelopt.torch.export.model_config.QKVConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">QKVConfig</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.QKVConfig" title="modelopt.torch.export.model_config.QKVConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">QKVConfig</span></code></a></p></td>
 <td><p>The QKV layer config.</p></td>
 </tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.RecurrentConfig" title="modelopt.torch.export.model_config.RecurrentConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RecurrentConfig</span></code></a></p></td>
+<td><p>The RecurrentBlock from recurrentgemma.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config.RgLruConfig" title="modelopt.torch.export.model_config.RgLruConfig"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RgLruConfig</span></code></a></p></td>
+<td><p>The RG LRU from recurrentgemma.</p></td>
+</tr>
 </tbody>
 </table>
 <dl class="py class">
@@ -190,16 +210,17 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <p>The attention layer config.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.AttentionConfig.__init__">
-<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">qkv</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dense</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_scaling_factor</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">-inf</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">clip_qkv</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.AttentionConfig.__init__" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">qkv</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dense</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_scaling_factor</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">kv_cache_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_dim</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">-inf</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">clip_qkv</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rel_attn_table</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.AttentionConfig.__init__" title="Link to this definition"></a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>qkv</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.QKVConfig" title="modelopt.torch.export.model_config.QKVConfig"><em>QKVConfig</em></a><em> | </em><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
 <li><p><strong>dense</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
 <li><p><strong>kv_cache_scaling_factor</strong> (<em>Tensor</em>) – </p></li>
-<li><p><strong>kv_cache_dtype</strong> (<em>str</em>) – </p></li>
+<li><p><strong>kv_cache_dtype</strong> (<em>str</em><em> | </em><em>None</em>) – </p></li>
 <li><p><strong>rotary_dim</strong> (<em>int</em>) – </p></li>
 <li><p><strong>clip_qkv</strong> (<em>float</em>) – </p></li>
+<li><p><strong>rel_attn_table</strong> (<em>Tensor</em>) – </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
@@ -220,7 +241,7 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.AttentionConfig.kv_cache_dtype">
-<span class="sig-name descname"><span class="pre">kv_cache_dtype</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.AttentionConfig.kv_cache_dtype" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">kv_cache_dtype</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.AttentionConfig.kv_cache_dtype" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute">
@@ -233,6 +254,11 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">qkv</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.QKVConfig" title="modelopt.torch.export.model_config.QKVConfig"><span class="pre">QKVConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><span class="pre">LinearConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.AttentionConfig.qkv" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.AttentionConfig.rel_attn_table">
+<span class="sig-name descname"><span class="pre">rel_attn_table</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tensor</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.AttentionConfig.rel_attn_table" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.AttentionConfig.rotary_dim">
 <span class="sig-name descname"><span class="pre">rotary_dim</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">-inf</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.AttentionConfig.rotary_dim" title="Link to this definition"></a></dt>
@@ -240,6 +266,39 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 
 </dd></dl>
 
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ConvConfig">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">ConvConfig</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.ConvConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
+<p>The Conv layer config.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ConvConfig.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">weight</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bias</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.ConvConfig.__init__" title="Link to this definition"></a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>weight</strong> (<em>Tensor</em>) – </p></li>
+<li><p><strong>bias</strong> (<em>Tensor</em>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>None</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ConvConfig.bias">
+<span class="sig-name descname"><span class="pre">bias</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tensor</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ConvConfig.bias" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ConvConfig.weight">
+<span class="sig-name descname"><span class="pre">weight</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tensor</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ConvConfig.weight" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
 <dl class="py class">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">DecoderLayerConfig</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig" title="Link to this definition"></a></dt>
@@ -247,16 +306,19 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <p>The decoder layer config.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.__init__">
-<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quantization</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">decoder_type</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_layernorm</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_layernorm</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">post_layernorm</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_attention_heads</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_head_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_pct</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_alibi</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">new_decoder_architecture</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parallel_attention</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">apply_residual_connection_post_layernorm</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">model_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rope_ratio</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">seq_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_base</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">partial_rotary_factor</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_num_experts</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_top_k</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_tp_mode</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_renorm_mode</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alibi_bias_max</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">residual_layernorm</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">residual_mlp</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.__init__" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quantization=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">decoder_type=''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_layernorm=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp_layernorm=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">recurrent=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">post_layernorm=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pre_feedforward_layernorm=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">post_feedforward_layernorm=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mlp=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_attention_heads=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_head_size=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_kv_heads=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_position_embeddings=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_pct=1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_alibi=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">new_decoder_architecture=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parallel_attention=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">apply_residual_connection_post_layernorm=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_cache=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">model_name=''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rope_ratio=1.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">seq_length=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">qwen_type=''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rotary_base=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">partial_rotary_factor=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">original_max_position_embeddings=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">longrope_scaling_short_factors=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">longrope_scaling_long_factors=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mup_attn_multiplier=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mup_embedding_multiplier=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mup_use_scaling=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mup_width_multiplier=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">blocksparse_block_size=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">blocksparse_homo_head_pattern=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">blocksparse_num_local_blocks=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">blocksparse_vertical_stride=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dense_attention_every_n_layers=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">gegelu_limit=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">longrope_short_mscale=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">longrope_long_mscale=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_num_experts=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_top_k=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_tp_mode=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">moe_renorm_mode=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">alibi_bias_max=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">residual_layernorm=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">residual_mlp=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rnn_hidden_size=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">logits_soft_cap=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">emb_scale_by_sqrt_dim=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layer_types=&lt;factory&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">final_logit_softcapping=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attn_logit_softcapping=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">query_pre_attn_scalar=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">clip_qkv=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cross_attention=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cross_attention_layernorm=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">self_attention=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">self_attention_layernorm=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attention_layernorm=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rel_attn_max_distance=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rel_attn_num_buckets=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_scaled_rope=False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.__init__" title="Link to this definition"></a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>quantization</strong> (<em>str</em>) – </p></li>
+<li><p><strong>quantization</strong> (<em>str</em><em> | </em><em>None</em>) – </p></li>
 <li><p><strong>decoder_type</strong> (<em>str</em>) – </p></li>
 <li><p><strong>input_layernorm</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
 <li><p><strong>mlp_layernorm</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
 <li><p><strong>attention</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.AttentionConfig" title="modelopt.torch.export.model_config.AttentionConfig"><em>AttentionConfig</em></a>) – </p></li>
+<li><p><strong>recurrent</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.RecurrentConfig" title="modelopt.torch.export.model_config.RecurrentConfig"><em>RecurrentConfig</em></a>) – </p></li>
 <li><p><strong>post_layernorm</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
+<li><p><strong>pre_feedforward_layernorm</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
+<li><p><strong>post_feedforward_layernorm</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
 <li><p><strong>mlp</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.MLPConfig" title="modelopt.torch.export.model_config.MLPConfig"><em>MLPConfig</em></a><em> | </em><a class="reference internal" href="#modelopt.torch.export.model_config.MOEConfig" title="modelopt.torch.export.model_config.MOEConfig"><em>MOEConfig</em></a>) – </p></li>
 <li><p><strong>num_attention_heads</strong> (<em>int</em>) – </p></li>
 <li><p><strong>attention_head_size</strong> (<em>int</em>) – </p></li>
@@ -271,8 +333,24 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <li><p><strong>model_name</strong> (<em>str</em>) – </p></li>
 <li><p><strong>rope_ratio</strong> (<em>float</em>) – </p></li>
 <li><p><strong>seq_length</strong> (<em>int</em>) – </p></li>
+<li><p><strong>qwen_type</strong> (<em>str</em>) – </p></li>
 <li><p><strong>rotary_base</strong> (<em>int</em>) – </p></li>
 <li><p><strong>partial_rotary_factor</strong> (<em>float</em>) – </p></li>
+<li><p><strong>original_max_position_embeddings</strong> (<em>int</em>) – </p></li>
+<li><p><strong>longrope_scaling_short_factors</strong> (<em>List</em><em>[</em><em>float</em><em>]</em>) – </p></li>
+<li><p><strong>longrope_scaling_long_factors</strong> (<em>List</em><em>[</em><em>float</em><em>]</em>) – </p></li>
+<li><p><strong>mup_attn_multiplier</strong> (<em>float</em>) – </p></li>
+<li><p><strong>mup_embedding_multiplier</strong> (<em>float</em>) – </p></li>
+<li><p><strong>mup_use_scaling</strong> (<em>float</em>) – </p></li>
+<li><p><strong>mup_width_multiplier</strong> (<em>float</em>) – </p></li>
+<li><p><strong>blocksparse_block_size</strong> (<em>int</em>) – </p></li>
+<li><p><strong>blocksparse_homo_head_pattern</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>blocksparse_num_local_blocks</strong> (<em>int</em>) – </p></li>
+<li><p><strong>blocksparse_vertical_stride</strong> (<em>int</em>) – </p></li>
+<li><p><strong>dense_attention_every_n_layers</strong> (<em>int</em>) – </p></li>
+<li><p><strong>gegelu_limit</strong> (<em>float</em>) – </p></li>
+<li><p><strong>longrope_short_mscale</strong> (<em>float</em>) – </p></li>
+<li><p><strong>longrope_long_mscale</strong> (<em>float</em>) – </p></li>
 <li><p><strong>moe_num_experts</strong> (<em>int</em>) – </p></li>
 <li><p><strong>moe_top_k</strong> (<em>int</em>) – </p></li>
 <li><p><strong>moe_tp_mode</strong> (<em>int</em>) – </p></li>
@@ -280,6 +358,22 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <li><p><strong>alibi_bias_max</strong> (<em>int</em>) – </p></li>
 <li><p><strong>residual_layernorm</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
 <li><p><strong>residual_mlp</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.MLPConfig" title="modelopt.torch.export.model_config.MLPConfig"><em>MLPConfig</em></a>) – </p></li>
+<li><p><strong>rnn_hidden_size</strong> (<em>int</em>) – </p></li>
+<li><p><strong>logits_soft_cap</strong> (<em>float</em>) – </p></li>
+<li><p><strong>emb_scale_by_sqrt_dim</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>layer_types</strong> (<em>List</em><em>[</em><em>str</em><em>]</em>) – </p></li>
+<li><p><strong>final_logit_softcapping</strong> (<em>float</em>) – </p></li>
+<li><p><strong>attn_logit_softcapping</strong> (<em>float</em>) – </p></li>
+<li><p><strong>query_pre_attn_scalar</strong> (<em>float</em>) – </p></li>
+<li><p><strong>clip_qkv</strong> (<em>int</em>) – </p></li>
+<li><p><strong>cross_attention</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.AttentionConfig" title="modelopt.torch.export.model_config.AttentionConfig"><em>AttentionConfig</em></a>) – </p></li>
+<li><p><strong>cross_attention_layernorm</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
+<li><p><strong>self_attention</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.AttentionConfig" title="modelopt.torch.export.model_config.AttentionConfig"><em>AttentionConfig</em></a>) – </p></li>
+<li><p><strong>self_attention_layernorm</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
+<li><p><strong>attention_layernorm</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
+<li><p><strong>rel_attn_max_distance</strong> (<em>int</em>) – </p></li>
+<li><p><strong>rel_attn_num_buckets</strong> (<em>int</em>) – </p></li>
+<li><p><strong>use_scaled_rope</strong> (<em>bool</em>) – </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
@@ -308,17 +402,82 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">attention_head_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.attention_head_size" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.attention_layernorm">
+<span class="sig-name descname"><span class="pre">attention_layernorm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><span class="pre">LayernormConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.attention_layernorm" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.attn_logit_softcapping">
+<span class="sig-name descname"><span class="pre">attn_logit_softcapping</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.attn_logit_softcapping" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_block_size">
+<span class="sig-name descname"><span class="pre">blocksparse_block_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_block_size" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_homo_head_pattern">
+<span class="sig-name descname"><span class="pre">blocksparse_homo_head_pattern</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_homo_head_pattern" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_num_local_blocks">
+<span class="sig-name descname"><span class="pre">blocksparse_num_local_blocks</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_num_local_blocks" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_vertical_stride">
+<span class="sig-name descname"><span class="pre">blocksparse_vertical_stride</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_vertical_stride" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.clip_qkv">
+<span class="sig-name descname"><span class="pre">clip_qkv</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.clip_qkv" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.cross_attention">
+<span class="sig-name descname"><span class="pre">cross_attention</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.AttentionConfig" title="modelopt.torch.export.model_config.AttentionConfig"><span class="pre">AttentionConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.cross_attention" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.cross_attention_layernorm">
+<span class="sig-name descname"><span class="pre">cross_attention_layernorm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><span class="pre">LayernormConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.cross_attention_layernorm" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.decoder_type">
 <span class="sig-name descname"><span class="pre">decoder_type</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">''</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.decoder_type" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.dense_attention_every_n_layers">
+<span class="sig-name descname"><span class="pre">dense_attention_every_n_layers</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.dense_attention_every_n_layers" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.emb_scale_by_sqrt_dim">
+<span class="sig-name descname"><span class="pre">emb_scale_by_sqrt_dim</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.emb_scale_by_sqrt_dim" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.ffn_hidden_size_local">
 <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">ffn_hidden_size_local</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.ffn_hidden_size_local" title="Link to this definition"></a></dt>
 <dd><p>Returns the ffn hidden size of the transformer model.</p>
 </dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.final_logit_softcapping">
+<span class="sig-name descname"><span class="pre">final_logit_softcapping</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.final_logit_softcapping" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.gegelu_limit">
+<span class="sig-name descname"><span class="pre">gegelu_limit</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.gegelu_limit" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.hidden_size">
 <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">hidden_size</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.hidden_size" title="Link to this definition"></a></dt>
@@ -330,6 +489,36 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">input_layernorm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><span class="pre">LayernormConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.input_layernorm" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.layer_types">
+<span class="sig-name descname"><span class="pre">layer_types</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.layer_types" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.logits_soft_cap">
+<span class="sig-name descname"><span class="pre">logits_soft_cap</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.logits_soft_cap" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.longrope_long_mscale">
+<span class="sig-name descname"><span class="pre">longrope_long_mscale</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.longrope_long_mscale" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.longrope_scaling_long_factors">
+<span class="sig-name descname"><span class="pre">longrope_scaling_long_factors</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.longrope_scaling_long_factors" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.longrope_scaling_short_factors">
+<span class="sig-name descname"><span class="pre">longrope_scaling_short_factors</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.longrope_scaling_short_factors" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.longrope_short_mscale">
+<span class="sig-name descname"><span class="pre">longrope_short_mscale</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.longrope_short_mscale" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.max_position_embeddings">
 <span class="sig-name descname"><span class="pre">max_position_embeddings</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.max_position_embeddings" title="Link to this definition"></a></dt>
@@ -370,6 +559,26 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">moe_tp_mode</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.moe_tp_mode" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.mup_attn_multiplier">
+<span class="sig-name descname"><span class="pre">mup_attn_multiplier</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.mup_attn_multiplier" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.mup_embedding_multiplier">
+<span class="sig-name descname"><span class="pre">mup_embedding_multiplier</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.mup_embedding_multiplier" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.mup_use_scaling">
+<span class="sig-name descname"><span class="pre">mup_use_scaling</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.mup_use_scaling" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.mup_width_multiplier">
+<span class="sig-name descname"><span class="pre">mup_width_multiplier</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.mup_width_multiplier" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.new_decoder_architecture">
 <span class="sig-name descname"><span class="pre">new_decoder_architecture</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.new_decoder_architecture" title="Link to this definition"></a></dt>
@@ -385,6 +594,11 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">num_kv_heads</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.num_kv_heads" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.original_max_position_embeddings">
+<span class="sig-name descname"><span class="pre">original_max_position_embeddings</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.original_max_position_embeddings" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.parallel_attention">
 <span class="sig-name descname"><span class="pre">parallel_attention</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.parallel_attention" title="Link to this definition"></a></dt>
@@ -395,14 +609,49 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">partial_rotary_factor</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.partial_rotary_factor" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.post_feedforward_layernorm">
+<span class="sig-name descname"><span class="pre">post_feedforward_layernorm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><span class="pre">LayernormConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.post_feedforward_layernorm" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.post_layernorm">
 <span class="sig-name descname"><span class="pre">post_layernorm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><span class="pre">LayernormConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.post_layernorm" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.pre_feedforward_layernorm">
+<span class="sig-name descname"><span class="pre">pre_feedforward_layernorm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><span class="pre">LayernormConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.pre_feedforward_layernorm" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.quantization">
-<span class="sig-name descname"><span class="pre">quantization</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">''</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.quantization" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">quantization</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.quantization" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.query_pre_attn_scalar">
+<span class="sig-name descname"><span class="pre">query_pre_attn_scalar</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.query_pre_attn_scalar" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.qwen_type">
+<span class="sig-name descname"><span class="pre">qwen_type</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">''</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.qwen_type" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.recurrent">
+<span class="sig-name descname"><span class="pre">recurrent</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.RecurrentConfig" title="modelopt.torch.export.model_config.RecurrentConfig"><span class="pre">RecurrentConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.recurrent" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.rel_attn_max_distance">
+<span class="sig-name descname"><span class="pre">rel_attn_max_distance</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.rel_attn_max_distance" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.rel_attn_num_buckets">
+<span class="sig-name descname"><span class="pre">rel_attn_num_buckets</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.rel_attn_num_buckets" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute">
@@ -415,6 +664,11 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">residual_mlp</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.MLPConfig" title="modelopt.torch.export.model_config.MLPConfig"><span class="pre">MLPConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.residual_mlp" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.rnn_hidden_size">
+<span class="sig-name descname"><span class="pre">rnn_hidden_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.rnn_hidden_size" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.rope_ratio">
 <span class="sig-name descname"><span class="pre">rope_ratio</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1.0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.rope_ratio" title="Link to this definition"></a></dt>
@@ -430,6 +684,16 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">rotary_pct</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1.0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.rotary_pct" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.self_attention">
+<span class="sig-name descname"><span class="pre">self_attention</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.AttentionConfig" title="modelopt.torch.export.model_config.AttentionConfig"><span class="pre">AttentionConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.self_attention" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.self_attention_layernorm">
+<span class="sig-name descname"><span class="pre">self_attention_layernorm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><span class="pre">LayernormConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.self_attention_layernorm" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.seq_length">
 <span class="sig-name descname"><span class="pre">seq_length</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.seq_length" title="Link to this definition"></a></dt>
@@ -445,6 +709,11 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">use_cache</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">True</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.use_cache" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.DecoderLayerConfig.use_scaled_rope">
+<span class="sig-name descname"><span class="pre">use_scaled_rope</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">False</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.DecoderLayerConfig.use_scaled_rope" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 </dd></dl>
 
 <dl class="py class">
@@ -562,6 +831,39 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 
 </dd></dl>
 
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.LinearActConfig">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">LinearActConfig</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.LinearActConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
+<p>The linear + activation layer config.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.LinearActConfig.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">linear</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden_act</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">''</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.LinearActConfig.__init__" title="Link to this definition"></a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>linear</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
+<li><p><strong>hidden_act</strong> (<em>str</em>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>None</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.LinearActConfig.hidden_act">
+<span class="sig-name descname"><span class="pre">hidden_act</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">''</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.LinearActConfig.hidden_act" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.LinearActConfig.linear">
+<span class="sig-name descname"><span class="pre">linear</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><span class="pre">LinearConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.LinearActConfig.linear" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
 <dl class="py class">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.LinearConfig">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">LinearConfig</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.LinearConfig" title="Link to this definition"></a></dt>
@@ -727,6 +1029,39 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 
 </dd></dl>
 
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.MedusaHeadConfig">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">MedusaHeadConfig</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.MedusaHeadConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
+<p>The decoder layer config.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.MedusaHeadConfig.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">medusa_layers</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lm_head</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.MedusaHeadConfig.__init__" title="Link to this definition"></a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>medusa_layers</strong> (<em>List</em><em>[</em><a class="reference internal" href="#modelopt.torch.export.model_config.LinearActConfig" title="modelopt.torch.export.model_config.LinearActConfig"><em>LinearActConfig</em></a><em>]</em>) – </p></li>
+<li><p><strong>lm_head</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>None</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.MedusaHeadConfig.lm_head">
+<span class="sig-name descname"><span class="pre">lm_head</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><span class="pre">LinearConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.MedusaHeadConfig.lm_head" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.MedusaHeadConfig.medusa_layers">
+<span class="sig-name descname"><span class="pre">medusa_layers</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#modelopt.torch.export.model_config.LinearActConfig" title="modelopt.torch.export.model_config.LinearActConfig"><span class="pre">LinearActConfig</span></a><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.MedusaHeadConfig.medusa_layers" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
 <dl class="py class">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">ModelConfig</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig" title="Link to this definition"></a></dt>
@@ -736,7 +1071,7 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 pipeline_parallel &gt; 1 is only supported for TensorRT-LLM checkpoint.</p>
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.__init__">
-<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">version=0.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantization=''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype='float16'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rank=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tensor_parallel=1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pipeline_parallel=1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_embedding=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ln_embed=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layers=&lt;factory&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ln_f=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lm_head=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">share_embedding_table=False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.__init__" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">version=0.0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantization=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype='float16'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_size=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rank=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tensor_parallel=1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">pipeline_parallel=1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">vocab_embedding=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">position_embedding=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ln_embed=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layers=&lt;factory&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ln_f=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lm_head=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">share_embedding_table=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">medusa_heads=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_medusa_heads=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_medusa_layers=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">enc_dec=''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoder_hidden_size=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoder_num_heads=0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">encoder_head_size=0</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.__init__" title="Link to this definition"></a></dt>
 <dd><dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
@@ -754,6 +1089,13 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <li><p><strong>ln_f</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LayernormConfig" title="modelopt.torch.export.model_config.LayernormConfig"><em>LayernormConfig</em></a>) – </p></li>
 <li><p><strong>lm_head</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
 <li><p><strong>share_embedding_table</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>medusa_heads</strong> (<em>List</em><em>[</em><a class="reference internal" href="#modelopt.torch.export.model_config.MedusaHeadConfig" title="modelopt.torch.export.model_config.MedusaHeadConfig"><em>MedusaHeadConfig</em></a><em>]</em>) – </p></li>
+<li><p><strong>num_medusa_heads</strong> (<em>int</em>) – </p></li>
+<li><p><strong>num_medusa_layers</strong> (<em>int</em>) – </p></li>
+<li><p><strong>enc_dec</strong> (<em>str</em>) – </p></li>
+<li><p><strong>encoder_hidden_size</strong> (<em>int</em>) – </p></li>
+<li><p><strong>encoder_num_heads</strong> (<em>int</em>) – </p></li>
+<li><p><strong>encoder_head_size</strong> (<em>int</em>) – </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
@@ -767,6 +1109,26 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <span class="sig-name descname"><span class="pre">dtype</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">'float16'</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.dtype" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.enc_dec">
+<span class="sig-name descname"><span class="pre">enc_dec</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">''</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.enc_dec" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.encoder_head_size">
+<span class="sig-name descname"><span class="pre">encoder_head_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.encoder_head_size" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.encoder_hidden_size">
+<span class="sig-name descname"><span class="pre">encoder_hidden_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.encoder_hidden_size" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.encoder_num_heads">
+<span class="sig-name descname"><span class="pre">encoder_num_heads</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.encoder_num_heads" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.hidden_act">
 <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">hidden_act</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.hidden_act" title="Link to this definition"></a></dt>
@@ -805,6 +1167,11 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <dd><p>Returns the max_position_embedding of the model.</p>
 </dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.medusa_heads">
+<span class="sig-name descname"><span class="pre">medusa_heads</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#modelopt.torch.export.model_config.MedusaHeadConfig" title="modelopt.torch.export.model_config.MedusaHeadConfig"><span class="pre">MedusaHeadConfig</span></a><span class="p"><span class="pre">]</span></span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.medusa_heads" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.num_attention_heads">
 <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">num_attention_heads</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.num_attention_heads" title="Link to this definition"></a></dt>
@@ -817,6 +1184,16 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 <dd><p>Returns the num_key_value_heads of the model.</p>
 </dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.num_medusa_heads">
+<span class="sig-name descname"><span class="pre">num_medusa_heads</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.num_medusa_heads" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.num_medusa_layers">
+<span class="sig-name descname"><span class="pre">num_medusa_layers</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">0</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.num_medusa_layers" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.pipeline_parallel">
 <span class="sig-name descname"><span class="pre">pipeline_parallel</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">1</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.pipeline_parallel" title="Link to this definition"></a></dt>
@@ -829,7 +1206,7 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config.ModelConfig.quantization">
-<span class="sig-name descname"><span class="pre">quantization</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">''</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.quantization" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">quantization</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.ModelConfig.quantization" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute">
@@ -958,6 +1335,102 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
 
 </dd></dl>
 
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RecurrentConfig">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">RecurrentConfig</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.RecurrentConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
+<p>The RecurrentBlock from recurrentgemma.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RecurrentConfig.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">linear_y</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">y_bias</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">linear_x</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">linear_out</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">conv1d</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">rg_lru</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.RecurrentConfig.__init__" title="Link to this definition"></a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>linear_y</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
+<li><p><strong>y_bias</strong> (<em>Tensor</em>) – </p></li>
+<li><p><strong>linear_x</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
+<li><p><strong>linear_out</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
+<li><p><strong>conv1d</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.ConvConfig" title="modelopt.torch.export.model_config.ConvConfig"><em>ConvConfig</em></a>) – </p></li>
+<li><p><strong>rg_lru</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.RgLruConfig" title="modelopt.torch.export.model_config.RgLruConfig"><em>RgLruConfig</em></a>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>None</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RecurrentConfig.conv1d">
+<span class="sig-name descname"><span class="pre">conv1d</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.ConvConfig" title="modelopt.torch.export.model_config.ConvConfig"><span class="pre">ConvConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.RecurrentConfig.conv1d" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RecurrentConfig.linear_out">
+<span class="sig-name descname"><span class="pre">linear_out</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><span class="pre">LinearConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.RecurrentConfig.linear_out" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RecurrentConfig.linear_x">
+<span class="sig-name descname"><span class="pre">linear_x</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><span class="pre">LinearConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.RecurrentConfig.linear_x" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RecurrentConfig.linear_y">
+<span class="sig-name descname"><span class="pre">linear_y</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><span class="pre">LinearConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.RecurrentConfig.linear_y" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RecurrentConfig.rg_lru">
+<span class="sig-name descname"><span class="pre">rg_lru</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.RgLruConfig" title="modelopt.torch.export.model_config.RgLruConfig"><span class="pre">RgLruConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.RecurrentConfig.rg_lru" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RecurrentConfig.y_bias">
+<span class="sig-name descname"><span class="pre">y_bias</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tensor</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.RecurrentConfig.y_bias" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RgLruConfig">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">RgLruConfig</span></span><a class="headerlink" href="#modelopt.torch.export.model_config.RgLruConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
+<p>The RG LRU from recurrentgemma.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RgLruConfig.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">recurrent_param</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_gate</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">recurrent_gate</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config.RgLruConfig.__init__" title="Link to this definition"></a></dt>
+<dd><dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>recurrent_param</strong> (<em>Tensor</em>) – </p></li>
+<li><p><strong>input_gate</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
+<li><p><strong>recurrent_gate</strong> (<a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><em>LinearConfig</em></a>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>None</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RgLruConfig.input_gate">
+<span class="sig-name descname"><span class="pre">input_gate</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><span class="pre">LinearConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.RgLruConfig.input_gate" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RgLruConfig.recurrent_gate">
+<span class="sig-name descname"><span class="pre">recurrent_gate</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.export.model_config.LinearConfig" title="modelopt.torch.export.model_config.LinearConfig"><span class="pre">LinearConfig</span></a></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.RgLruConfig.recurrent_gate" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config.RgLruConfig.recurrent_param">
+<span class="sig-name descname"><span class="pre">recurrent_param</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tensor</span></em><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.export.model_config.RgLruConfig.recurrent_param" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
 </section>
 
 
@@ -977,7 +1450,7 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -988,7 +1461,7 @@ <h1>model_config<a class="headerlink" href="#model-config" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.model_config_export.html b/reference/generated/modelopt.torch.export.model_config_export.html
index 36683ff..c9533fe 100644
--- a/reference/generated/modelopt.torch.export.model_config_export.html
+++ b/reference/generated/modelopt.torch.export.model_config_export.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>model_config_export &mdash; Model Optimizer 0.11.2</title>
+  <title>model_config_export &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="model_config_utils" href="modelopt.torch.export.model_config_utils.html" />
-    <link rel="prev" title="model_config" href="modelopt.torch.export.model_config.html" />
+    <link rel="prev" title="model_config" href="modelopt.torch.export.model_config.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -142,21 +147,43 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="model-config-export">
 <h1>model_config_export<a class="headerlink" href="#model-config-export" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.export.model_config_export">Code that export optimized models to the TensorRT-LLM checkpoint.</p>
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint" title="modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">export_tensorrt_llm_checkpoint</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config_export.export_hf_checkpoint" title="modelopt.torch.export.model_config_export.export_hf_checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">export_hf_checkpoint</span></code></a></p></td>
+<td><p>Exports the torch model to the packed checkpoint with original HF naming and save to the export_dir.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint" title="modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">export_tensorrt_llm_checkpoint</span></code></a></p></td>
 <td><p>Exports the torch model to the TensorRT-LLM checkpoint and save to the export_dir.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config_export.torch_to_tensorrt_llm_checkpoint" title="modelopt.torch.export.model_config_export.torch_to_tensorrt_llm_checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">torch_to_tensorrt_llm_checkpoint</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.model_config_export.torch_to_tensorrt_llm_checkpoint" title="modelopt.torch.export.model_config_export.torch_to_tensorrt_llm_checkpoint"><code class="xref py py-obj docutils literal notranslate"><span class="pre">torch_to_tensorrt_llm_checkpoint</span></code></a></p></td>
 <td><p>Converts the torch model to the TensorRT-LLM checkpoint per GPU rank.</p></td>
 </tr>
 </tbody>
 </table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.model_config_export.export_hf_checkpoint">
+<span class="sig-name descname"><span class="pre">export_hf_checkpoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">torch.float16</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">export_dir</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'/tmp'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config_export.export_hf_checkpoint" title="Link to this definition"></a></dt>
+<dd><p>Exports the torch model to the packed checkpoint with original HF naming and save to the export_dir.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>model</strong> (<em>Module</em>) – the torch model.</p></li>
+<li><p><strong>dtype</strong> (<em>dtype</em>) – the weights data type to export the unquantized layers.</p></li>
+<li><p><strong>export_dir</strong> (<em>Path</em><em> | </em><em>str</em>) – the target export path.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>Tuple</em>[<em>Dict</em>[<em>str</em>, <em>Any</em>], <em>Dict</em>[<em>str</em>, <em>Any</em>]]</p>
+</dd>
+</dl>
+<p>The packed checkpoint will be consumed by the TensorRT-LLM unified converter.</p>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint">
 <span class="sig-name descname"><span class="pre">export_tensorrt_llm_checkpoint</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">decoder_type</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">torch.float16</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">export_dir</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'/tmp'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inference_tensor_parallel</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inference_pipeline_parallel</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">export_npz</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">naive_fp8_quantization</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">use_nfs_workspace</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint" title="Link to this definition"></a></dt>
@@ -257,7 +284,7 @@ <h1>model_config_export<a class="headerlink" href="#model-config-export" title="
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -268,7 +295,7 @@ <h1>model_config_export<a class="headerlink" href="#model-config-export" title="
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.model_config_utils.html b/reference/generated/modelopt.torch.export.model_config_utils.html
index 6cc2ac1..118eec2 100644
--- a/reference/generated/modelopt.torch.export.model_config_utils.html
+++ b/reference/generated/modelopt.torch.export.model_config_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>model_config_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>model_config_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="postprocess" href="modelopt.torch.export.postprocess.html" />
-    <link rel="prev" title="model_config_export" href="modelopt.torch.export.model_config_export.html" />
+    <link rel="prev" title="model_config_export" href="modelopt.torch.export.model_config_export.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="model-config-utils">
 <h1>model_config_utils<a class="headerlink" href="#model-config-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.export.model_config_utils">Common utils for the ModelConfig.</p>
@@ -329,7 +334,7 @@ <h1>model_config_utils<a class="headerlink" href="#model-config-utils" title="Li
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -340,7 +345,7 @@ <h1>model_config_utils<a class="headerlink" href="#model-config-utils" title="Li
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.postprocess.html b/reference/generated/modelopt.torch.export.postprocess.html
index ff3a9f4..9753259 100644
--- a/reference/generated/modelopt.torch.export.postprocess.html
+++ b/reference/generated/modelopt.torch.export.postprocess.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>postprocess &mdash; Model Optimizer 0.11.2</title>
+  <title>postprocess &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="scaling_factor_utils" href="modelopt.torch.export.scaling_factor_utils.html" />
-    <link rel="prev" title="model_config_utils" href="modelopt.torch.export.model_config_utils.html" />
+    <link rel="prev" title="model_config_utils" href="modelopt.torch.export.model_config_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="postprocess">
 <h1>postprocess<a class="headerlink" href="#postprocess" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.export.postprocess">Utils to load and process model_config.</p>
@@ -252,7 +257,7 @@ <h1>postprocess<a class="headerlink" href="#postprocess" title="Link to this hea
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -263,7 +268,7 @@ <h1>postprocess<a class="headerlink" href="#postprocess" title="Link to this hea
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.scaling_factor_utils.html b/reference/generated/modelopt.torch.export.scaling_factor_utils.html
index 631149b..3bb58de 100644
--- a/reference/generated/modelopt.torch.export.scaling_factor_utils.html
+++ b/reference/generated/modelopt.torch.export.scaling_factor_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>scaling_factor_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>scaling_factor_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="tensorrt_llm_utils" href="modelopt.torch.export.tensorrt_llm_utils.html" />
-    <link rel="prev" title="postprocess" href="modelopt.torch.export.postprocess.html" />
+    <link rel="prev" title="postprocess" href="modelopt.torch.export.postprocess.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4 current"><a class="current reference internal" href="#">scaling_factor_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -142,21 +147,30 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="scaling-factor-utils">
 <h1>scaling_factor_utils<a class="headerlink" href="#scaling-factor-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.export.scaling_factor_utils">Utils for scaling factors adjustments.</p>
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor" title="modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_weights_scaling_factor</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.scaling_factor_utils.adjust_attn_amax_values" title="modelopt.torch.export.scaling_factor_utils.adjust_attn_amax_values"><code class="xref py py-obj docutils literal notranslate"><span class="pre">adjust_attn_amax_values</span></code></a></p></td>
+<td><p>Adjusts the amax values for the attention layers.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor" title="modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_weights_scaling_factor</span></code></a></p></td>
 <td><p>Calculate the weight scaling facotrs for a given group size.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.scaling_factor_utils.resmooth_and_get_scale" title="modelopt.torch.export.scaling_factor_utils.resmooth_and_get_scale"><code class="xref py py-obj docutils literal notranslate"><span class="pre">resmooth_and_get_scale</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.scaling_factor_utils.resmooth_and_get_scale" title="modelopt.torch.export.scaling_factor_utils.resmooth_and_get_scale"><code class="xref py py-obj docutils literal notranslate"><span class="pre">resmooth_and_get_scale</span></code></a></p></td>
 <td><p>Resmooths weights from a single or multiple ranks.</p></td>
 </tr>
 </tbody>
 </table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.scaling_factor_utils.adjust_attn_amax_values">
+<span class="sig-name descname"><span class="pre">adjust_attn_amax_values</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.scaling_factor_utils.adjust_attn_amax_values" title="Link to this definition"></a></dt>
+<dd><p>Adjusts the amax values for the attention layers.</p>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor">
 <span class="sig-name descname"><span class="pre">get_weights_scaling_factor</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">weight</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_size</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor" title="Link to this definition"></a></dt>
@@ -208,7 +222,7 @@ <h1>scaling_factor_utils<a class="headerlink" href="#scaling-factor-utils" title
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -219,7 +233,7 @@ <h1>scaling_factor_utils<a class="headerlink" href="#scaling-factor-utils" title
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.tensorrt_llm_utils.html b/reference/generated/modelopt.torch.export.tensorrt_llm_utils.html
index 339d90a..f99bdc2 100644
--- a/reference/generated/modelopt.torch.export.tensorrt_llm_utils.html
+++ b/reference/generated/modelopt.torch.export.tensorrt_llm_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>tensorrt_llm_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>tensorrt_llm_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="transformer_engine" href="modelopt.torch.export.transformer_engine.html" />
-    <link rel="prev" title="scaling_factor_utils" href="modelopt.torch.export.scaling_factor_utils.html" />
+    <link rel="prev" title="scaling_factor_utils" href="modelopt.torch.export.scaling_factor_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">tensorrt_llm_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="tensorrt-llm-utils">
 <h1>tensorrt_llm_utils<a class="headerlink" href="#tensorrt-llm-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.export.tensorrt_llm_utils">Utils for TensorRT-LLM checkpoint export.</p>
@@ -156,6 +161,12 @@ <h1>tensorrt_llm_utils<a class="headerlink" href="#tensorrt-llm-utils" title="Li
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.tensorrt_llm_utils.is_tensorrt_llm_0_8_or_9" title="modelopt.torch.export.tensorrt_llm_utils.is_tensorrt_llm_0_8_or_9"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_tensorrt_llm_0_8_or_9</span></code></a></p></td>
 <td><p>Returns true if tensorrt_llm version is 0.8 or 0.9.</p></td>
 </tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_decoder_layer" title="modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_decoder_layer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">prepare_enc_dec_decoder_layer</span></code></a></p></td>
+<td><p>Prepare the config for each decoder layer of encoder-decoder model.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_export_dir" title="modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_export_dir"><code class="xref py py-obj docutils literal notranslate"><span class="pre">prepare_enc_dec_export_dir</span></code></a></p></td>
+<td><p>Prepare the export directory for encoder-decoder model.</p></td>
+</tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.tensorrt_llm_utils.weights_to_npz" title="modelopt.torch.export.tensorrt_llm_utils.weights_to_npz"><code class="xref py py-obj docutils literal notranslate"><span class="pre">weights_to_npz</span></code></a></p></td>
 <td><p>Export the model_config and the weights in the backward-compatible npz forward.</p></td>
 </tr>
@@ -183,6 +194,36 @@ <h1>tensorrt_llm_utils<a class="headerlink" href="#tensorrt-llm-utils" title="Li
 <dd><p>Returns true if tensorrt_llm version is 0.8 or 0.9.</p>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_decoder_layer">
+<span class="sig-name descname"><span class="pre">prepare_enc_dec_decoder_layer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">layer_config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">model_config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">enc_dec</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">layers</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_decoder_layer" title="Link to this definition"></a></dt>
+<dd><p>Prepare the config for each decoder layer of encoder-decoder model.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>layer_config</strong> (<a class="reference internal" href="modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig" title="modelopt.torch.export.model_config.DecoderLayerConfig"><em>DecoderLayerConfig</em></a>) – </p></li>
+<li><p><strong>model_config</strong> (<em>T5Config</em>) – </p></li>
+<li><p><strong>enc_dec</strong> (<em>str</em>) – </p></li>
+<li><p><strong>layers</strong> (<em>List</em><em>[</em><a class="reference internal" href="modelopt.torch.export.model_config.html#modelopt.torch.export.model_config.DecoderLayerConfig" title="modelopt.torch.export.model_config.DecoderLayerConfig"><em>DecoderLayerConfig</em></a><em>]</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_export_dir">
+<span class="sig-name descname"><span class="pre">prepare_enc_dec_export_dir</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">tensorrt_llm_config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">export_root</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_export_dir" title="Link to this definition"></a></dt>
+<dd><p>Prepare the export directory for encoder-decoder model.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>tensorrt_llm_config</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Any</em><em>]</em>) – </p></li>
+<li><p><strong>export_root</strong> (<em>Path</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.export.tensorrt_llm_utils.weights_to_npz">
 <span class="sig-name descname"><span class="pre">weights_to_npz</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">weights</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tensorrt_llm_config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">export_dir</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.tensorrt_llm_utils.weights_to_npz" title="Link to this definition"></a></dt>
@@ -217,7 +258,7 @@ <h1>tensorrt_llm_utils<a class="headerlink" href="#tensorrt-llm-utils" title="Li
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -228,7 +269,7 @@ <h1>tensorrt_llm_utils<a class="headerlink" href="#tensorrt-llm-utils" title="Li
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.transformer_engine.html b/reference/generated/modelopt.torch.export.transformer_engine.html
index e2000a1..198310a 100644
--- a/reference/generated/modelopt.torch.export.transformer_engine.html
+++ b/reference/generated/modelopt.torch.export.transformer_engine.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>transformer_engine &mdash; Model Optimizer 0.11.2</title>
+  <title>transformer_engine &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="opt" href="modelopt.torch.opt.html" />
-    <link rel="prev" title="tensorrt_llm_utils" href="modelopt.torch.export.tensorrt_llm_utils.html" />
+    <link rel="next" title="vllm" href="modelopt.torch.export.vllm.html" />
+    <link rel="prev" title="tensorrt_llm_utils" href="modelopt.torch.export.tensorrt_llm_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,17 +82,19 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
@@ -99,6 +103,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">transformer_engine</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.vllm.html">vllm</a></li>
 </ul>
 </li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
@@ -142,7 +147,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="transformer-engine">
 <h1>transformer_engine<a class="headerlink" href="#transformer-engine" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.export.transformer_engine">Convert the Model Optimizer quantized model to the transformer_engine.</p>
@@ -172,7 +177,7 @@ <h1>transformer_engine<a class="headerlink" href="#transformer-engine" title="Li
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.torch.export.tensorrt_llm_utils.html" class="btn btn-neutral float-left" title="tensorrt_llm_utils" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.torch.opt.html" class="btn btn-neutral float-right" title="opt" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.torch.export.vllm.html" class="btn btn-neutral float-right" title="vllm" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -184,7 +189,7 @@ <h1>transformer_engine<a class="headerlink" href="#transformer-engine" title="Li
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -195,7 +200,7 @@ <h1>transformer_engine<a class="headerlink" href="#transformer-engine" title="Li
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.export.vllm.html b/reference/generated/modelopt.torch.export.vllm.html
new file mode 100644
index 0000000..e27c3d1
--- /dev/null
+++ b/reference/generated/modelopt.torch.export.vllm.html
@@ -0,0 +1,210 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>vllm &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="opt" href="modelopt.torch.opt.html" />
+    <link rel="prev" title="transformer_engine" href="modelopt.torch.export.transformer_engine.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.export.html">export</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.distribute.html">distribute</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.hf_config_map.html">hf_config_map</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.layer_utils.html">layer_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config.html">model_config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_export.html">model_config_export</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.model_config_utils.html">model_config_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.postprocess.html">postprocess</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.scaling_factor_utils.html">scaling_factor_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.tensorrt_llm_utils.html">tensorrt_llm_utils</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.export.transformer_engine.html">transformer_engine</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">vllm</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.export.html">export</a></li>
+      <li class="breadcrumb-item active">vllm</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.export.vllm.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="vllm">
+<h1>vllm<a class="headerlink" href="#vllm" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.export.vllm">Utility to convert a Model Optimizer exported model to vLLM Checkpoint.</p>
+<p class="rubric">Functions</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.export.vllm.export_to_vllm" title="modelopt.torch.export.vllm.export_to_vllm"><code class="xref py py-obj docutils literal notranslate"><span class="pre">export_to_vllm</span></code></a></p></td>
+<td><p>Exports the torch model to vLLM checkpoint and saves to export_dir.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.export.vllm.export_to_vllm">
+<span class="sig-name descname"><span class="pre">export_to_vllm</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tokenizer</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">export_path</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'/tmp'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.export.vllm.export_to_vllm" title="Link to this definition"></a></dt>
+<dd><p>Exports the torch model to vLLM checkpoint and saves to export_dir.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>model</strong> (<em>Module</em>) – the torch model</p></li>
+<li><p><strong>tokenizer</strong> (<em>Module</em>) – the tokenizer used for model</p></li>
+<li><p><strong>export_path</strong> (<em>Path</em><em> | </em><em>str</em>) – Path for exporting the vLLM compatible quantized checkpoint</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.export.transformer_engine.html" class="btn btn-neutral float-left" title="transformer_engine" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.opt.html" class="btn btn-neutral float-right" title="opt" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.html b/reference/generated/modelopt.torch.html
index 7baaea7..ffb9ddb 100644
--- a/reference/generated/modelopt.torch.html
+++ b/reference/generated/modelopt.torch.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>torch &mdash; Model Optimizer 0.11.2</title>
+  <title>torch &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="export" href="modelopt.torch.export.html" />
-    <link rel="prev" title="utils" href="modelopt.onnx.utils.html" />
+    <link rel="next" title="distill" href="modelopt.torch.distill.html" />
+    <link rel="prev" title="utils" href="modelopt.onnx.utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="current reference internal" href="#">torch</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -129,25 +132,28 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="torch">
 <h1>torch<a class="headerlink" href="#torch" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.export.html#module-modelopt.torch.export" title="modelopt.torch.export"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.distill.html#module-modelopt.torch.distill" title="modelopt.torch.distill"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.distill</span></code></a></p></td>
+<td><p>Distillation Task API subpackage for torch.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.export.html#module-modelopt.torch.export" title="modelopt.torch.export"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.export</span></code></a></p></td>
 <td><p>Export package.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.opt.html#module-modelopt.torch.opt" title="modelopt.torch.opt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.opt</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.opt.html#module-modelopt.torch.opt" title="modelopt.torch.opt"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.opt</span></code></a></p></td>
 <td><p>Module for general-purpose model optimization infrastructure.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.html#module-modelopt.torch.quantization" title="modelopt.torch.quantization"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.html#module-modelopt.torch.quantization" title="modelopt.torch.quantization"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization</span></code></a></p></td>
 <td><p>Quantization package.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.sparsity.html#module-modelopt.torch.sparsity" title="modelopt.torch.sparsity"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.sparsity</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.sparsity.html#module-modelopt.torch.sparsity" title="modelopt.torch.sparsity"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.sparsity</span></code></a></p></td>
 <td><p>API for sparsification algorithms.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.utils.html#module-modelopt.torch.utils" title="modelopt.torch.utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.utils</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.utils.html#module-modelopt.torch.utils" title="modelopt.torch.utils"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.utils</span></code></a></p></td>
 <td><p>Utility functions.</p></td>
 </tr>
 </tbody>
@@ -160,7 +166,7 @@ <h1>torch<a class="headerlink" href="#torch" title="Link to this heading"></a
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.onnx.utils.html" class="btn btn-neutral float-left" title="utils" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.torch.export.html" class="btn btn-neutral float-right" title="export" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.torch.distill.html" class="btn btn-neutral float-right" title="distill" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -172,7 +178,7 @@ <h1>torch<a class="headerlink" href="#torch" title="Link to this heading"></a
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -183,7 +189,7 @@ <h1>torch<a class="headerlink" href="#torch" title="Link to this heading"></a
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.opt.config.html b/reference/generated/modelopt.torch.opt.config.html
index dd0aba6..901e524 100644
--- a/reference/generated/modelopt.torch.opt.config.html
+++ b/reference/generated/modelopt.torch.opt.config.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>config &mdash; Model Optimizer 0.11.2</title>
+  <title>config &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="conversion" href="modelopt.torch.opt.conversion.html" />
-    <link rel="prev" title="opt" href="modelopt.torch.opt.html" />
+    <link rel="prev" title="opt" href="modelopt.torch.opt.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.opt.html">opt</a><ul class="current">
 <li class="toctree-l4 current"><a class="current reference internal" href="#">config</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="config">
 <h1>config<a class="headerlink" href="#config" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.opt.config">Modelopt’s pydantic BaseModel used for any type of configuration in algorithms and mode.</p>
@@ -436,7 +439,7 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -447,7 +450,7 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.opt.conversion.html b/reference/generated/modelopt.torch.opt.conversion.html
index ea63716..ee12b6c 100644
--- a/reference/generated/modelopt.torch.opt.conversion.html
+++ b/reference/generated/modelopt.torch.opt.conversion.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>conversion &mdash; Model Optimizer 0.11.2</title>
+  <title>conversion &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="dynamic" href="modelopt.torch.opt.dynamic.html" />
-    <link rel="prev" title="config" href="modelopt.torch.opt.config.html" />
+    <link rel="prev" title="config" href="modelopt.torch.opt.config.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.opt.html">opt</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.opt.config.html">config</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="conversion">
 <h1>conversion<a class="headerlink" href="#conversion" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.opt.conversion">Module to handle model converting and restoring for optimization methods.</p>
@@ -544,7 +547,7 @@ <h1>conversion<a class="headerlink" href="#conversion" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -555,7 +558,7 @@ <h1>conversion<a class="headerlink" href="#conversion" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.opt.dynamic.html b/reference/generated/modelopt.torch.opt.dynamic.html
index 5c54c44..4715aa9 100644
--- a/reference/generated/modelopt.torch.opt.dynamic.html
+++ b/reference/generated/modelopt.torch.opt.dynamic.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>dynamic &mdash; Model Optimizer 0.11.2</title>
+  <title>dynamic &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="hparam" href="modelopt.torch.opt.hparam.html" />
-    <link rel="prev" title="conversion" href="modelopt.torch.opt.conversion.html" />
+    <link rel="prev" title="conversion" href="modelopt.torch.opt.conversion.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.opt.html">opt</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.opt.config.html">config</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="dynamic">
 <h1>dynamic<a class="headerlink" href="#dynamic" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.opt.dynamic">Basic dynamic module class and hparam class.</p>
@@ -563,7 +566,7 @@ <h1>dynamic<a class="headerlink" href="#dynamic" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -574,7 +577,7 @@ <h1>dynamic<a class="headerlink" href="#dynamic" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.opt.hparam.html b/reference/generated/modelopt.torch.opt.hparam.html
index 51fef0f..2a3e1db 100644
--- a/reference/generated/modelopt.torch.opt.hparam.html
+++ b/reference/generated/modelopt.torch.opt.hparam.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>hparam &mdash; Model Optimizer 0.11.2</title>
+  <title>hparam &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="mode" href="modelopt.torch.opt.mode.html" />
-    <link rel="prev" title="dynamic" href="modelopt.torch.opt.dynamic.html" />
+    <link rel="prev" title="dynamic" href="modelopt.torch.opt.dynamic.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.opt.html">opt</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.opt.config.html">config</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="hparam">
 <h1>hparam<a class="headerlink" href="#hparam" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.opt.hparam">Standard hyperparameter class for regular symbol.</p>
@@ -184,8 +187,8 @@ <h1>hparam<a class="headerlink" href="#hparam" title="Link to this heading"><
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>choices</strong> (<em>Sequence</em><em>[</em><em>Tuple</em><em>[</em><em>int</em><em>, </em><em>...</em><em>] </em><em>| </em><em>int</em><em> | </em><em>float</em><em>]</em>) – </p></li>
-<li><p><strong>original</strong> (<em>Tuple</em><em>[</em><em>int</em><em>, </em><em>...</em><em>] </em><em>| </em><em>int</em><em> | </em><em>float</em><em> | </em><em>None</em>) – </p></li>
+<li><p><strong>choices</strong> (<em>Sequence</em><em>[</em><em>Tuple</em><em>[</em><em>int</em><em>, </em><em>...</em><em>] </em><em>| </em><em>int</em><em> | </em><em>float</em><em> | </em><em>CustomHPType</em><em>]</em>) – </p></li>
+<li><p><strong>original</strong> (<em>Tuple</em><em>[</em><em>int</em><em>, </em><em>...</em><em>] </em><em>| </em><em>int</em><em> | </em><em>float</em><em> | </em><em>CustomHPType</em><em> | </em><em>None</em>) – </p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
@@ -196,7 +199,7 @@ <h1>hparam<a class="headerlink" href="#hparam" title="Link to this heading"><
 
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.opt.hparam.Hparam.active">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">active</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.active" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">active</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">CustomHPType</span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.active" title="Link to this definition"></a></dt>
 <dd><p>Return the currently active value.</p>
 </dd></dl>
 
@@ -208,7 +211,7 @@ <h1>hparam<a class="headerlink" href="#hparam" title="Link to this heading"><
 
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.opt.hparam.Hparam.choices">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">choices</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.choices" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">choices</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Sequence</span><span class="p"><span class="pre">[</span></span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">CustomHPType</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.choices" title="Link to this definition"></a></dt>
 <dd><p>Return available choices.</p>
 </dd></dl>
 
@@ -260,19 +263,19 @@ <h1>hparam<a class="headerlink" href="#hparam" title="Link to this heading"><
 
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.opt.hparam.Hparam.max">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.max" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">CustomHPType</span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.max" title="Link to this definition"></a></dt>
 <dd><p>Return max value from among choices.</p>
 </dd></dl>
 
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.opt.hparam.Hparam.min">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">min</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.min" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">min</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">CustomHPType</span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.min" title="Link to this definition"></a></dt>
 <dd><p>Return min value from among choices.</p>
 </dd></dl>
 
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.opt.hparam.Hparam.original">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">original</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.original" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">original</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">CustomHPType</span></em><a class="headerlink" href="#modelopt.torch.opt.hparam.Hparam.original" title="Link to this definition"></a></dt>
 <dd><p>Return original value from among choices.</p>
 </dd></dl>
 
@@ -312,7 +315,7 @@ <h1>hparam<a class="headerlink" href="#hparam" title="Link to this heading"><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -323,7 +326,7 @@ <h1>hparam<a class="headerlink" href="#hparam" title="Link to this heading"><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.opt.html b/reference/generated/modelopt.torch.opt.html
index 38319c9..a91c61b 100644
--- a/reference/generated/modelopt.torch.opt.html
+++ b/reference/generated/modelopt.torch.opt.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>opt &mdash; Model Optimizer 0.11.2</title>
+  <title>opt &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="config" href="modelopt.torch.opt.config.html" />
-    <link rel="prev" title="transformer_engine" href="modelopt.torch.export.transformer_engine.html" />
+    <link rel="prev" title="vllm" href="modelopt.torch.export.vllm.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="#">opt</a><ul>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.opt.config.html">config</a></li>
@@ -140,7 +143,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="opt">
 <h1>opt<a class="headerlink" href="#opt" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -188,7 +191,7 @@ <h1>opt<a class="headerlink" href="#opt" title="Link to this heading"></a></h
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.torch.export.transformer_engine.html" class="btn btn-neutral float-left" title="transformer_engine" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.export.vllm.html" class="btn btn-neutral float-left" title="vllm" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.torch.opt.config.html" class="btn btn-neutral float-right" title="config" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -201,7 +204,7 @@ <h1>opt<a class="headerlink" href="#opt" title="Link to this heading"></a></h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -212,7 +215,7 @@ <h1>opt<a class="headerlink" href="#opt" title="Link to this heading"></a></h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.opt.mode.html b/reference/generated/modelopt.torch.opt.mode.html
index 17c8711..a0011c7 100644
--- a/reference/generated/modelopt.torch.opt.mode.html
+++ b/reference/generated/modelopt.torch.opt.mode.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>mode &mdash; Model Optimizer 0.11.2</title>
+  <title>mode &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="plugins" href="modelopt.torch.opt.plugins.html" />
-    <link rel="prev" title="hparam" href="modelopt.torch.opt.hparam.html" />
+    <link rel="prev" title="hparam" href="modelopt.torch.opt.hparam.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.opt.html">opt</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.opt.config.html">config</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="mode">
 <h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.opt.mode">Interface and utilities for optimization modes/algorithms.</p>
@@ -202,7 +205,7 @@ <h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -213,7 +216,7 @@ <h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.opt.plugins.html b/reference/generated/modelopt.torch.opt.plugins.html
index 11cbcb6..870fe8b 100644
--- a/reference/generated/modelopt.torch.opt.plugins.html
+++ b/reference/generated/modelopt.torch.opt.plugins.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>plugins &mdash; Model Optimizer 0.11.2</title>
+  <title>plugins &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="searcher" href="modelopt.torch.opt.searcher.html" />
-    <link rel="prev" title="mode" href="modelopt.torch.opt.mode.html" />
+    <link rel="prev" title="mode" href="modelopt.torch.opt.mode.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.opt.html">opt</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.opt.config.html">config</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="plugins">
 <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -169,7 +172,7 @@ <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -180,7 +183,7 @@ <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.opt.searcher.html b/reference/generated/modelopt.torch.opt.searcher.html
index 6e4872a..ea346c0 100644
--- a/reference/generated/modelopt.torch.opt.searcher.html
+++ b/reference/generated/modelopt.torch.opt.searcher.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>searcher &mdash; Model Optimizer 0.11.2</title>
+  <title>searcher &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="utils" href="modelopt.torch.opt.utils.html" />
-    <link rel="prev" title="plugins" href="modelopt.torch.opt.plugins.html" />
+    <link rel="prev" title="plugins" href="modelopt.torch.opt.plugins.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.opt.html">opt</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.opt.config.html">config</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="searcher">
 <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.opt.searcher">Standard interface to implement a searcher algorithm.</p>
@@ -209,7 +212,7 @@ <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading">
 
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.opt.searcher.BaseSearcher.construct_forward_loop">
-<span class="sig-name descname"><span class="pre">construct_forward_loop</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">silent</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.opt.searcher.BaseSearcher.construct_forward_loop" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">construct_forward_loop</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">silent</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">progress_bar_msg</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_iter_data_loader</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">post_process_fn</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.opt.searcher.BaseSearcher.construct_forward_loop" title="Link to this definition"></a></dt>
 <dd><p>Get runnable forward loop on the model using the provided configs.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
@@ -230,6 +233,11 @@ <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading">
 <dd><p>Return default state dict.</p>
 </dd></dl>
 
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.opt.searcher.BaseSearcher.deployment">
+<span class="sig-name descname"><span class="pre">deployment</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.opt.searcher.BaseSearcher.deployment" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.opt.searcher.BaseSearcher.dummy_input">
 <span class="sig-name descname"><span class="pre">dummy_input</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Any</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span></em><a class="headerlink" href="#modelopt.torch.opt.searcher.BaseSearcher.dummy_input" title="Link to this definition"></a></dt>
@@ -322,7 +330,7 @@ <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading">
 
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.opt.searcher.BaseSearcher.search">
-<em class="property"><span class="pre">final</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">search</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">constraints</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dummy_input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">config</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.opt.searcher.BaseSearcher.search" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">final</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">search</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">constraints</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dummy_input</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">config</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.opt.searcher.BaseSearcher.search" title="Link to this definition"></a></dt>
 <dd><p>Search a given prunable model for the best sub-net and return the search model.</p>
 <p>The best sub-net maximizes the score given by <code class="docutils literal notranslate"><span class="pre">score_func</span></code> while satisfying the
 <code class="docutils literal notranslate"><span class="pre">constraints</span></code>.</p>
@@ -332,7 +340,7 @@ <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading">
 <li><p><strong>model</strong> (<em>Module</em>) – The converted model to be searched.</p></li>
 <li><p><strong>constraints</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>str</em><em> | </em><em>float</em><em> | </em><em>None</em><em>]</em>) – The dictionary from constraint name to upper bound the searched model has
 to satisfy.</p></li>
-<li><p><strong>dummy_input</strong> (<em>Any</em><em> | </em><em>Tuple</em>) – Arguments of <code class="docutils literal notranslate"><span class="pre">model.forward()</span></code>. This is used for exporting and
+<li><p><strong>dummy_input</strong> (<em>Any</em><em> | </em><em>Tuple</em><em> | </em><em>None</em>) – Arguments of <code class="docutils literal notranslate"><span class="pre">model.forward()</span></code>. This is used for exporting and
 calculating inference-based metrics, such as latency/FLOPs. The format of
 <code class="docutils literal notranslate"><span class="pre">dummy_inputs</span></code> follows the convention of the <code class="docutils literal notranslate"><span class="pre">args</span></code> argument in
 <a class="reference external" href="https://pytorch.org/docs/stable/onnx.html#torch.onnx.export" rel="noopener noreferrer" target="_blank">torch.onnx.export</a>.</p></li>
@@ -382,7 +390,7 @@ <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -393,7 +401,7 @@ <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.opt.utils.html b/reference/generated/modelopt.torch.opt.utils.html
index b44353f..2889f71 100644
--- a/reference/generated/modelopt.torch.opt.utils.html
+++ b/reference/generated/modelopt.torch.opt.utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>utils &mdash; Model Optimizer 0.11.2</title>
+  <title>utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quantization" href="modelopt.torch.quantization.html" />
-    <link rel="prev" title="searcher" href="modelopt.torch.opt.searcher.html" />
+    <link rel="prev" title="searcher" href="modelopt.torch.opt.searcher.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.opt.html">opt</a><ul class="current">
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.opt.config.html">config</a></li>
@@ -141,27 +144,44 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="utils">
 <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.opt.utils">Utilities for optimization.</p>
 <p class="rubric">Functions</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.opt.utils.is_configurable" title="modelopt.torch.opt.utils.is_configurable"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_configurable</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.opt.utils.get_hparam" title="modelopt.torch.opt.utils.get_hparam"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_hparam</span></code></a></p></td>
+<td><p>Get the hparam with the given name.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.opt.utils.is_configurable" title="modelopt.torch.opt.utils.is_configurable"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_configurable</span></code></a></p></td>
 <td><p>Check if the model is configurable.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.opt.utils.is_dynamic" title="modelopt.torch.opt.utils.is_dynamic"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_dynamic</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.opt.utils.is_dynamic" title="modelopt.torch.opt.utils.is_dynamic"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_dynamic</span></code></a></p></td>
 <td><p>Check if the model is dynamic.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.opt.utils.named_hparams" title="modelopt.torch.opt.utils.named_hparams"><code class="xref py py-obj docutils literal notranslate"><span class="pre">named_hparams</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.opt.utils.named_hparams" title="modelopt.torch.opt.utils.named_hparams"><code class="xref py py-obj docutils literal notranslate"><span class="pre">named_hparams</span></code></a></p></td>
 <td><p>Recursively yield the name and instance of <em>all</em> hparams.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.opt.utils.search_space_size" title="modelopt.torch.opt.utils.search_space_size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">search_space_size</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.opt.utils.search_space_size" title="modelopt.torch.opt.utils.search_space_size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">search_space_size</span></code></a></p></td>
 <td><p>Return the size of the search space.</p></td>
 </tr>
 </tbody>
 </table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.opt.utils.get_hparam">
+<span class="sig-name descname"><span class="pre">get_hparam</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">name</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.opt.utils.get_hparam" title="Link to this definition"></a></dt>
+<dd><p>Get the hparam with the given name.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>name</strong> (<em>str</em>) – </p>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><a class="reference internal" href="modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam" title="modelopt.torch.opt.hparam.Hparam"><em>Hparam</em></a></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.opt.utils.is_configurable">
 <span class="sig-name descname"><span class="pre">is_configurable</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.opt.utils.is_configurable" title="Link to this definition"></a></dt>
@@ -237,7 +257,7 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -248,7 +268,7 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.algorithms.html b/reference/generated/modelopt.torch.quantization.algorithms.html
new file mode 100644
index 0000000..08a942b
--- /dev/null
+++ b/reference/generated/modelopt.torch.quantization.algorithms.html
@@ -0,0 +1,376 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>algorithms &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="calib" href="modelopt.torch.quantization.calib.html" />
+    <link rel="prev" title="quantization" href="modelopt.torch.quantization.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4 current"><a class="current reference internal" href="#">algorithms</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_calib.html">model_calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_quant.html">model_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.html">quantization</a></li>
+      <li class="breadcrumb-item active">algorithms</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.quantization.algorithms.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="algorithms">
+<h1>algorithms<a class="headerlink" href="#algorithms" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.quantization.algorithms">Module for advanced quantization algorithms.</p>
+<p class="rubric">Classes</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher" title="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher"><code class="xref py py-obj docutils literal notranslate"><span class="pre">AutoQuantizeSearcher</span></code></a></p></td>
+<td><p>A searcher for AutoQuantize algorithm.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.algorithms.QuantRecipe" title="modelopt.torch.quantization.algorithms.QuantRecipe"><code class="xref py py-obj docutils literal notranslate"><span class="pre">QuantRecipe</span></code></a></p></td>
+<td><p>A subclass of QuantizeConfig enabling auto_quantize specific configurations.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.algorithms.QuantRecipeHparam" title="modelopt.torch.quantization.algorithms.QuantRecipeHparam"><code class="xref py py-obj docutils literal notranslate"><span class="pre">QuantRecipeHparam</span></code></a></p></td>
+<td><p>An Hparam for quantization recipes.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">AutoQuantizeSearcher</span></span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="modelopt.torch.opt.searcher.html#modelopt.torch.opt.searcher.BaseSearcher" title="modelopt.torch.opt.searcher.BaseSearcher"><code class="xref py py-class docutils literal notranslate"><span class="pre">BaseSearcher</span></code></a></p>
+<p>A searcher for AutoQuantize algorithm.</p>
+<p>In AutoQuantize, we search for the best per-layer quantization configuration that minimizes the sum of per-layer
+scores while meeting the specified constraint. AutoQuantize uses Linear Programming Solver to find the
+optimal quantization configuration.</p>
+<p>The auto_quantize score for a layer quantization configuration is an approximation of model loss change change due
+to quantizing the particular layer with the particular configuration.
+The approximation is based on taylor expansion of the loss function wrt to the quantized output of the layer and
+substitution of Fisher information for Hessian.
+This approximation is mathematically correct for models where the loss
+is a log likelihood loss such as BERT, GPT, etc. However, the auto_quantize score can still be used as a proxy
+for other models such as ResNet.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.before_search">
+<span class="sig-name descname"><span class="pre">before_search</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.before_search" title="Link to this definition"></a></dt>
+<dd><p>Prepare the model for search by calibrating the quantizers  and collecting <code class="docutils literal notranslate"><span class="pre">AutoQuantize</span></code> score.</p>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.candidate_stats">
+<span class="sig-name descname"><span class="pre">candidate_stats</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">float</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.candidate_stats" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.default_search_config">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">default_search_config</span></span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.default_search_config" title="Link to this definition"></a></dt>
+<dd><p>Get the default search config for AutoQuantize.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.default_state_dict">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">default_state_dict</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.default_state_dict" title="Link to this definition"></a></dt>
+<dd><p>Get the default state dict for AutoQuantize.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.insert_quant_recipe_hparams">
+<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">insert_quant_recipe_hparams</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_recipes</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.insert_quant_recipe_hparams" title="Link to this definition"></a></dt>
+<dd><p>Insert the QuantRecipeHparam into the model for each quantized module.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>model</strong> (<em>Module</em>) – </p></li>
+<li><p><strong>quant_recipes</strong> (<em>List</em><em>[</em><a class="reference internal" href="#modelopt.torch.quantization.algorithms.QuantRecipe" title="modelopt.torch.quantization.algorithms.QuantRecipe"><em>QuantRecipe</em></a><em>]</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.merge_search_hparam_by_rules">
+<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">merge_search_hparam_by_rules</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.merge_search_hparam_by_rules" title="Link to this definition"></a></dt>
+<dd><p>Restrict the search space so that multiple modules can share the same recipe.</p>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.rules">
+<span class="sig-name descname"><span class="pre">rules</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">['^(.*?)\\.(q_proj|k_proj|v_proj)$',</span> <span class="pre">'^(.*?)\\.(gate_proj|up_proj)$',</span> <span class="pre">'^(.*?)\\.(\\d+\\.(w1|w2|w3))$',</span> <span class="pre">'^(.*?)\\.((w1_linear|w2_linear|w3_linear)\\.\\d+)$']</span></em><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.rules" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.run_search">
+<span class="sig-name descname"><span class="pre">run_search</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.run_search" title="Link to this definition"></a></dt>
+<dd><p>Search for the best per-layer quantization configuration and return the best model and configuration.</p>
+<p>AutoQuantize uses Linear Programming Solver to find the optimal quantization configuration which
+minimizes the sum of per-layer auto_quantize scores while meeting the specified constraint.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.sanitize_search_config">
+<span class="sig-name descname"><span class="pre">sanitize_search_config</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">config</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.sanitize_search_config" title="Link to this definition"></a></dt>
+<dd><p>Sanitize the search config dict.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>config</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Any</em><em>] </em><em>| </em><em>None</em>) – </p>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>Dict</em>[<em>str</em>, <em>Any</em>]</p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.QuantRecipe">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">QuantRecipe</span></span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.QuantRecipe" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">CustomHPType</span></code></p>
+<p>A subclass of QuantizeConfig enabling auto_quantize specific configurations.</p>
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.QuantRecipe.UNSUPPORTED_RECIPES">
+<span class="sig-name descname"><span class="pre">UNSUPPORTED_RECIPES</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">['AWQ',</span> <span class="pre">'SMOOTHQUANT']</span></em><a class="headerlink" href="#modelopt.torch.quantization.algorithms.QuantRecipe.UNSUPPORTED_RECIPES" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.QuantRecipe.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.QuantRecipe.__init__" title="Link to this definition"></a></dt>
+<dd><p>Initialize the QuantRecipe with the name of the quantization format.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>name</strong> (<em>str</em><em> | </em><em>None</em>) – </p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.QuantRecipe.compression">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">compression</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span></em><a class="headerlink" href="#modelopt.torch.quantization.algorithms.QuantRecipe.compression" title="Link to this definition"></a></dt>
+<dd><p>Get the compression factor for the quantization format.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.QuantRecipe.config">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">config</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizeConfig" title="modelopt.torch.quantization.config.QuantizeConfig"><span class="pre">QuantizeConfig</span></a></em><a class="headerlink" href="#modelopt.torch.quantization.algorithms.QuantRecipe.config" title="Link to this definition"></a></dt>
+<dd><p>Get the quantization configuration for the quantization format.</p>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.QuantRecipeHparam">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">QuantRecipeHparam</span></span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.QuantRecipeHparam" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="modelopt.torch.opt.hparam.html#modelopt.torch.opt.hparam.Hparam" title="modelopt.torch.opt.hparam.Hparam"><code class="xref py py-class docutils literal notranslate"><span class="pre">Hparam</span></code></a></p>
+<p>An Hparam for quantization recipes.</p>
+<p>In addition, this Hparam also:
+1. Keeps a link to its modules and sets the quantizers for the module based on the active recipe.
+2. Keeps track of the importance of each recipe in a dict instead of a tensor
+3. Links to other QuantRecipeHparam objects to enable setting the same recipe for multiple modules.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.QuantRecipeHparam.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">choices</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">original</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">nn_module</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.QuantRecipeHparam.__init__" title="Link to this definition"></a></dt>
+<dd><p>Initializes Hparam with original value and choices.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>choices</strong> (<em>Sequence</em><em>[</em><a class="reference internal" href="#modelopt.torch.quantization.algorithms.QuantRecipe" title="modelopt.torch.quantization.algorithms.QuantRecipe"><em>QuantRecipe</em></a><em>]</em>) – </p></li>
+<li><p><strong>original</strong> (<a class="reference internal" href="#modelopt.torch.quantization.algorithms.QuantRecipe" title="modelopt.torch.quantization.algorithms.QuantRecipe"><em>QuantRecipe</em></a><em> | </em><em>None</em>) – </p></li>
+<li><p><strong>nn_module</strong> (<em>Module</em>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>None</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.QuantRecipeHparam.active">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">active</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">CustomHPType</span></em><a class="headerlink" href="#modelopt.torch.quantization.algorithms.QuantRecipeHparam.active" title="Link to this definition"></a></dt>
+<dd><p>Return the currently active value.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.algorithms.QuantRecipeHparam.link_to">
+<span class="sig-name descname"><span class="pre">link_to</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">other</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.algorithms.QuantRecipeHparam.link_to" title="Link to this definition"></a></dt>
+<dd><p>Link this QuantRecipeHparam to the other QuantRecipeHparam.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>other</strong> (<a class="reference internal" href="#modelopt.torch.quantization.algorithms.QuantRecipeHparam" title="modelopt.torch.quantization.algorithms.QuantRecipeHparam"><em>QuantRecipeHparam</em></a>) – </p>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p><em>None</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.quantization.html" class="btn btn-neutral float-left" title="quantization" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.quantization.calib.html" class="btn btn-neutral float-right" title="calib" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.calib.calibrator.html b/reference/generated/modelopt.torch.quantization.calib.calibrator.html
index 1e2c7be..3908ec7 100644
--- a/reference/generated/modelopt.torch.quantization.calib.calibrator.html
+++ b/reference/generated/modelopt.torch.quantization.calib.calibrator.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>calibrator &mdash; Model Optimizer 0.11.2</title>
+  <title>calibrator &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="histogram" href="modelopt.torch.quantization.calib.histogram.html" />
-    <link rel="prev" title="calib" href="modelopt.torch.quantization.calib.html" />
+    <link rel="prev" title="calib" href="modelopt.torch.quantization.calib.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -147,7 +152,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="calibrator">
 <h1>calibrator<a class="headerlink" href="#calibrator" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.calib.calibrator">Abstract base class for calibrators.</p>
@@ -170,7 +175,7 @@ <h1>calibrator<a class="headerlink" href="#calibrator" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -181,7 +186,7 @@ <h1>calibrator<a class="headerlink" href="#calibrator" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.calib.histogram.html b/reference/generated/modelopt.torch.quantization.calib.histogram.html
index 1ea3495..30194f5 100644
--- a/reference/generated/modelopt.torch.quantization.calib.histogram.html
+++ b/reference/generated/modelopt.torch.quantization.calib.histogram.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>histogram &mdash; Model Optimizer 0.11.2</title>
+  <title>histogram &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="max" href="modelopt.torch.quantization.calib.max.html" />
-    <link rel="prev" title="calibrator" href="modelopt.torch.quantization.calib.calibrator.html" />
+    <link rel="prev" title="calibrator" href="modelopt.torch.quantization.calib.calibrator.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -147,7 +152,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="histogram">
 <h1>histogram<a class="headerlink" href="#histogram" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.calib.histogram">Histogram based calibrators.</p>
@@ -180,7 +185,7 @@ <h1>histogram<a class="headerlink" href="#histogram" title="Link to this heading
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>num_bits</strong> – An integer. Number of bits of quantization.</p></li>
-<li><p><strong>axis</strong> – A tuple. see QuantDescriptor.</p></li>
+<li><p><strong>axis</strong> – A tuple. see <code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code>.</p></li>
 <li><p><strong>unsigned</strong> – A boolean. using unsigned quantization.</p></li>
 <li><p><strong>num_bins</strong> – An integer. Number of histograms bins. Default 2048.</p></li>
 <li><p><strong>grow_method</strong> – A string. DEPRECATED. default None.</p></li>
@@ -284,7 +289,7 @@ <h1>histogram<a class="headerlink" href="#histogram" title="Link to this heading
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -295,7 +300,7 @@ <h1>histogram<a class="headerlink" href="#histogram" title="Link to this heading
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.calib.html b/reference/generated/modelopt.torch.quantization.calib.html
index 72e9052..a4f29be 100644
--- a/reference/generated/modelopt.torch.quantization.calib.html
+++ b/reference/generated/modelopt.torch.quantization.calib.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>calib &mdash; Model Optimizer 0.11.2</title>
+  <title>calib &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="calibrator" href="modelopt.torch.quantization.calib.calibrator.html" />
-    <link rel="prev" title="quantization" href="modelopt.torch.quantization.html" />
+    <link rel="prev" title="algorithms" href="modelopt.torch.quantization.algorithms.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="calib">
 <h1>calib<a class="headerlink" href="#calib" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -172,7 +177,7 @@ <h1>calib<a class="headerlink" href="#calib" title="Link to this heading"></a
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.torch.quantization.html" class="btn btn-neutral float-left" title="quantization" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.quantization.algorithms.html" class="btn btn-neutral float-left" title="algorithms" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.torch.quantization.calib.calibrator.html" class="btn btn-neutral float-right" title="calibrator" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -185,7 +190,7 @@ <h1>calib<a class="headerlink" href="#calib" title="Link to this heading"></a
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -196,7 +201,7 @@ <h1>calib<a class="headerlink" href="#calib" title="Link to this heading"></a
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.calib.max.html b/reference/generated/modelopt.torch.quantization.calib.max.html
index 565c917..59e5fc0 100644
--- a/reference/generated/modelopt.torch.quantization.calib.max.html
+++ b/reference/generated/modelopt.torch.quantization.calib.max.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>max &mdash; Model Optimizer 0.11.2</title>
+  <title>max &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="config" href="modelopt.torch.quantization.config.html" />
-    <link rel="prev" title="histogram" href="modelopt.torch.quantization.calib.histogram.html" />
+    <link rel="prev" title="histogram" href="modelopt.torch.quantization.calib.histogram.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -147,7 +152,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="max">
 <h1>max<a class="headerlink" href="#max" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.calib.max">Calibrator that returns the absolute max of all collected tensors.</p>
@@ -169,7 +174,7 @@ <h1>max<a class="headerlink" href="#max" title="Link to this heading"></a></h
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>calib_desc</strong> – A MaxCalibDescriptor.</p></li>
 <li><p><strong>num_bits</strong> – An integer. Number of bits of quantization.</p></li>
-<li><p><strong>axis</strong> – A tuple. see QuantDescriptor.</p></li>
+<li><p><strong>axis</strong> – A tuple. see <code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code>.</p></li>
 <li><p><strong>unsigned</strong> – A boolean. using unsigned quantization.</p></li>
 </ul>
 </dd>
@@ -237,7 +242,7 @@ <h1>max<a class="headerlink" href="#max" title="Link to this heading"></a></h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -248,7 +253,7 @@ <h1>max<a class="headerlink" href="#max" title="Link to this heading"></a></h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.config.html b/reference/generated/modelopt.torch.quantization.config.html
index db75d6f..711d5b5 100644
--- a/reference/generated/modelopt.torch.quantization.config.html
+++ b/reference/generated/modelopt.torch.quantization.config.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>config &mdash; Model Optimizer 0.11.2</title>
+  <title>config &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="conversion" href="modelopt.torch.quantization.conversion.html" />
-    <link rel="prev" title="max" href="modelopt.torch.quantization.calib.max.html" />
+    <link rel="prev" title="max" href="modelopt.torch.quantization.calib.max.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="config">
 <h1>config<a class="headerlink" href="#config" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.config">This document lists the quantization formats supported by Model Optimizer and example quantization configs.</p>
@@ -189,16 +194,21 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
 <p>Quantization config is dictionary specifying the values for keys <code class="docutils literal notranslate"><span class="pre">&quot;quant_cfg&quot;</span></code> and
 <code class="docutils literal notranslate"><span class="pre">&quot;algorithm&quot;</span></code>. The <code class="docutils literal notranslate"><span class="pre">&quot;quant_cfg&quot;</span></code> key specifies the quantization configurations. The
 <code class="docutils literal notranslate"><span class="pre">&quot;algorithm&quot;</span></code> key specifies the <code class="docutils literal notranslate"><span class="pre">algorithm</span></code> argument to
-<a class="reference internal" href="modelopt.torch.quantization.model_calib.html#modelopt.torch.quantization.model_calib.calibrate" title="modelopt.torch.quantization.model_calib.calibrate"><code class="xref py py-meth docutils literal notranslate"><span class="pre">calibrate</span></code></a>.</p>
-<p>Quantization configurations is a dictionary mapping wildcards or filter functions
-to its quantizer attributes. The wildcards or filter functions  are matched
+<a class="reference internal" href="modelopt.torch.quantization.model_calib.html#modelopt.torch.quantization.model_calib.calibrate" title="modelopt.torch.quantization.model_calib.calibrate"><code class="xref py py-meth docutils literal notranslate"><span class="pre">calibrate</span></code></a>. Please see <a class="reference internal" href="#modelopt.torch.quantization.config.QuantizeConfig" title="modelopt.torch.quantization.config.QuantizeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizeConfig</span></code></a>
+for the quantization config definition.</p>
+<p>‘Quantization configurations’ is a dictionary mapping wildcards or filter functions
+to its ‘quantizer attributes’. The wildcards or filter functions  are matched
 against the quantizer module names. The quantizer modules have names ending with
 <code class="docutils literal notranslate"><span class="pre">weight_quantizer</span></code> and <code class="docutils literal notranslate"><span class="pre">input_quantizer</span></code> and they perform weight quantization and
 input quantization (or activation quantization) respectively. The quantizer modules are generally
 instances of
-<a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code></a> and
-the specified quantizer attributes describe its quantization behavior. Quantizer attributes is a
-dictionary mapping quantizer attribute names to their values.</p>
+<a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code></a>.
+The quantizer attributes are defined by <a class="reference internal" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a>. See <a class="reference internal" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a>
+for details on the quantizer attributes and their values.</p>
+<p>The key <cite>“default”</cite> from the quantization configuration dictionary is applied if no other wildcard or filter functions
+match the quantizer module name.</p>
+<p>The quantizer attributes are applied in the order they are specified. For the missing attributes, the default attributes
+as defined by <a class="reference internal" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a> are used.</p>
 <p>Quantizer attributes can also be a list of dictionaries. In this case, the matched quantizer module
 is replaced with a
 <a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">SequentialQuantizer</span></code></a>
@@ -206,14 +216,42 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
 dictionary in the list specifies the quantization formats for each quantization step of the
 sequential quantizer. For example, <cite>SequentialQuantizer</cite> is used in ‘INT4 Weights, FP8 Activations’
 quantization in which the weights are quantized in INT4 followed by FP8.</p>
-<p id="example-quantization-configs">Here are examples quantization configs from Model Optimizer:</p>
+<p>In addition, the dictionary entries could also be pytorch module class names mapping the class specific
+quantization configurations. The pytorch modules should have a quantized equivalent.</p>
+<p>To get the string representation of a module class, do:</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">modelopt.torch.quantization.nn</span> <span class="kn">import</span> <span class="n">QuantModuleRegistry</span>
+
+<span class="c1"># Get the class name for nn.Conv2d</span>
+<span class="n">class_name</span> <span class="o">=</span> <span class="n">QuantModuleRegistry</span><span class="o">.</span><span class="n">get_key</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">Conv2d</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>Here is an example of a quantization config:</p>
+<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">MY_QUANT_CFG</span> <span class="o">=</span> <span class="p">{</span>
+    <span class="s2">&quot;quant_cfg&quot;</span><span class="p">:</span> <span class="p">{</span>
+        <span class="c1"># Quantizer wildcard strings mapping to quantizer attributes</span>
+        <span class="s2">&quot;*weight_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="mi">0</span><span class="p">},</span>
+        <span class="s2">&quot;*input_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">},</span>
+
+        <span class="c1"># Module class names mapping to quantizer configurations</span>
+        <span class="s2">&quot;nn.LeakyReLU&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;*input_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">}},</span>
+
+    <span class="p">}</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</section>
+<section id="example-quantization-configurations">
+<span id="example-quantization-configs"></span><h2>Example Quantization Configurations<a class="headerlink" href="#example-quantization-configurations" title="Link to this heading"></a></h2>
+<p>Here are the recommended quantization configs from Model Optimizer for
+quantization formats such as FP8, INT8, INT4, etc.:</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">INT8_DEFAULT_CFG</span> <span class="o">=</span> <span class="p">{</span>
     <span class="s2">&quot;quant_cfg&quot;</span><span class="p">:</span> <span class="p">{</span>
     <span class="s2">&quot;*weight_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="mi">0</span><span class="p">},</span>
     <span class="s2">&quot;*input_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">},</span>
     <span class="s2">&quot;*lm_head*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="s2">&quot;*block_sparse_moe.gate*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
-    <span class="s2">&quot;default&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">},</span>
+    <span class="s2">&quot;*router*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
+    <span class="s2">&quot;default&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="p">},</span>
     <span class="s2">&quot;algorithm&quot;</span><span class="p">:</span> <span class="s2">&quot;max&quot;</span><span class="p">,</span>
 <span class="p">}</span>
@@ -224,7 +262,12 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
     <span class="s2">&quot;*input_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="o">-</span><span class="mi">1</span><span class="p">},</span>
     <span class="s2">&quot;*lm_head*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="s2">&quot;*block_sparse_moe.gate*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
-    <span class="s2">&quot;default&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">},</span>
+    <span class="s2">&quot;*router*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
+    <span class="s2">&quot;nn.Conv2d&quot;</span><span class="p">:</span> <span class="p">{</span>
+        <span class="s2">&quot;*weight_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="mi">0</span><span class="p">},</span>
+        <span class="s2">&quot;*input_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">},</span>
+    <span class="p">},</span>
+    <span class="s2">&quot;default&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="p">},</span>
     <span class="s2">&quot;algorithm&quot;</span><span class="p">:</span> <span class="s2">&quot;smoothquant&quot;</span><span class="p">,</span>
 <span class="p">}</span>
@@ -234,7 +277,8 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
     <span class="s2">&quot;*weight_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">},</span>
     <span class="s2">&quot;*input_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">},</span>
     <span class="s2">&quot;*block_sparse_moe.gate*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
-    <span class="s2">&quot;default&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">},</span>
+    <span class="s2">&quot;*router*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
+    <span class="s2">&quot;default&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="p">},</span>
     <span class="s2">&quot;algorithm&quot;</span><span class="p">:</span> <span class="s2">&quot;max&quot;</span><span class="p">,</span>
 <span class="p">}</span>
@@ -245,6 +289,7 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
     <span class="s2">&quot;*input_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="s2">&quot;*lm_head*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="s2">&quot;*block_sparse_moe.gate*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
+    <span class="s2">&quot;*router*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
     <span class="s2">&quot;default&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="p">},</span>
     <span class="s2">&quot;algorithm&quot;</span><span class="p">:</span> <span class="s2">&quot;max&quot;</span><span class="p">,</span>
@@ -256,6 +301,7 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
     <span class="s2">&quot;*input_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="s2">&quot;*lm_head*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="s2">&quot;*block_sparse_moe.gate*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
+    <span class="s2">&quot;*router*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
     <span class="s2">&quot;default&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="p">},</span>
     <span class="s2">&quot;algorithm&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;method&quot;</span><span class="p">:</span> <span class="s2">&quot;awq_lite&quot;</span><span class="p">,</span> <span class="s2">&quot;alpha_step&quot;</span><span class="p">:</span> <span class="mf">0.1</span><span class="p">},</span>
@@ -272,6 +318,7 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
     <span class="s2">&quot;*input_quantizer&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;num_bits&quot;</span><span class="p">:</span> <span class="p">(</span><span class="mi">4</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> <span class="s2">&quot;axis&quot;</span><span class="p">:</span> <span class="kc">None</span><span class="p">,</span> <span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">},</span>
     <span class="s2">&quot;*lm_head*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
     <span class="s2">&quot;*block_sparse_moe.gate*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
+    <span class="s2">&quot;*router*&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>  <span class="c1"># Skip the MOE router</span>
     <span class="s2">&quot;default&quot;</span><span class="p">:</span> <span class="p">{</span><span class="s2">&quot;enable&quot;</span><span class="p">:</span> <span class="kc">False</span><span class="p">},</span>
 <span class="p">},</span>
 <span class="s2">&quot;algorithm&quot;</span><span class="p">:</span> <span class="s2">&quot;awq_lite&quot;</span><span class="p">,</span>
@@ -296,6 +343,204 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
 </pre></div>
 </div>
 </section>
+<dl class="py class pydantic_model">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQClipCalibConfig">
+<em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">AWQClipCalibConfig</span></span><a class="headerlink" href="#modelopt.torch.quantization.config.AWQClipCalibConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="#modelopt.torch.quantization.config.QuantizeAlgorithmConfig" title="modelopt.torch.quantization.config.QuantizeAlgorithmConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizeAlgorithmConfig</span></code></a></p>
+<p>The config for <code class="docutils literal notranslate"><span class="pre">awq_clip</span></code> (AWQ clip) algorithm.</p>
+<p>AWQ clip searches clipped amax for per-group quantization, This search requires much more compute
+compared to AWQ lite. To avoid any OOM, the linear layer weights are batched along the <code class="docutils literal notranslate"><span class="pre">out_features</span></code>
+dimension of batch size <code class="docutils literal notranslate"><span class="pre">max_co_batch_size</span></code>. AWQ clip calibration also takes longer than AWQ lite.</p>
+<p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show default config as JSON</summary><dl class="field-list simple">
+<dt class="field-odd">Default config (JSON)<span class="colon">:</span></dt>
+<dd class="field-odd"><p></p></dd>
+</dl>
+<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
+<span class="w">   </span><span class="nt">&quot;method&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;max&quot;</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;max_co_batch_size&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">1024</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;max_tokens_per_batch&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">64</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;min_clip_ratio&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">0.5</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;shrink_step&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">0.05</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;debug&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">false</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</details></p><dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQClipCalibConfig.debug">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">debug</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.AWQClipCalibConfig.debug" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>If True, module’s search metadata will be kept as a module attribute named <code class="docutils literal notranslate"><span class="pre">awq_clip</span></code>.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQClipCalibConfig.max_co_batch_size">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_co_batch_size</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.AWQClipCalibConfig.max_co_batch_size" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>Reduce this number if CUDA Out of Memory error occurs.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQClipCalibConfig.max_tokens_per_batch">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">max_tokens_per_batch</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.AWQClipCalibConfig.max_tokens_per_batch" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The total tokens used for clip search would be <code class="docutils literal notranslate"><span class="pre">max_tokens_per_batch</span> <span class="pre">*</span> <span class="pre">number</span> <span class="pre">of</span> <span class="pre">batches</span></code>.
+Original AWQ uses a total of 512 tokens to search for clip values.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQClipCalibConfig.min_clip_ratio">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">min_clip_ratio</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.AWQClipCalibConfig.min_clip_ratio" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>It should be in (0, 1.0). Clip will search for the optimal clipping value in the range
+<code class="docutils literal notranslate"><span class="pre">[original</span> <span class="pre">block</span> <span class="pre">amax</span> <span class="pre">*</span> <span class="pre">min_clip_ratio,</span> <span class="pre">original</span> <span class="pre">block</span> <span class="pre">amax]</span></code>.</p>
+</details></p><dl class="field-list simple">
+<dt class="field-odd">Constraints<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>gt</strong> = 0.0</p></li>
+<li><p><strong>lt</strong> = 1.0</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQClipCalibConfig.shrink_step">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">shrink_step</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.AWQClipCalibConfig.shrink_step" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>It should be in range (0, 1.0]. The clip ratio will be searched from <code class="docutils literal notranslate"><span class="pre">min_clip_ratio</span></code> to 1
+with the step size specified.</p>
+</details></p><dl class="field-list simple">
+<dt class="field-odd">Constraints<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>gt</strong> = 0.0</p></li>
+<li><p><strong>le</strong> = 1.0</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class pydantic_model">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQFullCalibConfig">
+<em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">AWQFullCalibConfig</span></span><a class="headerlink" href="#modelopt.torch.quantization.config.AWQFullCalibConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="#modelopt.torch.quantization.config.AWQLiteCalibConfig" title="modelopt.torch.quantization.config.AWQLiteCalibConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">AWQLiteCalibConfig</span></code></a>, <a class="reference internal" href="#modelopt.torch.quantization.config.AWQClipCalibConfig" title="modelopt.torch.quantization.config.AWQClipCalibConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">AWQClipCalibConfig</span></code></a></p>
+<p>The config for <code class="docutils literal notranslate"><span class="pre">awq</span></code> or <code class="docutils literal notranslate"><span class="pre">awq_full</span></code> algorithm (AWQ full).</p>
+<p>AWQ full performs <code class="docutils literal notranslate"><span class="pre">awq_lite</span></code> followed by <code class="docutils literal notranslate"><span class="pre">awq_clip</span></code>.</p>
+<p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show default config as JSON</summary><dl class="field-list simple">
+<dt class="field-odd">Default config (JSON)<span class="colon">:</span></dt>
+<dd class="field-odd"><p></p></dd>
+</dl>
+<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
+<span class="w">   </span><span class="nt">&quot;method&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;max&quot;</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;max_co_batch_size&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">1024</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;max_tokens_per_batch&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">64</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;min_clip_ratio&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">0.5</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;shrink_step&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">0.05</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;debug&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;alpha_step&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">0.1</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</details></p><dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQFullCalibConfig.debug">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">debug</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.AWQFullCalibConfig.debug" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>If True, module’s search metadata will be kept as module attributes named <code class="docutils literal notranslate"><span class="pre">awq_lite</span></code> and <code class="docutils literal notranslate"><span class="pre">awq_clip</span></code>.</p>
+</details></p></dd></dl>
+
+</dd></dl>
+
+<dl class="py class pydantic_model">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQLiteCalibConfig">
+<em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">AWQLiteCalibConfig</span></span><a class="headerlink" href="#modelopt.torch.quantization.config.AWQLiteCalibConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="#modelopt.torch.quantization.config.QuantizeAlgorithmConfig" title="modelopt.torch.quantization.config.QuantizeAlgorithmConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizeAlgorithmConfig</span></code></a></p>
+<p>The config for <code class="docutils literal notranslate"><span class="pre">awq_lite</span></code> (AWQ lite) algorithm.</p>
+<p>AWQ lite applies a channel-wise scaling factor which minimizes the output difference after quantization.
+See <a class="reference external" href="https://arxiv.org/pdf/2306.00978" rel="noopener noreferrer" target="_blank">AWQ paper</a> for more details.</p>
+<p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show default config as JSON</summary><dl class="field-list simple">
+<dt class="field-odd">Default config (JSON)<span class="colon">:</span></dt>
+<dd class="field-odd"><p></p></dd>
+</dl>
+<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
+<span class="w">   </span><span class="nt">&quot;method&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;max&quot;</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;alpha_step&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">0.1</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;debug&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">false</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</details></p><dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQLiteCalibConfig.alpha_step">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">alpha_step</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.AWQLiteCalibConfig.alpha_step" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The alpha will be searched from 0 to 1 with the step size specified.</p>
+</details></p><dl class="field-list simple">
+<dt class="field-odd">Constraints<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>gt</strong> = 0.0</p></li>
+<li><p><strong>le</strong> = 1.0</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.AWQLiteCalibConfig.debug">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">debug</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.AWQLiteCalibConfig.debug" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>If True, module’s search metadata will be kept as a module attribute named <cite>awq_lite</cite>.</p>
+</details></p></dd></dl>
+
+</dd></dl>
+
+<dl class="py class pydantic_model">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.MaxCalibConfig">
+<em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">MaxCalibConfig</span></span><a class="headerlink" href="#modelopt.torch.quantization.config.MaxCalibConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="#modelopt.torch.quantization.config.QuantizeAlgorithmConfig" title="modelopt.torch.quantization.config.QuantizeAlgorithmConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizeAlgorithmConfig</span></code></a></p>
+<p>The config for max calibration algorithm.</p>
+<p>Max calibration estimates max values of activations or weights and use this max values
+to set the quantization scaling factor.
+See <a class="reference external" href="https://arxiv.org/pdf/2004.09602" rel="noopener noreferrer" target="_blank">Integer Quantization</a> for the concepts.</p>
+<p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show default config as JSON</summary><dl class="field-list simple">
+<dt class="field-odd">Default config (JSON)<span class="colon">:</span></dt>
+<dd class="field-odd"><p></p></dd>
+</dl>
+<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
+<span class="w">   </span><span class="nt">&quot;method&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;max&quot;</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</details></p></dd></dl>
+
+<dl class="py class pydantic_model">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizeAlgorithmConfig">
+<em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">QuantizeAlgorithmConfig</span></span><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizeAlgorithmConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">ModeloptBaseConfig</span></code></a></p>
+<p>Calibration algorithm config base.</p>
+<p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show default config as JSON</summary><dl class="field-list simple">
+<dt class="field-odd">Default config (JSON)<span class="colon">:</span></dt>
+<dd class="field-odd"><p></p></dd>
+</dl>
+<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
+<span class="w">   </span><span class="nt">&quot;method&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;max&quot;</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</details></p><dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizeAlgorithmConfig.method">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">method</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizeAlgorithmConfig.method" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The algorithm used for calibration. Supported algorithms include
+<code class="docutils literal notranslate"><span class="pre">&quot;max&quot;,</span> <span class="pre">&quot;smoothquant&quot;,</span> <span class="pre">&quot;awq_lite&quot;,</span> <span class="pre">&quot;awq_full&quot;,</span> <span class="pre">and</span> <span class="pre">&quot;awq_clip&quot;</span></code>.</p>
+</details></p></dd></dl>
+
+</dd></dl>
+
 <dl class="py class pydantic_model">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizeConfig">
 <em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">QuantizeConfig</span></span><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizeConfig" title="Link to this definition"></a></dt>
@@ -319,16 +564,234 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
 </div>
 </details></p><dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizeConfig.algorithm">
-<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">algorithm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizeConfig.algorithm" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">algorithm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.MaxCalibConfig" title="modelopt.torch.quantization.config.MaxCalibConfig"><span class="pre">MaxCalibConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.SmoothQuantCalibConfig" title="modelopt.torch.quantization.config.SmoothQuantCalibConfig"><span class="pre">SmoothQuantCalibConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.AWQLiteCalibConfig" title="modelopt.torch.quantization.config.AWQLiteCalibConfig"><span class="pre">AWQLiteCalibConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.AWQClipCalibConfig" title="modelopt.torch.quantization.config.AWQClipCalibConfig"><span class="pre">AWQClipCalibConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.AWQFullCalibConfig" title="modelopt.torch.quantization.config.AWQFullCalibConfig"><span class="pre">AWQFullCalibConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.RealQuantizeConfig" title="modelopt.torch.quantization.config.RealQuantizeConfig"><span class="pre">RealQuantizeConfig</span></a></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizeConfig.algorithm" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute pydantic_field">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizeConfig.quant_cfg">
-<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">quant_cfg</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Callable</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Any</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizeConfig.quant_cfg" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">quant_cfg</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Callable</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><span class="pre">QuantizerAttributeConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><span class="pre">QuantizerAttributeConfig</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Callable</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><span class="pre">QuantizerAttributeConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">List</span><span class="p"><span class="pre">[</span></span><a class="reference internal" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><span class="pre">QuantizerAttributeConfig</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizeConfig.quant_cfg" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 </dd></dl>
 
+<dl class="py class pydantic_model">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig">
+<em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">QuantizerAttributeConfig</span></span><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="modelopt.torch.opt.config.html#modelopt.torch.opt.config.ModeloptBaseConfig" title="modelopt.torch.opt.config.ModeloptBaseConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">ModeloptBaseConfig</span></code></a></p>
+<p>Quantizer attribute type.</p>
+<p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show default config as JSON</summary><dl class="field-list simple">
+<dt class="field-odd">Default config (JSON)<span class="colon">:</span></dt>
+<dd class="field-odd"><p></p></dd>
+</dl>
+<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
+<span class="w">   </span><span class="nt">&quot;enable&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">true</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;num_bits&quot;</span><span class="p">:</span><span class="w"> </span><span class="mi">8</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;axis&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;fake_quant&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">true</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;unsigned&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;narrow_range&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;learn_amax&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;type&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;static&quot;</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;block_sizes&quot;</span><span class="p">:</span><span class="w"> </span><span class="kc">null</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;trt_high_precision_dtype&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;Float&quot;</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;calibrator&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;max&quot;</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</details></p><dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.axis">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">axis</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="p"><span class="pre">...</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.axis" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The specified axis/axes will have its own amax for
+computing scaling factor. If None (the default), use per tensor scale. Must be in the
+range [-rank(input_tensor), rank(input_tensor)). E.g. For a KCRS weight tensor,
+<code class="docutils literal notranslate"><span class="pre">quant_axis=(0)</span></code> will yield per channel scaling.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.block_sizes">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">block_sizes</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Dict</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.block_sizes" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The keys are the axes for block quantization and the
+values are block sizes for quantization along the respective axes. Keys must be in the
+range <code class="docutils literal notranslate"><span class="pre">[-tensor.dim(),</span> <span class="pre">tensor.dim())</span></code>. Values, which are the block sizes
+for quantization must be positive integers.</p>
+<p>In addition, there can be special string keys <code class="docutils literal notranslate"><span class="pre">&quot;type&quot;</span></code>, <code class="docutils literal notranslate"><span class="pre">&quot;scale_bits&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;scale_block_sizes&quot;</span></code>.</p>
+<p>Key <code class="docutils literal notranslate"><span class="pre">&quot;type&quot;</span></code> should map to <code class="docutils literal notranslate"><span class="pre">&quot;dynamic&quot;</span></code> or <code class="docutils literal notranslate"><span class="pre">&quot;static&quot;</span></code> where <code class="docutils literal notranslate"><span class="pre">&quot;dynamic&quot;</span></code>
+indicates dynamic block quantization and “static”
+indicates static calibrated block quantization. By default, the type is <code class="docutils literal notranslate"><span class="pre">&quot;static&quot;</span></code>.</p>
+<p>Key <code class="docutils literal notranslate"><span class="pre">&quot;scale_bits&quot;</span></code> specify the quantization bits for the per-block quantization scale factor
+(i.e a double quantization scheme).</p>
+<p>Key <code class="docutils literal notranslate"><span class="pre">&quot;scale_block_sizes&quot;</span></code> specify the block size for double quantization.
+By default per-block quantization scale is not quantized.</p>
+<p>For example, <code class="docutils literal notranslate"><span class="pre">block_sizes</span> <span class="pre">=</span> <span class="pre">{-1:</span> <span class="pre">32}</span></code> will quantize the last axis of the input tensor in
+blocks of size 32 with static calibration and <code class="docutils literal notranslate"><span class="pre">block_sizes</span> <span class="pre">=</span> <span class="pre">{-1:</span> <span class="pre">32,</span> <span class="pre">&quot;type&quot;:</span> <span class="pre">&quot;dynamic&quot;}</span></code>
+will perform dynamic block quantization. If None, block
+quantization is not performed. <code class="docutils literal notranslate"><span class="pre">axis</span></code> must be None when <code class="docutils literal notranslate"><span class="pre">block_sizes</span></code> is not None.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.calibrator">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">calibrator</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Callable</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.calibrator" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The calibrator can be a string from <code class="docutils literal notranslate"><span class="pre">[&quot;max&quot;,</span> <span class="pre">&quot;histogram&quot;]</span></code> or a constructor
+to create a calibrator which subclasses <code class="xref py py-class docutils literal notranslate"><span class="pre">_Calibrator</span></code>.
+See <a class="reference internal" href="modelopt.torch.utils.network.html#modelopt.torch.utils.network.standardize_constructor_args" title="modelopt.torch.utils.network.standardize_constructor_args"><code class="xref py py-meth docutils literal notranslate"><span class="pre">standardize_constructor_args</span></code></a>
+for more information on how to specify the constructor.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.enable">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">enable</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.enable" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>If True, enables the quantizer. If False, by-pass the quantizer and returns the input tensor.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.fake_quant">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">fake_quant</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.fake_quant" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>If True, enable fake quantization.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.learn_amax">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">learn_amax</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.learn_amax" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>If True, enable learning amax.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.narrow_range">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">narrow_range</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.narrow_range" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>If True, enable narrow range quantization. Used only for integer quantization.</p>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.num_bits">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">num_bits</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">int</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">int</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">int</span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.num_bits" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p><cite>num_bits</cite> can be:</p>
+<ol class="arabic simple">
+<li><dl class="simple">
+<dt>A positive integer argument for integer quantization. <cite>num_bits</cite> specify</dt><dd><p>the number of bits used for integer quantization.</p>
+</dd>
+</dl>
+</li>
+<li><dl class="simple">
+<dt>Constant integer tuple (E,M) for floating point quantization emulating</dt><dd><p>Nvidia’s FPx quantization. E is the number of exponent bits and M is the number
+of mantissa bits. Supported FPx quantization formats: FP8 with (E=4, M=3).</p>
+</dd>
+</dl>
+</li>
+</ol>
+</details></p></dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.trt_high_precision_dtype">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">trt_high_precision_dtype</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.trt_high_precision_dtype" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The value is a string from <code class="docutils literal notranslate"><span class="pre">[&quot;Float&quot;,</span> <span class="pre">&quot;Half&quot;,</span> <span class="pre">&quot;BFloat16&quot;]</span></code>.
+The QDQs will be assigned the appropriate data type, and this variable will only be
+used when the user is exporting the quantized ONNX model.</p>
+</details></p><dl class="field-list simple">
+<dt class="field-odd">Constraints<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>pattern</strong> = ^Float$|^Half$|^BFloat16$</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.type">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">type</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">str</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.type" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The value is a string from <code class="docutils literal notranslate"><span class="pre">[&quot;static&quot;,</span> <span class="pre">&quot;dynamic&quot;]</span></code>.
+If <code class="docutils literal notranslate"><span class="pre">&quot;dynamic&quot;</span></code>, dynamic quantization will be enabled which does not collect any statistics during
+calibration.</p>
+</details></p><dl class="field-list simple">
+<dt class="field-odd">Constraints<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>pattern</strong> = ^static$|^dynamic$</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.QuantizerAttributeConfig.unsigned">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">unsigned</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">bool</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.QuantizerAttributeConfig.unsigned" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>If True, enable unsigned quantization. Used only for integer quantization.</p>
+</details></p></dd></dl>
+
+</dd></dl>
+
+<dl class="py class pydantic_model">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.RealQuantizeConfig">
+<em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">RealQuantizeConfig</span></span><a class="headerlink" href="#modelopt.torch.quantization.config.RealQuantizeConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="#modelopt.torch.quantization.config.QuantizeAlgorithmConfig" title="modelopt.torch.quantization.config.QuantizeAlgorithmConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizeAlgorithmConfig</span></code></a></p>
+<p>The config for real quantization config.</p>
+<p>The <code class="docutils literal notranslate"><span class="pre">additional_algorithm</span></code> will be used for calibration before quantizing weights into low precision.</p>
+<p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show default config as JSON</summary><dl class="field-list simple">
+<dt class="field-odd">Default config (JSON)<span class="colon">:</span></dt>
+<dd class="field-odd"><p></p></dd>
+</dl>
+<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
+<span class="w">   </span><span class="nt">&quot;method&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;max&quot;</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;additional_algorithm&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;&quot;</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</details></p><dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.RealQuantizeConfig.additional_algorithm">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">additional_algorithm</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.AWQLiteCalibConfig" title="modelopt.torch.quantization.config.AWQLiteCalibConfig"><span class="pre">AWQLiteCalibConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.AWQClipCalibConfig" title="modelopt.torch.quantization.config.AWQClipCalibConfig"><span class="pre">AWQClipCalibConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="#modelopt.torch.quantization.config.AWQFullCalibConfig" title="modelopt.torch.quantization.config.AWQFullCalibConfig"><span class="pre">AWQFullCalibConfig</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.RealQuantizeConfig.additional_algorithm" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>The algorithm used for calibration. Supported algorithms include
+<code class="docutils literal notranslate"><span class="pre">&quot;awq_lite&quot;,</span> <span class="pre">&quot;awq_full&quot;,</span> <span class="pre">and</span> <span class="pre">&quot;awq_clip&quot;</span></code>.</p>
+</details></p></dd></dl>
+
+</dd></dl>
+
+<dl class="py class pydantic_model">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.SmoothQuantCalibConfig">
+<em class="property"><span class="pre">ModeloptConfig</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">SmoothQuantCalibConfig</span></span><a class="headerlink" href="#modelopt.torch.quantization.config.SmoothQuantCalibConfig" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="#modelopt.torch.quantization.config.QuantizeAlgorithmConfig" title="modelopt.torch.quantization.config.QuantizeAlgorithmConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizeAlgorithmConfig</span></code></a></p>
+<p>The config for <code class="docutils literal notranslate"><span class="pre">smoothquant</span></code> algorithm (SmoothQuant).</p>
+<p>SmoothQuant applies a smoothing factor which balances the scale of outliers in weights and activations.
+See <a class="reference external" href="https://arxiv.org/pdf/2211.10438" rel="noopener noreferrer" target="_blank">SmoothQuant paper</a> for more details.</p>
+<p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show default config as JSON</summary><dl class="field-list simple">
+<dt class="field-odd">Default config (JSON)<span class="colon">:</span></dt>
+<dd class="field-odd"><p></p></dd>
+</dl>
+<div class="highlight-json notranslate"><div class="highlight"><pre><span></span><span class="p">{</span>
+<span class="w">   </span><span class="nt">&quot;method&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;max&quot;</span><span class="p">,</span>
+<span class="w">   </span><span class="nt">&quot;alpha&quot;</span><span class="p">:</span><span class="w"> </span><span class="mf">1.0</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</details></p><dl class="py attribute pydantic_field">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.config.SmoothQuantCalibConfig.alpha">
+<em class="property"><span class="pre">field</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">alpha</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">float</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></em><a class="headerlink" href="#modelopt.torch.quantization.config.SmoothQuantCalibConfig.alpha" title="Link to this definition"></a></dt>
+<dd><p><details  class="autodoc_pydantic_collapsable_json">
+<summary>Show details</summary><p>This hyper-parameter controls the migration strength.The migration strength is within [0, 1], a larger value migrates more quantization difficulty to weights.</p>
+</details></p><dl class="field-list simple">
+<dt class="field-odd">Constraints<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>ge</strong> = 0.0</p></li>
+<li><p><strong>le</strong> = 1.0</p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
 </section>
 
 
@@ -348,7 +811,7 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -359,7 +822,7 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.conversion.html b/reference/generated/modelopt.torch.quantization.conversion.html
index 2fdb4cb..8fe0085 100644
--- a/reference/generated/modelopt.torch.quantization.conversion.html
+++ b/reference/generated/modelopt.torch.quantization.conversion.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>conversion &mdash; Model Optimizer 0.11.2</title>
+  <title>conversion &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="extensions" href="modelopt.torch.quantization.extensions.html" />
-    <link rel="prev" title="config" href="modelopt.torch.quantization.config.html" />
+    <link rel="prev" title="config" href="modelopt.torch.quantization.config.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="conversion">
 <h1>conversion<a class="headerlink" href="#conversion" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.conversion">Quantization conversion/restore utilities.</p>
@@ -186,7 +191,7 @@ <h1>conversion<a class="headerlink" href="#conversion" title="Link to this headi
 </dl>
 <p>Here is an example of defining a quantized class and registering it:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">modelopt.torch.quantization</span> <span class="k">as</span> <span class="nn">mtq</span>
-<span class="kn">from</span> <span class="nn">modelopt.torch.quantization.tensor_quant</span> <span class="kn">import</span> <span class="n">TensorQuantizer</span><span class="p">,</span> <span class="n">QuantDescriptor</span>
+<span class="kn">from</span> <span class="nn">modelopt.torch.quantization.nn</span> <span class="kn">import</span> <span class="n">TensorQuantizer</span>
 
 
 <span class="k">class</span> <span class="nc">QuantLayerNorm</span><span class="p">(</span><span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">):</span>
@@ -196,8 +201,8 @@ <h1>conversion<a class="headerlink" href="#conversion" title="Link to this headi
 
     <span class="k">def</span> <span class="nf">_setup</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="c1"># Method to setup the quantizers</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">input_quantizer</span> <span class="o">=</span> <span class="n">TensorQuantizer</span><span class="p">(</span><span class="n">QuantDescriptor</span><span class="p">())</span>
-        <span class="bp">self</span><span class="o">.</span><span class="n">weight_quantizer</span> <span class="o">=</span> <span class="n">TensorQuantizer</span><span class="p">(</span><span class="n">QuantDescriptor</span><span class="p">())</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">input_quantizer</span> <span class="o">=</span> <span class="n">TensorQuantizer</span><span class="p">()</span>
+        <span class="bp">self</span><span class="o">.</span><span class="n">weight_quantizer</span> <span class="o">=</span> <span class="n">TensorQuantizer</span><span class="p">()</span>
 
     <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="nb">input</span><span class="p">):</span>
         <span class="nb">input</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">input_quantizer</span><span class="p">(</span><span class="nb">input</span><span class="p">)</span>
@@ -224,7 +229,7 @@ <h1>conversion<a class="headerlink" href="#conversion" title="Link to this headi
 
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.conversion.set_quantizer_attribute">
-<span class="sig-name descname"><span class="pre">set_quantizer_attribute</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quant_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">wildcard_or_filter_func</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attribute</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.conversion.set_quantizer_attribute" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">set_quantizer_attribute</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quant_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">wildcard_or_filter_func</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">attribute</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">parent_class</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.conversion.set_quantizer_attribute" title="Link to this definition"></a></dt>
 <dd><p>Finegrained adjustment of quantizer attribute by wildcard or filter function.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@@ -236,14 +241,17 @@ <h1>conversion<a class="headerlink" href="#conversion" title="Link to this headi
 <a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code></a>.
 The filter function takes a quantized module name as input and returns <code class="docutils literal notranslate"><span class="pre">True</span></code> if the
 quantizer should be adjusted and <code class="docutils literal notranslate"><span class="pre">False</span></code> otherwise.</p></li>
-<li><p><strong>attribute</strong> – a dict of quantizer attributes or a list of quantizer attribute dicts.
-An example attribute dict is: <code class="docutils literal notranslate"><span class="pre">{&quot;num_bits&quot;:</span> <span class="pre">8,</span> <span class="pre">&quot;axis&quot;:</span> <span class="pre">0,</span> <span class="pre">&quot;enable&quot;:</span> <span class="pre">True}</span></code>.
-If <code class="docutils literal notranslate"><span class="pre">attribute</span></code> is a list of dicts, the matched
-<code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code> modules will be replaced with
-<code class="xref py py-class docutils literal notranslate"><span class="pre">SequentialQuantizer</span></code> modules having one quantizer
-for each attribute dict from the list.
-See <code class="xref py py-meth docutils literal notranslate"><span class="pre">set_from_attribute_dict</span></code>
+<li><p><strong>attribute</strong> (<a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><em>QuantizerAttributeConfig</em></a><em> | </em><em>List</em><em>[</em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><em>QuantizerAttributeConfig</em></a><em>] </em><em>| </em><em>Dict</em><em>[</em><em>str</em><em> | </em><em>Callable</em><em>, </em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><em>QuantizerAttributeConfig</em></a><em> | </em><em>List</em><em>[</em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><em>QuantizerAttributeConfig</em></a><em>]</em><em>] </em><em>| </em><em>Dict</em><em> | </em><em>List</em><em>[</em><em>Dict</em><em>]</em>) – An instance of <a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a> or an equivalent
+dictionary or a list of these two types.
+If <code class="docutils literal notranslate"><span class="pre">attribute</span></code> is a list, the matched
+<code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code>
+modules will be replaced with <code class="xref py py-class docutils literal notranslate"><span class="pre">SequentialQuantizer</span></code>
+modules having one quantizer for each attribute instance from the list.
+See
+<code class="xref py py-meth docutils literal notranslate"><span class="pre">set_from_attribute_config()</span></code>
 for more details on the supported attributes and their types.</p></li>
+<li><p><strong>parent_class</strong> (<em>None</em><em> | </em><em>type</em>) – (Optional) The parent class of the quantizer modules matching <code class="docutils literal notranslate"><span class="pre">wildcard_or_filter_func</span></code> which
+should be adjusted. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, all the matching quantizer modules will be adjusted.</p></li>
 </ul>
 </dd>
 </dl>
@@ -254,14 +262,22 @@ <h1>conversion<a class="headerlink" href="#conversion" title="Link to this headi
 <span class="sig-name descname"><span class="pre">set_quantizer_by_cfg</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quant_model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quant_cfg</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.conversion.set_quantizer_by_cfg" title="Link to this definition"></a></dt>
 <dd><p>Update the quantizer attributes based on the specified <cite>quant_cfg</cite>.</p>
 <p><cite>quant_cfg</cite> is a dictionary mapping wildcards or filter functions
-to its quantizer attributes. The wildcards or filter functions  are matched
-against the quantizer module names. The specified quantizer attributes of the
-matched quantizer modules are set accordingly.</p>
+to its quantizer attributes which are defined in
+<a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a>.
+The wildcards or filter functions  are matched against the quantizer module names.
+The specified quantizer attributes of the matched quantizer modules are set accordingly.
+The key <code class="docutils literal notranslate"><span class="pre">&quot;default&quot;</span></code> is a special key that sets the quantizer attributes of all the quantizers for which
+no other wildcard or filter functions match the quantizer module name.</p>
+<p>In addition, the dictionary entries could also be pytorch module class names mapping the class specific
+quantization configuration. The pytorch modules should have a quantized equivalent.</p>
 <p>See <a class="reference internal" href="#modelopt.torch.quantization.conversion.set_quantizer_attribute" title="modelopt.torch.quantization.conversion.set_quantizer_attribute"><code class="xref py py-meth docutils literal notranslate"><span class="pre">set_quantizer_attribute</span></code></a>
 for more details.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>quant_model</strong> (<em>Module</em>) – </p>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>quant_model</strong> (<em>Module</em>) – </p></li>
+<li><p><strong>quant_cfg</strong> (<em>Dict</em><em>[</em><em>str</em><em> | </em><em>Callable</em><em>, </em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><em>QuantizerAttributeConfig</em></a><em> | </em><em>List</em><em>[</em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><em>QuantizerAttributeConfig</em></a><em>] </em><em>| </em><em>Dict</em><em>[</em><em>str</em><em> | </em><em>Callable</em><em>, </em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><em>QuantizerAttributeConfig</em></a><em> | </em><em>List</em><em>[</em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><em>QuantizerAttributeConfig</em></a><em>]</em><em>]</em><em>] </em><em>| </em><em>Dict</em>) – </p></li>
+</ul>
 </dd>
 </dl>
 </dd></dl>
@@ -296,7 +312,7 @@ <h1>conversion<a class="headerlink" href="#conversion" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -307,7 +323,7 @@ <h1>conversion<a class="headerlink" href="#conversion" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.extensions.html b/reference/generated/modelopt.torch.quantization.extensions.html
index 0ebe9ac..2e6cd9c 100644
--- a/reference/generated/modelopt.torch.quantization.extensions.html
+++ b/reference/generated/modelopt.torch.quantization.extensions.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>extensions &mdash; Model Optimizer 0.11.2</title>
+  <title>extensions &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="mode" href="modelopt.torch.quantization.mode.html" />
-    <link rel="prev" title="conversion" href="modelopt.torch.quantization.conversion.html" />
+    <link rel="prev" title="conversion" href="modelopt.torch.quantization.conversion.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,10 +151,33 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="extensions">
 <h1>extensions<a class="headerlink" href="#extensions" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.extensions">Module to load C++ / CUDA extensions.</p>
+<p class="rubric">Functions</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.extensions.get_cuda_ext" title="modelopt.torch.quantization.extensions.get_cuda_ext"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_cuda_ext</span></code></a></p></td>
+<td><p>Returns the cuda extention for tensor_quant.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.extensions.get_cuda_ext_fp8" title="modelopt.torch.quantization.extensions.get_cuda_ext_fp8"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_cuda_ext_fp8</span></code></a></p></td>
+<td><p>Returns the cuda extention for tensor_quant_fp8.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.extensions.get_cuda_ext">
+<span class="sig-name descname"><span class="pre">get_cuda_ext</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.extensions.get_cuda_ext" title="Link to this definition"></a></dt>
+<dd><p>Returns the cuda extention for tensor_quant.</p>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.extensions.get_cuda_ext_fp8">
+<span class="sig-name descname"><span class="pre">get_cuda_ext_fp8</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.extensions.get_cuda_ext_fp8" title="Link to this definition"></a></dt>
+<dd><p>Returns the cuda extention for tensor_quant_fp8.</p>
+</dd></dl>
+
 </section>
 
 
@@ -169,7 +197,7 @@ <h1>extensions<a class="headerlink" href="#extensions" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -180,7 +208,7 @@ <h1>extensions<a class="headerlink" href="#extensions" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.html b/reference/generated/modelopt.torch.quantization.html
index 01f14fc..a048872 100644
--- a/reference/generated/modelopt.torch.quantization.html
+++ b/reference/generated/modelopt.torch.quantization.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quantization &mdash; Model Optimizer 0.11.2</title>
+  <title>quantization &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="calib" href="modelopt.torch.quantization.calib.html" />
-    <link rel="prev" title="utils" href="modelopt.torch.opt.utils.html" />
+    <link rel="next" title="algorithms" href="modelopt.torch.quantization.algorithms.html" />
+    <link rel="prev" title="utils" href="modelopt.torch.opt.utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="current reference internal" href="#">quantization</a><ul>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -145,42 +150,48 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quantization">
 <h1>quantization<a class="headerlink" href="#quantization" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
 <table class="autosummary longtable docutils align-default">
 <tbody>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.calib.html#module-modelopt.torch.quantization.calib" title="modelopt.torch.quantization.calib"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.calib</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.algorithms.html#module-modelopt.torch.quantization.algorithms" title="modelopt.torch.quantization.algorithms"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.algorithms</span></code></a></p></td>
+<td><p>Module for advanced quantization algorithms.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.calib.html#module-modelopt.torch.quantization.calib" title="modelopt.torch.quantization.calib"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.calib</span></code></a></p></td>
 <td><p>Calibrator classes.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.config.html#module-modelopt.torch.quantization.config" title="modelopt.torch.quantization.config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.config</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.config.html#module-modelopt.torch.quantization.config" title="modelopt.torch.quantization.config"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.config</span></code></a></p></td>
 <td><p>This document lists the quantization formats supported by Model Optimizer and example quantization configs.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.conversion.html#module-modelopt.torch.quantization.conversion" title="modelopt.torch.quantization.conversion"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.conversion</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.conversion.html#module-modelopt.torch.quantization.conversion" title="modelopt.torch.quantization.conversion"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.conversion</span></code></a></p></td>
 <td><p>Quantization conversion/restore utilities.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.extensions.html#module-modelopt.torch.quantization.extensions" title="modelopt.torch.quantization.extensions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.extensions</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.extensions.html#module-modelopt.torch.quantization.extensions" title="modelopt.torch.quantization.extensions"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.extensions</span></code></a></p></td>
 <td><p>Module to load C++ / CUDA extensions.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.mode.html#module-modelopt.torch.quantization.mode" title="modelopt.torch.quantization.mode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.mode</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.mode.html#module-modelopt.torch.quantization.mode" title="modelopt.torch.quantization.mode"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.mode</span></code></a></p></td>
 <td><p>This module contains the mode descriptor for the quantization mode.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.model_calib.html#module-modelopt.torch.quantization.model_calib" title="modelopt.torch.quantization.model_calib"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.model_calib</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.model_calib.html#module-modelopt.torch.quantization.model_calib" title="modelopt.torch.quantization.model_calib"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.model_calib</span></code></a></p></td>
 <td><p>Calibration utilities.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.model_quant.html#module-modelopt.torch.quantization.model_quant" title="modelopt.torch.quantization.model_quant"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.model_quant</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.model_quant.html#module-modelopt.torch.quantization.model_quant" title="modelopt.torch.quantization.model_quant"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.model_quant</span></code></a></p></td>
 <td><p>User-facing quantization API.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.nn.html#module-modelopt.torch.quantization.nn" title="modelopt.torch.quantization.nn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.nn</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.nn.html#module-modelopt.torch.quantization.nn" title="modelopt.torch.quantization.nn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.nn</span></code></a></p></td>
 <td><p>Modules with quantization support.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.optim.html#module-modelopt.torch.quantization.optim" title="modelopt.torch.quantization.optim"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.optim</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.optim.html#module-modelopt.torch.quantization.optim" title="modelopt.torch.quantization.optim"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.optim</span></code></a></p></td>
 <td><p>Deprecated.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.plugins.html#module-modelopt.torch.quantization.plugins" title="modelopt.torch.quantization.plugins"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.plugins</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.plugins.html#module-modelopt.torch.quantization.plugins" title="modelopt.torch.quantization.plugins"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.plugins</span></code></a></p></td>
 <td><p>Handles quantization plugins to correctly quantize third-party modules.</p></td>
 </tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.qtensor.html#module-modelopt.torch.quantization.qtensor" title="modelopt.torch.quantization.qtensor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.qtensor</span></code></a></p></td>
+<td><p>Tensor Class for Real Quantization.</p></td>
+</tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html#module-modelopt.torch.quantization.quant_modules" title="modelopt.torch.quantization.quant_modules"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.quant_modules</span></code></a></p></td>
 <td><p>Deprecated.</p></td>
 </tr>
@@ -200,7 +211,7 @@ <h1>quantization<a class="headerlink" href="#quantization" title="Link to this h
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.torch.opt.utils.html" class="btn btn-neutral float-left" title="utils" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.torch.quantization.calib.html" class="btn btn-neutral float-right" title="calib" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.torch.quantization.algorithms.html" class="btn btn-neutral float-right" title="algorithms" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -212,7 +223,7 @@ <h1>quantization<a class="headerlink" href="#quantization" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -223,7 +234,7 @@ <h1>quantization<a class="headerlink" href="#quantization" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.mode.html b/reference/generated/modelopt.torch.quantization.mode.html
index 9bbd224..557b084 100644
--- a/reference/generated/modelopt.torch.quantization.mode.html
+++ b/reference/generated/modelopt.torch.quantization.mode.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>mode &mdash; Model Optimizer 0.11.2</title>
+  <title>mode &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="model_calib" href="modelopt.torch.quantization.model_calib.html" />
-    <link rel="prev" title="extensions" href="modelopt.torch.quantization.extensions.html" />
+    <link rel="prev" title="extensions" href="modelopt.torch.quantization.extensions.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="mode">
 <h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.mode">This module contains the mode descriptor for the quantization mode.</p>
@@ -276,7 +281,7 @@ <h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -287,7 +292,7 @@ <h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.model_calib.html b/reference/generated/modelopt.torch.quantization.model_calib.html
index 036197f..5fc5fee 100644
--- a/reference/generated/modelopt.torch.quantization.model_calib.html
+++ b/reference/generated/modelopt.torch.quantization.model_calib.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>model_calib &mdash; Model Optimizer 0.11.2</title>
+  <title>model_calib &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="model_quant" href="modelopt.torch.quantization.model_quant.html" />
-    <link rel="prev" title="mode" href="modelopt.torch.quantization.mode.html" />
+    <link rel="prev" title="mode" href="modelopt.torch.quantization.mode.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="model-calib">
 <h1>model_calib<a class="headerlink" href="#model-calib" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.model_calib">Calibration utilities.</p>
@@ -169,15 +174,17 @@ <h1>model_calib<a class="headerlink" href="#model-calib" title="Link to this hea
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>model</strong> (<em>Module</em>) – A pytorch model with quantizer modules.</p></li>
-<li><p><strong>algorithm</strong> (<em>str</em><em> | </em><em>dict</em><em> | </em><em>None</em>) – A string or dictionary specifying the calibration algorithm to use. Supported
+<li><p><strong>algorithm</strong> (<em>str</em><em> | </em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.MaxCalibConfig" title="modelopt.torch.quantization.config.MaxCalibConfig"><em>MaxCalibConfig</em></a><em> | </em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.SmoothQuantCalibConfig" title="modelopt.torch.quantization.config.SmoothQuantCalibConfig"><em>SmoothQuantCalibConfig</em></a><em> | </em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQLiteCalibConfig" title="modelopt.torch.quantization.config.AWQLiteCalibConfig"><em>AWQLiteCalibConfig</em></a><em> | </em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQClipCalibConfig" title="modelopt.torch.quantization.config.AWQClipCalibConfig"><em>AWQClipCalibConfig</em></a><em> | </em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.AWQFullCalibConfig" title="modelopt.torch.quantization.config.AWQFullCalibConfig"><em>AWQFullCalibConfig</em></a><em> | </em><a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.RealQuantizeConfig" title="modelopt.torch.quantization.config.RealQuantizeConfig"><em>RealQuantizeConfig</em></a><em> | </em><em>None</em>) – A string or dictionary specifying the calibration algorithm to use. Supported
 algorithms are <code class="docutils literal notranslate"><span class="pre">&quot;max&quot;</span></code>, <code class="docutils literal notranslate"><span class="pre">&quot;smoothquant&quot;</span></code>, <code class="docutils literal notranslate"><span class="pre">&quot;awq_lite&quot;</span></code>, <code class="docutils literal notranslate"><span class="pre">&quot;awq_full&quot;</span></code>, and
 <code class="docutils literal notranslate"><span class="pre">&quot;awq_clip&quot;</span></code>. If a dictionary is passed, the key <code class="docutils literal notranslate"><span class="pre">&quot;method&quot;</span></code> should specify the
 calibration algorithm to use. Other key-value pairs  in this dictionary will be passed
 as kwargs to the algorithm. An example dictionary argument:
 <code class="docutils literal notranslate"><span class="pre">{&quot;method&quot;:</span> <span class="pre">&quot;awq_clip&quot;,</span> <span class="pre">&quot;max_co_batch_size&quot;:</span> <span class="pre">4096}</span></code>. If <code class="docutils literal notranslate"><span class="pre">None</span></code>, no calibration is
-performed.</p></li>
+performed. For real quantization, the key <code class="docutils literal notranslate"><span class="pre">method</span></code> should be <code class="docutils literal notranslate"><span class="pre">real_quantize</span></code>, and
+the calibration algorithm used should be specified in <code class="docutils literal notranslate"><span class="pre">additional_algorithm</span></code>.</p></li>
 <li><p><strong>forward_loop</strong> (<em>Callable</em><em>[</em><em>[</em><em>Module</em><em>]</em><em>, </em><em>None</em><em>] </em><em>| </em><em>None</em>) – A callable which takes the model as argument and forwards calibration data
-through the model.</p></li>
+through the model. This is not required for weight-only quantization with the <code class="docutils literal notranslate"><span class="pre">&quot;max&quot;</span></code>
+algorithm.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Return type<span class="colon">:</span></dt>
@@ -223,7 +230,7 @@ <h1>model_calib<a class="headerlink" href="#model-calib" title="Link to this hea
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -234,7 +241,7 @@ <h1>model_calib<a class="headerlink" href="#model-calib" title="Link to this hea
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.model_quant.html b/reference/generated/modelopt.torch.quantization.model_quant.html
index 5529faf..f1b6260 100644
--- a/reference/generated/modelopt.torch.quantization.model_quant.html
+++ b/reference/generated/modelopt.torch.quantization.model_quant.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>model_quant &mdash; Model Optimizer 0.11.2</title>
+  <title>model_quant &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="nn" href="modelopt.torch.quantization.nn.html" />
-    <link rel="prev" title="model_calib" href="modelopt.torch.quantization.model_calib.html" />
+    <link rel="prev" title="model_calib" href="modelopt.torch.quantization.model_calib.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="model-quant">
 <h1>model_quant<a class="headerlink" href="#model-quant" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.model_quant">User-facing quantization API.</p>
@@ -154,22 +159,126 @@ <h1>model_quant<a class="headerlink" href="#model-quant" title="Link to this hea
 <table class="autosummary longtable docutils align-default">
 <tbody>
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.quantize" title="modelopt.torch.quantization.model_quant.quantize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantize</span></code></a></p></td>
-<td><p>Quantizes and calibrates the model.</p></td>
+<td><p>Quantizes and calibrates the model in-place.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.auto_quantize" title="modelopt.torch.quantization.model_quant.auto_quantize"><code class="xref py py-obj docutils literal notranslate"><span class="pre">auto_quantize</span></code></a></p></td>
+<td><p>Quantizes a model by searching for the best quantization formats per-layer.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.disable_quantizer" title="modelopt.torch.quantization.model_quant.disable_quantizer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">disable_quantizer</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.disable_quantizer" title="modelopt.torch.quantization.model_quant.disable_quantizer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">disable_quantizer</span></code></a></p></td>
 <td><p>Disable quantizer by wildcard or filter function.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.enable_quantizer" title="modelopt.torch.quantization.model_quant.enable_quantizer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">enable_quantizer</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.enable_quantizer" title="modelopt.torch.quantization.model_quant.enable_quantizer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">enable_quantizer</span></code></a></p></td>
 <td><p>Enable quantizer by wildcard or filter function.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.print_quant_summary" title="modelopt.torch.quantization.model_quant.print_quant_summary"><code class="xref py py-obj docutils literal notranslate"><span class="pre">print_quant_summary</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.print_quant_summary" title="modelopt.torch.quantization.model_quant.print_quant_summary"><code class="xref py py-obj docutils literal notranslate"><span class="pre">print_quant_summary</span></code></a></p></td>
 <td><p>Print summary of all quantizer modules in the model.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.fold_weight" title="modelopt.torch.quantization.model_quant.fold_weight"><code class="xref py py-obj docutils literal notranslate"><span class="pre">fold_weight</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.model_quant.fold_weight" title="modelopt.torch.quantization.model_quant.fold_weight"><code class="xref py py-obj docutils literal notranslate"><span class="pre">fold_weight</span></code></a></p></td>
 <td><p>Fold weight quantizer for fast evaluation.</p></td>
 </tr>
 </tbody>
 </table>
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.model_quant.auto_quantize">
+<span class="sig-name descname"><span class="pre">auto_quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_loader</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">loss_func</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">constraints</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">{'weight_compression':</span> <span class="pre">0.3}</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantization_formats</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">['FP8_DEFAULT_CFG',</span> <span class="pre">None]</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">collect_func</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_calib_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_score_steps</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">128</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">verbose</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.model_quant.auto_quantize" title="Link to this definition"></a></dt>
+<dd><p>Quantizes a model by searching for the best quantization formats per-layer.</p>
+<p><code class="docutils literal notranslate"><span class="pre">auto_quantize</span></code> uses a gradient based sensitivity score to rank the per-layer quantization formats and search
+for the best quantization formats per-layer.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>model</strong> (<em>Module</em>) – A pytorch model with quantizer modules.</p></li>
+<li><p><strong>data_loader</strong> (<em>Iterable</em>) – An iterator that yields data that is to be used for calibrating quantized layers and estimating
+<code class="docutils literal notranslate"><span class="pre">auto_quantize</span></code> scores.</p></li>
+<li><p><strong>loss_func</strong> (<em>Callable</em><em>[</em><em>[</em><em>Any</em><em>, </em><em>Any</em><em>]</em><em>, </em><em>Tensor</em><em>]</em>) – <p>A <code class="docutils literal notranslate"><span class="pre">Callable</span></code> which takes the model output (i.e output of <code class="docutils literal notranslate"><span class="pre">model.forward()</span></code>)
+and the batch of data as its inputs and returns a scalar loss.</p>
+<p>It should be possible to run a backward pass on the loss value returned by this method.</p>
+<p><code class="docutils literal notranslate"><span class="pre">collect_func</span></code> will be used to gather the inputs to <code class="docutils literal notranslate"><span class="pre">model.forward()</span></code>
+from a batch of data yielded by``data_loader``.</p>
+<p><code class="docutils literal notranslate"><span class="pre">loss_func</span></code> should support the following usage:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">for</span> <span class="n">batch</span> <span class="ow">in</span> <span class="n">data_loader</span><span class="p">:</span>
+    <span class="c1"># Assuming collect_func returns a tuple of arguments</span>
+    <span class="n">output</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="o">*</span><span class="n">collect_func</span><span class="p">(</span><span class="n">batch</span><span class="p">))</span>
+
+    <span class="n">loss</span> <span class="o">=</span> <span class="n">loss_func</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">batch</span><span class="p">)</span>
+    <span class="n">loss</span><span class="o">.</span><span class="n">backward</span><span class="p">()</span>
+</pre></div>
+</div>
+</p></li>
+<li><p><strong>constraints</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>float</em><em> | </em><em>str</em><em>]</em>) – <p>Constraints for the search. Currently we support <code class="docutils literal notranslate"><span class="pre">weight_compression</span></code>.
+For example, for a compressed weight of 0.30 (i.e, 30%) of the original total weight size,
+<code class="docutils literal notranslate"><span class="pre">constraints</span></code> should be:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># For a compressed weight of 0.30 (i.e, 30%) of the original total weight size</span>
+<span class="n">constraints</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;weight_compression&quot;</span><span class="p">:</span> <span class="mf">0.30</span><span class="p">}</span>
+</pre></div>
+</div>
+<p>You can also provide the equivalent percentage value in string type. For example:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># For a compressed weight of 30% of the original total weight size</span>
+<span class="n">constraints</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;weight_compression&quot;</span><span class="p">:</span> <span class="s2">&quot;30%&quot;</span><span class="p">}</span>
+</pre></div>
+</div>
+</p></li>
+<li><p><strong>quantization_formats</strong> (<em>List</em><em>[</em><em>str</em><em> | </em><em>None</em><em>]</em>) – <p>A list of the string names of the quantization formats to search for.
+The supported quantization formats are as listed by <code class="xref py py-attr docutils literal notranslate"><span class="pre">modelopt.torch.quantization.config.choices</span></code>.</p>
+<p>In addition, the quantization format can also be <code class="docutils literal notranslate"><span class="pre">None</span></code> which implies skipping quantization for
+the layer.</p>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>The quantization formats will be applied on a per-layer match basis. The global model level name
+based quantizer attribute setting will be ignored. For example, in <code class="docutils literal notranslate"><span class="pre">FP8_DEFAULT_CFG</span></code> quantizer
+configuration the key <code class="docutils literal notranslate"><span class="pre">&quot;*lm_head*&quot;:</span> <span class="pre">{&quot;enable&quot;:</span> <span class="pre">False}</span></code> disables quantization for the <code class="docutils literal notranslate"><span class="pre">lm_head</span></code>
+layer. However in <code class="docutils literal notranslate"><span class="pre">auto_quantize</span></code>, the quantization format for the <code class="docutils literal notranslate"><span class="pre">lm_head</span></code> layer will be searched.
+This is because the key <code class="docutils literal notranslate"><span class="pre">&quot;*lm_head*&quot;</span></code> sets the quantizer attributes based on the global model level
+name, not per-layer basis. The keys <code class="docutils literal notranslate"><span class="pre">&quot;*input_quantizer&quot;</span></code>, <code class="docutils literal notranslate"><span class="pre">&quot;*weight_quantizer&quot;</span></code> etc. in
+<code class="docutils literal notranslate"><span class="pre">FP8_DEFAULT_CFG</span></code> match on a per-layer basis  - hence the corresponding quantizers
+will be set as specified.</p>
+</div>
+</p></li>
+<li><p><strong>collect_func</strong> (<em>Callable</em><em> | </em><em>None</em>) – A <code class="docutils literal notranslate"><span class="pre">Callable</span></code> that takes a batch of data from the data loader as input and returns the input to
+<code class="docutils literal notranslate"><span class="pre">model.forward()</span></code> as described in
+<a class="reference internal" href="modelopt.torch.utils.network.html#modelopt.torch.utils.network.run_forward_loop" title="modelopt.torch.utils.network.run_forward_loop"><code class="xref py py-meth docutils literal notranslate"><span class="pre">run_forward_loop</span></code></a>.</p></li>
+<li><p><strong>num_calib_steps</strong> (<em>int</em>) – Number of batches to use for calibrating the quantized model. Suggested value is 512.</p></li>
+<li><p><strong>num_score_steps</strong> (<em>int</em>) – Number of batches to use for estimating <code class="docutils literal notranslate"><span class="pre">auto_quantize</span></code> scores. Suggested value is 128.
+A higher value could increase the time taken for performing <code class="docutils literal notranslate"><span class="pre">auto_quantize</span></code>.</p></li>
+<li><p><strong>verbose</strong> (<em>bool</em>) – If True, prints the search progress/intermediate results.</p></li>
+</ul>
+</dd>
+</dl>
+<dl class="simple">
+<dt>Returns: A tuple (model, state_dict) where <code class="docutils literal notranslate"><span class="pre">model</span></code> is the searched and quantized model and</dt><dd><p><code class="docutils literal notranslate"><span class="pre">state_dict</span></code> contains the history and detailed stats of the search procedure.</p>
+</dd>
+</dl>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p><code class="docutils literal notranslate"><span class="pre">auto_quantize</span></code> groups certain layers and restricts the quantization formats for them to be same. For example,
+Q, K, V linear layers belonging to the same transformer layer will have the same quantization format.
+This is to ensure compatibility with TensorRT-LLM which fuses these three linear layers into a single linear
+layer.</p>
+<p>A list of regex pattern rules as defined in <a class="reference internal" href="modelopt.torch.quantization.algorithms.html#modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.rules" title="modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.rules"><code class="xref py py-attr docutils literal notranslate"><span class="pre">rules</span></code></a>
+are used to specify the group of layers. The first captured group
+in the regex pattern (i.e, <code class="docutils literal notranslate"><span class="pre">pattern.match(name).group(1)</span></code>) is used to group the layers. All the layers
+that share the same first captured group will have the same quantization format..</p>
+<p>For example, the rule <code class="docutils literal notranslate"><span class="pre">r&quot;^(.*?)\.(q_proj|k_proj|v_proj)$&quot;</span></code>
+groups the <cite>q_proj</cite>, <cite>k_proj</cite>, <cite>v_proj</cite> linear layers belonging to the same transformer layer.</p>
+<p>You may modify the rules to group the layers as per your requirement.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">modelopt.torch.quantization.algorithms</span> <span class="kn">import</span> <span class="n">AutoQuantizeSearcher</span>
+
+<span class="c1"># To additionally group the layers belonging to same `mlp` layer,</span>
+<span class="c1"># add the following rule</span>
+<span class="n">AutoQuantizeSearcher</span><span class="o">.</span><span class="n">rules</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">r</span><span class="s2">&quot;^(.*?)\.mlp&quot;</span><span class="p">)</span>
+
+<span class="c1"># Perform `auto_quantize`</span>
+<span class="n">model</span><span class="p">,</span> <span class="n">state_dict</span> <span class="o">=</span> <span class="n">auto_quantize</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="o">...</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+<div class="admonition note">
+<p class="admonition-title">Note</p>
+<p>The <code class="docutils literal notranslate"><span class="pre">auto_quantize</span></code> API and algorithm is experimental and subject to change. <code class="docutils literal notranslate"><span class="pre">auto_quantize</span></code> searched models
+might not be readily deployable to TensorRT-LLM yet.</p>
+</div>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.model_quant.disable_quantizer">
 <span class="sig-name descname"><span class="pre">disable_quantizer</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">wildcard_or_filter_func</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.model_quant.disable_quantizer" title="Link to this definition"></a></dt>
@@ -223,7 +332,7 @@ <h1>model_quant<a class="headerlink" href="#model-quant" title="Link to this hea
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.model_quant.quantize">
 <span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">config</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">forward_loop</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.model_quant.quantize" title="Link to this definition"></a></dt>
-<dd><p>Quantizes and calibrates the model.</p>
+<dd><p>Quantizes and calibrates the model in-place.</p>
 <p>This method performs replacement of modules with their quantized counterparts and
 performs calibration as specified by <code class="docutils literal notranslate"><span class="pre">quant_cfg</span></code>.
 <code class="docutils literal notranslate"><span class="pre">forward_loop</span></code> is used to forward data through the model and gather statistics for calibration.</p>
@@ -231,7 +340,10 @@ <h1>model_quant<a class="headerlink" href="#model-quant" title="Link to this hea
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
 <li><p><strong>model</strong> (<em>Module</em>) – A pytorch model</p></li>
-<li><p><strong>config</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Any</em><em>]</em>) – <p>A dictionary specifying the values for keys <code class="docutils literal notranslate"><span class="pre">&quot;quant_cfg&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;algorithm&quot;</span></code>.
+<li><p><strong>config</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Any</em><em>]</em>) – <p>A dictionary or an instance of
+<a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizeConfig" title="modelopt.torch.quantization.config.QuantizeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizeConfig</span></code></a> specifying the
+values for keys <code class="docutils literal notranslate"><span class="pre">&quot;quant_cfg&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;algorithm&quot;</span></code>.
+It is basically a dictionary specifying the values for keys <code class="docutils literal notranslate"><span class="pre">&quot;quant_cfg&quot;</span></code> and <code class="docutils literal notranslate"><span class="pre">&quot;algorithm&quot;</span></code>.
 The <code class="docutils literal notranslate"><span class="pre">&quot;quant_cfg&quot;</span></code> key specifies the quantization configurations.
 The <code class="docutils literal notranslate"><span class="pre">&quot;algorithm&quot;</span></code> key specifies the <code class="docutils literal notranslate"><span class="pre">algorithm</span></code> argument to
 <a class="reference internal" href="modelopt.torch.quantization.model_calib.html#modelopt.torch.quantization.model_calib.calibrate" title="modelopt.torch.quantization.model_calib.calibrate"><code class="xref py py-meth docutils literal notranslate"><span class="pre">calibrate</span></code></a>.</p>
@@ -241,16 +353,20 @@ <h1>model_quant<a class="headerlink" href="#model-quant" title="Link to this hea
 <code class="docutils literal notranslate"><span class="pre">weight_quantizer</span></code> and <code class="docutils literal notranslate"><span class="pre">input_quantizer</span></code> and they perform weight quantization and
 input quantization (or activation quantization) respectively. The quantizer modules
 are instances of
-<a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code></a> and the
-specified quantizer attributes describe its quantization behavior. See
-<a class="reference internal" href="modelopt.torch.quantization.conversion.html#modelopt.torch.quantization.conversion.set_quantizer_by_cfg" title="modelopt.torch.quantization.conversion.set_quantizer_by_cfg"><code class="xref py py-meth docutils literal notranslate"><span class="pre">set_quantizer_by_cfg</span></code></a> for more details
-on <code class="docutils literal notranslate"><span class="pre">&quot;quant_cfg&quot;</span></code> dictionary.</p>
+<a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">TensorQuantizer</span></code></a>.
+The quantizer attributes are defined by <code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code>. See
+<code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code> for details on the quantizer attributes and their values.</p>
 <p>An example <code class="docutils literal notranslate"><span class="pre">config</span></code> dictionary is given below:</p>
-<p>Please see <a class="reference internal" href="modelopt.torch.quantization.config.html#module-modelopt.torch.quantization.config" title="modelopt.torch.quantization.config"><code class="xref py py-mod docutils literal notranslate"><span class="pre">config</span></code></a> for more examples.</p>
+<p>See <a class="reference internal" href="modelopt.torch.quantization.config.html#quantization-formats"><span class="std std-ref">Quantization Formats</span></a> to learn more about the supported
+quantization formats. See <a class="reference internal" href="modelopt.torch.quantization.config.html#quantization-configs"><span class="std std-ref">Quantization Configs</span></a> for more details on
+<code class="docutils literal notranslate"><span class="pre">config</span></code> dictionary.</p>
 </p></li>
 <li><p><strong>forward_loop</strong> (<em>Callable</em><em>[</em><em>[</em><em>Module</em><em>]</em><em>, </em><em>None</em><em>] </em><em>| </em><em>None</em>) – <p>A callable that forwards all calibration data through the model. This is used
 to gather statistics for calibration. It should take model as the argument. It does not need
-to return anything. Here are a few examples for correct <code class="docutils literal notranslate"><span class="pre">forward_loop</span></code> definitions:
+to return anything.</p>
+<p>This argument is not required for weight-only quantization with the <code class="docutils literal notranslate"><span class="pre">&quot;max&quot;</span></code>
+algorithm.</p>
+<p>Here are a few examples for correct <code class="docutils literal notranslate"><span class="pre">forward_loop</span></code> definitions:
 Example 1:</p>
 <div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">forward_loop</span><span class="p">(</span><span class="n">model</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
     <span class="c1"># iterate over the data loader and forward data through the model</span>
@@ -305,7 +421,7 @@ <h1>model_quant<a class="headerlink" href="#model-quant" title="Link to this hea
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -316,7 +432,7 @@ <h1>model_quant<a class="headerlink" href="#model-quant" title="Link to this hea
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.functional.html b/reference/generated/modelopt.torch.quantization.nn.functional.html
index f5ddae2..3a9656b 100644
--- a/reference/generated/modelopt.torch.quantization.nn.functional.html
+++ b/reference/generated/modelopt.torch.quantization.nn.functional.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>functional &mdash; Model Optimizer 0.11.2</title>
+  <title>functional &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="modules" href="modelopt.torch.quantization.nn.modules.html" />
-    <link rel="prev" title="nn" href="modelopt.torch.quantization.nn.html" />
+    <link rel="prev" title="nn" href="modelopt.torch.quantization.nn.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -147,7 +152,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="functional">
 <h1>functional<a class="headerlink" href="#functional" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.functional">Some supportive functions.</p>
@@ -200,7 +205,7 @@ <h1>functional<a class="headerlink" href="#functional" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -211,7 +216,7 @@ <h1>functional<a class="headerlink" href="#functional" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.html b/reference/generated/modelopt.torch.quantization.nn.html
index 87d149b..de6e730 100644
--- a/reference/generated/modelopt.torch.quantization.nn.html
+++ b/reference/generated/modelopt.torch.quantization.nn.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>nn &mdash; Model Optimizer 0.11.2</title>
+  <title>nn &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="functional" href="modelopt.torch.quantization.nn.functional.html" />
-    <link rel="prev" title="model_quant" href="modelopt.torch.quantization.model_quant.html" />
+    <link rel="prev" title="model_quant" href="modelopt.torch.quantization.model_quant.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="current reference internal" href="#">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="nn">
 <h1>nn<a class="headerlink" href="#nn" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -180,7 +185,7 @@ <h1>nn<a class="headerlink" href="#nn" title="Link to this heading"></a></h1>
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -191,7 +196,7 @@ <h1>nn<a class="headerlink" href="#nn" title="Link to this heading"></a></h1>
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.clip.html b/reference/generated/modelopt.torch.quantization.nn.modules.clip.html
index 0051d4e..3919c31 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.clip.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.clip.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>clip &mdash; Model Optimizer 0.11.2</title>
+  <title>clip &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quant_activations" href="modelopt.torch.quantization.nn.modules.quant_activations.html" />
-    <link rel="prev" title="modules" href="modelopt.torch.quantization.nn.modules.html" />
+    <link rel="prev" title="modules" href="modelopt.torch.quantization.nn.modules.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -148,7 +153,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="clip">
 <h1>clip<a class="headerlink" href="#clip" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.modules.clip">Implement a clip module as pytorch only has a simple clamp function.</p>
@@ -211,7 +216,7 @@ <h1>clip<a class="headerlink" href="#clip" title="Link to this heading"></a><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -222,7 +227,7 @@ <h1>clip<a class="headerlink" href="#clip" title="Link to this heading"></a><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.html b/reference/generated/modelopt.torch.quantization.nn.modules.html
index 289a0c2..ce6fc2e 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>modules &mdash; Model Optimizer 0.11.2</title>
+  <title>modules &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="clip" href="modelopt.torch.quantization.nn.modules.clip.html" />
-    <link rel="prev" title="functional" href="modelopt.torch.quantization.nn.functional.html" />
+    <link rel="prev" title="functional" href="modelopt.torch.quantization.nn.functional.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -147,7 +152,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="modules">
 <h1>modules<a class="headerlink" href="#modules" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -177,7 +182,10 @@ <h1>modules<a class="headerlink" href="#modules" title="Link to this heading">
 <tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.nn.modules.quant_pooling.html#module-modelopt.torch.quantization.nn.modules.quant_pooling" title="modelopt.torch.quantization.nn.modules.quant_pooling"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.nn.modules.quant_pooling</span></code></a></p></td>
 <td><p>Quantized Pooling modules.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#module-modelopt.torch.quantization.nn.modules.tensor_quantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.nn.modules.tensor_quantizer</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.nn.modules.quant_rnn.html#module-modelopt.torch.quantization.nn.modules.quant_rnn" title="modelopt.torch.quantization.nn.modules.quant_rnn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.nn.modules.quant_rnn</span></code></a></p></td>
+<td><p>Quantized RNN.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#module-modelopt.torch.quantization.nn.modules.tensor_quantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.nn.modules.tensor_quantizer</span></code></a></p></td>
 <td><p>TensorQuantizer Module.</p></td>
 </tr>
 </tbody>
@@ -202,7 +210,7 @@ <h1>modules<a class="headerlink" href="#modules" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -213,7 +221,7 @@ <h1>modules<a class="headerlink" href="#modules" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.html
index 90073b7..f9044f0 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quant_activations &mdash; Model Optimizer 0.11.2</title>
+  <title>quant_activations &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quant_batchnorm" href="modelopt.torch.quantization.nn.modules.quant_batchnorm.html" />
-    <link rel="prev" title="clip" href="modelopt.torch.quantization.nn.modules.clip.html" />
+    <link rel="prev" title="clip" href="modelopt.torch.quantization.nn.modules.clip.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -148,7 +153,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quant-activations">
 <h1>quant_activations<a class="headerlink" href="#quant-activations" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.modules.quant_activations">Quantized activations module.</p>
@@ -171,7 +176,7 @@ <h1>quant_activations<a class="headerlink" href="#quant-activations" title="Link
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -182,7 +187,7 @@ <h1>quant_activations<a class="headerlink" href="#quant-activations" title="Link
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.html
index cec7956..01fc432 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quant_batchnorm &mdash; Model Optimizer 0.11.2</title>
+  <title>quant_batchnorm &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quant_conv" href="modelopt.torch.quantization.nn.modules.quant_conv.html" />
-    <link rel="prev" title="quant_activations" href="modelopt.torch.quantization.nn.modules.quant_activations.html" />
+    <link rel="prev" title="quant_activations" href="modelopt.torch.quantization.nn.modules.quant_activations.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -148,7 +153,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quant-batchnorm">
 <h1>quant_batchnorm<a class="headerlink" href="#quant-batchnorm" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.modules.quant_batchnorm">Quantized batch normalization module.</p>
@@ -171,7 +176,7 @@ <h1>quant_batchnorm<a class="headerlink" href="#quant-batchnorm" title="Link to
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -182,7 +187,7 @@ <h1>quant_batchnorm<a class="headerlink" href="#quant-batchnorm" title="Link to
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html
index 6fe0bc2..d6ef606 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quant_conv &mdash; Model Optimizer 0.11.2</title>
+  <title>quant_conv &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quant_instancenorm" href="modelopt.torch.quantization.nn.modules.quant_instancenorm.html" />
-    <link rel="prev" title="quant_batchnorm" href="modelopt.torch.quantization.nn.modules.quant_batchnorm.html" />
+    <link rel="prev" title="quant_batchnorm" href="modelopt.torch.quantization.nn.modules.quant_batchnorm.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -148,7 +153,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quant-conv">
 <h1>quant_conv<a class="headerlink" href="#quant-conv" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.modules.quant_conv">Quantized convolution.</p>
@@ -236,7 +241,7 @@ <h1>quant_conv<a class="headerlink" href="#quant-conv" title="Link to this headi
 <p>Quantized 1D convolution.</p>
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d.default_quant_desc_weight">
-<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d.default_quant_desc_weight" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=0,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d.default_quant_desc_weight" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -248,7 +253,7 @@ <h1>quant_conv<a class="headerlink" href="#quant-conv" title="Link to this headi
 <p>Quantized 2D convolution.</p>
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d.default_quant_desc_weight">
-<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d.default_quant_desc_weight" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=0,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d.default_quant_desc_weight" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -260,7 +265,7 @@ <h1>quant_conv<a class="headerlink" href="#quant-conv" title="Link to this headi
 <p>Quantized 3D convolution.</p>
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d.default_quant_desc_weight">
-<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d.default_quant_desc_weight" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=0,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d.default_quant_desc_weight" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -272,7 +277,7 @@ <h1>quant_conv<a class="headerlink" href="#quant-conv" title="Link to this headi
 <p>Quantized 1D transposed convolution.</p>
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d.default_quant_desc_weight">
-<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d.default_quant_desc_weight" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=0,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d.default_quant_desc_weight" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -284,7 +289,7 @@ <h1>quant_conv<a class="headerlink" href="#quant-conv" title="Link to this headi
 <p>Quantized 2D transposed convolution.</p>
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d.default_quant_desc_weight">
-<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d.default_quant_desc_weight" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=0,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d.default_quant_desc_weight" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -296,7 +301,7 @@ <h1>quant_conv<a class="headerlink" href="#quant-conv" title="Link to this headi
 <p>Quantized 3D transposed convolution.</p>
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d.default_quant_desc_weight">
-<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d.default_quant_desc_weight" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=0,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d.default_quant_desc_weight" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -320,7 +325,7 @@ <h1>quant_conv<a class="headerlink" href="#quant-conv" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -331,7 +336,7 @@ <h1>quant_conv<a class="headerlink" href="#quant-conv" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.html
index c0bedd8..385504a 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quant_instancenorm &mdash; Model Optimizer 0.11.2</title>
+  <title>quant_instancenorm &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quant_linear" href="modelopt.torch.quantization.nn.modules.quant_linear.html" />
-    <link rel="prev" title="quant_conv" href="modelopt.torch.quantization.nn.modules.quant_conv.html" />
+    <link rel="prev" title="quant_conv" href="modelopt.torch.quantization.nn.modules.quant_conv.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -148,7 +153,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quant-instancenorm">
 <h1>quant_instancenorm<a class="headerlink" href="#quant-instancenorm" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.modules.quant_instancenorm">Quantized instance normalization module.</p>
@@ -206,7 +211,7 @@ <h1>quant_instancenorm<a class="headerlink" href="#quant-instancenorm" title="Li
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -217,7 +222,7 @@ <h1>quant_instancenorm<a class="headerlink" href="#quant-instancenorm" title="Li
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.html
index 61d4330..89bf0b6 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quant_linear &mdash; Model Optimizer 0.11.2</title>
+  <title>quant_linear &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quant_module" href="modelopt.torch.quantization.nn.modules.quant_module.html" />
-    <link rel="prev" title="quant_instancenorm" href="modelopt.torch.quantization.nn.modules.quant_instancenorm.html" />
+    <link rel="prev" title="quant_instancenorm" href="modelopt.torch.quantization.nn.modules.quant_instancenorm.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -148,7 +153,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quant-linear">
 <h1>quant_linear<a class="headerlink" href="#quant-linear" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.modules.quant_linear">Quantized Linear.</p>
@@ -176,7 +181,7 @@ <h1>quant_linear<a class="headerlink" href="#quant-linear" title="Link to this h
 <p>Quantized version of nn.Linear.</p>
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear.default_quant_desc_weight">
-<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear.default_quant_desc_weight" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=0,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear.default_quant_desc_weight" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 </dd></dl>
@@ -200,7 +205,7 @@ <h1>quant_linear<a class="headerlink" href="#quant-linear" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -211,7 +216,7 @@ <h1>quant_linear<a class="headerlink" href="#quant-linear" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html
index 441db74..ba98b09 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quant_module &mdash; Model Optimizer 0.11.2</title>
+  <title>quant_module &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="quant_pooling" href="modelopt.torch.quantization.nn.modules.quant_pooling.html" />
-    <link rel="prev" title="quant_linear" href="modelopt.torch.quantization.nn.modules.quant_linear.html" />
+    <link rel="prev" title="quant_linear" href="modelopt.torch.quantization.nn.modules.quant_linear.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -148,7 +153,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quant-module">
 <h1>quant_module<a class="headerlink" href="#quant-module" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.modules.quant_module">Base class for quantization modules.</p>
@@ -170,12 +175,12 @@ <h1>quant_module<a class="headerlink" href="#quant-module" title="Link to this h
 <p>Base class for modules where the input is quantized.</p>
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_input">
-<span class="sig-name descname"><span class="pre">default_quant_desc_input</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_input" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_input</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=None,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_input" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_output">
-<span class="sig-name descname"><span class="pre">default_quant_desc_output</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_output" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_output</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=None,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_output" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 <dl class="py method">
@@ -204,7 +209,7 @@ <h1>quant_module<a class="headerlink" href="#quant-module" title="Link to this h
 <p>Quantized linear modules are modules where both the input and the weight are quantized.</p>
 <dl class="py attribute">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.default_quant_desc_weight">
-<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.default_quant_desc_weight" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=None,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.default_quant_desc_weight" title="Link to this definition"></a></dt>
 <dd></dd></dl>
 
 <dl class="py method">
@@ -251,7 +256,7 @@ <h1>quant_module<a class="headerlink" href="#quant-module" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -262,7 +267,7 @@ <h1>quant_module<a class="headerlink" href="#quant-module" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html
index fc3b0cc..9befdec 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quant_pooling &mdash; Model Optimizer 0.11.2</title>
+  <title>quant_pooling &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="tensor_quantizer" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html" />
-    <link rel="prev" title="quant_module" href="modelopt.torch.quantization.nn.modules.quant_module.html" />
+    <link rel="next" title="quant_rnn" href="modelopt.torch.quantization.nn.modules.quant_rnn.html" />
+    <link rel="prev" title="quant_module" href="modelopt.torch.quantization.nn.modules.quant_module.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -148,7 +153,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quant-pooling">
 <h1>quant_pooling<a class="headerlink" href="#quant-pooling" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.modules.quant_pooling">Quantized Pooling modules.</p>
@@ -335,7 +340,7 @@ <h1>quant_pooling<a class="headerlink" href="#quant-pooling" title="Link to this
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.torch.quantization.nn.modules.quant_module.html" class="btn btn-neutral float-left" title="quant_module" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html" class="btn btn-neutral float-right" title="tensor_quantizer" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.torch.quantization.nn.modules.quant_rnn.html" class="btn btn-neutral float-right" title="quant_rnn" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -347,7 +352,7 @@ <h1>quant_pooling<a class="headerlink" href="#quant-pooling" title="Link to this
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -358,7 +363,7 @@ <h1>quant_pooling<a class="headerlink" href="#quant-pooling" title="Link to this
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html
new file mode 100644
index 0000000..f5a3134
--- /dev/null
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.html
@@ -0,0 +1,376 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>quant_rnn &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="tensor_quantizer" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html" />
+    <link rel="prev" title="quant_pooling" href="modelopt.torch.quantization.nn.modules.quant_pooling.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_calib.html">model_calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_quant.html">model_quant</a></li>
+<li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.html">quantization</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.nn.html">nn</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.nn.modules.html">modules</a></li>
+      <li class="breadcrumb-item active">quant_rnn</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="quant-rnn">
+<h1>quant_rnn<a class="headerlink" href="#quant-rnn" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.quantization.nn.modules.quant_rnn">Quantized RNN.</p>
+<p class="rubric">Classes</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase" title="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase"><code class="xref py py-obj docutils literal notranslate"><span class="pre">QuantRNNBase</span></code></a></p></td>
+<td><p>Base class for quantized RNN modules.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNFullBase" title="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNFullBase"><code class="xref py py-obj docutils literal notranslate"><span class="pre">QuantRNNFullBase</span></code></a></p></td>
+<td><p>Quantized RNN with input quantizer.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward" title="modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward"><code class="xref py py-obj docutils literal notranslate"><span class="pre">RNNLayerForward</span></code></a></p></td>
+<td><p>A single layer of rnn modules.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward" title="modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward"><code class="xref py py-obj docutils literal notranslate"><span class="pre">VFRNNForward</span></code></a></p></td>
+<td><p>Reimplement the _VF rnn calls with python to enable input quantizers.</p></td>
+</tr>
+</tbody>
+</table>
+<p class="rubric">Functions</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_forward" title="modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_forward"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_quantized_rnn_layer_forward</span></code></a></p></td>
+<td><p>Construct the forward call for different rnn cells.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_forward" title="modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_forward"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_quantized_rnn_layer_variable_len_forward</span></code></a></p></td>
+<td><p>Construct the forward call for packed sequence.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_reverse_forward" title="modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_reverse_forward"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_quantized_rnn_layer_variable_len_reverse_forward</span></code></a></p></td>
+<td><p>Construct the forward call for packed sequence in the reversed direction.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.lstm_cell_with_proj" title="modelopt.torch.quantization.nn.modules.quant_rnn.lstm_cell_with_proj"><code class="xref py py-obj docutils literal notranslate"><span class="pre">lstm_cell_with_proj</span></code></a></p></td>
+<td><p>Currently the _VF.lstm_cell doesn't accept projected inputs.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.quantized_cell_forward" title="modelopt.torch.quantization.nn.modules.quant_rnn.quantized_cell_forward"><code class="xref py py-obj docutils literal notranslate"><span class="pre">quantized_cell_forward</span></code></a></p></td>
+<td><p>Call input quantizer before calling cell.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">QuantRNNBase</span></span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="modelopt.torch.opt.dynamic.html#modelopt.torch.opt.dynamic.DynamicModule" title="modelopt.torch.opt.dynamic.DynamicModule"><code class="xref py py-class docutils literal notranslate"><span class="pre">DynamicModule</span></code></a></p>
+<p>Base class for quantized RNN modules.</p>
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.all_input_quantizers_disabled">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">all_input_quantizers_disabled</span></span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.all_input_quantizers_disabled" title="Link to this definition"></a></dt>
+<dd><p>Check if all input quantizer are disabled.</p>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.default_quant_desc_input">
+<span class="sig-name descname"><span class="pre">default_quant_desc_input</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=None,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.default_quant_desc_input" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.default_quant_desc_weight">
+<span class="sig-name descname"><span class="pre">default_quant_desc_weight</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">QuantizerAttributeConfig(enable=True,</span> <span class="pre">num_bits=8,</span> <span class="pre">axis=None,</span> <span class="pre">fake_quant=True,</span> <span class="pre">unsigned=False,</span> <span class="pre">narrow_range=False,</span> <span class="pre">learn_amax=False,</span> <span class="pre">type='static',</span> <span class="pre">block_sizes=None,</span> <span class="pre">trt_high_precision_dtype='Float',</span> <span class="pre">calibrator='max')</span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.default_quant_desc_weight" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.forward" title="Link to this definition"></a></dt>
+<dd><p>Quantize the input and the weight before calling the original forward method.</p>
+</dd></dl>
+
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.functionals_to_replace">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">functionals_to_replace</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="pre">Iterator</span><span class="p"><span class="pre">[</span></span><span class="pre">Tuple</span><span class="p"><span class="pre">[</span></span><span class="pre">module</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">str</span><span class="p"><span class="pre">,</span></span><span class="w"> </span><span class="pre">Callable</span><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">]</span></span></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.functionals_to_replace" title="Link to this definition"></a></dt>
+<dd><p>Replace functions of packages on the fly.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.quantize_weight">
+<span class="sig-name descname"><span class="pre">quantize_weight</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.quantize_weight" title="Link to this definition"></a></dt>
+<dd><p>Context in which <code class="docutils literal notranslate"><span class="pre">self.weight</span></code> is quantized.</p>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.weight_quantizer">
+<span class="sig-name descname"><span class="pre">weight_quantizer</span></span><em class="property"><span class="p"><span class="pre">:</span></span><span class="w"> </span><a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"><span class="pre">TensorQuantizer</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer"><span class="pre">SequentialQuantizer</span></a></em><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.weight_quantizer" title="Link to this definition"></a></dt>
+<dd></dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNFullBase">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">QuantRNNFullBase</span></span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNFullBase" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="#modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase" title="modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantRNNBase</span></code></a></p>
+<p>Quantized RNN with input quantizer.</p>
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">RNNLayerForward</span></span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
+<p>A single layer of rnn modules.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cell</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reverse</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">variable_len</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward.__init__" title="Link to this definition"></a></dt>
+<dd><p>Init the layer forward for different cells, directions, and inputs.</p>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">VFRNNForward</span></span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
+<p>Reimplement the _VF rnn calls with python to enable input quantizers.</p>
+<p>It’s less efficient compared to oringinal calls.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">mode</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bidirectional</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_layers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_proj</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">has_bias</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_quantizers</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">proj_input_quantizers</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_first</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward.__init__" title="Link to this definition"></a></dt>
+<dd><p>Pre-construct necessary parameters for vf calls to reduce overhead.</p>
+<p>Refer to torch RNN modules for parameter informations.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>mode</strong> (<em>str</em>) – </p></li>
+<li><p><strong>bidirectional</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>num_layers</strong> (<em>int</em>) – </p></li>
+<li><p><strong>has_proj</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>has_bias</strong> (<em>bool</em>) – </p></li>
+<li><p><strong>input_quantizers</strong> (<em>List</em><em>[</em><a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"><em>TensorQuantizer</em></a><em>]</em>) – </p></li>
+<li><p><strong>proj_input_quantizers</strong> (<em>List</em><em>[</em><a class="reference internal" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"><em>TensorQuantizer</em></a><em>] </em><em>| </em><em>None</em>) – </p></li>
+<li><p><strong>batch_first</strong> (<em>bool</em><em> | </em><em>None</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward.forward">
+<span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">layer_forwards</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">flat_weights</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dropout</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">0</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">training</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_sizes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward.forward" title="Link to this definition"></a></dt>
+<dd><p>This this the core implementation of vf rnn calls.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>layer_forwards</strong> (<em>Tuple</em><em>[</em><em>Callable</em><em>]</em>) – </p></li>
+<li><p><strong>input</strong> (<em>Tensor</em>) – </p></li>
+<li><p><strong>flat_weights</strong> (<em>List</em><em>[</em><em>Tensor</em><em>]</em>) – </p></li>
+<li><p><strong>hidden</strong> (<em>Tensor</em><em> | </em><em>Tuple</em><em>[</em><em>Tensor</em><em>]</em>) – </p></li>
+<li><p><strong>dropout</strong> (<em>float</em><em> | </em><em>None</em>) – </p></li>
+<li><p><strong>training</strong> (<em>bool</em><em> | </em><em>None</em>) – </p></li>
+<li><p><strong>batch_sizes</strong> (<em>Tensor</em><em> | </em><em>None</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_forward">
+<span class="sig-name descname"><span class="pre">get_quantized_rnn_layer_forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cell</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reverse</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_forward" title="Link to this definition"></a></dt>
+<dd><p>Construct the forward call for different rnn cells.</p>
+<p>Note that batch_sizes is here for keeping a consistant signature with the forward of variable length.</p>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_forward">
+<span class="sig-name descname"><span class="pre">get_quantized_rnn_layer_variable_len_forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cell</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_forward" title="Link to this definition"></a></dt>
+<dd><p>Construct the forward call for packed sequence.</p>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_reverse_forward">
+<span class="sig-name descname"><span class="pre">get_quantized_rnn_layer_variable_len_reverse_forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cell</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_reverse_forward" title="Link to this definition"></a></dt>
+<dd><p>Construct the forward call for packed sequence in the reversed direction.</p>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.lstm_cell_with_proj">
+<span class="sig-name descname"><span class="pre">lstm_cell_with_proj</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">weights</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">proj_input_quantizer</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.lstm_cell_with_proj" title="Link to this definition"></a></dt>
+<dd><p>Currently the _VF.lstm_cell doesn’t accept projected inputs. i.e. h_n and c_n must be same shape.</p>
+<p>This implementation is not optimized for cuda compared to _VF.lstm_cell, so we only use it when projection exists.</p>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.quant_rnn.quantized_cell_forward">
+<span class="sig-name descname"><span class="pre">quantized_cell_forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cell</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">hidden</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">weights</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">input_quantizer</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">proj_input_quantizer</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.quant_rnn.quantized_cell_forward" title="Link to this definition"></a></dt>
+<dd><p>Call input quantizer before calling cell.</p>
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.quantization.nn.modules.quant_pooling.html" class="btn btn-neutral float-left" title="quant_pooling" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html" class="btn btn-neutral float-right" title="tensor_quantizer" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html b/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html
index c9aa07a..94545f3 100644
--- a/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html
+++ b/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>tensor_quantizer &mdash; Model Optimizer 0.11.2</title>
+  <title>tensor_quantizer &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="optim" href="modelopt.torch.quantization.optim.html" />
-    <link rel="prev" title="quant_pooling" href="modelopt.torch.quantization.nn.modules.quant_pooling.html" />
+    <link rel="prev" title="quant_rnn" href="modelopt.torch.quantization.nn.modules.quant_rnn.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -148,7 +153,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="tensor-quantizer">
 <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.nn.modules.tensor_quantizer">TensorQuantizer Module.</p>
@@ -217,8 +222,8 @@ <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link t
 </dd></dl>
 
 <dl class="py method">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.set_from_attribute_dict">
-<span class="sig-name descname"><span class="pre">set_from_attribute_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">attributes</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.set_from_attribute_dict" title="Link to this definition"></a></dt>
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.set_from_attribute_config">
+<span class="sig-name descname"><span class="pre">set_from_attribute_config</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">attributes</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.set_from_attribute_config" title="Link to this definition"></a></dt>
 <dd><p>Set the attributes of contained quantizers from a list of attribute_dicts.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@@ -240,45 +245,32 @@ <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link t
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">TensorQuantizer</span></span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer" title="Link to this definition"></a></dt>
 <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Module</span></code></p>
 <p>Tensor quantizer module.</p>
-<p>This module uses tensor_quant or fake_tensor_quant function to quantize a tensor. And wrappers
-variable, moving statistics we’d want when training a quantized network.</p>
-<dl class="simple">
-<dt>Experimental features:</dt><dd><ul class="simple">
-<li><p><code class="docutils literal notranslate"><span class="pre">clip</span></code> stage learns range before enabling quantization.</p></li>
-<li><p><code class="docutils literal notranslate"><span class="pre">calib</span></code> stage runs calibration</p></li>
-</ul>
-</dd>
-</dl>
+<p>This module manages quantization and calibration of input tensor. It can perform fake (simulated quantization)
+or real quantization for various precisions and formats such as FP8 per-tensor, INT8 per-channel,
+INT4 per-block etc.</p>
+<p>If quantization is enabled, it calls the appropriate quantization functional and
+returns the quantized tensor. The quantized tensor data type will be same as the input tensor data type for
+fake quantization. During calibration mode, the module collects the statistics using its calibrator.</p>
+<p>The quantization parameters are as described in
+<a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a>. They can be set
+at initialization using <code class="docutils literal notranslate"><span class="pre">quant_attribute_cfg</span></code> or later by calling <a class="reference internal" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_config" title="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_config"><code class="xref py py-meth docutils literal notranslate"><span class="pre">set_from_attribute_config()</span></code></a>.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
 <dd class="field-odd"><ul class="simple">
-<li><p><strong>quant_desc</strong> – An instance of <a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html#modelopt.torch.quantization.tensor_quant.QuantDescriptor" title="modelopt.torch.quantization.tensor_quant.QuantDescriptor"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantDescriptor</span></code></a>.</p></li>
-<li><p><strong>disabled</strong> – A boolean. If True, by pass the whole module returns input. Default False.</p></li>
-<li><p><strong>if_quant</strong> – A boolean. If True, run main quantization body. Default True.</p></li>
-<li><p><strong>if_clip</strong> – A boolean. If True, clip before quantization and learn amax. Default False.</p></li>
-<li><p><strong>if_calib</strong> – A boolean. If True, run calibration. Not implemented yet. Settings of calibration
-will probably go to <code class="xref py py-class docutils literal notranslate"><span class="pre">QuantDescriptor</span></code>.</p></li>
-</ul>
-</dd>
-</dl>
-<dl class="simple">
-<dt>Readonly Properties:</dt><dd><ul class="simple">
-<li><p>axis:</p></li>
-<li><p>fake_quant:</p></li>
-<li><p>scale:</p></li>
-<li><p>step_size:</p></li>
-</ul>
-</dd>
-<dt>Mutable Properties:</dt><dd><ul class="simple">
-<li><p>num_bits:</p></li>
-<li><p>unsigned:</p></li>
-<li><p>amax:</p></li>
+<li><p><strong>quant_attribute_cfg</strong> – An instance of
+<a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a> or None.
+If None, default values are used.</p></li>
+<li><p><strong>if_quant</strong> – A boolean. If True, quantization is enabled in the forward path.</p></li>
+<li><p><strong>if_clip</strong> – A boolean. If True, clipping (with <code class="docutils literal notranslate"><span class="pre">_learn_amax</span></code>) is enabled in the forward path.</p></li>
+<li><p><strong>if_calib</strong> – A boolean. If True, calibration is enabled in the forward path.</p></li>
+<li><p><strong>amax</strong> – None or an array like object such as list, tuple, numpy array, scalar
+which can be used to construct amax tensor.</p></li>
 </ul>
 </dd>
 </dl>
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.__init__">
-<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quant_desc=&lt;modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor</span> <span class="pre">object&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">disabled=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">if_quant=True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">if_clip=False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">if_calib=False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.__init__" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">quant_attribute_cfg</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">if_quant</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">if_clip</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">if_calib</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.__init__" title="Link to this definition"></a></dt>
 <dd><p>Initialize quantizer and set up required variables.</p>
 </dd></dl>
 
@@ -306,6 +298,20 @@ <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link t
 <dd><p>Clean up temporary variables created during set_from_modelopt_state.</p>
 </dd></dl>
 
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.dequantize">
+<span class="sig-name descname"><span class="pre">dequantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">qtensor</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.dequantize" title="Link to this definition"></a></dt>
+<dd><p>De-quantize a real quantized tensor to a given dtype.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>qtensor</strong> (<a class="reference internal" href="modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor" title="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor"><em>BaseQuantizedTensor</em></a>) – </p></li>
+<li><p><strong>dtype</strong> (<em>dtype</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable">
 <span class="sig-name descname"><span class="pre">disable</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable" title="Link to this definition"></a></dt>
@@ -384,7 +390,7 @@ <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link t
 <dd><p>Apply tensor_quant function to inputs.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>inputs</strong> – A Tensor of type float32.</p>
+<dd class="field-odd"><p><strong>inputs</strong> – A Tensor of type float32/float16/bfloat16.</p>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
 <dd class="field-even"><p>A Tensor of type output_dtype</p>
@@ -464,12 +470,14 @@ <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link t
 </dd></dl>
 
 <dl class="py method">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_dict">
-<span class="sig-name descname"><span class="pre">set_from_attribute_dict</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">attribute_dict</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_dict" title="Link to this definition"></a></dt>
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_config">
+<span class="sig-name descname"><span class="pre">set_from_attribute_config</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">attribute_cfg</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_config" title="Link to this definition"></a></dt>
 <dd><p>Set quantizer attributes from attribute_dict.</p>
+<p>The attributes are defined in
+<a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><code class="xref py py-class docutils literal notranslate"><span class="pre">QuantizerAttributeConfig</span></code></a>.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>attribute_dict</strong> (<em>Dict</em><em>[</em><em>str</em><em>, </em><em>Any</em><em>]</em>) – </p>
+<dd class="field-odd"><p><strong>attribute_cfg</strong> (<a class="reference internal" href="modelopt.torch.quantization.config.html#modelopt.torch.quantization.config.QuantizerAttributeConfig" title="modelopt.torch.quantization.config.QuantizerAttributeConfig"><em>QuantizerAttributeConfig</em></a><em> | </em><em>Dict</em>) – </p>
 </dd>
 </dl>
 </dd></dl>
@@ -497,6 +505,12 @@ <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link t
 </dl>
 </dd></dl>
 
+<dl class="py property">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.trt_high_precision_dtype">
+<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">trt_high_precision_dtype</span></span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.trt_high_precision_dtype" title="Link to this definition"></a></dt>
+<dd><p>Return True if FP16 AMAX is used when exporting the model.</p>
+</dd></dl>
+
 <dl class="py property">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.unsigned">
 <em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">unsigned</span></span><a class="headerlink" href="#modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.unsigned" title="Link to this definition"></a></dt>
@@ -511,7 +525,7 @@ <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link t
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.torch.quantization.nn.modules.quant_pooling.html" class="btn btn-neutral float-left" title="quant_pooling" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.quantization.nn.modules.quant_rnn.html" class="btn btn-neutral float-left" title="quant_rnn" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.torch.quantization.optim.html" class="btn btn-neutral float-right" title="optim" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -524,7 +538,7 @@ <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link t
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -535,7 +549,7 @@ <h1>tensor_quantizer<a class="headerlink" href="#tensor-quantizer" title="Link t
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.optim.html b/reference/generated/modelopt.torch.quantization.optim.html
index 4c04a08..5e102b5 100644
--- a/reference/generated/modelopt.torch.quantization.optim.html
+++ b/reference/generated/modelopt.torch.quantization.optim.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>optim &mdash; Model Optimizer 0.11.2</title>
+  <title>optim &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="plugins" href="modelopt.torch.quantization.plugins.html" />
-    <link rel="prev" title="tensor_quantizer" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html" />
+    <link rel="prev" title="tensor_quantizer" href="modelopt.torch.quantization.nn.modules.tensor_quantizer.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="optim">
 <h1>optim<a class="headerlink" href="#optim" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.optim">Deprecated. Placeholder module for throwing deprecated error.</p>
@@ -210,7 +215,7 @@ <h1>optim<a class="headerlink" href="#optim" title="Link to this heading"></a
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -221,7 +226,7 @@ <h1>optim<a class="headerlink" href="#optim" title="Link to this heading"></a
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.plugins.html b/reference/generated/modelopt.torch.quantization.plugins.html
index a7a5e7b..cfbfdcd 100644
--- a/reference/generated/modelopt.torch.quantization.plugins.html
+++ b/reference/generated/modelopt.torch.quantization.plugins.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>plugins &mdash; Model Optimizer 0.11.2</title>
+  <title>plugins &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,23 +36,23 @@
     <script src="../../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
-    <link rel="next" title="quant_modules" href="modelopt.torch.quantization.quant_modules.html" />
-    <link rel="prev" title="optim" href="modelopt.torch.quantization.optim.html" />
+    <link rel="next" title="qtensor" href="modelopt.torch.quantization.qtensor.html" />
+    <link rel="prev" title="optim" href="modelopt.torch.quantization.optim.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="plugins">
 <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -171,7 +176,7 @@ <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading">
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
         <a href="modelopt.torch.quantization.optim.html" class="btn btn-neutral float-left" title="optim" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
-        <a href="modelopt.torch.quantization.quant_modules.html" class="btn btn-neutral float-right" title="quant_modules" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+        <a href="modelopt.torch.quantization.qtensor.html" class="btn btn-neutral float-right" title="qtensor" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
   <hr/>
@@ -183,7 +188,7 @@ <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -194,7 +199,7 @@ <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html b/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html
new file mode 100644
index 0000000..49772de
--- /dev/null
+++ b/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.html
@@ -0,0 +1,307 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>base_qtensor &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="int4_tensor" href="modelopt.torch.quantization.qtensor.int4_tensor.html" />
+    <link rel="prev" title="qtensor" href="modelopt.torch.quantization.qtensor.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_calib.html">model_calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_quant.html">model_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.html">quantization</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
+      <li class="breadcrumb-item active">base_qtensor</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="base-qtensor">
+<h1>base_qtensor<a class="headerlink" href="#base-qtensor" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.quantization.qtensor.base_qtensor">Base Class for Real Quantized Tensor.</p>
+<p class="rubric">Classes</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor" title="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">BaseQuantizedTensor</span></code></a></p></td>
+<td><p>Base class for quantized tensors, providing methods for quantization and dequantization.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper" title="modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper"><code class="xref py py-obj docutils literal notranslate"><span class="pre">QTensorWrapper</span></code></a></p></td>
+<td><p>A wrapper class for quantized tensors to make them compatible with torch.nn.Parameter.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">BaseQuantizedTensor</span></span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
+<p>Base class for quantized tensors, providing methods for quantization and dequantization.</p>
+<p>This class should be subclassed to implement specific types of quantized tensors. It handles the
+storage of quantized data along with the necessary configurations and original attributes.</p>
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.original_meta_tensor">
+<span class="sig-name descname"><span class="pre">original_meta_tensor</span></span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.original_meta_tensor" title="Link to this definition"></a></dt>
+<dd><p>Original meta to keep attributes of original tensors.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>torch.Tensor</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.quantized_data">
+<span class="sig-name descname"><span class="pre">quantized_data</span></span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.quantized_data" title="Link to this definition"></a></dt>
+<dd><p>Storage for the quantized tensor data. Quantized_data dtype is
+customized per QuantizedTensor implementation.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>torch.Tensor</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.__init__">
+<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">original_meta_tensor</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">quantized_data</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.__init__" title="Link to this definition"></a></dt>
+<dd><p>Initialize data attributes.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>original_meta_tensor</strong> (<em>Tensor</em>) – </p></li>
+<li><p><strong>quantized_data</strong> (<em>Tensor</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.dequantize">
+<span class="sig-name descname"><span class="pre">dequantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwarg</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.dequantize" title="Link to this definition"></a></dt>
+<dd><p>Converts the quantized tensor back to a standard torch.Tensor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Returns<span class="colon">:</span></dt>
+<dd class="field-odd"><p>The dequantized tensor.</p>
+</dd>
+<dt class="field-even">Return type<span class="colon">:</span></dt>
+<dd class="field-even"><p>torch.Tensor</p>
+</dd>
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>dtype</strong> (<em>dtype</em>) – </p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.quantize">
+<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">block_size</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.quantize" title="Link to this definition"></a></dt>
+<dd><p>Pack a fake torch.Tensor into a real quantized tensor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>fake_quant_tensor</strong> (<em>torch.Tensor</em>) – The fake quantized tensor.</p></li>
+<li><p><strong>input</strong> (<em>Tensor</em>) – </p></li>
+<li><p><strong>block_size</strong> (<em>int</em>) – </p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>A real quantized tensor, scales.</p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">QTensorWrapper</span></span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper" title="Link to this definition"></a></dt>
+<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">Parameter</span></code></p>
+<p>A wrapper class for quantized tensors to make them compatible with torch.nn.Parameter.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>qtensor</strong> (<a class="reference internal" href="#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor" title="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor"><em>BaseQuantizedTensor</em></a>) – The quantized tensor to be wrapped.</p>
+</dd>
+</dl>
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper.__new__">
+<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">__new__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">cls</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">qtensor</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper.__new__" title="Link to this definition"></a></dt>
+<dd><p>Create a new QTensorWrapper instance.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>qtensor</strong> (<a class="reference internal" href="#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor" title="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor"><em>BaseQuantizedTensor</em></a>) – </p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.quantization.qtensor.html" class="btn btn-neutral float-left" title="qtensor" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.quantization.qtensor.int4_tensor.html" class="btn btn-neutral float-right" title="int4_tensor" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.qtensor.html b/reference/generated/modelopt.torch.quantization.qtensor.html
new file mode 100644
index 0000000..1cbf6d8
--- /dev/null
+++ b/reference/generated/modelopt.torch.quantization.qtensor.html
@@ -0,0 +1,205 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>qtensor &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="base_qtensor" href="modelopt.torch.quantization.qtensor.base_qtensor.html" />
+    <link rel="prev" title="plugins" href="modelopt.torch.quantization.plugins.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_calib.html">model_calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_quant.html">model_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4 current"><a class="current reference internal" href="#">qtensor</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.html">quantization</a></li>
+      <li class="breadcrumb-item active">qtensor</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.quantization.qtensor.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="qtensor">
+<h1>qtensor<a class="headerlink" href="#qtensor" title="Link to this heading"></a></h1>
+<p class="rubric">Modules</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.qtensor.base_qtensor.html#module-modelopt.torch.quantization.qtensor.base_qtensor" title="modelopt.torch.quantization.qtensor.base_qtensor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.qtensor.base_qtensor</span></code></a></p></td>
+<td><p>Base Class for Real Quantized Tensor.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="modelopt.torch.quantization.qtensor.int4_tensor.html#module-modelopt.torch.quantization.qtensor.int4_tensor" title="modelopt.torch.quantization.qtensor.int4_tensor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.qtensor.int4_tensor</span></code></a></p></td>
+<td><p>Implements INT4 quantization for efficient tensor storage and computation.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="modelopt.torch.quantization.qtensor.nf4_tensor.html#module-modelopt.torch.quantization.qtensor.nf4_tensor" title="modelopt.torch.quantization.qtensor.nf4_tensor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">modelopt.torch.quantization.qtensor.nf4_tensor</span></code></a></p></td>
+<td><p>Implements NF4 quantization for efficient tensor storage and computation.</p></td>
+</tr>
+</tbody>
+</table>
+<p id="module-modelopt.torch.quantization.qtensor">Tensor Class for Real Quantization.</p>
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.quantization.plugins.html" class="btn btn-neutral float-left" title="plugins" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.quantization.qtensor.base_qtensor.html" class="btn btn-neutral float-right" title="base_qtensor" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html b/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html
new file mode 100644
index 0000000..64e603a
--- /dev/null
+++ b/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.html
@@ -0,0 +1,249 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>int4_tensor &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="nf4_tensor" href="modelopt.torch.quantization.qtensor.nf4_tensor.html" />
+    <link rel="prev" title="base_qtensor" href="modelopt.torch.quantization.qtensor.base_qtensor.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_calib.html">model_calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_quant.html">model_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.html">quantization</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
+      <li class="breadcrumb-item active">int4_tensor</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="int4-tensor">
+<h1>int4_tensor<a class="headerlink" href="#int4-tensor" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.quantization.qtensor.int4_tensor">Implements INT4 quantization for efficient tensor storage and computation.</p>
+<p class="rubric">Classes</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor" title="modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">INT4QTensor</span></code></a></p></td>
+<td><p>Implements the INT4 quantization on tensors for more efficient storage or computation.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">INT4QTensor</span></span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor" title="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">BaseQuantizedTensor</span></code></a></p>
+<p>Implements the INT4 quantization on tensors for more efficient storage or computation.</p>
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.quantized_data">
+<span class="sig-name descname"><span class="pre">quantized_data</span></span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.quantized_data" title="Link to this definition"></a></dt>
+<dd><p>The quantized data stored as a packed uint8 tensor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>torch.Tensor</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.dequantize">
+<span class="sig-name descname"><span class="pre">dequantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwarg</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.dequantize" title="Link to this definition"></a></dt>
+<dd><p>Dequantze INT4 packed tensor to a target dtype.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>dtype</strong> (<em>dtype</em>) – </p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.quantize">
+<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">block_size</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.quantize" title="Link to this definition"></a></dt>
+<dd><p>Converting a tensor to a quantized format based on INT4 (AWQ) quantization.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>input</strong> (<em>torch.Tensor</em>) – The input tensor to be quantized.</p></li>
+<li><p><strong>block_size</strong> (<em>int</em>) – The size of each block for quantization.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>Contains quantized data, input quantization config, and scale quantization config.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>tuple</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.quantization.qtensor.base_qtensor.html" class="btn btn-neutral float-left" title="base_qtensor" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.quantization.qtensor.nf4_tensor.html" class="btn btn-neutral float-right" title="nf4_tensor" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html b/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html
new file mode 100644
index 0000000..07369b1
--- /dev/null
+++ b/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.html
@@ -0,0 +1,267 @@
+<!DOCTYPE html>
+<html class="writer-html5" lang="en" data-content_root="../../">
+<head>
+  <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>nf4_tensor &mdash; Model Optimizer 0.15.0</title>
+      <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
+      <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
+      <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
+      <link rel="stylesheet" type="text/css" href="../../_static/tabs.css?v=4c969af8" />
+      <link rel="stylesheet" type="text/css" href="../../_static/togglebutton.css?v=13237357" />
+      <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
+      <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
+
+  
+  <!--[if lt IE 9]>
+    <script src="../../_static/js/html5shiv.min.js"></script>
+  <![endif]-->
+  
+        <script src="../../_static/jquery.js?v=5d32c60e"></script>
+        <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
+        <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
+        <script src="../../_static/doctools.js?v=888ff710"></script>
+        <script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
+        <script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
+        <script src="../../_static/copybutton.js?v=20d3d275"></script>
+        <script src="../../_static/tabs.js?v=3ee01567"></script>
+        <script>let toggleHintShow = 'Click to show';</script>
+        <script>let toggleHintHide = 'Click to hide';</script>
+        <script>let toggleOpenOnPrint = 'true';</script>
+        <script src="../../_static/togglebutton.js?v=4a39c7ea"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+        <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
+        <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="../../_static/js/theme.js"></script>
+    <link rel="index" title="Index" href="../../genindex.html" />
+    <link rel="search" title="Search" href="../../search.html" />
+    <link rel="next" title="quant_modules" href="modelopt.torch.quantization.quant_modules.html" />
+    <link rel="prev" title="int4_tensor" href="modelopt.torch.quantization.qtensor.int4_tensor.html" /> 
+</head>
+
+<body class="wy-body-for-nav"> 
+  <div class="wy-grid-for-nav">
+    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
+      <div class="wy-side-scroll">
+        <div class="wy-side-nav-search" >
+
+          
+          
+          <a href="../../index.html" class="icon icon-home">
+            TensorRT Model Optimizer
+          </a>
+              <div class="version">
+                0.15.0
+              </div>
+<div role="search">
+  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
+    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
+    <input type="hidden" name="check_keywords" value="yes" />
+    <input type="hidden" name="area" value="default" />
+  </form>
+</div>
+        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
+              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../deployment/1_tensorrt_llm_deployment.html">TensorRT-LLM Deployment</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Examples</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Reference</span></p>
+<ul class="current">
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
+<li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
+<li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
+<li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
+<li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
+<li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.extensions.html">extensions</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.mode.html">mode</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_calib.html">model_calib</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.model_quant.html">model_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4 current"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
+</ul>
+</li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.sparsity.html">sparsity</a></li>
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.utils.html">utils</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+<p class="caption" role="heading"><span class="caption-text">Support</span></p>
+<ul>
+<li class="toctree-l1"><a class="reference internal" href="../../support/1_contact.html">Contact us</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../support/2_faqs.html">FAQs</a></li>
+</ul>
+
+        </div>
+      </div>
+    </nav>
+
+    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
+          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
+          <a href="../../index.html">TensorRT Model Optimizer</a>
+      </nav>
+
+      <div class="wy-nav-content">
+        <div class="rst-content">
+          <div role="navigation" aria-label="Page navigation">
+  <ul class="wy-breadcrumbs">
+      <li><a href="../../index.html" class="icon icon-home" aria-label="Home"></a></li>
+          <li class="breadcrumb-item"><a href="../1_modelopt_api.html">modelopt API</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.html">torch</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.html">quantization</a></li>
+          <li class="breadcrumb-item"><a href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
+      <li class="breadcrumb-item active">nf4_tensor</li>
+      <li class="wy-breadcrumbs-aside">
+            <a href="../../_sources/reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.rst.txt" rel="nofollow"> View page source</a>
+      </li>
+  </ul>
+  <hr/>
+</div>
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+             
+  <section id="nf4-tensor">
+<h1>nf4_tensor<a class="headerlink" href="#nf4-tensor" title="Link to this heading"></a></h1>
+<p id="module-modelopt.torch.quantization.qtensor.nf4_tensor">Implements NF4 quantization for efficient tensor storage and computation.</p>
+<p class="rubric">Classes</p>
+<table class="autosummary longtable docutils align-default">
+<tbody>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor" title="modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">NF4QTensor</span></code></a></p></td>
+<td><p>Implements the NF4 quantization on tensors for more efficient storage or computation.</p></td>
+</tr>
+</tbody>
+</table>
+<dl class="py class">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">NF4QTensor</span></span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor" title="Link to this definition"></a></dt>
+<dd><p>Bases: <a class="reference internal" href="modelopt.torch.quantization.qtensor.base_qtensor.html#modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor" title="modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">BaseQuantizedTensor</span></code></a></p>
+<p>Implements the NF4 quantization on tensors for more efficient storage or computation.</p>
+<dl class="py attribute">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.quantized_data">
+<span class="sig-name descname"><span class="pre">quantized_data</span></span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.quantized_data" title="Link to this definition"></a></dt>
+<dd><p>The quantized data stored as a packed uint8 tensor.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Type<span class="colon">:</span></dt>
+<dd class="field-odd"><p>torch.Tensor</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.dequantize">
+<span class="sig-name descname"><span class="pre">dequantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dtype</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwarg</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.dequantize" title="Link to this definition"></a></dt>
+<dd><p>Dequantze NF4 packed tensor to a target dtype.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><p><strong>dtype</strong> (<em>dtype</em>) – </p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.double_quantization">
+<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">double_quantization</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">scales</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale_block_size</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_scale_bits</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.double_quantization" title="Link to this definition"></a></dt>
+<dd><p>Perform double quantization on the scales.</p>
+<p>Unlike the <cite>quantize</cite> method quantizing input data, this function quantizes float scales into
+int8 to further reduce memory usage of scales.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>scales</strong> (<em>Tensor</em>) – </p></li>
+<li><p><strong>scale_block_size</strong> (<em>int</em>) – </p></li>
+<li><p><strong>num_scale_bits</strong> (<em>int</em>) – </p></li>
+</ul>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.quantize">
+<em class="property"><span class="pre">classmethod</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">input</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">block_size</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.quantize" title="Link to this definition"></a></dt>
+<dd><p>Converting a tensor to a quantized format based on NF4 double quantization.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Parameters<span class="colon">:</span></dt>
+<dd class="field-odd"><ul class="simple">
+<li><p><strong>input</strong> (<em>torch.Tensor</em>) – The input tensor to be quantized.</p></li>
+<li><p><strong>block_size</strong> (<em>int</em>) – The size of each block for quantization.</p></li>
+<li><p><strong>scale_block_size</strong> (<em>int</em>) – The block size for scaling during quantization.</p></li>
+</ul>
+</dd>
+<dt class="field-even">Returns<span class="colon">:</span></dt>
+<dd class="field-even"><p>Contains quantized data, input quantization config, and scale quantization config.</p>
+</dd>
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>tuple</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+</section>
+
+
+           </div>
+          </div>
+          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
+        <a href="modelopt.torch.quantization.qtensor.int4_tensor.html" class="btn btn-neutral float-left" title="int4_tensor" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.quantization.quant_modules.html" class="btn btn-neutral float-right" title="quant_modules" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
+    </div>
+
+  <hr/>
+
+  <div role="contentinfo">
+    <p>&#169; Copyright 2023-2024, NVIDIA Corporation.</p>
+  </div>
+
+  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
+    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
+    provided by <a href="https://readthedocs.org">Read the Docs</a>.
+   
+
+</footer>
+        </div>
+      </div>
+    </section>
+  </div>
+  <script>
+      jQuery(function () {
+          SphinxRtdTheme.Navigation.enable(true);
+      });
+  </script> 
+
+</body>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.quant_modules.html b/reference/generated/modelopt.torch.quantization.quant_modules.html
index 60b4464..3b49ba0 100644
--- a/reference/generated/modelopt.torch.quantization.quant_modules.html
+++ b/reference/generated/modelopt.torch.quantization.quant_modules.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>quant_modules &mdash; Model Optimizer 0.11.2</title>
+  <title>quant_modules &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="tensor_quant" href="modelopt.torch.quantization.tensor_quant.html" />
-    <link rel="prev" title="plugins" href="modelopt.torch.quantization.plugins.html" />
+    <link rel="prev" title="nf4_tensor" href="modelopt.torch.quantization.qtensor.nf4_tensor.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="quant-modules">
 <h1>quant_modules<a class="headerlink" href="#quant-modules" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.quant_modules">Deprecated. Placeholder module for throwing deprecated error.</p>
@@ -190,7 +195,7 @@ <h1>quant_modules<a class="headerlink" href="#quant-modules" title="Link to this
            </div>
           </div>
           <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
-        <a href="modelopt.torch.quantization.plugins.html" class="btn btn-neutral float-left" title="plugins" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
+        <a href="modelopt.torch.quantization.qtensor.nf4_tensor.html" class="btn btn-neutral float-left" title="nf4_tensor" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
         <a href="modelopt.torch.quantization.tensor_quant.html" class="btn btn-neutral float-right" title="tensor_quant" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
     </div>
 
@@ -203,7 +208,7 @@ <h1>quant_modules<a class="headerlink" href="#quant-modules" title="Link to this
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -214,7 +219,7 @@ <h1>quant_modules<a class="headerlink" href="#quant-modules" title="Link to this
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.tensor_quant.html b/reference/generated/modelopt.torch.quantization.tensor_quant.html
index ade5f07..8f95536 100644
--- a/reference/generated/modelopt.torch.quantization.tensor_quant.html
+++ b/reference/generated/modelopt.torch.quantization.tensor_quant.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>tensor_quant &mdash; Model Optimizer 0.11.2</title>
+  <title>tensor_quant &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="utils" href="modelopt.torch.quantization.utils.html" />
-    <link rel="prev" title="quant_modules" href="modelopt.torch.quantization.quant_modules.html" />
+    <link rel="prev" title="quant_modules" href="modelopt.torch.quantization.quant_modules.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">tensor_quant</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.utils.html">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="tensor-quant">
 <h1>tensor_quant<a class="headerlink" href="#tensor-quant" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.tensor_quant">Basic tensor quantization functions.</p>
@@ -162,15 +167,9 @@ <h1>tensor_quant<a class="headerlink" href="#tensor-quant" title="Link to this h
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction" title="modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction"><code class="xref py py-obj docutils literal notranslate"><span class="pre">LegacyFakeTensorQuantFunction</span></code></a></p></td>
 <td><p>Fake version of TensorQuantFunction.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.tensor_quant.QuantDescriptor" title="modelopt.torch.quantization.tensor_quant.QuantDescriptor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">QuantDescriptor</span></code></a></p></td>
-<td><p>alias of <a class="reference internal" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor" title="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor"><code class="xref py py-class docutils literal notranslate"><span class="pre">ScaledQuantDescriptor</span></code></a></p></td>
-</tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.tensor_quant.ScaledE4M3Function" title="modelopt.torch.quantization.tensor_quant.ScaledE4M3Function"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ScaledE4M3Function</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.tensor_quant.ScaledE4M3Function" title="modelopt.torch.quantization.tensor_quant.ScaledE4M3Function"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ScaledE4M3Function</span></code></a></p></td>
 <td><p>E4M3fy input with scale.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor" title="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor"><code class="xref py py-obj docutils literal notranslate"><span class="pre">ScaledQuantDescriptor</span></code></a></p></td>
-<td><p>Supportive descriptor of quantization.</p></td>
-</tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.quantization.tensor_quant.TensorQuantFunction" title="modelopt.torch.quantization.tensor_quant.TensorQuantFunction"><code class="xref py py-obj docutils literal notranslate"><span class="pre">TensorQuantFunction</span></code></a></p></td>
 <td><p>A universal tensor quantization function.</p></td>
 </tr>
@@ -252,13 +251,13 @@ <h1>tensor_quant<a class="headerlink" href="#tensor-quant" title="Link to this h
 
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.forward">
-<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ctx</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_bits</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unsigned</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.forward" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ctx</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_bits</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unsigned</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_high_precision_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'Float'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.forward" title="Link to this definition"></a></dt>
 <dd><p>Forward method.</p>
 </dd></dl>
 
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.symbolic">
-<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">symbolic</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">g</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_bits</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unsigned</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.symbolic" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">symbolic</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">g</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_bits</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unsigned</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_high_precision_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'Float'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.symbolic" title="Link to this definition"></a></dt>
 <dd><p>ONNX symbolic function.</p>
 </dd></dl>
 
@@ -284,12 +283,6 @@ <h1>tensor_quant<a class="headerlink" href="#tensor-quant" title="Link to this h
 
 </dd></dl>
 
-<dl class="py attribute">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.QuantDescriptor">
-<span class="sig-name descname"><span class="pre">QuantDescriptor</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.QuantDescriptor" title="Link to this definition"></a></dt>
-<dd><p>alias of <a class="reference internal" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor" title="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor"><code class="xref py py-class docutils literal notranslate"><span class="pre">ScaledQuantDescriptor</span></code></a></p>
-</dd></dl>
-
 <dl class="py class">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledE4M3Function">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">ScaledE4M3Function</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledE4M3Function" title="Link to this definition"></a></dt>
@@ -303,187 +296,18 @@ <h1>tensor_quant<a class="headerlink" href="#tensor-quant" title="Link to this h
 
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.forward">
-<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ctx</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">E</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">M</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.forward" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ctx</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">E</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">M</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_high_precision_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'Float'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.forward" title="Link to this definition"></a></dt>
 <dd><p>Forward method.</p>
 </dd></dl>
 
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.symbolic">
-<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">symbolic</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">g</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">E</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">4</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">M</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">3</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.symbolic" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">symbolic</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">g</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">E</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">4</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">M</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">3</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_high_precision_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'Float'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.symbolic" title="Link to this definition"></a></dt>
 <dd><p>ONNX symbolic function.</p>
 </dd></dl>
 
 </dd></dl>
 
-<dl class="py class">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor">
-<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">ScaledQuantDescriptor</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor" title="Link to this definition"></a></dt>
-<dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
-<p>Supportive descriptor of quantization.</p>
-<p>Describe how a tensor should be quantized. A QuantDescriptor and a tensor defines a quantized
-tensor.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><ul class="simple">
-<li><p><strong>num_bits</strong> – <p>An integer or a tuple of two integers.
-Specifically, <cite>num_bits</cite> can be:</p>
-<ol class="arabic simple">
-<li><dl class="simple">
-<dt>A positive integer argument for integer quantization. <cite>num_bits</cite> specify</dt><dd><p>the number of bits used for integer quantization.</p>
-</dd>
-</dl>
-</li>
-<li><dl class="simple">
-<dt>Constant integer tuple (E,M) for floating point quantization emulating</dt><dd><p>Nvidia’s FPx quantization. E is the number of exponent bits and M is the number
-of mantissa bits. Supported FPx quantizations: FP8 with (E=4, M=3).</p>
-</dd>
-</dl>
-</li>
-</ol>
-<p>Default: 8.</p>
-</p></li>
-<li><p><strong>name</strong> – Seems a nice thing to have</p></li>
-<li><p><strong>fake_quant</strong> – A boolean. If True, use fake quantization mode. Default True.</p></li>
-<li><p><strong>axis</strong> – None, int or tuple of int. The specified axis/axes will have its own amax for
-computing scaling factor. If None (the default), use per tensor scale. Must be in the
-range [-rank(input_tensor), rank(input_tensor)). E.g. For a KCRS weight tensor,
-<code class="docutils literal notranslate"><span class="pre">quant_axis=(0)</span></code> will yield per channel scaling.</p></li>
-<li><p><strong>block_sizes</strong> – <p>None or a dictionary. The dictionary specifies
-block quantization parameters. The keys are the axes for block quantization and the
-values are block sizes for quantization along the respective axes. Keys must be in the
-range <code class="docutils literal notranslate"><span class="pre">[-rank(input_tensor),</span> <span class="pre">rank(input_tensor)]</span></code>. Values, which are the block sizes
-for quantization must be positive integers.</p>
-<p>In addition, there can be special string keys “type” and “scale_bits”. Key “type”
-should map to “dynamic” or “static” where “dynamic” indicates dynamic block quantization and “static”
-indicates static calibrated block quantization. By default, the type is “static”. Key “scale_bits”
-specify the quantization bits for the per-block quantization scale factor
-(i.e a double quantization scheme). By default per-block quantization scale is not quantized.</p>
-<p>For example, <code class="docutils literal notranslate"><span class="pre">block_sizes</span> <span class="pre">=</span> <span class="pre">{-1:</span> <span class="pre">32}</span></code> will quantize the last axis of the input tensor in
-blocks of size 32 with static calibration and <code class="docutils literal notranslate"><span class="pre">block_sizes</span> <span class="pre">=</span> <span class="pre">{-1:</span> <span class="pre">32,</span> <span class="pre">&quot;type&quot;:</span> <span class="pre">&quot;dynamic&quot;}</span></code>
-will perform dynamic block quantization. If None, block
-quantization is not performed. <code class="docutils literal notranslate"><span class="pre">axis</span></code> must be None when <code class="docutils literal notranslate"><span class="pre">block_sizes</span></code> is not None.</p>
-</p></li>
-<li><p><strong>amax</strong> – A float or list/ndarray of floats of user specified absolute max range. If supplied,
-ignore quant_axis and use this to quantize. If learn_amax is True, will be used to
-initialize learnable amax.</p></li>
-<li><p><strong>learn_amax</strong> – A boolean. If True, learn amax.</p></li>
-<li><p><strong>scale_amax</strong> – A float. If supplied, multiply amax by scale_amax. Default None. It is useful
-for some quick experiment.</p></li>
-<li><p><strong>calib_method</strong> – A string. One of <code class="docutils literal notranslate"><span class="pre">[&quot;max&quot;,</span> <span class="pre">&quot;histogram&quot;]</span></code> indicates which calibration to use.
-Except the simple max calibration, other methods are all histogram based.</p></li>
-<li><p><strong>unsigned</strong> – A boolean. If True, use unsigned.</p></li>
-<li><p><strong>narrow_range</strong> – A boolean. if True, symmetric integer range for signed quantization is used.</p></li>
-</ul>
-</dd>
-</dl>
-<dl class="simple">
-<dt>Read-only properties:</dt><dd><ul class="simple">
-<li><p>fake_quant:</p></li>
-<li><p>name:</p></li>
-<li><p>learn_amax:</p></li>
-<li><p>scale_amax:</p></li>
-<li><p>axis:</p></li>
-<li><p>calib_method:</p></li>
-<li><p>num_bits:</p></li>
-<li><p>amax:</p></li>
-<li><p>unsigned:</p></li>
-</ul>
-</dd>
-</dl>
-<dl class="py method">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.__init__">
-<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">num_bits</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">fake_quant</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">axis</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">block_sizes</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">learn_amax</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">scale_amax</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">calib_method</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'max'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unsigned</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.__init__" title="Link to this definition"></a></dt>
-<dd><p>Initialize QuantDescriptor.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.amax">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">amax</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.amax" title="Link to this definition"></a></dt>
-<dd><p>Return amax.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.axis">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">axis</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.axis" title="Link to this definition"></a></dt>
-<dd><p>Return axis for quantization.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.block_sizes">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">block_sizes</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.block_sizes" title="Link to this definition"></a></dt>
-<dd><p>Return block_sizes for quantization.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.calib_method">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">calib_method</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.calib_method" title="Link to this definition"></a></dt>
-<dd><p>Return calibration method.</p>
-</dd></dl>
-
-<dl class="py method">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.dict">
-<span class="sig-name descname"><span class="pre">dict</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.dict" title="Link to this definition"></a></dt>
-<dd><p>Serialize to dict.</p>
-<p>The build-in __dict__ method returns all the attributes, which includes those have default value and have
-protected prefix “_”. This method only returns those have values other than the default one and don’t have _ in
-key. Construct a instance by dict returned by this method should get exactly the same instance.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.fake_quant">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">fake_quant</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.fake_quant" title="Link to this definition"></a></dt>
-<dd><p>Return True if fake quantization is used.</p>
-</dd></dl>
-
-<dl class="py method">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.get_block_quant_axes_and_sizes">
-<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">get_block_quant_axes_and_sizes</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">block_sizes</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.get_block_quant_axes_and_sizes" title="Link to this definition"></a></dt>
-<dd><p>Return axes and sizes for block quantization.</p>
-<dl class="field-list simple">
-<dt class="field-odd">Parameters<span class="colon">:</span></dt>
-<dd class="field-odd"><p><strong>block_sizes</strong> (<em>dict</em>) – </p>
-</dd>
-</dl>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.learn_amax">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">learn_amax</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.learn_amax" title="Link to this definition"></a></dt>
-<dd><p>Return True if amax is learnable.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.name">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">name</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.name" title="Link to this definition"></a></dt>
-<dd><p>Return name.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.narrow_range">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">narrow_range</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.narrow_range" title="Link to this definition"></a></dt>
-<dd><p>Return True if symmetric integer range for signed quantization is used.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.num_bits">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">num_bits</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.num_bits" title="Link to this definition"></a></dt>
-<dd><p>Return num_bits.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.scale_amax">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">scale_amax</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.scale_amax" title="Link to this definition"></a></dt>
-<dd><p>Return scale_amax.</p>
-</dd></dl>
-
-<dl class="py property">
-<dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.unsigned">
-<em class="property"><span class="pre">property</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">unsigned</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.unsigned" title="Link to this definition"></a></dt>
-<dd><p>Return True if unsigned integer range is used.</p>
-</dd></dl>
-
-</dd></dl>
-
 <dl class="py class">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.TensorQuantFunction">
 <em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">TensorQuantFunction</span></span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.TensorQuantFunction" title="Link to this definition"></a></dt>
@@ -518,7 +342,7 @@ <h1>tensor_quant<a class="headerlink" href="#tensor-quant" title="Link to this h
 
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.TensorQuantFunction.forward">
-<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ctx</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_bits</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unsigned</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.TensorQuantFunction.forward" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">forward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ctx</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_bits</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unsigned</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_high_precision_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'Float'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.TensorQuantFunction.forward" title="Link to this definition"></a></dt>
 <dd><p>Forward method.</p>
 <p>Follow tensorflow convention, max value is passed in and used to decide scale, instead of inputing scale
 directly. Though inputing scale directly may be more natural to use.</p>
@@ -552,7 +376,7 @@ <h1>tensor_quant<a class="headerlink" href="#tensor-quant" title="Link to this h
 
 <dl class="py method">
 <dt class="sig sig-object py" id="modelopt.torch.quantization.tensor_quant.TensorQuantFunction.symbolic">
-<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">symbolic</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">g</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_bits</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unsigned</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.TensorQuantFunction.symbolic" title="Link to this definition"></a></dt>
+<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">symbolic</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">g</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">inputs</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">amax</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_bits</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">8</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">unsigned</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">narrow_range</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">trt_high_precision_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'Float'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.quantization.tensor_quant.TensorQuantFunction.symbolic" title="Link to this definition"></a></dt>
 <dd><p>ONNX symbolic function.</p>
 </dd></dl>
 
@@ -595,7 +419,7 @@ <h1>tensor_quant<a class="headerlink" href="#tensor-quant" title="Link to this h
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -606,7 +430,7 @@ <h1>tensor_quant<a class="headerlink" href="#tensor-quant" title="Link to this h
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.quantization.utils.html b/reference/generated/modelopt.torch.quantization.utils.html
index aeefd60..f81b423 100644
--- a/reference/generated/modelopt.torch.quantization.utils.html
+++ b/reference/generated/modelopt.torch.quantization.utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>utils &mdash; Model Optimizer 0.11.2</title>
+  <title>utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="sparsity" href="modelopt.torch.sparsity.html" />
-    <link rel="prev" title="tensor_quant" href="modelopt.torch.quantization.tensor_quant.html" />
+    <link rel="prev" title="tensor_quant" href="modelopt.torch.quantization.tensor_quant.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,18 +82,20 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3 current"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a><ul class="current">
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.algorithms.html">algorithms</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.calib.html">calib</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.config.html">config</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.conversion.html">conversion</a></li>
@@ -102,6 +106,7 @@
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.nn.html">nn</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.optim.html">optim</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.plugins.html">plugins</a></li>
+<li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.qtensor.html">qtensor</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.quant_modules.html">quant_modules</a></li>
 <li class="toctree-l4"><a class="reference internal" href="modelopt.torch.quantization.tensor_quant.html">tensor_quant</a></li>
 <li class="toctree-l4 current"><a class="current reference internal" href="#">utils</a></li>
@@ -146,7 +151,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="utils">
 <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.quantization.utils">Quantization utilities.</p>
@@ -273,7 +278,7 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -284,7 +289,7 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.sparsity.config.html b/reference/generated/modelopt.torch.sparsity.config.html
index e9f2388..67b4641 100644
--- a/reference/generated/modelopt.torch.sparsity.config.html
+++ b/reference/generated/modelopt.torch.sparsity.config.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>config &mdash; Model Optimizer 0.11.2</title>
+  <title>config &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="magnitude" href="modelopt.torch.sparsity.magnitude.html" />
-    <link rel="prev" title="sparsity" href="modelopt.torch.sparsity.html" />
+    <link rel="prev" title="sparsity" href="modelopt.torch.sparsity.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="config">
 <h1>config<a class="headerlink" href="#config" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.sparsity.config">Default configurations for sparsity modes.</p>
@@ -375,7 +378,7 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -386,7 +389,7 @@ <h1>config<a class="headerlink" href="#config" title="Link to this heading"><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.sparsity.html b/reference/generated/modelopt.torch.sparsity.html
index a30b343..5b3b1c1 100644
--- a/reference/generated/modelopt.torch.sparsity.html
+++ b/reference/generated/modelopt.torch.sparsity.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>sparsity &mdash; Model Optimizer 0.11.2</title>
+  <title>sparsity &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="config" href="modelopt.torch.sparsity.config.html" />
-    <link rel="prev" title="utils" href="modelopt.torch.quantization.utils.html" />
+    <link rel="prev" title="utils" href="modelopt.torch.quantization.utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -140,7 +143,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="sparsity">
 <h1>sparsity<a class="headerlink" href="#sparsity" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -192,7 +195,7 @@ <h1>sparsity<a class="headerlink" href="#sparsity" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -203,7 +206,7 @@ <h1>sparsity<a class="headerlink" href="#sparsity" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.sparsity.magnitude.html b/reference/generated/modelopt.torch.sparsity.magnitude.html
index 0c8a573..bf73e7e 100644
--- a/reference/generated/modelopt.torch.sparsity.magnitude.html
+++ b/reference/generated/modelopt.torch.sparsity.magnitude.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>magnitude &mdash; Model Optimizer 0.11.2</title>
+  <title>magnitude &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="mode" href="modelopt.torch.sparsity.mode.html" />
-    <link rel="prev" title="config" href="modelopt.torch.sparsity.config.html" />
+    <link rel="prev" title="config" href="modelopt.torch.sparsity.config.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="magnitude">
 <h1>magnitude<a class="headerlink" href="#magnitude" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.sparsity.magnitude">Magnitude-base sparsity inspired by NVIDIA ASP (Automatic SParsity).</p>
@@ -274,7 +277,7 @@ <h1>magnitude<a class="headerlink" href="#magnitude" title="Link to this heading
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -285,7 +288,7 @@ <h1>magnitude<a class="headerlink" href="#magnitude" title="Link to this heading
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.sparsity.mode.html b/reference/generated/modelopt.torch.sparsity.mode.html
index a7c11c7..0d92ff8 100644
--- a/reference/generated/modelopt.torch.sparsity.mode.html
+++ b/reference/generated/modelopt.torch.sparsity.mode.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>mode &mdash; Model Optimizer 0.11.2</title>
+  <title>mode &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="module" href="modelopt.torch.sparsity.module.html" />
-    <link rel="prev" title="magnitude" href="modelopt.torch.sparsity.magnitude.html" />
+    <link rel="prev" title="magnitude" href="modelopt.torch.sparsity.magnitude.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="mode">
 <h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.sparsity.mode">Sparsity mode descriptor.</p>
@@ -410,7 +413,7 @@ <h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -421,7 +424,7 @@ <h1>mode<a class="headerlink" href="#mode" title="Link to this heading"></a><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.sparsity.module.html b/reference/generated/modelopt.torch.sparsity.module.html
index e60ef21..146f2f3 100644
--- a/reference/generated/modelopt.torch.sparsity.module.html
+++ b/reference/generated/modelopt.torch.sparsity.module.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>module &mdash; Model Optimizer 0.11.2</title>
+  <title>module &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="plugins" href="modelopt.torch.sparsity.plugins.html" />
-    <link rel="prev" title="mode" href="modelopt.torch.sparsity.mode.html" />
+    <link rel="prev" title="mode" href="modelopt.torch.sparsity.mode.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="module">
 <h1>module<a class="headerlink" href="#module" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.sparsity.module">Dynamic class for all sparse modules.</p>
@@ -198,7 +201,7 @@ <h1>module<a class="headerlink" href="#module" title="Link to this heading"><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -209,7 +212,7 @@ <h1>module<a class="headerlink" href="#module" title="Link to this heading"><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.sparsity.plugins.html b/reference/generated/modelopt.torch.sparsity.plugins.html
index ad452fa..5477956 100644
--- a/reference/generated/modelopt.torch.sparsity.plugins.html
+++ b/reference/generated/modelopt.torch.sparsity.plugins.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>plugins &mdash; Model Optimizer 0.11.2</title>
+  <title>plugins &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="searcher" href="modelopt.torch.sparsity.searcher.html" />
-    <link rel="prev" title="module" href="modelopt.torch.sparsity.module.html" />
+    <link rel="prev" title="module" href="modelopt.torch.sparsity.module.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="plugins">
 <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -173,7 +176,7 @@ <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -184,7 +187,7 @@ <h1>plugins<a class="headerlink" href="#plugins" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.sparsity.searcher.html b/reference/generated/modelopt.torch.sparsity.searcher.html
index e53bbd2..4af2492 100644
--- a/reference/generated/modelopt.torch.sparsity.searcher.html
+++ b/reference/generated/modelopt.torch.sparsity.searcher.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>searcher &mdash; Model Optimizer 0.11.2</title>
+  <title>searcher &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="sparsegpt" href="modelopt.torch.sparsity.sparsegpt.html" />
-    <link rel="prev" title="plugins" href="modelopt.torch.sparsity.plugins.html" />
+    <link rel="prev" title="plugins" href="modelopt.torch.sparsity.plugins.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="searcher">
 <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.sparsity.searcher">Searcher interface for sparsity algorithms.</p>
@@ -211,7 +214,7 @@ <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -222,7 +225,7 @@ <h1>searcher<a class="headerlink" href="#searcher" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.sparsity.sparsegpt.html b/reference/generated/modelopt.torch.sparsity.sparsegpt.html
index 5adbdc2..2075eac 100644
--- a/reference/generated/modelopt.torch.sparsity.sparsegpt.html
+++ b/reference/generated/modelopt.torch.sparsity.sparsegpt.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>sparsegpt &mdash; Model Optimizer 0.11.2</title>
+  <title>sparsegpt &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="sparsification" href="modelopt.torch.sparsity.sparsification.html" />
-    <link rel="prev" title="searcher" href="modelopt.torch.sparsity.searcher.html" />
+    <link rel="prev" title="searcher" href="modelopt.torch.sparsity.searcher.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="sparsegpt">
 <h1>sparsegpt<a class="headerlink" href="#sparsegpt" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.sparsity.sparsegpt">Utility functions of SparseGPT.</p>
@@ -261,7 +264,7 @@ <h1>sparsegpt<a class="headerlink" href="#sparsegpt" title="Link to this heading
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -272,7 +275,7 @@ <h1>sparsegpt<a class="headerlink" href="#sparsegpt" title="Link to this heading
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.sparsity.sparsification.html b/reference/generated/modelopt.torch.sparsity.sparsification.html
index f0ec2ba..0cfa9ef 100644
--- a/reference/generated/modelopt.torch.sparsity.sparsification.html
+++ b/reference/generated/modelopt.torch.sparsity.sparsification.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>sparsification &mdash; Model Optimizer 0.11.2</title>
+  <title>sparsification &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="utils" href="modelopt.torch.utils.html" />
-    <link rel="prev" title="sparsegpt" href="modelopt.torch.sparsity.sparsegpt.html" />
+    <link rel="prev" title="sparsegpt" href="modelopt.torch.sparsity.sparsegpt.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -141,7 +144,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="sparsification">
 <h1>sparsification<a class="headerlink" href="#sparsification" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.sparsity.sparsification">High-level API to automatically sparsify your model with various algorithms.</p>
@@ -262,7 +265,7 @@ <h1>sparsification<a class="headerlink" href="#sparsification" title="Link to th
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -273,7 +276,7 @@ <h1>sparsification<a class="headerlink" href="#sparsification" title="Link to th
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.cpp_extension.html b/reference/generated/modelopt.torch.utils.cpp_extension.html
index 9d563c1..897eb56 100644
--- a/reference/generated/modelopt.torch.utils.cpp_extension.html
+++ b/reference/generated/modelopt.torch.utils.cpp_extension.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>cpp_extension &mdash; Model Optimizer 0.11.2</title>
+  <title>cpp_extension &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="dataset_utils" href="modelopt.torch.utils.dataset_utils.html" />
-    <link rel="prev" title="utils" href="modelopt.torch.utils.html" />
+    <link rel="prev" title="utils" href="modelopt.torch.utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="cpp-extension">
 <h1>cpp_extension<a class="headerlink" href="#cpp-extension" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.cpp_extension">Utility functions for loading CPP / CUDA extensions.</p>
@@ -194,7 +197,7 @@ <h1>cpp_extension<a class="headerlink" href="#cpp-extension" title="Link to this
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -205,7 +208,7 @@ <h1>cpp_extension<a class="headerlink" href="#cpp-extension" title="Link to this
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.dataset_utils.html b/reference/generated/modelopt.torch.utils.dataset_utils.html
index af7ffce..042c6df 100644
--- a/reference/generated/modelopt.torch.utils.dataset_utils.html
+++ b/reference/generated/modelopt.torch.utils.dataset_utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>dataset_utils &mdash; Model Optimizer 0.11.2</title>
+  <title>dataset_utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="distributed" href="modelopt.torch.utils.distributed.html" />
-    <link rel="prev" title="cpp_extension" href="modelopt.torch.utils.cpp_extension.html" />
+    <link rel="prev" title="cpp_extension" href="modelopt.torch.utils.cpp_extension.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="dataset-utils">
 <h1>dataset_utils<a class="headerlink" href="#dataset-utils" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.dataset_utils">Utility functions for getting samples and forward loop function for different datasets.</p>
@@ -170,7 +173,7 @@ <h1>dataset_utils<a class="headerlink" href="#dataset-utils" title="Link to this
 - dataset_name: The name of the dataset to be used.
 - tokenizer: The tokenizer used to preprocess text data into a format suitable
 for the model.
-- batch_size: Batch size of the returned dataloader.
+- batch_size: Batch size of the returned dataloader. If 0 is provided, we auto determine the batch_size.
 - num_samples: Number of samples from the dataset.
 - max_sample_length: Maximum length of a sample.
 - device: Target device for the returned dataloader.</p>
@@ -210,7 +213,7 @@ <h1>dataset_utils<a class="headerlink" href="#dataset-utils" title="Link to this
 
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.utils.dataset_utils.get_dataset_dataloader">
-<span class="sig-name descname"><span class="pre">get_dataset_dataloader</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cnn_dailymail'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tokenizer</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_samples</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_sample_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.dataset_utils.get_dataset_dataloader" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">get_dataset_dataloader</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">dataset_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'cnn_dailymail'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">tokenizer</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">batch_size</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">num_samples</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_sample_length</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">512</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">include_labels</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.dataset_utils.get_dataset_dataloader" title="Link to this definition"></a></dt>
 <dd><p>Get a dataloader with the dataset name and toknizer of the target model.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@@ -221,6 +224,7 @@ <h1>dataset_utils<a class="headerlink" href="#dataset-utils" title="Link to this
 <li><p><strong>num_samples</strong> (<em>int</em>) – Number of samples from the dataset.</p></li>
 <li><p><strong>max_sample_length</strong> (<em>int</em>) – Maximum length of a sample.</p></li>
 <li><p><strong>device</strong> (<em>str</em><em> | </em><em>None</em>) – Target device for the returned dataloader.</p></li>
+<li><p><strong>include_labels</strong> (<em>bool</em>) – Whether to include labels in the dataloader.</p></li>
 </ul>
 </dd>
 <dt class="field-even">Returns<span class="colon">:</span></dt>
@@ -251,7 +255,7 @@ <h1>dataset_utils<a class="headerlink" href="#dataset-utils" title="Link to this
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -262,7 +266,7 @@ <h1>dataset_utils<a class="headerlink" href="#dataset-utils" title="Link to this
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.distributed.html b/reference/generated/modelopt.torch.utils.distributed.html
index 623fd68..1e01e48 100644
--- a/reference/generated/modelopt.torch.utils.distributed.html
+++ b/reference/generated/modelopt.torch.utils.distributed.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>distributed &mdash; Model Optimizer 0.11.2</title>
+  <title>distributed &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="graph" href="modelopt.torch.utils.graph.html" />
-    <link rel="prev" title="dataset_utils" href="modelopt.torch.utils.dataset_utils.html" />
+    <link rel="prev" title="dataset_utils" href="modelopt.torch.utils.dataset_utils.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="distributed">
 <h1>distributed<a class="headerlink" href="#distributed" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.distributed">Utility functions for using torch.distributed.</p>
@@ -153,29 +156,35 @@ <h1>distributed<a class="headerlink" href="#distributed" title="Link to this hea
 <tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.backend" title="modelopt.torch.utils.distributed.backend"><code class="xref py py-obj docutils literal notranslate"><span class="pre">backend</span></code></a></p></td>
 <td><p>Returns the distributed backend.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.size" title="modelopt.torch.utils.distributed.size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">size</span></code></a></p></td>
-<td><p>Returns the number of processes.</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.barrier" title="modelopt.torch.utils.distributed.barrier"><code class="xref py py-obj docutils literal notranslate"><span class="pre">barrier</span></code></a></p></td>
+<td><p>Synchronizes all processes.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.rank" title="modelopt.torch.utils.distributed.rank"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rank</span></code></a></p></td>
-<td><p>Returns the rank of the current process.</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.get_data_parallel_group" title="modelopt.torch.utils.distributed.get_data_parallel_group"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_data_parallel_group</span></code></a></p></td>
+<td><p>Get the data parallel group.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.is_master" title="modelopt.torch.utils.distributed.is_master"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_master</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.get_tensor_parallel_group" title="modelopt.torch.utils.distributed.get_tensor_parallel_group"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_tensor_parallel_group</span></code></a></p></td>
+<td><p>Get the tensor parallel group.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.is_available" title="modelopt.torch.utils.distributed.is_available"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_available</span></code></a></p></td>
+<td><p>Returns whether the distributed package is available.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.is_initialized" title="modelopt.torch.utils.distributed.is_initialized"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_initialized</span></code></a></p></td>
+<td><p>Returns whether the distributed package is initialized.</p></td>
+</tr>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.is_master" title="modelopt.torch.utils.distributed.is_master"><code class="xref py py-obj docutils literal notranslate"><span class="pre">is_master</span></code></a></p></td>
 <td><p>Returns whether the current process is the master process.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.barrier" title="modelopt.torch.utils.distributed.barrier"><code class="xref py py-obj docutils literal notranslate"><span class="pre">barrier</span></code></a></p></td>
-<td><p>Synchronizes all processes.</p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.rank" title="modelopt.torch.utils.distributed.rank"><code class="xref py py-obj docutils literal notranslate"><span class="pre">rank</span></code></a></p></td>
+<td><p>Returns the rank of the current process.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.set_data_parallel_group" title="modelopt.torch.utils.distributed.set_data_parallel_group"><code class="xref py py-obj docutils literal notranslate"><span class="pre">set_data_parallel_group</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.set_data_parallel_group" title="modelopt.torch.utils.distributed.set_data_parallel_group"><code class="xref py py-obj docutils literal notranslate"><span class="pre">set_data_parallel_group</span></code></a></p></td>
 <td><p>Set the data parallel group.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.set_tensor_parallel_group" title="modelopt.torch.utils.distributed.set_tensor_parallel_group"><code class="xref py py-obj docutils literal notranslate"><span class="pre">set_tensor_parallel_group</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.set_tensor_parallel_group" title="modelopt.torch.utils.distributed.set_tensor_parallel_group"><code class="xref py py-obj docutils literal notranslate"><span class="pre">set_tensor_parallel_group</span></code></a></p></td>
 <td><p>Set the tensor parallel group.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.get_data_parallel_group" title="modelopt.torch.utils.distributed.get_data_parallel_group"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_data_parallel_group</span></code></a></p></td>
-<td><p>Get the data parallel group.</p></td>
-</tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.get_tensor_parallel_group" title="modelopt.torch.utils.distributed.get_tensor_parallel_group"><code class="xref py py-obj docutils literal notranslate"><span class="pre">get_tensor_parallel_group</span></code></a></p></td>
-<td><p>Get the tensor parallel group.</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.distributed.size" title="modelopt.torch.utils.distributed.size"><code class="xref py py-obj docutils literal notranslate"><span class="pre">size</span></code></a></p></td>
+<td><p>Returns the number of processes.</p></td>
 </tr>
 </tbody>
 </table>
@@ -192,7 +201,7 @@ <h1>distributed<a class="headerlink" href="#distributed" title="Link to this hea
 
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.utils.distributed.barrier">
-<span class="sig-name descname"><span class="pre">barrier</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.distributed.barrier" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">barrier</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.distributed.barrier" title="Link to this definition"></a></dt>
 <dd><p>Synchronizes all processes.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Return type<span class="colon">:</span></dt>
@@ -223,6 +232,28 @@ <h1>distributed<a class="headerlink" href="#distributed" title="Link to this hea
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.utils.distributed.is_available">
+<span class="sig-name descname"><span class="pre">is_available</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.distributed.is_available" title="Link to this definition"></a></dt>
+<dd><p>Returns whether the distributed package is available.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>bool</em></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.utils.distributed.is_initialized">
+<span class="sig-name descname"><span class="pre">is_initialized</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.distributed.is_initialized" title="Link to this definition"></a></dt>
+<dd><p>Returns whether the distributed package is initialized.</p>
+<dl class="field-list simple">
+<dt class="field-odd">Return type<span class="colon">:</span></dt>
+<dd class="field-odd"><p><em>bool</em></p>
+</dd>
+</dl>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.utils.distributed.is_master">
 <span class="sig-name descname"><span class="pre">is_master</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">group</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.distributed.is_master" title="Link to this definition"></a></dt>
@@ -297,7 +328,7 @@ <h1>distributed<a class="headerlink" href="#distributed" title="Link to this hea
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -308,7 +339,7 @@ <h1>distributed<a class="headerlink" href="#distributed" title="Link to this hea
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.graph.html b/reference/generated/modelopt.torch.utils.graph.html
index 3cb8868..16c9c1e 100644
--- a/reference/generated/modelopt.torch.utils.graph.html
+++ b/reference/generated/modelopt.torch.utils.graph.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>graph &mdash; Model Optimizer 0.11.2</title>
+  <title>graph &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="list" href="modelopt.torch.utils.list.html" />
-    <link rel="prev" title="distributed" href="modelopt.torch.utils.distributed.html" />
+    <link rel="prev" title="distributed" href="modelopt.torch.utils.distributed.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="graph">
 <h1>graph<a class="headerlink" href="#graph" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.graph">Utility functions for computational graph.</p>
@@ -194,7 +197,7 @@ <h1>graph<a class="headerlink" href="#graph" title="Link to this heading"></a
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -205,7 +208,7 @@ <h1>graph<a class="headerlink" href="#graph" title="Link to this heading"></a
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.html b/reference/generated/modelopt.torch.utils.html
index 69bcd48..c14fbba 100644
--- a/reference/generated/modelopt.torch.utils.html
+++ b/reference/generated/modelopt.torch.utils.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>utils &mdash; Model Optimizer 0.11.2</title>
+  <title>utils &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="cpp_extension" href="modelopt.torch.utils.cpp_extension.html" />
-    <link rel="prev" title="sparsification" href="modelopt.torch.sparsity.sparsification.html" />
+    <link rel="prev" title="sparsification" href="modelopt.torch.sparsity.sparsification.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -142,7 +145,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="utils">
 <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a></h1>
 <p class="rubric">Modules</p>
@@ -200,7 +203,7 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -211,7 +214,7 @@ <h1>utils<a class="headerlink" href="#utils" title="Link to this heading"></a
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.list.html b/reference/generated/modelopt.torch.utils.list.html
index e6ba21c..67a2469 100644
--- a/reference/generated/modelopt.torch.utils.list.html
+++ b/reference/generated/modelopt.torch.utils.list.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>list &mdash; Model Optimizer 0.11.2</title>
+  <title>list &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="logging" href="modelopt.torch.utils.logging.html" />
-    <link rel="prev" title="graph" href="modelopt.torch.utils.graph.html" />
+    <link rel="prev" title="graph" href="modelopt.torch.utils.graph.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="list">
 <h1>list<a class="headerlink" href="#list" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.list">Utils for operating on lists.</p>
@@ -243,7 +246,7 @@ <h1>list<a class="headerlink" href="#list" title="Link to this heading"></a><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -254,7 +257,7 @@ <h1>list<a class="headerlink" href="#list" title="Link to this heading"></a><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.logging.html b/reference/generated/modelopt.torch.utils.logging.html
index fd19dbd..41e2f86 100644
--- a/reference/generated/modelopt.torch.utils.logging.html
+++ b/reference/generated/modelopt.torch.utils.logging.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>logging &mdash; Model Optimizer 0.11.2</title>
+  <title>logging &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="network" href="modelopt.torch.utils.network.html" />
-    <link rel="prev" title="list" href="modelopt.torch.utils.list.html" />
+    <link rel="prev" title="list" href="modelopt.torch.utils.list.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="logging">
 <h1>logging<a class="headerlink" href="#logging" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.logging">Utility functions for logging.</p>
@@ -213,7 +216,7 @@ <h1>logging<a class="headerlink" href="#logging" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -224,7 +227,7 @@ <h1>logging<a class="headerlink" href="#logging" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.network.html b/reference/generated/modelopt.torch.utils.network.html
index 90e1db7..2cd635e 100644
--- a/reference/generated/modelopt.torch.utils.network.html
+++ b/reference/generated/modelopt.torch.utils.network.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>network &mdash; Model Optimizer 0.11.2</title>
+  <title>network &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="perf" href="modelopt.torch.utils.perf.html" />
-    <link rel="prev" title="logging" href="modelopt.torch.utils.logging.html" />
+    <link rel="prev" title="logging" href="modelopt.torch.utils.logging.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="network">
 <h1>network<a class="headerlink" href="#network" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.network">Utility functions for PyTorch models.</p>
@@ -186,29 +189,32 @@ <h1>network<a class="headerlink" href="#network" title="Link to this heading">
 <tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.remove_bn" title="modelopt.torch.utils.network.remove_bn"><code class="xref py py-obj docutils literal notranslate"><span class="pre">remove_bn</span></code></a></p></td>
 <td><p>Remove all batch normalization layers in the network.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.set_submodule" title="modelopt.torch.utils.network.set_submodule"><code class="xref py py-obj docutils literal notranslate"><span class="pre">set_submodule</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.run_forward_loop" title="modelopt.torch.utils.network.run_forward_loop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">run_forward_loop</span></code></a></p></td>
+<td><p>Run multiple forward passes with a model according to the provided data loader.</p></td>
+</tr>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.set_submodule" title="modelopt.torch.utils.network.set_submodule"><code class="xref py py-obj docutils literal notranslate"><span class="pre">set_submodule</span></code></a></p></td>
 <td><p>The set function that complements nn.Module.get_submodule().</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.standardize_model_args" title="modelopt.torch.utils.network.standardize_model_args"><code class="xref py py-obj docutils literal notranslate"><span class="pre">standardize_model_args</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.standardize_model_args" title="modelopt.torch.utils.network.standardize_model_args"><code class="xref py py-obj docutils literal notranslate"><span class="pre">standardize_model_args</span></code></a></p></td>
 <td><p>Standardize model arguments according to torch.onnx.export.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.standardize_model_like_tuple" title="modelopt.torch.utils.network.standardize_model_like_tuple"><code class="xref py py-obj docutils literal notranslate"><span class="pre">standardize_model_like_tuple</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.standardize_model_like_tuple" title="modelopt.torch.utils.network.standardize_model_like_tuple"><code class="xref py py-obj docutils literal notranslate"><span class="pre">standardize_model_like_tuple</span></code></a></p></td>
 <td><p>Standardize a model-like tuple.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.standardize_named_model_args" title="modelopt.torch.utils.network.standardize_named_model_args"><code class="xref py py-obj docutils literal notranslate"><span class="pre">standardize_named_model_args</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.standardize_named_model_args" title="modelopt.torch.utils.network.standardize_named_model_args"><code class="xref py py-obj docutils literal notranslate"><span class="pre">standardize_named_model_args</span></code></a></p></td>
 <td><p>Standardize model arguments according to torch.onnx.export and give them a name.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.standardize_constructor_args" title="modelopt.torch.utils.network.standardize_constructor_args"><code class="xref py py-obj docutils literal notranslate"><span class="pre">standardize_constructor_args</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.standardize_constructor_args" title="modelopt.torch.utils.network.standardize_constructor_args"><code class="xref py py-obj docutils literal notranslate"><span class="pre">standardize_constructor_args</span></code></a></p></td>
 <td><p>Standardize a constructor-like tuple.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.unwrap_model" title="modelopt.torch.utils.network.unwrap_model"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unwrap_model</span></code></a></p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.unwrap_model" title="modelopt.torch.utils.network.unwrap_model"><code class="xref py py-obj docutils literal notranslate"><span class="pre">unwrap_model</span></code></a></p></td>
 <td><p>Unwrap a model that is wrapped by supported wrapper module or return original model.</p></td>
 </tr>
-<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.zero_grad" title="modelopt.torch.utils.network.zero_grad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">zero_grad</span></code></a></p></td>
+<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.zero_grad" title="modelopt.torch.utils.network.zero_grad"><code class="xref py py-obj docutils literal notranslate"><span class="pre">zero_grad</span></code></a></p></td>
 <td><p>Set any gradients in the model's parameters to None.</p></td>
 </tr>
-<tr class="row-even"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.run_forward_loop" title="modelopt.torch.utils.network.run_forward_loop"><code class="xref py py-obj docutils literal notranslate"><span class="pre">run_forward_loop</span></code></a></p></td>
-<td><p>Run multiple forward passes with a model according to the provided data loader.</p></td>
+<tr class="row-odd"><td><p><a class="reference internal" href="#modelopt.torch.utils.network.create_param_grad_clear_hook" title="modelopt.torch.utils.network.create_param_grad_clear_hook"><code class="xref py py-obj docutils literal notranslate"><span class="pre">create_param_grad_clear_hook</span></code></a></p></td>
+<td><p>Create a hook to clear gradients for a parameter.</p></td>
 </tr>
 </tbody>
 </table>
@@ -229,6 +235,14 @@ <h1>network<a class="headerlink" href="#network" title="Link to this heading">
 </dl>
 </dd></dl>
 
+<dl class="py function">
+<dt class="sig sig-object py" id="modelopt.torch.utils.network.create_param_grad_clear_hook">
+<span class="sig-name descname"><span class="pre">create_param_grad_clear_hook</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">param</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.network.create_param_grad_clear_hook" title="Link to this definition"></a></dt>
+<dd><p>Create a hook to clear gradients for a parameter.</p>
+<p>The hook will be fired after the gradient is accumulated for the parameter.
+Important: For this to work, <code class="docutils literal notranslate"><span class="pre">accum_grad</span></code> should be kept alive as longs as this utility is needed.</p>
+</dd></dl>
+
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.utils.network.get_model_attributes">
 <span class="sig-name descname"><span class="pre">get_model_attributes</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.network.get_model_attributes" title="Link to this definition"></a></dt>
@@ -405,7 +419,7 @@ <h1>network<a class="headerlink" href="#network" title="Link to this heading">
 
 <dl class="py function">
 <dt class="sig sig-object py" id="modelopt.torch.utils.network.run_forward_loop">
-<span class="sig-name descname"><span class="pre">run_forward_loop</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_loader</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_iters</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">collect_func</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">progress_bar</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.network.run_forward_loop" title="Link to this definition"></a></dt>
+<span class="sig-name descname"><span class="pre">run_forward_loop</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">data_loader</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">max_iters</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">collect_func</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">progress_bar</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">post_process</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#modelopt.torch.utils.network.run_forward_loop" title="Link to this definition"></a></dt>
 <dd><p>Run multiple forward passes with a model according to the provided data loader.</p>
 <dl class="field-list simple">
 <dt class="field-odd">Parameters<span class="colon">:</span></dt>
@@ -452,6 +466,8 @@ <h1>network<a class="headerlink" href="#network" title="Link to this heading">
 </div></blockquote>
 </p></li>
 <li><p><strong>progress_bar</strong> (<em>str</em><em> | </em><em>None</em>) – Set to a description string to see the progress bar.</p></li>
+<li><p><strong>post_process</strong> (<em>Callable</em><em> | </em><em>None</em>) – A callable that takes the model outputs and the data as input and can be used to
+run any post-processing or operations such as backward pass.</p></li>
 </ul>
 </dd>
 </dl>
@@ -635,7 +651,7 @@ <h1>network<a class="headerlink" href="#network" title="Link to this heading">
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -646,7 +662,7 @@ <h1>network<a class="headerlink" href="#network" title="Link to this heading">
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.perf.html b/reference/generated/modelopt.torch.utils.perf.html
index c35a246..0a8c059 100644
--- a/reference/generated/modelopt.torch.utils.perf.html
+++ b/reference/generated/modelopt.torch.utils.perf.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>perf &mdash; Model Optimizer 0.11.2</title>
+  <title>perf &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="random" href="modelopt.torch.utils.random.html" />
-    <link rel="prev" title="network" href="modelopt.torch.utils.network.html" />
+    <link rel="prev" title="network" href="modelopt.torch.utils.network.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="perf">
 <h1>perf<a class="headerlink" href="#perf" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.perf">Utility functions for performance measurement.</p>
@@ -236,7 +239,7 @@ <h1>perf<a class="headerlink" href="#perf" title="Link to this heading"></a><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -247,7 +250,7 @@ <h1>perf<a class="headerlink" href="#perf" title="Link to this heading"></a><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.random.html b/reference/generated/modelopt.torch.utils.random.html
index ece7d86..ae6cad8 100644
--- a/reference/generated/modelopt.torch.utils.random.html
+++ b/reference/generated/modelopt.torch.utils.random.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>random &mdash; Model Optimizer 0.11.2</title>
+  <title>random &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="tensor" href="modelopt.torch.utils.tensor.html" />
-    <link rel="prev" title="perf" href="modelopt.torch.utils.perf.html" />
+    <link rel="prev" title="perf" href="modelopt.torch.utils.perf.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="random">
 <h1>random<a class="headerlink" href="#random" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.random">Random number generator with a deterministic, synchronized seed for sampling.</p>
@@ -303,7 +306,7 @@ <h1>random<a class="headerlink" href="#random" title="Link to this heading"><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -314,7 +317,7 @@ <h1>random<a class="headerlink" href="#random" title="Link to this heading"><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/reference/generated/modelopt.torch.utils.tensor.html b/reference/generated/modelopt.torch.utils.tensor.html
index 1c13de2..ea2ef20 100644
--- a/reference/generated/modelopt.torch.utils.tensor.html
+++ b/reference/generated/modelopt.torch.utils.tensor.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>tensor &mdash; Model Optimizer 0.11.2</title>
+  <title>tensor &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../../_static/jquery.js?v=5d32c60e"></script>
         <script src="../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../../genindex.html" />
     <link rel="search" title="Search" href="../../search.html" />
     <link rel="next" title="Contact us" href="../../support/1_contact.html" />
-    <link rel="prev" title="random" href="modelopt.torch.utils.random.html" />
+    <link rel="prev" title="random" href="modelopt.torch.utils.random.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,15 +82,16 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul class="current">
-<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../0_versions.html">Changelog</a></li>
 <li class="toctree-l1 current"><a class="reference internal" href="../1_modelopt_api.html">modelopt API</a><ul class="current">
 <li class="toctree-l2"><a class="reference internal" href="modelopt.deploy.html">deploy</a></li>
 <li class="toctree-l2"><a class="reference internal" href="modelopt.onnx.html">onnx</a></li>
 <li class="toctree-l2 current"><a class="reference internal" href="modelopt.torch.html">torch</a><ul class="current">
+<li class="toctree-l3"><a class="reference internal" href="modelopt.torch.distill.html">distill</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.export.html">export</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.opt.html">opt</a></li>
 <li class="toctree-l3"><a class="reference internal" href="modelopt.torch.quantization.html">quantization</a></li>
@@ -143,7 +146,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="tensor">
 <h1>tensor<a class="headerlink" href="#tensor" title="Link to this heading"></a></h1>
 <p id="module-modelopt.torch.utils.tensor">Utility functions for PyTorch tensors.</p>
@@ -223,7 +226,7 @@ <h1>tensor<a class="headerlink" href="#tensor" title="Link to this heading"><
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -234,7 +237,7 @@ <h1>tensor<a class="headerlink" href="#tensor" title="Link to this heading"><
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/search.html b/search.html
index 411e5c3..c9a000b 100644
--- a/search.html
+++ b/search.html
@@ -3,7 +3,7 @@
 <head>
   <meta charset="utf-8" />
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Search &mdash; Model Optimizer 0.11.2</title>
+  <title>Search &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="_static/copybutton.css?v=76b2166b" />
@@ -12,12 +12,12 @@
       <link rel="stylesheet" type="text/css" href="_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="_static/custom.css?v=d10054b6" />
 
-
-
+  
+    
   <!--[if lt IE 9]>
     <script src="_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="_static/jquery.js?v=5d32c60e"></script>
         <script src="_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <script src="_static/searchtools.js"></script>
     <script src="_static/language_data.js"></script>
     <link rel="index" title="Index" href="genindex.html" />
-    <link rel="search" title="Search" href="#" />
+    <link rel="search" title="Search" href="#" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="#" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,11 +82,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -115,7 +117,7 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <noscript>
   <div id="fallback" class="admonition warning">
     <p class="last">
@@ -124,9 +126,9 @@
   </div>
   </noscript>
 
-
+  
   <div id="search-results">
-
+  
   </div>
 
            </div>
@@ -142,7 +144,7 @@
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -157,10 +159,10 @@
   <script>
     jQuery(function() { Search.loadIndex("searchindex.js"); });
   </script>
-
+  
   <script id="searchindexloader"></script>
-
+   
 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/searchindex.js b/searchindex.js
index 42b2e24..70c592a 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["deployment/1_tensorrt_llm_deployment", "examples/0_all_examples", "getting_started/1_overview", "getting_started/2_installation", "getting_started/3_quantization", "getting_started/6_sparsity", "guides/1_quantization", "guides/5_sparsity", "guides/_basic_quantization", "guides/_choosing_quant_methods", "guides/_onnx_quantization", "guides/_pytorch_quantization", "index", "reference/0_versions", "reference/1_modelopt_api", "reference/generated/modelopt.deploy", "reference/generated/modelopt.deploy.llm", "reference/generated/modelopt.deploy.llm.generate", "reference/generated/modelopt.deploy.llm.model_config_trt", "reference/generated/modelopt.deploy.llm.nemo_utils", "reference/generated/modelopt.onnx", "reference/generated/modelopt.onnx.op_types", "reference/generated/modelopt.onnx.quantization", "reference/generated/modelopt.onnx.quantization.calib_utils", "reference/generated/modelopt.onnx.quantization.graph_utils", "reference/generated/modelopt.onnx.quantization.gs_patching", "reference/generated/modelopt.onnx.quantization.int4", "reference/generated/modelopt.onnx.quantization.operators", "reference/generated/modelopt.onnx.quantization.ort_patching", "reference/generated/modelopt.onnx.quantization.ort_utils", "reference/generated/modelopt.onnx.quantization.partitioning", "reference/generated/modelopt.onnx.quantization.qdq_utils", "reference/generated/modelopt.onnx.quantization.quant_utils", "reference/generated/modelopt.onnx.quantization.quantize", "reference/generated/modelopt.onnx.utils", "reference/generated/modelopt.torch", "reference/generated/modelopt.torch.export", "reference/generated/modelopt.torch.export.distribute", "reference/generated/modelopt.torch.export.layer_utils", "reference/generated/modelopt.torch.export.model_config", "reference/generated/modelopt.torch.export.model_config_export", "reference/generated/modelopt.torch.export.model_config_utils", "reference/generated/modelopt.torch.export.postprocess", "reference/generated/modelopt.torch.export.scaling_factor_utils", "reference/generated/modelopt.torch.export.tensorrt_llm_utils", "reference/generated/modelopt.torch.export.transformer_engine", "reference/generated/modelopt.torch.opt", "reference/generated/modelopt.torch.opt.config", "reference/generated/modelopt.torch.opt.conversion", "reference/generated/modelopt.torch.opt.dynamic", "reference/generated/modelopt.torch.opt.hparam", "reference/generated/modelopt.torch.opt.mode", "reference/generated/modelopt.torch.opt.plugins", "reference/generated/modelopt.torch.opt.searcher", "reference/generated/modelopt.torch.opt.utils", "reference/generated/modelopt.torch.quantization", "reference/generated/modelopt.torch.quantization.calib", "reference/generated/modelopt.torch.quantization.calib.calibrator", "reference/generated/modelopt.torch.quantization.calib.histogram", "reference/generated/modelopt.torch.quantization.calib.max", "reference/generated/modelopt.torch.quantization.config", "reference/generated/modelopt.torch.quantization.conversion", "reference/generated/modelopt.torch.quantization.extensions", "reference/generated/modelopt.torch.quantization.mode", "reference/generated/modelopt.torch.quantization.model_calib", "reference/generated/modelopt.torch.quantization.model_quant", "reference/generated/modelopt.torch.quantization.nn", "reference/generated/modelopt.torch.quantization.nn.functional", "reference/generated/modelopt.torch.quantization.nn.modules", "reference/generated/modelopt.torch.quantization.nn.modules.clip", "reference/generated/modelopt.torch.quantization.nn.modules.quant_activations", "reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm", "reference/generated/modelopt.torch.quantization.nn.modules.quant_conv", "reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm", "reference/generated/modelopt.torch.quantization.nn.modules.quant_linear", "reference/generated/modelopt.torch.quantization.nn.modules.quant_module", "reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling", "reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer", "reference/generated/modelopt.torch.quantization.optim", "reference/generated/modelopt.torch.quantization.plugins", "reference/generated/modelopt.torch.quantization.quant_modules", "reference/generated/modelopt.torch.quantization.tensor_quant", "reference/generated/modelopt.torch.quantization.utils", "reference/generated/modelopt.torch.sparsity", "reference/generated/modelopt.torch.sparsity.config", "reference/generated/modelopt.torch.sparsity.magnitude", "reference/generated/modelopt.torch.sparsity.mode", "reference/generated/modelopt.torch.sparsity.module", "reference/generated/modelopt.torch.sparsity.plugins", "reference/generated/modelopt.torch.sparsity.searcher", "reference/generated/modelopt.torch.sparsity.sparsegpt", "reference/generated/modelopt.torch.sparsity.sparsification", "reference/generated/modelopt.torch.utils", "reference/generated/modelopt.torch.utils.cpp_extension", "reference/generated/modelopt.torch.utils.dataset_utils", "reference/generated/modelopt.torch.utils.distributed", "reference/generated/modelopt.torch.utils.graph", "reference/generated/modelopt.torch.utils.list", "reference/generated/modelopt.torch.utils.logging", "reference/generated/modelopt.torch.utils.network", "reference/generated/modelopt.torch.utils.perf", "reference/generated/modelopt.torch.utils.random", "reference/generated/modelopt.torch.utils.tensor", "support/1_contact", "support/2_faqs"], "filenames": ["deployment/1_tensorrt_llm_deployment.rst", "examples/0_all_examples.rst", "getting_started/1_overview.rst", "getting_started/2_installation.rst", "getting_started/3_quantization.rst", "getting_started/6_sparsity.rst", "guides/1_quantization.rst", "guides/5_sparsity.rst", "guides/_basic_quantization.rst", "guides/_choosing_quant_methods.rst", "guides/_onnx_quantization.rst", "guides/_pytorch_quantization.rst", "index.rst", "reference/0_versions.rst", "reference/1_modelopt_api.rst", "reference/generated/modelopt.deploy.rst", "reference/generated/modelopt.deploy.llm.rst", "reference/generated/modelopt.deploy.llm.generate.rst", "reference/generated/modelopt.deploy.llm.model_config_trt.rst", "reference/generated/modelopt.deploy.llm.nemo_utils.rst", "reference/generated/modelopt.onnx.rst", "reference/generated/modelopt.onnx.op_types.rst", "reference/generated/modelopt.onnx.quantization.rst", "reference/generated/modelopt.onnx.quantization.calib_utils.rst", "reference/generated/modelopt.onnx.quantization.graph_utils.rst", "reference/generated/modelopt.onnx.quantization.gs_patching.rst", "reference/generated/modelopt.onnx.quantization.int4.rst", "reference/generated/modelopt.onnx.quantization.operators.rst", "reference/generated/modelopt.onnx.quantization.ort_patching.rst", "reference/generated/modelopt.onnx.quantization.ort_utils.rst", "reference/generated/modelopt.onnx.quantization.partitioning.rst", "reference/generated/modelopt.onnx.quantization.qdq_utils.rst", "reference/generated/modelopt.onnx.quantization.quant_utils.rst", "reference/generated/modelopt.onnx.quantization.quantize.rst", "reference/generated/modelopt.onnx.utils.rst", "reference/generated/modelopt.torch.rst", "reference/generated/modelopt.torch.export.rst", "reference/generated/modelopt.torch.export.distribute.rst", "reference/generated/modelopt.torch.export.layer_utils.rst", "reference/generated/modelopt.torch.export.model_config.rst", "reference/generated/modelopt.torch.export.model_config_export.rst", "reference/generated/modelopt.torch.export.model_config_utils.rst", "reference/generated/modelopt.torch.export.postprocess.rst", "reference/generated/modelopt.torch.export.scaling_factor_utils.rst", "reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst", "reference/generated/modelopt.torch.export.transformer_engine.rst", "reference/generated/modelopt.torch.opt.rst", "reference/generated/modelopt.torch.opt.config.rst", "reference/generated/modelopt.torch.opt.conversion.rst", "reference/generated/modelopt.torch.opt.dynamic.rst", "reference/generated/modelopt.torch.opt.hparam.rst", "reference/generated/modelopt.torch.opt.mode.rst", "reference/generated/modelopt.torch.opt.plugins.rst", "reference/generated/modelopt.torch.opt.searcher.rst", "reference/generated/modelopt.torch.opt.utils.rst", "reference/generated/modelopt.torch.quantization.rst", "reference/generated/modelopt.torch.quantization.calib.rst", "reference/generated/modelopt.torch.quantization.calib.calibrator.rst", "reference/generated/modelopt.torch.quantization.calib.histogram.rst", "reference/generated/modelopt.torch.quantization.calib.max.rst", "reference/generated/modelopt.torch.quantization.config.rst", "reference/generated/modelopt.torch.quantization.conversion.rst", "reference/generated/modelopt.torch.quantization.extensions.rst", "reference/generated/modelopt.torch.quantization.mode.rst", "reference/generated/modelopt.torch.quantization.model_calib.rst", "reference/generated/modelopt.torch.quantization.model_quant.rst", "reference/generated/modelopt.torch.quantization.nn.rst", "reference/generated/modelopt.torch.quantization.nn.functional.rst", "reference/generated/modelopt.torch.quantization.nn.modules.rst", "reference/generated/modelopt.torch.quantization.nn.modules.clip.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_module.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.rst", "reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.rst", "reference/generated/modelopt.torch.quantization.optim.rst", "reference/generated/modelopt.torch.quantization.plugins.rst", "reference/generated/modelopt.torch.quantization.quant_modules.rst", "reference/generated/modelopt.torch.quantization.tensor_quant.rst", "reference/generated/modelopt.torch.quantization.utils.rst", "reference/generated/modelopt.torch.sparsity.rst", "reference/generated/modelopt.torch.sparsity.config.rst", "reference/generated/modelopt.torch.sparsity.magnitude.rst", "reference/generated/modelopt.torch.sparsity.mode.rst", "reference/generated/modelopt.torch.sparsity.module.rst", "reference/generated/modelopt.torch.sparsity.plugins.rst", "reference/generated/modelopt.torch.sparsity.searcher.rst", "reference/generated/modelopt.torch.sparsity.sparsegpt.rst", "reference/generated/modelopt.torch.sparsity.sparsification.rst", "reference/generated/modelopt.torch.utils.rst", "reference/generated/modelopt.torch.utils.cpp_extension.rst", "reference/generated/modelopt.torch.utils.dataset_utils.rst", "reference/generated/modelopt.torch.utils.distributed.rst", "reference/generated/modelopt.torch.utils.graph.rst", "reference/generated/modelopt.torch.utils.list.rst", "reference/generated/modelopt.torch.utils.logging.rst", "reference/generated/modelopt.torch.utils.network.rst", "reference/generated/modelopt.torch.utils.perf.rst", "reference/generated/modelopt.torch.utils.random.rst", "reference/generated/modelopt.torch.utils.tensor.rst", "support/1_contact.rst", "support/2_faqs.rst"], "titles": ["TensorRT-LLM Deployment", "All ModelOpt Examples", "Overview", "Installation", "Quick Start: Quantization", "Quick Start: Sparsity", "Quantization", "Sparsity", "Basic Concepts", "Best practices to choose the right quantization methods", "ONNX Quantization (Beta)", "PyTorch Quantization", "Welcome to Model Optimizer (ModelOpt) documentation!", "Model Optimizer Changelog", "modelopt API", "deploy", "llm", "generate", "model_config_trt", "nemo_utils", "onnx", "op_types", "quantization", "calib_utils", "graph_utils", "gs_patching", "int4", "operators", "ort_patching", "ort_utils", "partitioning", "qdq_utils", "quant_utils", "quantize", "utils", "torch", "export", "distribute", "layer_utils", "model_config", "model_config_export", "model_config_utils", "postprocess", "scaling_factor_utils", "tensorrt_llm_utils", "transformer_engine", "opt", "config", "conversion", "dynamic", "hparam", "mode", "plugins", "searcher", "utils", "quantization", "calib", "calibrator", "histogram", "max", "config", "conversion", "extensions", "mode", "model_calib", "model_quant", "nn", "functional", "modules", "clip", "quant_activations", "quant_batchnorm", "quant_conv", "quant_instancenorm", "quant_linear", "quant_module", "quant_pooling", "tensor_quantizer", "optim", "plugins", "quant_modules", "tensor_quant", "utils", "sparsity", "config", "magnitude", "mode", "module", "plugins", "searcher", "sparsegpt", "sparsification", "utils", "cpp_extension", "dataset_utils", "distributed", "graph", "list", "logging", "network", "perf", "random", "tensor", "Contact us", "FAQs"], "terms": {"pleas": [0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 17, 46, 48, 60, 65, 79, 91, 104], "read": [0, 37, 81], "workflow": [0, 2, 51, 53], "first": [0, 3, 7, 9, 11, 26, 34, 37, 93, 99], "befor": [0, 3, 8, 49, 53, 63, 75, 77, 101], "go": [0, 77], "through": [0, 5, 8, 11, 64, 65, 81], "thi": [0, 3, 5, 7, 9, 10, 11, 13, 18, 19, 21, 24, 26, 27, 28, 30, 32, 33, 34, 37, 38, 39, 41, 42, 44, 47, 48, 49, 50, 53, 58, 60, 61, 63, 64, 65, 67, 77, 79, 80, 81, 82, 85, 86, 87, 91, 94, 99, 101, 104], "section": [0, 7], "modelopt": [0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 13, 16, 18, 21, 24, 29, 39, 46, 47, 48, 51, 56, 60, 61, 72, 74, 75, 77, 94, 101, 104], "toolkit": [0, 6], "automat": [0, 7, 11, 16, 49, 85, 91], "convers": [0, 9, 16, 21, 33, 49, 101], "engin": [0, 10, 16, 17, 18, 39, 40], "acceler": [0, 2, 4, 5, 7, 16], "inferenc": [0, 16], "i": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 16, 17, 18, 19, 21, 24, 26, 27, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 44, 47, 48, 49, 50, 51, 53, 54, 58, 59, 60, 61, 63, 64, 65, 67, 75, 77, 80, 81, 82, 84, 85, 86, 87, 91, 95, 99, 101, 104], "achiev": [0, 6, 7, 11, 49], "huggingfac": [0, 3, 4, 11, 17, 19, 36, 39, 79], "nemo": [0, 2, 4, 11, 19, 36, 39, 79], "build": [0, 10, 16, 18, 19, 24, 38, 39, 40, 81, 91], "from": [0, 2, 4, 7, 8, 9, 11, 13, 16, 17, 18, 19, 21, 23, 24, 28, 30, 31, 33, 34, 38, 39, 41, 43, 46, 47, 48, 49, 50, 53, 58, 60, 61, 77, 81, 85, 90, 91, 94, 97, 99, 101, 102], "after": [0, 7, 8, 10, 11, 13, 21, 37, 46, 48, 49, 51, 53, 64, 91], "can": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 18, 21, 24, 32, 39, 40, 46, 47, 48, 49, 51, 53, 60, 63, 79, 81, 84, 86, 94, 99, 100, 101], "format": [0, 2, 4, 6, 7, 9, 10, 11, 13, 37, 39, 40, 41, 53, 77, 94, 99], "store": [0, 7, 33, 40, 48, 49, 50, 53, 81], "A": [0, 7, 8, 9, 11, 17, 30, 31, 37, 40, 41, 47, 48, 49, 50, 51, 53, 58, 59, 61, 64, 65, 69, 77, 81, 82, 89, 91, 94, 99, 100], "singl": [0, 8, 10, 16, 18, 24, 32, 41, 42, 43, 50, 51, 77, 99], "json": [0, 37, 40, 41, 47, 60, 84], "file": [0, 10, 13, 18, 26, 32, 33, 34, 37, 38, 40, 44, 47, 48, 93], "record": [0, 48, 51], "structur": 0, "metadata": [0, 7, 48, 86], "config": [0, 4, 5, 7, 17, 19, 37, 38, 39, 40, 41, 42, 44, 48, 49, 53, 63, 65, 86, 89, 90, 91, 101], "group": [0, 9, 13, 37, 43, 77, 95], "safetensor": [0, 40], "each": [0, 7, 8, 11, 13, 18, 26, 37, 40, 43, 46, 48, 49, 50, 51, 60, 61, 82, 84, 91, 101], "local": [0, 37], "calibr": [0, 4, 5, 7, 9, 11, 13, 23, 26, 33, 40, 56, 58, 59, 60, 64, 65, 77, 81, 91, 94], "gpu": [0, 2, 7, 9, 18, 40, 58, 100, 101], "rank": [0, 18, 37, 39, 40, 42, 43, 77, 81, 82, 95, 100], "weight": [0, 2, 5, 7, 8, 9, 11, 18, 19, 26, 31, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44, 48, 49, 58, 60, 61, 64, 65, 75, 81, 82, 85, 87, 91], "scale": [0, 10, 18, 26, 31, 38, 39, 40, 41, 43, 64, 77, 81], "factor": [0, 9, 18, 26, 31, 38, 39, 40, 41, 43, 64, 81], "per": [0, 8, 9, 32, 40, 81, 91], "The": [0, 2, 4, 7, 8, 9, 10, 11, 13, 16, 17, 18, 19, 26, 31, 32, 33, 37, 39, 40, 41, 42, 47, 48, 49, 53, 60, 61, 63, 64, 65, 67, 77, 81, 82, 85, 86, 91, 94, 96, 99, 101], "api": [0, 2, 3, 4, 5, 7, 10, 11, 12, 13, 16, 17, 18, 19, 40, 64, 65, 80, 83, 91], "export_tensorrt_llm_checkpoint": [0, 18, 40], "us": [0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 16, 18, 21, 24, 26, 32, 33, 37, 40, 43, 46, 47, 48, 49, 53, 58, 59, 60, 63, 64, 65, 67, 69, 77, 80, 81, 84, 85, 87, 91, 93, 94, 95, 99, 100, 101, 104], "follow": [0, 3, 7, 8, 9, 10, 11, 13, 24, 33, 48, 53, 60, 63, 81, 91], "torch": [0, 2, 3, 4, 5, 7, 11, 13, 18, 29, 37, 38, 40, 41, 46, 48, 49, 53, 56, 58, 60, 61, 72, 74, 75, 77, 81, 93, 94, 95, 99, 101, 102], "import": [0, 3, 4, 5, 7, 8, 10, 11, 16, 47, 48, 50, 60, 61, 94, 101], "inference_mod": 0, "decoder_typ": [0, 38, 39, 40], "type": [0, 7, 10, 17, 19, 21, 24, 26, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 47, 48, 49, 50, 51, 53, 54, 58, 61, 63, 64, 65, 75, 77, 81, 85, 86, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102], "str": [0, 17, 18, 19, 21, 23, 24, 26, 29, 30, 31, 33, 34, 37, 38, 39, 40, 41, 42, 44, 47, 48, 49, 53, 54, 58, 60, 61, 63, 64, 65, 77, 84, 85, 86, 89, 90, 91, 93, 94, 95, 97, 98, 99], "e": [0, 5, 7, 8, 9, 11, 13, 26, 40, 49, 51, 53, 81, 93, 99], "g": [0, 5, 7, 8, 11, 13, 40, 49, 51, 81, 93, 99], "gptj": [0, 40], "llama": [0, 40], "gptnext": [0, 40], "dtype": [0, 25, 31, 38, 39, 40, 81, 99], "data": [0, 4, 5, 7, 9, 10, 11, 13, 23, 32, 33, 34, 40, 56, 58, 64, 65, 91, 94, 95, 99, 102], "unquant": [0, 8, 40], "layer": [0, 7, 11, 24, 30, 37, 38, 39, 40, 50, 60, 84, 91, 99], "export_dir": [0, 40, 44], "directori": [0, 10, 17, 18, 33, 34, 37, 40], "where": [0, 7, 8, 11, 26, 53, 75, 81, 99], "inference_tensor_parallel": [0, 13, 40, 42], "number": [0, 18, 26, 43, 49, 50, 58, 59, 65, 69, 81, 94, 95, 98, 99, 101], "infer": [0, 2, 4, 5, 7, 9, 16, 29, 39, 40, 42, 48, 53, 94], "time": [0, 3, 9, 13, 16, 18, 40, 91, 93, 97], "tensor": [0, 7, 8, 9, 11, 13, 18, 23, 24, 26, 31, 34, 37, 38, 39, 40, 41, 42, 43, 49, 50, 58, 59, 60, 67, 69, 77, 81, 82, 85, 90, 95, 99], "parallel": [0, 7, 13, 37, 40, 42, 82, 95, 99], "inference_pipeline_parallel": [0, 40, 42], "pipelin": [0, 7, 11, 40, 65], "If": [0, 3, 7, 9, 10, 18, 23, 33, 37, 38, 39, 42, 43, 48, 49, 50, 58, 59, 61, 64, 69, 77, 81, 82, 84, 91, 99], "call": [0, 3, 7, 11, 16, 21, 32, 47, 49, 61, 75, 87, 91, 94], "success": [0, 3], "save": [0, 6, 10, 11, 18, 33, 34, 37, 39, 40, 46, 48, 53, 59, 63, 77, 81], "otherwis": [0, 33, 37, 38, 61, 81, 96], "state_dict": [0, 7, 11, 37, 48, 53], "instead": [0, 13, 32, 37, 49, 50, 51, 58, 80, 81, 84], "fp16": [0, 9, 10], "bf16": [0, 9], "fp8": [0, 2, 8, 9, 11, 13, 33, 38, 39, 40, 60, 81], "int8_sq": [0, 39], "int4_awq": 0, "gpt2": [0, 40], "ye": 0, "No": 0, "2": [0, 2, 5, 7, 13, 23, 24, 26, 32, 65, 81, 85], "3": [0, 3, 38, 60, 65, 81], "mistral": 0, "mixtral": 0, "8x7b": 0, "falcon": 0, "40b": 0, "180b": 0, "7b": 0, "rw": 0, "1b": 0, "mpt": 0, "30b": 0, "baichuan": 0, "1": [0, 3, 10, 11, 13, 17, 18, 23, 26, 38, 39, 40, 41, 42, 58, 60, 65, 81, 82, 94, 97, 99, 101], "qwen": 0, "14b": 0, "chatglm2": 0, "6b": [0, 7], "bloom": 0, "phi": [0, 44], "nemotron": 0, "8": [0, 2, 3, 9, 10, 11, 26, 38, 44, 58, 59, 60, 61, 81, 93, 99], "gemma": 0, "2b": 0, "onc": [0, 16, 21, 58], "avail": [0, 2, 3, 4, 8, 13, 23, 33, 50, 91], "deploi": [0, 3, 9, 13, 16], "visit": [1, 2], "tensorrt": [1, 4, 6, 9, 10, 11, 12, 13, 16, 17, 18, 24, 39, 40, 44], "model": [1, 6, 8, 9, 15, 16, 17, 18, 19, 20, 22, 23, 24, 26, 30, 33, 34, 35, 37, 38, 39, 40, 42, 45, 46, 48, 49, 51, 53, 54, 58, 60, 61, 63, 64, 65, 77, 80, 86, 91, 94, 99, 101, 104], "optim": [1, 4, 6, 7, 11, 15, 16, 20, 22, 32, 33, 35, 40, 45, 46, 48, 51, 53, 54, 60, 91], "github": [1, 4, 5, 6, 17, 18, 19, 21, 40, 81, 99, 103], "repositori": [1, 2, 6], "minim": [2, 8, 13, 53], "cost": [2, 8], "present": [2, 16, 34], "signific": 2, "challeng": 2, "gener": [2, 8, 10, 16, 18, 21, 33, 34, 39, 41, 46, 47, 48, 49, 50, 54, 60, 85, 89, 101], "ai": 2, "continu": [2, 7, 91], "grow": 2, "complex": 2, "size": [2, 8, 9, 18, 32, 34, 37, 38, 39, 43, 49, 54, 77, 81, 85, 94, 95, 99], "refer": [2, 4, 6, 7, 10, 11, 13, 17, 19, 21, 24, 46, 50, 77, 91, 99], "librari": 2, "compris": [2, 9], "state": [2, 11, 37, 48, 51, 53, 63, 75, 77, 89, 99], "art": 2, "includ": [2, 3, 8, 13, 17, 39, 49, 81], "compress": [2, 7, 9], "It": [2, 11, 37, 51, 63, 65, 77, 81, 85, 91, 99], "accept": [2, 13, 81], "onnx": [2, 3, 4, 6, 9, 11, 13, 21, 22, 23, 24, 26, 30, 31, 32, 33, 34, 53, 80, 81, 99], "input": [2, 4, 5, 7, 10, 11, 13, 17, 18, 23, 24, 27, 30, 31, 32, 33, 34, 49, 58, 60, 61, 65, 67, 69, 73, 75, 77, 81, 82, 91, 94, 99, 102], "provid": [2, 5, 7, 8, 10, 11, 18, 23, 24, 29, 32, 33, 37, 43, 46, 47, 48, 49, 53, 56, 84, 91, 99], "python": [2, 3, 10, 16, 21, 41, 101], "user": [2, 4, 7, 9, 10, 11, 13, 21, 33, 37, 46, 48, 49, 50, 65, 81], "easili": 2, "stack": [2, 6, 63], "differ": [2, 6, 8, 9, 21, 33, 49, 91, 94], "produc": [2, 24, 34], "checkpoint": [2, 7, 18, 39, 40, 44, 46, 48, 53, 77], "seamlessli": 2, "integr": 2, "within": [2, 10, 51, 81, 82, 98], "softwar": [2, 6], "ecosystem": 2, "readi": [2, 4], "deploy": [2, 6, 9, 11, 15, 16, 35, 40], "downstream": 2, "framework": [2, 6, 11], "like": [2, 4, 6, 7, 8, 10, 11, 13, 23, 30, 33, 48, 49, 53, 59, 99], "llm": [2, 4, 6, 9, 11, 12, 17, 18, 36, 39, 40, 44, 60], "further": 2, "ar": [2, 3, 7, 8, 9, 11, 13, 18, 19, 24, 26, 30, 33, 34, 38, 39, 40, 41, 42, 44, 47, 48, 49, 53, 60, 61, 64, 65, 75, 77, 81, 82, 84, 91, 93, 99], "plan": 2, "megatron": [2, 79, 88], "lm": 2, "train": [2, 4, 13, 42, 77, 91, 104], "loop": [2, 4, 5, 11, 32, 53, 91, 94], "For": [2, 3, 7, 8, 9, 10, 11, 16, 17, 18, 40, 42, 47, 48, 49, 50, 51, 60, 81, 84, 99], "enterpris": 2, "bit": [2, 8, 9, 11, 32, 58, 59, 81], "stabl": 2, "diffus": [2, 79], "also": [2, 4, 7, 11, 41, 49, 51, 58, 60, 67, 99, 101], "nim": 2, "free": 2, "all": [2, 3, 7, 12, 13, 28, 30, 31, 33, 34, 37, 39, 40, 42, 47, 48, 49, 54, 58, 59, 65, 77, 81, 82, 84, 85, 87, 95, 99, 101], "develop": 2, "pypi": [2, 3], "end": [2, 4, 5, 24, 60, 65, 99, 100], "exampl": [2, 3, 4, 5, 6, 7, 8, 10, 11, 16, 17, 19, 33, 47, 48, 50, 51, 60, 61, 64, 65, 79, 81, 84, 94, 101], "script": [2, 19], "recip": 2, "an": [2, 4, 5, 7, 8, 10, 11, 13, 16, 24, 26, 29, 32, 33, 34, 37, 38, 40, 42, 48, 49, 50, 51, 58, 59, 61, 63, 64, 65, 67, 77, 81, 84, 86, 91, 99, 101, 103], "effect": [2, 4, 5, 8, 11, 81], "larg": [2, 9, 32], "2x": [2, 7], "4x": 2, "speed": [2, 4, 5, 7], "up": [2, 3, 4, 7, 9, 37, 46, 49, 53, 77, 91], "while": [2, 8, 11, 13, 49, 53, 80], "preserv": [2, 8, 48], "qualiti": [2, 11], "enabl": [2, 4, 7, 11, 18, 50, 60, 61, 65, 77, 82, 93], "highli": [2, 7, 11], "perform": [2, 4, 5, 9, 11, 17, 26, 33, 37, 41, 58, 60, 64, 65, 77, 81, 100], "int8": [2, 4, 9, 10, 33, 38, 60], "int4": [2, 9, 11, 33, 60], "etc": [2, 4, 10, 11, 24, 26, 33], "support": [2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 16, 19, 25, 26, 31, 33, 36, 38, 39, 48, 49, 53, 58, 60, 61, 63, 64, 66, 67, 68, 79, 80, 81, 82, 88, 91, 99], "advanc": [2, 4, 5, 8], "algorithm": [2, 4, 9, 10, 11, 13, 26, 46, 47, 48, 51, 53, 60, 64, 65, 83, 86, 89, 90, 91], "smoothquant": [2, 4, 8, 9, 11, 60, 64, 77], "awq": [2, 4, 8, 9, 11, 26, 39, 42, 60], "doubl": [2, 81], "easi": [2, 5, 7, 10, 16], "both": [2, 4, 7, 8, 9, 10, 13, 26, 49, 75], "post": [2, 4, 8, 13, 53], "ptq": [2, 5, 8, 13, 24, 33], "awar": [2, 4, 13, 26], "qat": [2, 4, 5, 13], "page": 2, "list": [2, 3, 8, 11, 17, 21, 24, 30, 33, 34, 37, 38, 39, 40, 42, 43, 48, 59, 60, 61, 77, 81, 91, 93, 102], "reduc": [2, 4, 5, 7, 8, 65, 82, 91, 101], "memori": [2, 4, 5, 6, 7, 9, 11, 18, 34, 40, 42, 99, 100], "footprint": [2, 4, 5, 7, 18], "deep": [2, 4, 5, 8], "learn": [2, 4, 5, 8, 11, 46, 60, 69, 77, 81, 82], "mt": [2, 5, 7], "sparsifi": [2, 5, 7, 13, 18, 91], "appli": [2, 5, 7, 9, 26, 48, 73, 77, 81, 85, 94], "given": [2, 5, 21, 24, 31, 33, 34, 43, 47, 49, 53, 60, 61, 65, 77, 82, 85, 90, 91, 94, 99, 101], "4": [2, 5, 7, 8, 9, 23, 32, 38, 60, 81, 85], "pattern": [2, 5, 7, 24, 30, 33, 84, 85, 91, 96], "variou": [2, 5, 31, 46, 48, 61, 91], "sparsif": [2, 13, 83, 86], "method": [2, 5, 6, 7, 11, 19, 24, 32, 33, 37, 47, 48, 49, 53, 58, 60, 61, 64, 65, 75, 77, 81, 99], "asp": [2, 5, 7, 85], "sparsegpt": [2, 5, 7, 86, 91], "fine": [2, 7, 8, 11, 53, 91], "tune": [2, 7, 8, 11, 18, 53, 91], "latter": 2, "recommend": [2, 3, 9, 11, 60, 81, 91], "accuraci": [2, 6, 7, 8, 9, 33], "degrad": [2, 9, 11], "nvidia": [3, 5, 6, 7, 11, 13, 17, 18, 19, 40, 81, 85], "current": [3, 6, 7, 10, 13, 26, 47, 48, 49, 50, 79, 86, 88, 91, 93, 95], "ha": [3, 9, 11, 18, 23, 24, 34, 37, 48, 49, 50, 53, 65, 69, 82], "o": 3, "linux": 3, "window": [3, 13, 21], "architectur": [3, 7, 40, 48, 49, 101], "x86_64": 3, "aarch64": 3, "win_amd64": [3, 13], "12": [3, 93], "pytorch": [3, 6, 7, 9, 46, 49, 61, 64, 65, 67, 69, 81, 82, 94, 99, 102], "11": [3, 93], "cuda": [3, 62, 81, 93, 100], "its": [3, 7, 44, 48, 49, 53, 60, 61, 65, 77, 81, 91, 99], "depend": [3, 13, 24, 49, 50], "via": [3, 7, 9, 26, 48, 49, 53, 63, 86, 87, 99, 101], "pip": [3, 13], "review": 3, "licens": 3, "term": [3, 7], "ani": [3, 13, 18, 24, 33, 37, 40, 41, 44, 47, 48, 49, 50, 53, 60, 63, 65, 77, 82, 84, 86, 87, 89, 90, 91, 93, 96, 97, 99, 101, 103], "quick": [3, 12, 81], "detail": [3, 4, 6, 8, 9, 10, 11, 18, 33, 53, 61, 65, 77, 81, 84, 91], "instruct": 3, "set": [3, 4, 7, 11, 13, 17, 18, 21, 25, 30, 31, 37, 38, 40, 44, 46, 47, 48, 49, 50, 53, 58, 61, 63, 77, 86, 87, 91, 95, 99], "virtual": 3, "environ": 3, "we": [3, 4, 7, 8, 9, 11, 24, 26, 38, 39, 40, 42, 47, 48, 49, 50, 51, 53, 58, 63, 77, 79, 81, 87, 88, 91, 99, 101], "you": [3, 6, 7, 9, 10, 11, 13, 18, 47, 48, 60, 79, 80, 84, 91, 99, 103], "don": [3, 53, 63, 81], "t": [3, 30, 53, 58, 63, 67, 81, 82, 101], "have": [3, 7, 8, 9, 11, 23, 37, 47, 49, 60, 61, 65, 81, 99, 103], "one": [3, 8, 38, 39, 42, 48, 49, 58, 61, 79, 81, 82, 85], "alreadi": [3, 30, 48], "run": [3, 10, 11, 16, 19, 53, 58, 65, 77, 91, 99], "command": [3, 10], "activ": [3, 8, 9, 26, 38, 39, 49, 50, 60, 65, 70, 81, 87, 99], "conda": 3, "name": [3, 10, 13, 24, 30, 31, 33, 34, 47, 49, 53, 54, 60, 61, 63, 65, 81, 82, 84, 86, 93, 94, 99, 100], "creat": [3, 7, 10, 11, 24, 29, 31, 37, 40, 48, 58, 60, 77, 85, 90, 94], "n": [3, 85], "option": [3, 7, 13, 24, 30, 33, 38, 43, 48, 49, 50, 53, 91, 101], "desir": [3, 4, 31, 48, 91], "version": [3, 11, 21, 32, 39, 44, 49, 74, 76, 81, 82, 93], "By": [3, 10, 13, 27, 81], "default": [3, 10, 11, 13, 21, 27, 33, 38, 40, 47, 49, 50, 53, 58, 60, 69, 77, 81, 82, 84, 85, 89, 90, 91, 99, 101], "latest": 3, "want": [3, 7, 11, 37, 48, 49, 53, 60, 63, 77, 81, 84, 91, 101], "specif": [3, 7, 9, 24, 49, 51, 60, 81, 84, 94], "your": [3, 7, 9, 11, 13, 60, 79, 91], "extra": [3, 13, 19, 77, 99], "index": [3, 77], "url": 3, "http": [3, 17, 18, 19, 21, 40, 67, 81, 99], "download": [3, 10], "org": [3, 67], "whl": 3, "cu118": 3, "identifi": [3, 30], "correct": [3, 61, 65, 75], "partial": [3, 24], "note": [3, 11, 19, 21, 24, 27, 30, 34, 37, 48, 49, 50, 63, 84, 87], "when": [3, 7, 9, 18, 24, 33, 37, 47, 48, 49, 50, 58, 77, 81, 87, 94, 104], "without": [3, 7, 33, 40, 49, 99], "onli": [3, 5, 7, 8, 9, 10, 11, 13, 18, 21, 24, 26, 27, 30, 31, 33, 36, 39, 41, 44, 47, 49, 58, 60, 67, 69, 81, 87, 98, 99], "barebon": 3, "none": [3, 18, 19, 24, 26, 33, 34, 37, 38, 39, 40, 42, 43, 44, 47, 48, 49, 50, 53, 54, 58, 59, 60, 63, 64, 65, 77, 81, 82, 84, 86, 87, 89, 91, 93, 94, 95, 99, 100, 101], "modul": [3, 7, 13, 15, 16, 19, 20, 22, 25, 26, 27, 28, 35, 36, 37, 38, 39, 40, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 58, 60, 61, 62, 63, 64, 65, 66, 69, 70, 71, 73, 75, 76, 77, 78, 79, 80, 82, 83, 84, 86, 88, 91, 92, 93, 94, 96, 99], "work": [3, 8, 10, 24, 37, 79], "appropri": [3, 4, 5, 50], "below": [3, 4, 6, 7, 8, 9, 10, 11, 49, 60, 65, 99], "need": [3, 8, 11, 13, 37, 38, 39, 44, 48, 49, 50, 60, 65, 80, 81, 99], "correctli": [3, 7, 11, 47, 77, 79], "correspond": [3, 48, 49, 50, 60, 63, 86, 99, 101], "_deploi": [3, 13], "addition": [3, 24, 48], "3rd": 3, "parti": [3, 52, 79, 88], "plugin": 3, "third": [3, 52, 79, 88], "packag": [3, 13, 15, 16, 36, 55, 82], "transform": [3, 7, 10, 38, 39, 41], "hf": [3, 13, 19], "cach": [3, 9, 11, 17, 18, 100], "dir": [3, 19, 37], "com": [3, 17, 18, 19, 21, 40, 81, 99], "": [3, 4, 5, 6, 7, 9, 10, 11, 15, 16, 21, 26, 37, 47, 48, 49, 51, 59, 63, 67, 81, 86, 91, 94, 97, 99], "quantiz": [3, 12, 13, 16, 21, 24, 26, 27, 30, 31, 32, 38, 39, 40, 41, 42, 43, 45, 51, 56, 58, 59, 61, 63, 64, 65, 66, 68, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 94], "compil": [3, 10, 18, 30, 33, 93], "fast": [3, 65], "kernel": [3, 9, 18, 30, 33, 99], "mai": [3, 7, 8, 11, 48, 49, 81, 91, 93, 103], "take": [3, 7, 11, 47, 50, 58, 61, 64, 65, 77, 81, 91, 93, 99], "few": [3, 65, 93], "minut": [3, 9], "subsequ": [3, 7, 53, 93], "much": [3, 11], "faster": [3, 50], "To": [3, 6, 7, 11, 13, 84], "invok": [3, 98], "now": [3, 13, 30, 63], "c": [3, 62, 93], "extens": [3, 19, 81, 93], "ext": 3, "print": [3, 11, 16, 24, 33, 65, 91, 98], "cuda_ext": 3, "cuda_ext_fp8": 3, "techniqu": [4, 5, 7, 8, 11, 13], "mtq": [4, 11, 60, 61, 80, 94], "case": [4, 5, 7, 9, 10, 11, 42, 49, 60], "requir": [4, 5, 7, 9, 11, 13, 39, 63, 65, 77, 93], "configur": [4, 5, 7, 11, 47, 49, 50, 51, 53, 54, 60, 65, 84, 91, 94, 101], "forward": [4, 5, 8, 11, 24, 44, 53, 61, 64, 65, 67, 69, 75, 77, 81, 90, 91, 94, 99], "here": [4, 5, 11, 30, 32, 49, 50, 60, 61, 65, 99], "setup": [4, 5, 11, 61], "get_model": [4, 5, 11], "show": [4, 11, 47, 60, 84], "rough": 4, "how": [4, 9, 19, 46, 50, 79, 81, 99], "loader": [4, 5, 11, 65, 91, 99], "calib_s": [4, 5, 11], "data_load": [4, 5, 7, 11, 65, 91, 99], "get_dataload": [4, 11], "num_sampl": [4, 5, 11, 94], "defin": [4, 5, 7, 8, 11, 30, 39, 46, 51, 53, 61, 67, 81, 86, 101], "forward_loop": [4, 5, 11, 53, 60, 64, 65, 91, 94], "function": [4, 11, 18, 19, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 40, 41, 42, 43, 44, 45, 47, 48, 50, 51, 53, 54, 58, 60, 61, 64, 65, 69, 77, 78, 80, 81, 82, 85, 86, 87, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102], "should": [4, 10, 11, 13, 18, 21, 23, 24, 30, 32, 33, 39, 47, 48, 49, 50, 61, 64, 65, 81, 91], "wrap": [4, 11, 47, 48, 99], "insid": [4, 91], "def": [4, 11, 61, 65], "batch": [4, 9, 11, 16, 17, 18, 26, 34, 49, 65, 71, 91, 94, 99], "int8_smoothquant_cfg": [4, 11, 60], "just": [4, 39, 63, 99], "regular": [4, 7, 8, 11, 33, 46, 49, 50, 86, 91], "evalu": [4, 19, 53, 65], "export": [4, 6, 7, 11, 13, 16, 18, 38, 40, 42, 44, 46, 49, 51, 53, 63, 77, 80, 82, 86, 91, 99], "see": [4, 9, 10, 11, 19, 49, 58, 59, 60, 61, 65, 81, 91, 99], "guid": [4, 5, 11], "more": [4, 5, 6, 7, 9, 11, 18, 46, 47, 49, 60, 61, 65, 67, 77, 81, 91, 99], "next": [4, 5, 23, 24, 86], "step": [4, 5, 7, 10, 48, 51, 53, 60, 77], "about": [4, 5, 7, 46, 60, 77], "usag": [4, 5, 7, 11, 18, 47, 94, 100], "checkout": [4, 5], "out": [4, 5, 7, 32, 79], "featur": [5, 7, 13, 18, 50, 53, 77], "get_train_dataload": 5, "sparsity_config": [5, 7], "collect_func": [5, 7, 91, 99], "lambda": [5, 7], "x": [5, 7, 11, 26, 58, 59, 85, 97, 99], "mode": [5, 13, 33, 47, 48, 53, 60, 81, 82, 84, 91], "driven": [5, 7], "sparse_magnitud": [5, 7, 84, 91], "doe": [5, 7, 9, 27, 33, 48, 49, 50, 63, 65], "pure": [5, 47], "base": [5, 6, 7, 9, 11, 17, 19, 23, 26, 27, 33, 37, 39, 47, 48, 49, 50, 53, 57, 58, 59, 60, 61, 63, 64, 67, 69, 72, 73, 74, 75, 76, 77, 81, 84, 85, 86, 87, 89, 90, 98, 99, 100], "substitut": 5, "iter": [5, 17, 23, 40, 48, 49, 65, 77, 91, 94, 99], "dataset": [5, 65, 94], "hardwar": [6, 7], "simul": [6, 11], "origin": [6, 7, 8, 9, 10, 11, 33, 41, 48, 49, 50, 61, 75, 99, 101], "precis": [6, 9, 10, 11, 33], "test": [6, 16], "best": [6, 10, 17, 53, 85], "trade": 6, "off": 6, "between": [6, 18, 58], "low": [6, 8, 9, 11, 33], "actual": [6, 9, 48, 49, 50, 53, 99], "speedup": [6, 10, 11], "find": [6, 7, 11, 26, 30, 34, 85], "document": [6, 60, 91], "basic": [6, 7, 23, 29, 32, 49, 53, 81], "concept": [6, 46], "practic": [6, 17], "choos": [6, 60, 91, 101], "right": [6, 60, 63], "beta": 6, "describ": [7, 11, 48, 49, 60, 63, 65, 81, 86, 91], "obtain": 7, "either": [7, 18, 21, 33, 49, 99], "exist": [7, 37, 38, 48, 49], "load": [7, 9, 10, 16, 18, 19, 34, 37, 41, 42, 48, 53, 62, 77, 93, 94], "pre": [7, 11, 30, 43, 53], "re": [7, 49], "mto": [7, 11, 48], "relat": [7, 24, 30, 34], "process": [7, 8, 10, 18, 33, 37, 40, 42, 48, 49, 53, 84, 91, 94, 95, 98, 99, 101], "convert": [7, 11, 13, 16, 18, 32, 33, 38, 39, 40, 41, 44, 45, 48, 49, 53, 63, 84, 86, 91, 98, 99, 101, 102, 104], "dens": [7, 39], "retrain": 7, "simplest": [7, 8, 11], "wai": [7, 8, 11, 49, 99], "return": [7, 11, 17, 19, 21, 23, 24, 26, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44, 47, 48, 49, 50, 53, 54, 58, 59, 61, 63, 64, 65, 77, 81, 82, 85, 86, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102], "dictionari": [7, 10, 11, 23, 24, 26, 34, 47, 48, 49, 53, 60, 61, 64, 65, 81, 84, 91, 99], "specifi": [7, 11, 18, 34, 48, 49, 51, 58, 60, 61, 63, 64, 65, 81, 82, 84, 85, 86, 91, 93, 94, 100, 102], "dataload": [7, 11, 94], "magnitud": [7, 86, 91], "respect": [7, 8, 47, 60, 65, 81], "automodelforcausallm": 7, "from_pretrain": [7, 11], "eleutherai": 7, "gpt": 7, "j": 7, "calib_dataload": 7, "sparse_model": 7, "threshold": 7, "futur": [7, 11, 91], "modelopt_sparse_model": 7, "pth": 7, "along": [7, 8, 81, 82], "mask": [7, 85, 87, 89, 90, 91], "later": [7, 9, 84], "opt": [7, 9, 11, 13, 48], "initi": [7, 11, 17, 23, 24, 26, 31, 48, 49, 50, 58, 59, 61, 69, 75, 77, 80, 81, 87, 94, 99, 100], "unmodifi": [7, 48, 49], "plain": 7, "enforc": [7, 47, 48, 49, 50, 91], "access": [7, 21, 37, 47, 49, 60], "remov": [7, 24, 34, 37, 49, 85, 90, 99], "longer": [7, 11, 42, 49, 80, 91], "dure": [7, 8, 11, 33, 48, 77, 91, 104], "do": [7, 30, 37, 49, 63, 81], "overview": [7, 8, 12], "well": [7, 9, 11, 49, 84, 99, 100], "terminologi": 7, "fraction": 7, "zero": [7, 31, 58, 81, 85], "broadli": 7, "categor": [7, 21], "randomli": 7, "distribut": [7, 13], "across": [7, 8, 37, 39, 42, 49, 77, 101], "matrix": [7, 85, 90], "flexibl": 7, "lead": 7, "poor": 7, "util": [7, 13, 19, 21, 23, 24, 29, 30, 31, 32, 37, 38, 41, 42, 43, 44, 48, 51, 61, 64, 90, 93, 94, 95, 96, 97, 98, 99, 100, 102], "other": [7, 8, 13, 19, 33, 37, 42, 49, 64, 81], "hand": 7, "effici": 7, "exploit": 7, "higher": [7, 18], "math": [7, 8], "throughput": [7, 9, 10], "usual": [7, 48, 49, 53], "special": [7, 11, 49, 81], "grain": [7, 8], "block": [7, 9, 11, 26, 38, 43, 49, 81, 91, 101], "contigu": [7, 41, 42], "element": [7, 24, 32, 85, 97, 99, 101], "most": [7, 8, 9, 33], "nonzero": 7, "due": [7, 11], "implement": [7, 8, 30, 37, 46, 49, 53, 67, 69, 77, 81], "benefit": 7, "bandwidth": [7, 8, 9], "smaller": 7, "than": [7, 11, 19, 49, 81], "core": [7, 13], "deliv": 7, "multipli": [7, 81], "oper": [7, 8, 9, 13, 21, 33, 34, 97], "argument": [7, 21, 48, 49, 50, 53, 58, 60, 64, 65, 81, 91, 93, 94, 99], "allow": [7, 10, 11, 13, 49, 53], "On": 7, "amper": [7, 9], "four": 7, "two": [7, 10, 32, 34, 38, 40, 81, 99], "There": [7, 81], "mani": [7, 33, 81], "commonli": [7, 8], "approach": 7, "largest": 7, "retain": [7, 82], "rest": 7, "simpl": [7, 10, 11, 69, 81, 100], "brain": 7, "surgeon": 7, "better": [7, 10], "consist": [8, 19, 47, 49], "found": [8, 24, 47, 49], "topic": 8, "width": [8, 17, 18], "valu": [8, 10, 32, 34, 39, 47, 49, 50, 59, 60, 63, 64, 65, 75, 77, 81, 82, 86, 87, 91, 99], "integ": [8, 9, 26, 58, 59, 77, 81], "sign": [8, 32, 77, 81], "mantissa": [8, 81], "float": [8, 17, 26, 32, 38, 39, 50, 53, 58, 65, 81, 90, 97, 98, 99, 100, 101], "point": [8, 9, 11, 31, 81], "expon": [8, 81], "FOR": 8, "explan": 8, "unscal": 8, "map": [8, 24, 30, 31, 40, 44, 60, 61, 65, 81], "rang": [8, 50, 67, 77, 81, 82], "share": [8, 34, 37, 40], "same": [8, 13, 18, 33, 34, 37, 39, 48, 49, 81, 84, 91, 99, 101], "calcul": [8, 43, 53, 81, 82, 85], "divid": 8, "common": [8, 11, 19, 30, 32, 34, 41], "whole": [8, 51, 77], "global": [8, 31, 59], "channel": [8, 9, 10, 49, 58, 81, 82, 99], "separ": [8, 11, 48, 58], "fix": [8, 13, 30, 51, 77, 91], "dimens": [8, 26, 32, 34, 82], "typic": [8, 9, 10, 11, 33], "gptq": 8, "stai": 8, "high": [8, 9, 17, 91], "help": [8, 27, 99], "constrain": 8, "scenario": [8, 9], "comput": [8, 9, 11, 26, 50, 58, 77, 81, 82, 85, 91, 96, 97, 102], "potenti": 8, "adjust": [8, 11, 43, 61, 64], "maxim": [8, 53, 85], "max": [8, 9, 11, 17, 18, 26, 39, 50, 60, 64, 67, 69, 81, 97], "which": [8, 11, 18, 24, 30, 46, 47, 48, 50, 53, 58, 60, 61, 64, 65, 67, 75, 81, 82, 84, 85, 99, 101], "maximum": [8, 59, 82, 94], "unchang": [8, 49], "round": [8, 26, 39], "nearest": [8, 26], "entropi": [8, 33, 58], "view": 8, "updat": [8, 11, 26, 38, 44, 47, 48, 49, 61, 63, 77, 86, 91], "loss": [8, 26, 91], "compar": [8, 99], "must": [8, 11, 26, 48, 49, 50, 63, 81, 82, 99], "backward": [8, 13, 24, 40, 44, 67, 81], "pass": [8, 11, 37, 49, 64, 67, 77, 81, 82, 99], "straight": [8, 81], "estim": [8, 13, 50, 81], "ste": 8, "clip": [8, 33, 67, 77, 81], "behind": 8, "explicit": [8, 10, 33], "graph": [8, 13, 24, 30, 31, 34, 102], "represent": [8, 40, 63, 86], "qdq": [8, 10, 24, 27, 30, 31, 33], "node": [8, 10, 11, 13, 18, 24, 30, 31, 33, 34, 40], "network": [8, 37, 77, 90], "three": 9, "primari": 9, "compon": 9, "context": [9, 18, 39, 49, 75, 80, 81, 82, 98], "small": [9, 11], "often": [9, 11, 30], "bound": [9, 18, 53, 69], "In": [9, 10, 49, 60, 81, 99], "limit": [9, 18], "regim": 9, "give": [9, 33, 99], "superior": 9, "improv": 9, "serv": 9, "16": [9, 23, 50], "densiti": 9, "becom": 9, "crucial": 9, "consequ": [9, 49], "lower": [9, 33, 47, 69], "choic": [9, 49, 50, 101], "suggest": 9, "priorit": [9, 47], "caus": [9, 30], "veri": 9, "littl": 9, "strong": 9, "meet": [9, 82], "could": [9, 11, 16, 17, 49, 50], "try": [9, 102], "earlier": [9, 84], "sq": 9, "might": [9, 11, 48, 49, 81, 99], "toler": 9, "tabl": [9, 60], "summar": 9, "tradeoff": 9, "consid": [9, 21], "medium": 9, "min": [9, 50, 67, 69, 93, 97], "50": 9, "ada": 9, "hopper": 9, "variant": [9, 19, 32], "w4a16": [9, 60], "wise": [9, 11, 24], "25": 9, "ten": 9, "w4a8": [9, 39, 60], "impact": 9, "measur": [9, 41, 100], "10": [9, 11], "popular": 9, "ll": 9, "subject": [9, 21], "togeth": [10, 11, 39, 48, 49], "eq": 10, "kei": [10, 11, 41, 47, 48, 49, 60, 64, 65, 81, 84, 91, 99], "advantag": [10, 11], "offer": [10, 11, 16], "non": [10, 19, 21, 24, 30, 48, 49, 85, 99], "expert": [10, 11, 38, 39], "white": 10, "box": 10, "design": 10, "custom": [10, 19, 31, 47, 49, 60, 61], "vision": 10, "new": [10, 13, 31, 47, 49, 63, 82], "rule": [10, 11, 24, 47, 49, 87], "real": [10, 11], "6": [10, 26], "9": [10, 26, 44], "prefer": 10, "link": [10, 13, 41], "done": [10, 11, 32, 91], "random": [10, 13, 23, 34], "imag": [10, 99], "numpi": [10, 23, 31, 33, 34, 59, 102], "multi": [10, 37], "arrai": [10, 31, 32, 33, 59, 102], "calib_data": 10, "np": [10, 41, 58], "randn": 10, "batch_siz": [10, 34, 94], "h": [10, 85], "w": [10, 26, 49, 85], "npy": 10, "dict": [10, 17, 18, 23, 24, 30, 31, 33, 34, 37, 40, 41, 44, 47, 48, 49, 51, 53, 60, 61, 63, 64, 65, 77, 81, 84, 86, 89, 90, 91, 97, 99], "match": [10, 11, 24, 30, 47, 60, 61, 65, 84, 96], "input_nam": [10, 24, 34], "shape": [10, 17, 21, 23, 31, 34, 39, 42, 59, 77, 81], "input_name2": 10, "shape2": 10, "savez": 10, "npz": [10, 40, 44], "moq": 10, "calibration_data": [10, 23, 33], "calibration_data_path": 10, "onnx_path": [10, 23, 29, 33, 34], "output_path": [10, 33], "quant": [10, 33, 63], "quantize_mod": [10, 26, 33], "altern": 10, "line": 10, "m": [10, 81, 85], "path": [10, 13, 17, 18, 19, 23, 24, 33, 34, 37, 40, 42, 44, 93], "output": [10, 11, 16, 17, 18, 24, 30, 31, 33, 34, 38, 49, 77, 81], "calibraton": 10, "tool": [10, 19, 21, 33, 46], "insert": [10, 11, 31, 33, 48], "friendli": [10, 33], "chang": [10, 13, 21, 48, 59], "behavior": [10, 48, 49, 60, 65, 67], "tweak": 10, "param": 10, "op_types_to_quant": [10, 21, 33], "op_types_to_exclud": [10, 33], "trtexec": 10, "usr": 10, "src": [10, 24], "bin": [10, 58], "previou": 10, "saveengin": 10, "check": [10, 11, 19, 24, 34, 42, 47, 48, 49, 53, 54, 79, 82, 96, 99], "report": [10, 100], "latenc": [10, 53], "field": [10, 39, 41, 47, 60, 84], "replac": [10, 11, 29, 49, 60, 61, 65, 77, 82], "flag": [10, 18, 33, 48], "implicit": 10, "refactor": 11, "pytorch_quant": 11, "nativ": [11, 13, 30], "hug": 11, "face": [11, 65], "fake": [11, 48, 77, 81], "mean": [11, 18, 40, 91, 97], "cover": 11, "128": [11, 58, 60, 81], "512": [11, 23, 94], "sampl": [11, 17, 23, 49, 94, 99, 101], "callabl": [11, 48, 50, 53, 60, 61, 63, 64, 65, 86, 91, 94, 99], "own": [11, 42, 49, 60, 79, 81], "order": [11, 19, 48, 49, 50, 84, 99], "collect": [11, 19, 24, 30, 37, 56, 58, 59, 90], "statist": [11, 56, 65, 77, 82], "around": 11, "select": [11, 21, 26, 33, 36, 49, 64, 85, 101], "look": [11, 30, 47, 49], "verifi": 11, "place": [11, 30, 47, 48, 49, 91, 101], "let": [11, 49], "summari": [11, 65], "successfulli": 11, "print_quantization_summari": 11, "normal": [11, 21, 27, 50, 71, 73, 91, 99], "flow": [11, 21], "sample_input": 11, "onnx_fil": 11, "direct": 11, "recov": 11, "resourc": [11, 18], "directli": [11, 49, 77, 81], "frozen": [11, 87], "int8_default_cfg": [11, 60], "calib_set": 11, "rate": 11, "durat": 11, "train_load": 11, "schedul": 11, "epoch": 11, "even": [11, 26], "less": 11, "suffici": [11, 27], "resum": [11, 46], "modelopt_st": [11, 48, 77], "pt": [11, 48], "trainer": 11, "save_model": 11, "restor": [11, 41, 46, 48, 61, 63, 86], "restore_from_modelopt_st": [11, 48], "un": [11, 61], "load_state_dict": [11, 48], "under": [11, 18, 40], "hood": 11, "linear": [11, 21, 24, 33, 34, 38, 39, 41, 74, 75, 82, 84], "conv": [11, 30, 33], "patch": [11, 25, 28, 49], "instanc": [11, 17, 41, 48, 49, 54, 60, 61, 65, 73, 77, 81, 91, 94], "quantdescriptor": [11, 58, 59, 61, 77, 81], "paramet": [11, 13, 17, 18, 19, 21, 23, 24, 26, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 53, 54, 56, 58, 59, 61, 64, 65, 69, 77, 81, 82, 85, 86, 87, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 101, 102], "axi": [11, 58, 59, 60, 61, 77, 81, 82], "tensor_qu": [11, 61, 72, 74, 75, 77], "nn": [11, 48, 49, 53, 58, 61, 74, 76, 77, 84, 99], "descriptor": [11, 63, 81, 86], "quant_desc": [11, 77], "num_bit": [11, 38, 58, 59, 60, 61, 77, 81], "unsign": [11, 32, 58, 59, 77, 81], "true": [11, 17, 26, 34, 39, 40, 42, 44, 48, 49, 53, 58, 60, 61, 69, 77, 81, 82, 96, 99], "quant_x": 11, "disabl": [11, 13, 65, 77, 82], "who": 11, "wildcard": [11, 60, 61, 65], "filter": [11, 60, 61, 65], "copi": [11, 13, 21, 60], "quant_cfg": [11, 60, 61, 65, 94], "bmm": 11, "output_quant": [11, 38, 75], "howev": [11, 49], "regist": [11, 13, 47, 49, 50, 61, 81, 90], "them": [11, 32, 39, 42, 46, 49, 77, 99], "handl": [11, 13, 48, 49, 52, 79, 88], "unsupport": 11, "subclass": [11, 49], "kv": [11, 17], "attent": [11, 38, 39], "layernorm": [11, 38, 39, 61], "class": [11, 17, 19, 23, 26, 27, 37, 39, 47, 48, 49, 50, 51, 53, 56, 57, 58, 59, 61, 63, 67, 69, 72, 73, 74, 75, 76, 77, 81, 85, 86, 87, 89, 90, 99, 100], "quantlayernorm": [11, 61], "__init__": [11, 17, 19, 23, 26, 27, 37, 39, 48, 49, 50, 53, 58, 59, 61, 69, 77, 81, 100], "self": [11, 48, 49, 50, 61, 75], "normalized_shap": [11, 61], "super": [11, 61], "_setup": [11, 49, 61], "input_quant": [11, 60, 61, 65, 75], "weight_quant": [11, 58, 60, 61, 65, 75], "anywher": 11, "f": [11, 48, 61], "layer_norm": [11, 61], "bia": [11, 39, 61, 81], "ep": [11, 39, 61], "so": [11, 36, 39, 47, 49, 58, 59], "instanti": [11, 19], "attribut": [11, 49, 60, 61, 65, 77, 81, 99], "code": [11, 32, 40, 63, 79, 86, 101], "original_cl": [11, 49, 61], "quantized_cl": [11, 61], "fold": [11, 49, 65], "avoid": [11, 104], "repeat": [11, 97], "inferec": 11, "fold_weight": [11, 65], "quantized_model": 11, "user_evaluate_func": 11, "instal": [12, 13, 82], "sparsiti": [12, 13, 18, 84, 85, 86, 87, 88, 89, 91], "changelog": 12, "contact": 12, "u": 12, "faq": 12, "break": 13, "wa": [13, 53, 99], "renam": 13, "ammo": 13, "full": [13, 18, 37, 38, 39, 47, 48], "product": 13, "being": [13, 49], "deprec": [13, 58, 78, 80, 98], "inference_gpu": 13, "arg": [13, 19, 48, 49, 53, 75, 77, 78, 80, 87, 98, 99, 101, 102], "model_config_export": 13, "torch_to_tensorrt_llm_checkpoint": [13, 18, 40], "experiment": [13, 64, 77, 81], "sat": 13, "chain": [13, 63], "set_data_parallel_group": [13, 95], "set_tensor_parallel_group": [13, 95], "multipl": [13, 16, 42, 43, 48, 49, 60, 77, 99], "modif": [13, 46, 48], "float8": 13, "fsdp": 13, "fulli": [13, 49], "shard": 13, "ad": [13, 48, 49, 77, 81, 91], "releas": 13, "wheel": 13, "submodul": [13, 49, 84], "bug": 13, "compat": [13, 38, 40, 44, 48, 49, 99], "issu": [13, 99, 103, 104], "dynam": [13, 25, 27, 47, 54, 81, 87, 91, 99], "dim": 13, "opset": 13, "neg": 13, "pb": 13, "tmp": [13, 40], "folder": 13, "tensorrt_llm": [16, 17, 18, 19, 39, 40, 44], "stage": [16, 77], "top": [16, 27, 48, 49], "build_tensorrt_llm": [16, 18], "pretrained_config": [16, 18], "pretrained_config_json_path": 16, "engine_dir": [16, 17, 18], "max_input_len": [16, 17, 18], "max_output_len": [16, 18], "max_batch_s": [16, 18], "max_beam_width": [16, 17, 18], "max_num_beam": 16, "num_build_work": [16, 18], "offlin": 16, "built": [16, 18], "host_context": 16, "token": [16, 17, 18, 19, 94], "num_beam": 16, "long": 16, "input_text": 16, "wrapper": [17, 48, 77, 99], "over": [17, 26, 34, 47, 49, 53, 65, 73, 94], "level": [17, 24, 49, 91], "runner": 17, "hlapi": 17, "profil": [17, 99], "valid": [17, 34, 42, 47, 49], "kv_cache_config": 17, "tokenizerbas": 17, "int": [17, 18, 26, 31, 34, 37, 38, 39, 40, 42, 43, 44, 49, 50, 54, 58, 77, 81, 82, 85, 94, 95, 97, 99], "blob": [17, 18, 19, 21, 40, 81, 99], "main": [17, 18, 19, 21, 27, 40, 77], "doc": [17, 18, 40, 47, 81], "sourc": [17, 18, 40, 63, 79, 86, 93], "perf": 17, "md": [17, 18, 40, 81], "generate_text": 17, "prompt": [17, 18], "max_new_token": 17, "temperatur": 17, "0": [17, 18, 26, 34, 39, 40, 44, 58, 60, 61, 77, 81, 99, 100, 101], "keep_input_prompt": 17, "text": [17, 19, 94], "string": [17, 48, 58, 61, 64, 81, 85, 91, 98, 99], "length": [17, 18, 50, 82, 94], "bool": [17, 18, 24, 26, 32, 33, 34, 38, 39, 40, 42, 47, 48, 49, 53, 54, 58, 63, 85, 86, 95, 96, 99], "prommpt": 17, "2d": [17, 72], "beam": [17, 18], "properti": [17, 19, 37, 39, 47, 48, 49, 50, 53, 59, 63, 77, 81, 86, 89, 90], "get": [17, 37, 47, 49, 51, 53, 77, 81, 85, 89, 90, 94, 95, 99, 100], "200": 18, "max_num_token": 18, "enable_spars": 18, "fals": [18, 26, 33, 34, 39, 40, 47, 48, 49, 58, 59, 60, 61, 69, 77, 81, 96, 99, 104], "max_prompt_embedding_table_s": 18, "target": [18, 37, 40, 41, 42, 48, 49, 94, 99], "sequenc": [18, 21, 31, 32, 50, 51, 96, 101], "search": [18, 46, 48, 49, 53, 54, 86, 89, 90, 91, 101], "phase": 18, "count": [18, 99], "ones": [18, 85], "been": [18, 48, 65], "fall": 18, "inflight": 18, "alloc": 18, "perf_best_practic": 18, "worker": 18, "concern": 18, "increas": 18, "num": [18, 98], "At": 18, "lost": 18, "higer": 18, "cpu": [18, 42, 58], "conserv": 18, "switch": 18, "trt": [18, 31, 44], "With": 18, "tactic": 18, "spars": [18, 84, 85, 86, 87, 89, 90, 91], "significantli": [18, 91], "prepend": 18, "concaten": 18, "embed": [18, 38, 39, 42, 58], "multimod": 18, "build_tensorrt_llm_rank": 18, "customsentencepiecetoken": 19, "pretrainedtoken": [19, 94], "sentencepiecetoken": 19, "make": [19, 32, 42, 49, 82], "nemo_exampl": 19, "sh": 19, "kwarg": [19, 47, 48, 49, 64, 75, 77, 78, 80, 87, 98, 99, 101, 102], "constructor": [19, 99], "legaci": 19, "batch_decod": 19, "id": [19, 41], "introduc": 19, "batch_encode_plu": 19, "ignor": [19, 81], "decod": [19, 38, 39, 40], "mmethod": 19, "encod": 19, "return_tensor": 19, "max_length": 19, "eos_token": 19, "eos_token_id": 19, "pad_token": 19, "pad_token_id": 19, "get_nemo_token": 19, "tokenizer_cfg_path": 19, "logic": [19, 32, 38, 44, 53], "get_nmt_token": 19, "nlp": 19, "tokenizer_util": 19, "py": [19, 21, 40, 99], "get_tokenzi": 19, "tokenizer_dir_or_path": 19, "subpackag": [20, 22, 35], "op": [21, 27, 30, 33, 48], "get_quantizable_op_typ": 21, "_configure_ort": 21, "suppli": [21, 81], "is_binary_op": 21, "whether": [21, 24, 31, 32, 37, 38, 40, 48, 49, 50, 53, 63, 81, 91, 95, 99], "binari": 21, "is_control_flow_op": 21, "control": 21, "categori": 21, "is_conversion_op": 21, "is_copy_op": 21, "is_default_quantizable_op_by_ort": 21, "ort": [21, 27, 28, 29], "nodes_to_quant": [21, 30, 33], "microsoft": 21, "onnxruntim": [21, 29], "registri": [21, 47, 48, 49], "is_fusible_reduction_op": 21, "reduct": 21, "fusibl": [21, 30], "myelin": 21, "is_generator_op": 21, "is_irregular_mem_access_op": 21, "irreggular": 21, "mem": 21, "is_linear_op": 21, "is_modifier_op": 21, "modifi": [21, 24, 27, 31, 34, 47, 48, 49, 87], "is_multiclass_op": 21, "multiclass": 21, "is_non_reshape_copy_op": 21, "reshap": [21, 85], "is_normalization_op": 21, "is_pointwise_or_elementwise_op": 21, "pointwis": [21, 30], "elementwis": 21, "is_pooling_or_window_op": 21, "pool": [21, 76], "is_recurrent_op": 21, "recurr": 21, "is_selection_op": 21, "is_sequence_op": 21, "is_shape_op": 21, "is_unary_op": 21, "unari": 21, "calibrationdataprovid": 23, "calibrationdataread": [23, 26], "intial": [23, 37], "ndarrai": [23, 26, 31, 32, 33, 34, 41, 44, 81, 102], "ex": [23, 24], "64": [23, 42], "timestep": 23, "encoder_hidden_st": 23, "768": 23, "1024": [23, 60], "get_next": 23, "reader": 23, "randomdataprovid": 23, "placement": 24, "build_non_residual_input_map": 24, "residu": [24, 30], "add": [24, 30, 33, 48, 99], "assum": [24, 38, 99], "subgraph": [24, 30], "convolut": [24, 72], "sum": [24, 85], "anoth": [24, 48, 49], "constant": [24, 31, 38, 41, 44, 81], "becaus": [24, 26, 44, 93, 99], "occur": [24, 38, 44], "modern": 24, "convnet": 24, "connect": 24, "v": [24, 39, 99], "classify_partition_nod": 24, "partit": [24, 33], "outsid": 24, "algo": 24, "info": [24, 47, 99], "dst": 24, "tupl": [24, 30, 34, 37, 38, 40, 48, 49, 50, 53, 54, 58, 59, 63, 81, 82, 85, 86, 90, 91, 97, 99], "filter_quantizable_kgen_head": 24, "cask_fusible_partit": 24, "kgen_partit": 24, "quantizable_op_typ": [24, 30], "kgen": [24, 30], "head": 24, "cask": [24, 30], "get_fusible_backbon": 24, "backbon": [24, 30], "fuse": [24, 30], "bn": 24, "relu": 24, "some": [24, 30, 32, 38, 44, 51, 59, 67, 81], "tri": 24, "those": [24, 30, 81, 84], "biasadd": 24, "constmul": 24, "start": [24, 41, 100], "has_const_input": 24, "has_path_typ": 24, "path_typ": 24, "is_forward": 24, "wild_card_typ": 24, "path_nod": 24, "wrt": 24, "travers": [24, 30], "wild": 24, "card": 24, "skip": [24, 49, 58, 60], "accumul": 24, "is_const_input": 24, "const": 24, "foldabl": 24, "print_stat": 24, "verbos": [24, 33, 91], "stat": [24, 53, 91, 97], "remove_partial_input_qdq": 24, "no_quantize_input": 24, "mark": 24, "onnx_graphsurgeon": 25, "explicitli": [25, 47, 49], "patch_gs_modul": 25, "graphsurgeon": [25, 31], "woq": 26, "write": [26, 33, 37, 79], "back": [26, 46, 49], "disk": 26, "awqcliphelp": 26, "object": [26, 31, 37, 39, 48, 49, 50, 72, 74, 75, 77, 81, 99], "helper": [26, 32], "block_siz": [26, 60, 77, 81], "alpha_step": [26, 60], "05": [26, 39], "alpha": 26, "5": [26, 30], "55": 26, "65": 26, "7": 26, "75": 26, "85": 26, "95": 26, "min_alpha": 26, "update_best_param": 26, "dq_tensor": 26, "dequant": [26, 31, 81], "find_scal": 26, "quant_tensor": 26, "quantize_int4": 26, "onnx_model": [26, 34], "calibration_data_read": 26, "use_external_data_format": [26, 33], "gemm_io_typ": 26, "gemm": [26, 33], "modelproto": [26, 34], "googl": [26, 31, 81], "protobuf": [26, 31], "intern": [26, 31, 49, 101], "enum_type_wrapp": [26, 31], "enumtypewrapp": [26, 31], "0x7f7a18433710": [26, 31], "quantize_int4_awq_clip": 26, "data_read": 26, "k": [26, 39], "quantize_int4_rtn": 26, "dq_onli": [26, 31], "rtn": 26, "ab": [26, 67], "q": [26, 31, 39], "round_to_even": 26, "denot": 26, "ti": 26, "alwai": 26, "cin": 26, "plug": 26, "rh": 26, "y": [26, 99], "broken": 26, "addit": [27, 47, 48, 49, 53, 81, 91], "qdqconvtranspos": 27, "qdqoperatorbas": 27, "convtranspos": [27, 33], "onnx_quant": 27, "onnx_nod": 27, "init": 27, "qdqnormal": 27, "intend": [27, 49], "contain": [28, 46, 47, 48, 49, 53, 63, 77, 91, 99], "patch_ort_modul": 28, "shoul": 29, "ort_client": 29, "create_inference_sess": 29, "inferencesess": 29, "find_fusible_partit": 30, "partitioned_nod": 30, "non_residual_input": 30, "matmul": [30, 33], "find_hardcoded_pattern": 30, "tail": 30, "mtl_v1": 30, "reducesum": 30, "div": 30, "mul": [30, 33], "sub": [30, 46, 53], "pow": 30, "sqrt": 30, "find_layer_norm_partit": 30, "norm": 30, "find_mha_partit": 30, "mha": 30, "softmax": 30, "least": [30, 47], "find_non_quantizable_partitions_from_pattern": 30, "certain": [30, 48, 49], "counterpart": [30, 49, 65], "expect": [30, 44], "find_quantizable_nod": 30, "yet": [30, 77], "get_skiped_output_lay": 30, "paritially_quantizable_nod": 30, "dq": 31, "insert_dq_nod": 31, "quantized_weight": 31, "insert_qdq_nod": 31, "weight_map": 31, "make_gs_dequantize_nod": 31, "_basename_": 31, "make_gs_dequantize_output": 31, "variabl": [31, 34, 77, 99], "repres": [31, 49, 50, 81, 99], "make_gs_quantize_nod": 31, "make_gs_quantize_output": 31, "make_gs_quantized_weight": 31, "wq": 31, "make_gs_scal": 31, "make_gs_zp": 31, "use_trt_qdq_op": 31, "pack_float32_to_4bit_optim": 32, "float32": [32, 77, 81], "4bit": 32, "pack": [32, 41], "everi": 32, "concecut": 32, "byte": [32, 34, 100], "pack_float32_to_4bit": 32, "mainli": 32, "reli": 32, "move": [32, 58, 77, 102], "therebi": 32, "remain": [32, 49], "ceil": 32, "farrai": 32, "calib": [33, 77], "boost": 33, "But": [33, 58], "aka": 33, "drop": 33, "averagepool": 33, "batchnorm": 33, "globalaveragepool": 33, "maxpool": 33, "calibration_method": 33, "nodes_to_exclud": 33, "keep_intermediate_fil": 33, "minmax": 33, "indic": [33, 48, 50, 53, 63, 81, 91, 99, 101], "express": [33, 84], "exclud": 33, "conv__224": 33, "conv__252": 33, "keep": 33, "intermedi": 33, "filenam": 33, "suffix": [33, 98], "throughout": 33, "One": [33, 58, 81], "int4_rtn": 33, "int4_rtn_dq": 33, "int4_rtn_trt": 33, "int4_rtn_trt_dq": 33, "int4_awq_clip": 33, "int4_awq_clip_trt": 33, "model_nam": [33, 39], "duplicate_shared_linear_weight": 34, "duplic": [34, 49, 85], "thei": [34, 49, 60, 65, 84, 91, 99], "graphproto": 34, "find_lowest_common_ancestor": 34, "node1": 34, "node2": 34, "lowest": 34, "ancestor": 34, "second": 34, "lca": 34, "distanc": 34, "gen_random_input": 34, "get_all_input_nam": 34, "get_batch_s": 34, "assert": [34, 48], "fail": 34, "get_batch_size_from_byt": 34, "onnx_byt": 34, "get_child_nod": 34, "consum": 34, "get_input_nam": 34, "external_inputs_onli": 34, "extern": 34, "external_input_nam": 34, "initializer_nam": 34, "get_input_names_from_byt": 34, "model_byt": 34, "get_input_shap": 34, "get_input_shapes_from_byt": 34, "get_node_nam": 34, "get_node_names_from_byt": 34, "get_output_nam": 34, "get_output_names_from_byt": 34, "get_output_shap": 34, "get_parent_nod": 34, "get_variable_input": 34, "is_valid_onnx_model": 34, "file_path": 34, "name_onnx_nod": 34, "assign": [34, 49], "statu": 34, "randomize_weight": 34, "randomize_weights_onnx_byt": 34, "seed": [34, 101], "remove_weights_data": 34, "raw": 34, "save_onnx": 34, "save_as_external_data": 34, "save_onnx_bytes_to_dir": 34, "onnx_dir": 34, "onnx_nam": 34, "validate_batch_s": 34, "equal": [34, 50], "validate_onnx": 34, "els": [34, 38], "far": [36, 59], "nfsworkspac": 37, "workspac": [37, 40], "storag": 37, "nf": [37, 40], "modifit": 37, "involv": 37, "commun": [37, 40], "nor": 37, "barrier": [37, 95], "respons": 37, "synchron": [37, 77, 95, 101], "serial": [37, 81, 99], "workspace_path": [37, 40, 42], "postprocess": [37, 40, 64], "cross": [37, 40], "sharedmemori": 37, "clean": [37, 77], "is_initi": 37, "read_configs_and_weights_from_rank": 37, "target_rank": 37, "write_configs_and_weight": 37, "config_json": 37, "get_configs_parallel": 37, "gather": [37, 65], "shm": 37, "nullabl": 37, "sync": 37, "yield": [37, 40, 48, 49, 54, 77, 81, 91], "empti": [37, 42, 81, 84, 99], "destroi": [37, 91], "consumpt": 37, "get_group": 37, "get_rank": 37, "safe": 37, "get_tensors_parallel": 37, "get_world_s": 37, "world": 37, "model_config": [38, 40, 41, 42, 44], "empir": [38, 44], "except": [38, 44, 77, 81, 84, 98], "build_attention_config": 38, "model_metadata_config": 38, "ext_config": 38, "decoderlayerconfig": [38, 39], "attentionconfig": [38, 39], "build_decoder_config": 38, "build_embedding_config": 38, "normalization_const": 38, "embeddingconfig": [38, 39], "build_layernorm_config": 38, "layernormconfig": [38, 39], "build_linear_config": 38, "linear_typ": [38, 39], "linearconfig": [38, 39, 41], "build_mlp_config": 38, "mlp": [38, 39], "mlpconfig": [38, 39], "build_moe_config": 38, "moe": [38, 60], "moeconfig": [38, 39], "build_qkv": 38, "qkv_modul": 38, "qkv": [38, 39, 41], "qkvconfig": [38, 39, 41], "build_stacked_expert": 38, "experts_weight_1": 38, "experts_weight_2": 38, "check_model_compat": 38, "module_list": 38, "And": [38, 39, 77], "posit": [38, 81], "assembl": 38, "modulelist": 38, "final": [38, 49, 53], "get_activation_scaling_factor": 38, "get_kv_cache_dtyp": 38, "kv_cach": 38, "get_kv_cache_scaling_factor": 38, "get_prequant_scaling_factor": 38, "prequant": [38, 39], "get_scaling_factor": 38, "tensorquant": [38, 60, 61, 65, 75, 77], "get_transformer_lay": 38, "root": [38, 48], "get_weight_block_s": 38, "get_weight_scaling_factor": 38, "get_weight_scaling_factor_2": 38, "secondari": 38, "is_attent": 38, "is_decoder_list": 38, "is_embed": 38, "is_layernorm": 38, "is_linear": 38, "is_mlp": 38, "is_mo": 38, "kv_cache_scaling_factor": 39, "kv_cache_dtyp": 39, "rotary_dim": 39, "inf": 39, "clip_qkv": 39, "input_layernorm": 39, "mlp_layernorm": 39, "post_layernorm": 39, "num_attention_head": 39, "attention_head_s": 39, "num_kv_head": 39, "max_position_embed": 39, "rotary_pct": 39, "use_alibi": 39, "new_decoder_architectur": 39, "parallel_attent": 39, "apply_residual_connection_post_layernorm": 39, "use_cach": 39, "rope_ratio": 39, "seq_length": 39, "rotary_bas": 39, "partial_rotary_factor": 39, "moe_num_expert": 39, "moe_top_k": 39, "moe_tp_mod": 39, "moe_renorm_mod": 39, "alibi_bias_max": 39, "residual_layernorm": 39, "residual_mlp": 39, "ffn_hidden_size_loc": 39, "ffn": 39, "hidden": 39, "hidden_s": 39, "local_vocab_s": 39, "vocab_s": 39, "expertconfig": 39, "fc": 39, "proj": 39, "layernorm_typ": 39, "1e": 39, "column": [39, 82], "activation_scaling_factor": 39, "weights_scaling_factor": [39, 41], "weights_scaling_factor_2": 39, "prequant_scaling_factor": 39, "awq_block_s": 39, "gate": [39, 60], "hidden_act": 39, "merged_fc1_g": 39, "mixtur": 39, "router": [39, 60], "modelconfig": [39, 41, 42, 44], "inform": [39, 48, 77, 85, 91], "pipeline_parallel": 39, "float16": [39, 40], "tensor_parallel": 39, "vocab_embed": 39, "position_embed": 39, "ln_emb": 39, "factori": 39, "ln_f": 39, "lm_head": [39, 41, 42, 60, 84], "share_embedding_t": 39, "num_key_value_head": 39, "vocab_size_pad": 39, "pad": [39, 41, 42, 99], "merg": [39, 40, 41, 42, 43], "concat": 39, "fit": 39, "quanitz": 39, "weight_scaling_factor_2": 39, "export_npz": 40, "naive_fp8_quant": 40, "use_nfs_workspac": 40, "split": [40, 41, 42, 58], "manual": 40, "old": 40, "naiv": 40, "nest": [40, 41], "pretrainedconfig": 40, "modeling_util": 40, "uniqu": [40, 49, 85, 99], "tensorrt_llm_config": [40, 44], "from_quantized_weight": 41, "torch_dtyp": 41, "merge_fc1_g": 41, "merge_qkv": 41, "model_config_from_dict": 41, "d": [41, 77], "model_config_to_dict": 41, "naive_quant": 41, "debug": 41, "pack_linear_weight": 41, "pad_weight": 41, "tp_size": [41, 44], "restore_model_config": 41, "recurs": [41, 49, 54, 61, 102], "split_config_and_weight": 41, "prefix": [41, 77, 81], "to_quantized_weight": 41, "check_weight_shape_valid": 42, "training_tensor_parallel": 42, "tp": [42, 44], "recurisv": 42, "pad_embedding_lm_head": 42, "padding_factor": 42, "postprocess_model_config": 42, "training_pipeline_parallel": 42, "pp": 42, "item": [42, 47], "postprocess_tensor": 42, "force_cpu": 42, "force_contigu": 42, "force_non_view": 42, "get_weights_scaling_factor": 43, "group_siz": 43, "facotr": 43, "resmooth_and_get_scal": 43, "merged_weight": 43, "pre_quant_scal": [43, 77], "avg_pre_quant_scal": 43, "resmooth": 43, "averag": 43, "weight_scaling_factor": 43, "convert_to_tensorrt_llm_config": 44, "tp_size_overwrit": 44, "overwrit": [44, 49, 84], "builder": 44, "unshard": 44, "is_tensorrt_llm_0_8_or_9": 44, "weights_to_npz": 44, "convert_to_transformer_engin": 45, "transformers_engin": 45, "purpos": [46, 81], "infrastructur": 46, "ingest": 46, "procedur": [46, 47, 53], "manag": [46, 48, 49, 80, 82], "individu": [46, 49, 77, 91], "wihin": 46, "pydant": 47, "basemodel": 47, "modeloptconfig": [47, 60, 84], "modeloptbaseconfig": [47, 48, 60, 63, 84, 86], "our": 47, "extend": [47, 49], "capabl": 47, "easier": [47, 58], "manipul": 47, "alia": [47, 50, 72, 74, 76, 81], "get_field_name_from_kei": 47, "alias": 47, "possibl": [47, 49, 85], "itemsview": 47, "keysview": 47, "model_dump": 47, "dump": 47, "warn": [47, 99], "model_dump_json": 47, "valuesview": 47, "modeloptbaserul": 47, "what": 47, "govern": 47, "classmethod": [47, 48, 49], "customize_rul": 47, "construct": [47, 49, 81], "accord": [47, 49, 67, 91, 99], "get_rule_typ": 47, "wrapped_onli": 47, "typealia": 47, "validate_rul": 47, "cl": 47, "unwrap": [47, 48, 99], "modeloptbaseruleconfig": [47, 84], "made": 47, "register_default": 47, "extra_default": 47, "unregister_default": 47, "unregist": [47, 61], "modeloptfield": 47, "pydanticundefin": 47, "get_kwargs_for_create_model_with_rul": 47, "default_rul": 47, "create_model": 47, "auto": 47, "relev": 47, "rule_field": 47, "docstr": 47, "pertain": 47, "myruleconfig": 47, "get_create_model_kwargs_for_rule_model": 47, "sparsemagnitudeconfig": [47, 84, 91], "conveni": 47, "sinc": [47, 58, 63], "autodoc": 47, "workaround": 47, "burden": 47, "standard": [48, 49, 50, 53, 91, 99], "interfac": [48, 51, 53, 89], "histori": [48, 53], "modeloptstatemanag": 48, "correspondig": 48, "task": [48, 53, 65], "init_st": 48, "add_mod": 48, "_state": 48, "therefor": [48, 49], "recal": 48, "_modedescriptor": [48, 63, 86, 91], "check_mod": 48, "propos": 48, "static": [48, 67, 75, 77, 81], "get_config_class": 48, "has_stat": 48, "trivial": 48, "is_convert": 48, "is_root": 48, "rais": [48, 59, 69, 81, 82, 103], "detect": 48, "last_mod": 48, "last": [48, 81, 99], "modes_with_st": 48, "transfer_state_dict": 48, "model_from": 48, "model_to": [48, 99], "transfer": [48, 58], "update_last_state_before_new_mod": 48, "update_last_state_before_sav": 48, "apply_mod": 48, "form": [48, 99], "model_cl": 48, "quantizemodedescriptor": [48, 63], "_moderegistrycl": 48, "retriev": [48, 99], "error": [48, 63, 78, 80, 98], "bias": 48, "model_weight": 48, "pathlik": 48, "binaryio": 48, "locat": [48, 61], "distributeddataparallel": 48, "previous": [48, 86], "hparam": [49, 54], "dynamicmodul": [49, 50, 75, 87, 99], "famili": 49, "searchabl": 49, "unit": [49, 51, 99], "space": [49, 53, 54, 101], "candid": 49, "dynamicconv2d": 49, "callback": [49, 50], "out_channel": 49, "upon": 49, "temporari": [49, 77], "ensur": [49, 99], "expos": 49, "outermost": 49, "child": [49, 58], "dynamiclinear": 49, "inherit": 49, "__class__": 49, "henc": [49, 87], "simultan": 49, "inject": 49, "rigoruo": 49, "fashion": 49, "vanilla": 49, "still": 49, "sever": 49, "mechan": 49, "parent": 49, "mutual": 49, "exlus": 49, "append": 49, "dyanmic": 49, "anymor": 49, "affect": [49, 99], "simpli": 49, "underli": 49, "revert": 49, "kept": 49, "until": [49, 99], "resultign": 49, "extra_repr": [49, 77], "sure": 49, "__dict__": [49, 81], "heavili": 49, "temporarili": 49, "again": 49, "afterward": 49, "force_assign": 49, "forc": 49, "overwritt": 49, "buffer": [49, 77], "circumst": 49, "freez": 49, "restrict": 49, "tbe": 49, "orgin": 49, "although": [49, 50], "get_hparam": 49, "get_paramet": 49, "scalabl": 49, "overriden": 49, "out_features_ratio": 49, "system": 49, "keyword": [49, 58, 93, 99], "_dmregistrycl": 49, "fly": 49, "leav": 49, "intact": 49, "some_dynamic_modul": 49, "named_hparam": [49, 54], "accordingli": [49, 61], "symbol": [49, 50, 81], "reset_dynamic_attribut": 49, "interf": 49, "getattr": 49, "setattr": 49, "delattr": 49, "exit": 49, "dynamicspac": 49, "hyperparamet": [49, 50, 53], "hp": 49, "parameter_nam": 49, "subnet": [49, 53, 86, 101], "convert_to_dynam": 49, "dm_registri": 49, "result": [49, 101], "is_configur": [49, 50, 54], "is_dynam": [49, 54], "named_dynamic_modul": 49, "strict": [49, 77], "exact": 49, "ident": 50, "activeslic": 50, "union": 50, "slice": 50, "longtensor": 50, "importanceestim": 50, "active_slic": 50, "sort": 50, "enforce_ord": 50, "32": [50, 81], "equival": 50, "_order": 50, "todo": 50, "ever": [50, 63], "cycl": 50, "detector": 50, "among": 50, "1d": [50, 72, 85], "in_channel": 50, "conv2d": [50, 72, 81, 84], "score": [50, 53, 91], "associ": 50, "notion": 50, "is_sort": 50, "sortabl": 50, "register_import": 50, "importance_estim": 50, "prune": 51, "prepar": [51, 90, 94], "constitut": 51, "arbitrari": 51, "whenev": 53, "conjunct": [53, 104], "entrypoint": [53, 63, 86], "basesearch": [53, 86, 89], "abc": 53, "overrid": 53, "after_search": [53, 90], "before_search": [53, 90], "constraint": 53, "construct_forward_loop": 53, "silent": 53, "runnabl": 53, "default_search_config": [53, 89, 90], "abstract": [53, 57, 81], "default_state_dict": [53, 89], "dummy_input": [53, 99], "eval_scor": 53, "has_scor": 53, "load_search_checkpoint": 53, "reset_search": 53, "reset": [53, 58, 59, 77], "begin": 53, "run_search": [53, 89], "sanitize_search_config": [53, 89], "sanit": [53, 89], "save_search_checkpoint": 53, "prunabl": 53, "net": [53, 99], "score_func": 53, "satisfi": [53, 93], "upper": [53, 69], "metric": 53, "flop": 53, "convent": [53, 81], "search_space_s": 54, "determin": [56, 101], "histogramcalibr": 58, "_calibr": [58, 59], "unifi": 58, "compute_amax": [58, 59, 77], "percentil": 58, "mse": 58, "boolean": [58, 59, 69, 77, 81, 82], "num_bin": 58, "2048": [58, 60], "grow_method": 58, "skip_zero": 58, "torch_hist": 58, "histc": 58, "stride": 58, "start_bin": 58, "99": 58, "amax": [58, 59, 64, 77, 81, 82], "100": 58, "calibrate_weight": 58, "perchannel": 58, "ideal": 58, "would": [58, 91], "collector": 58, "haven": 58, "decoupl": 58, "decid": [58, 81], "NOT": [58, 81], "everyth": 58, "neuron": 58, "absolut": [59, 81, 82], "maxcalibr": 59, "track": 59, "calib_desc": 59, "maxcalibdescriptor": 59, "readonli": [59, 77], "plot": 59, "track_amax": 59, "runtimeerror": 59, "definit": [60, 65], "cnn": 60, "fp8_default_cfg": 60, "int4_awq_cfg": 60, "w4a8_awq_beta_cfg": 60, "against": [60, 61, 65], "sequentialquant": [60, 61, 75, 77], "sequenti": [60, 77, 84], "block_sparse_mo": 60, "int4_blockwise_weight_only_cfg": 60, "awq_lit": [60, 64], "awq_ful": [60, 64], "max_co_batch_s": [60, 64], "awq_clip": [60, 64], "These": 60, "custom_int4_awq_cfg": 60, "deepcopi": 60, "quantizeconfig": 60, "null": [60, 84], "replace_quant_modul": 61, "set_quantizer_attribut": 61, "quant_model": 61, "wildcard_or_filter_func": [61, 65], "finegrain": 61, "set_from_attribute_dict": [61, 77], "set_quantizer_by_cfg": [61, 65], "quantizeexportmodedescriptor": 63, "placehold": [63, 78, 80], "throw": [63, 78, 80], "properli": 63, "config_class": [63, 86], "is_export_mod": [63, 86], "inspect": [63, 86], "export_mod": [63, 86], "next_mod": [63, 86], "immedi": 63, "update_for_new_mod": [63, 86], "update_for_sav": [63, 86], "pair": 64, "4096": 64, "postprocess_amax": 64, "post_process_fn": 64, "disable_quant": 65, "enable_quant": 65, "print_quant_summari": 65, "anyth": 65, "entir": 65, "subsampl": 65, "clipfunct": 67, "univers": [67, 81], "clamp": [67, 69], "scalar": 67, "doesn": [67, 82], "broadcast": [67, 81], "genar": 67, "gradient": [67, 81, 82, 91, 99], "ibm": 67, "pact": 67, "paper": [67, 91], "arxiv": 67, "1805": 67, "06085": 67, "tensorflow": [67, 81, 99], "clip_by_valu": 67, "ctx": [67, 81], "grad_output": [67, 81], "clip_value_min": [67, 69], "clip_value_max": [67, 69], "learn_min": 69, "learn_max": 69, "similar": [69, 77], "valueerror": [69, 81, 82], "conv1d": 72, "quantconv1d": 72, "quantconv2d": 72, "conv3d": 72, "quantconv3d": 72, "convtranspose1d": 72, "quantconvtranspose1d": 72, "convtranspose2d": 72, "quantconvtranspose2d": 72, "convtranspose3d": 72, "quantconvtranspose3d": 72, "_legacyquantlinearconvbasemixin": [72, 74], "default_quant_desc_weight": [72, 74, 75], "scaledquantdescriptor": [72, 74, 75, 77, 81], "3d": [72, 73], "transpos": 72, "quantinstancenorm1d": 73, "_legacyquantinputbasemixin": [73, 76], "instancenorm1d": 73, "quantinstancenorm2d": 73, "instancenorm2d": 73, "4d": 73, "quantinstancenorm3d": 73, "instancenorm3d": 73, "5d": 73, "quantlinear": 74, "quantinputbas": 75, "default_quant_desc_input": 75, "default_quant_desc_output": 75, "quantlinearconvbas": 75, "initialize_quantizer_with_dummy_st": 75, "dummi": 75, "devic": [75, 94, 99, 100], "quantize_weight": 75, "adaptiveavgpool1d": 76, "quantadaptiveavgpool1d": 76, "adaptiveavgpool2d": 76, "quantadaptiveavgpool2d": 76, "adaptiveavgpool3d": 76, "quantadaptiveavgpool3d": 76, "avgpool1d": 76, "quantavgpool1d": 76, "avgpool2d": 76, "quantavgpool2d": 76, "avgpool3d": 76, "quantavgpool3d": 76, "maxpool1d": 76, "quantmaxpool1d": 76, "maxpool2d": 76, "quantmaxpool2d": 76, "maxpool3d": 76, "quantmaxpool3d": 76, "container": 77, "get_modelopt_st": 77, "meta": [77, 86], "replace_sequential_quantizer_with_single_quant": 77, "indx": 77, "attribute_dict": 77, "tensor_quantizer_iter": 77, "itself": 77, "fake_tensor_qu": 77, "if_quant": 77, "bodi": 77, "if_clip": 77, "if_calib": 77, "Not": 77, "probabl": 77, "fake_qu": [77, 81], "step_siz": 77, "mutabl": 77, "clean_up_after_set_from_modelopt_st": 77, "set_from_modelopt_st": 77, "bypass": 77, "neither": 77, "disable_calib": 77, "disable_clip": 77, "disable_qu": 77, "enable_calib": 77, "enable_clip": 77, "enable_qu": 77, "export_amax": 77, "output_dtyp": [77, 81], "init_learn_amax": 77, "is_en": 77, "load_calib_amax": 77, "necessari": [77, 87], "maxbound": 77, "narrow_rang": [77, 81], "symmetr": [77, 81], "reset_amax": 77, "sync_amax_across_distributed_group": 77, "parallel_group": 77, "distributedprocessgroup": [77, 95], "freeze_paramet": 78, "group_paramet": 78, "match_paramet": 78, "quant_weight_inplac": 78, "apex": 79, "deactiv": [80, 84, 99], "enable_onnx_export": 80, "fakeaffinetensorquantfunct": 81, "affin": 81, "gemmlowp": 81, "style": 81, "shift": 81, "master": [81, 95, 98, 99], "reason": 81, "cancel": 81, "come": 81, "penalti": 81, "grad_input": 81, "min_rang": 81, "max_rang": 81, "As": 81, "granular": [81, 82], "faketensorquantfunct": 81, "tensorquantfunct": 81, "legacyfaketensorquantfunct": 81, "comment": 81, "scalede4m3funct": 81, "e4m3fi": 81, "emul": 81, "fpx": 81, "seem": 81, "nice": 81, "thing": 81, "ax": 81, "input_tensor": [81, 82], "kcr": 81, "quant_axi": 81, "scale_bit": 81, "scheme": 81, "learn_amax": 81, "learnabl": 81, "scale_amax": 81, "experi": 81, "calib_method": 81, "histogram": 81, "protect": 81, "_": 81, "exactli": [81, 99], "get_block_quant_axes_and_s": 81, "interpret": [81, 99], "127": 81, "grad_scal": 81, "though": 81, "natur": 81, "int32": 81, "255": 81, "scaled_e4m3_abstract": 81, "scaled_e4m3": 81, "export_torch_mod": 82, "is_quant": 82, "is_quantized_column_parallel_linear": 82, "is_quantized_layer_with_weight": 82, "is_quantized_row_parallel_linear": 82, "row": 82, "is_torch_library_support": 82, "exce": 82, "reduce_amax": 82, "keepdim": 82, "unless": 82, "entri": 82, "never": 82, "meant": 82, "deprect": 82, "sens": 82, "unknown": 82, "replace_funct": 82, "new_func": 82, "exportsparseconfig": [84, 86], "export_spars": [84, 86], "sparsegptconfig": [84, 91], "sparse_gpt": 84, "sparseconv2dconfig": 84, "shown": 84, "glob": 84, "unnest": 84, "short": 84, "sparselinearconfig": 84, "inspir": 85, "magnitudesearch": 85, "basesparsesearch": [85, 89, 90], "searcher": [85, 90], "compute_valid_1d_pattern": 85, "vector": 85, "permut": 85, "create_asp_mask": 85, "m4n2_1d": 85, "booltensor": [85, 87], "fill": 85, "ratio": 85, "get_nmprune_info": 85, "mat": 85, "mn_1d_best": 85, "reshape_1d": 85, "dimension": 85, "hw": 85, "exportsparsemodedescriptor": 86, "sparsegptmodedescriptor": 86, "sparsemagnitudemodedescriptor": 86, "search_algorithm": 86, "convert_sparse_model": 86, "restore_export_spars": 86, "restore_sparse_model": 86, "update_sparse_metadata": 86, "sparsemodul": 87, "set_mask": 87, "sparsegptsearch": 90, "hessian": [90, 91], "artifcat": 90, "hook": 90, "create_sgpt_mask": 90, "invert": 90, "hessian_damp": 90, "invers": 90, "finish": 91, "approxim": [91, 101], "carefulli": 91, "runtim": 91, "cannot": [91, 99], "fewer": 91, "run_forward_loop": [91, 99], "thu": 91, "cpp": 93, "load_cpp_extens": 93, "cuda_version_specifi": 93, "fail_msg": 93, "load_kwarg": 93, "instantan": 93, "create_forward_loop": 94, "dataset_nam": 94, "cnn_dailymail": 94, "max_sample_length": 94, "tailor": 94, "feed": [94, 99], "predict": 94, "preprocess": 94, "suitabl": 94, "pretrainedtokenizerfast": 94, "get_dataset_dataload": 94, "tokniz": 94, "instancn": 94, "hugginfac": 94, "backend": 95, "get_data_parallel_group": 95, "get_tensor_parallel_group": 95, "is_mast": 95, "processgroup": 95, "list_closest_to_median": 97, "closest": [97, 101], "val": 97, "avg": 97, "std": 97, "val2list": 97, "repeat_tim": 97, "val2tupl": 97, "min_len": 97, "idx_repeat": 97, "deprecatederror": 98, "notimplementederror": 98, "no_stdout": 98, "silenc": 98, "stdout": 98, "num2hrb": 98, "big": 98, "human": 98, "readabl": 98, "print_rank_0": 98, "compare_dict": 99, "dict1": 99, "dict2": 99, "unmatch": 99, "get_model_attribut": 99, "get_module_devic": 99, "get_same_pad": 99, "kernel_s": 99, "init_model_from_model_lik": 99, "model_cls_or_cal": 99, "is_channels_last": 99, "is_parallel": 99, "make_divis": 99, "divisor": 99, "min_val": 99, "taken": 99, "tf": 99, "repo": 99, "divis": 99, "seen": 99, "research": 99, "slim": 99, "mobilenet": 99, "target_model": 99, "layout": 99, "param_num": 99, "trainable_onli": 99, "1000000": 99, "trainabl": 99, "1e6": 99, "million": 99, "param_num_from_forward": 99, "circumv": 99, "appear": 99, "remove_bn": 99, "max_it": 99, "progress_bar": 99, "infiinit": 99, "exhaust": 99, "z": 99, "label": 99, "descript": 99, "progress": 99, "bar": 99, "set_submodul": 99, "target_submodul": 99, "complement": 99, "get_submodul": 99, "standardize_constructor_arg": 99, "constructor_arg": 99, "standardize_model_arg": 99, "model_or_fw_or_sig": 99, "use_kwarg": 99, "signatur": 99, "mtn": [99, 101], "matter": 99, "were": 99, "kw_only_arg": 99, "standardize_model_like_tupl": 99, "standardize_named_model_arg": 99, "args_norm": 99, "args_with_default": 99, "unwrap_model": 99, "raise_error": 99, "msg": 99, "zero_grad": 99, "timer": 100, "contextdecor": 100, "decor": 100, "stop": 100, "clear_cuda_cach": 100, "clear": 100, "get_cuda_memory_stat": 100, "report_memori": 100, "determinist": 101, "centroid": 101, "seq": 101, "prod": 101, "aim": 101, "cheapli": 101, "median": 101, "na": 101, "recogn": 101, "popul": 101, "shuffl": 101, "mutablesequ": 101, "numpy_to_torch": 102, "np_output": 102, "torch_detach": 102, "detach": 102, "torch_to": 102, "torch_to_numpi": 102, "question": 103}, "objects": {"modelopt": [[15, 0, 0, "-", "deploy"], [20, 0, 0, "-", "onnx"], [35, 0, 0, "-", "torch"]], "modelopt.deploy": [[16, 0, 0, "-", "llm"]], "modelopt.deploy.llm": [[17, 0, 0, "-", "generate"], [18, 0, 0, "-", "model_config_trt"], [19, 0, 0, "-", "nemo_utils"]], "modelopt.deploy.llm.generate": [[17, 1, 1, "", "LLM"]], "modelopt.deploy.llm.generate.LLM": [[17, 2, 1, "", "__init__"], [17, 2, 1, "", "generate_text"], [17, 3, 1, "", "max_beam_width"], [17, 3, 1, "", "max_input_len"]], "modelopt.deploy.llm.model_config_trt": [[18, 4, 1, "", "build_tensorrt_llm"], [18, 4, 1, "", "build_tensorrt_llm_rank"]], "modelopt.deploy.llm.nemo_utils": [[19, 1, 1, "", "CustomSentencePieceTokenizer"], [19, 4, 1, "", "get_nemo_tokenizer"], [19, 4, 1, "", "get_tokenzier"]], "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer": [[19, 2, 1, "", "__init__"], [19, 2, 1, "", "batch_decode"], [19, 2, 1, "", "batch_encode_plus"], [19, 2, 1, "", "decode"], [19, 2, 1, "", "encode"], [19, 3, 1, "", "eos_token"], [19, 3, 1, "", "eos_token_id"], [19, 3, 1, "", "pad_token"], [19, 3, 1, "", "pad_token_id"]], "modelopt.onnx": [[21, 0, 0, "-", "op_types"], [22, 0, 0, "-", "quantization"], [34, 0, 0, "-", "utils"]], "modelopt.onnx.op_types": [[21, 4, 1, "", "get_quantizable_op_types"], [21, 4, 1, "", "is_binary_op"], [21, 4, 1, "", "is_control_flow_op"], [21, 4, 1, "", "is_conversion_op"], [21, 4, 1, "", "is_copy_op"], [21, 4, 1, "", "is_default_quantizable_op_by_ort"], [21, 4, 1, "", "is_fusible_reduction_op"], [21, 4, 1, "", "is_generator_op"], [21, 4, 1, "", "is_irregular_mem_access_op"], [21, 4, 1, "", "is_linear_op"], [21, 4, 1, "", "is_modifier_op"], [21, 4, 1, "", "is_multiclass_op"], [21, 4, 1, "", "is_non_reshape_copy_op"], [21, 4, 1, "", "is_normalization_op"], [21, 4, 1, "", "is_pointwise_or_elementwise_op"], [21, 4, 1, "", "is_pooling_or_window_op"], [21, 4, 1, "", "is_recurrent_op"], [21, 4, 1, "", "is_selection_op"], [21, 4, 1, "", "is_sequence_op"], [21, 4, 1, "", "is_shape_op"], [21, 4, 1, "", "is_unary_op"]], "modelopt.onnx.quantization": [[23, 0, 0, "-", "calib_utils"], [24, 0, 0, "-", "graph_utils"], [25, 0, 0, "-", "gs_patching"], [26, 0, 0, "-", "int4"], [27, 0, 0, "-", "operators"], [28, 0, 0, "-", "ort_patching"], [29, 0, 0, "-", "ort_utils"], [30, 0, 0, "-", "partitioning"], [31, 0, 0, "-", "qdq_utils"], [32, 0, 0, "-", "quant_utils"], [33, 0, 0, "-", "quantize"]], "modelopt.onnx.quantization.calib_utils": [[23, 1, 1, "", "CalibrationDataProvider"], [23, 1, 1, "", "RandomDataProvider"]], "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider": [[23, 2, 1, "", "__init__"], [23, 2, 1, "", "get_next"]], "modelopt.onnx.quantization.calib_utils.RandomDataProvider": [[23, 2, 1, "", "__init__"], [23, 2, 1, "", "get_next"]], "modelopt.onnx.quantization.graph_utils": [[24, 4, 1, "", "build_non_residual_input_map"], [24, 4, 1, "", "classify_partition_nodes"], [24, 4, 1, "", "filter_quantizable_kgen_heads"], [24, 4, 1, "", "get_fusible_backbone"], [24, 4, 1, "", "has_const_input"], [24, 4, 1, "", "has_path_type"], [24, 4, 1, "", "is_const_input"], [24, 4, 1, "", "print_stat"], [24, 4, 1, "", "remove_partial_input_qdq"]], "modelopt.onnx.quantization.gs_patching": [[25, 4, 1, "", "patch_gs_modules"]], "modelopt.onnx.quantization.int4": [[26, 1, 1, "", "AWQClipHelper"], [26, 4, 1, "", "dq_tensor"], [26, 4, 1, "", "find_scales"], [26, 4, 1, "", "quant_tensor"], [26, 4, 1, "", "quantize_int4"], [26, 4, 1, "", "quantize_int4_awq_clip"], [26, 4, 1, "", "quantize_int4_rtn"], [26, 4, 1, "", "rtn"]], "modelopt.onnx.quantization.int4.AWQClipHelper": [[26, 2, 1, "", "__init__"], [26, 5, 1, "", "alpha_step"], [26, 5, 1, "", "alphas"], [26, 5, 1, "", "min_alpha"], [26, 2, 1, "", "update_best_params"]], "modelopt.onnx.quantization.operators": [[27, 1, 1, "", "QDQConvTranspose"], [27, 1, 1, "", "QDQNormalization"]], "modelopt.onnx.quantization.operators.QDQConvTranspose": [[27, 2, 1, "", "__init__"], [27, 2, 1, "", "quantize"]], "modelopt.onnx.quantization.operators.QDQNormalization": [[27, 2, 1, "", "__init__"], [27, 2, 1, "", "quantize"]], "modelopt.onnx.quantization.ort_patching": [[28, 4, 1, "", "patch_ort_modules"]], "modelopt.onnx.quantization.ort_utils": [[29, 4, 1, "", "create_inference_session"]], "modelopt.onnx.quantization.partitioning": [[30, 4, 1, "", "find_fusible_partitions"], [30, 4, 1, "", "find_hardcoded_patterns"], [30, 4, 1, "", "find_layer_norm_partitions"], [30, 4, 1, "", "find_mha_partitions"], [30, 4, 1, "", "find_non_quantizable_partitions_from_patterns"], [30, 4, 1, "", "find_quantizable_nodes"], [30, 4, 1, "", "get_skiped_output_layers"]], "modelopt.onnx.quantization.qdq_utils": [[31, 4, 1, "", "insert_dq_nodes"], [31, 4, 1, "", "insert_qdq_nodes"], [31, 4, 1, "", "make_gs_dequantize_node"], [31, 4, 1, "", "make_gs_dequantize_output"], [31, 4, 1, "", "make_gs_quantize_node"], [31, 4, 1, "", "make_gs_quantize_output"], [31, 4, 1, "", "make_gs_quantized_weight"], [31, 4, 1, "", "make_gs_scale"], [31, 4, 1, "", "make_gs_zp"], [31, 4, 1, "", "use_trt_qdq_ops"]], "modelopt.onnx.quantization.quant_utils": [[32, 4, 1, "", "pack_float32_to_4bit_optimized"]], "modelopt.onnx.quantization.quantize": [[33, 4, 1, "", "quantize"]], "modelopt.onnx.utils": [[34, 4, 1, "", "duplicate_shared_linear_weights"], [34, 4, 1, "", "find_lowest_common_ancestor"], [34, 4, 1, "", "gen_random_inputs"], [34, 4, 1, "", "get_all_input_names"], [34, 4, 1, "", "get_batch_size"], [34, 4, 1, "", "get_batch_size_from_bytes"], [34, 4, 1, "", "get_child_nodes"], [34, 4, 1, "", "get_input_names"], [34, 4, 1, "", "get_input_names_from_bytes"], [34, 4, 1, "", "get_input_shapes"], [34, 4, 1, "", "get_input_shapes_from_bytes"], [34, 4, 1, "", "get_node_names"], [34, 4, 1, "", "get_node_names_from_bytes"], [34, 4, 1, "", "get_output_names"], [34, 4, 1, "", "get_output_names_from_bytes"], [34, 4, 1, "", "get_output_shapes"], [34, 4, 1, "", "get_parent_nodes"], [34, 4, 1, "", "get_variable_inputs"], [34, 4, 1, "", "is_valid_onnx_model"], [34, 4, 1, "", "name_onnx_nodes"], [34, 4, 1, "", "randomize_weights"], [34, 4, 1, "", "randomize_weights_onnx_bytes"], [34, 4, 1, "", "remove_weights_data"], [34, 4, 1, "", "save_onnx"], [34, 4, 1, "", "save_onnx_bytes_to_dir"], [34, 4, 1, "", "validate_batch_size"], [34, 4, 1, "", "validate_onnx"]], "modelopt.torch": [[36, 0, 0, "-", "export"], [46, 0, 0, "-", "opt"], [55, 0, 0, "-", "quantization"], [83, 0, 0, "-", "sparsity"], [92, 0, 0, "-", "utils"]], "modelopt.torch.export": [[37, 0, 0, "-", "distribute"], [38, 0, 0, "-", "layer_utils"], [39, 0, 0, "-", "model_config"], [40, 0, 0, "-", "model_config_export"], [41, 0, 0, "-", "model_config_utils"], [42, 0, 0, "-", "postprocess"], [43, 0, 0, "-", "scaling_factor_utils"], [44, 0, 0, "-", "tensorrt_llm_utils"], [45, 0, 0, "-", "transformer_engine"]], "modelopt.torch.export.distribute": [[37, 1, 1, "", "NFSWorkspace"], [37, 4, 1, "", "barrier"], [37, 4, 1, "", "get_configs_parallel"], [37, 4, 1, "", "get_group"], [37, 4, 1, "", "get_rank"], [37, 4, 1, "", "get_tensors_parallel"], [37, 4, 1, "", "get_world_size"]], "modelopt.torch.export.distribute.NFSWorkspace": [[37, 2, 1, "", "__init__"], [37, 3, 1, "", "is_initialized"], [37, 2, 1, "", "read_configs_and_weights_from_rank"], [37, 2, 1, "", "write_configs_and_weights"]], "modelopt.torch.export.layer_utils": [[38, 4, 1, "", "build_attention_config"], [38, 4, 1, "", "build_decoder_config"], [38, 4, 1, "", "build_embedding_config"], [38, 4, 1, "", "build_layernorm_config"], [38, 4, 1, "", "build_linear_config"], [38, 4, 1, "", "build_mlp_config"], [38, 4, 1, "", "build_moe_config"], [38, 4, 1, "", "build_qkv"], [38, 4, 1, "", "build_stacked_experts"], [38, 4, 1, "", "check_model_compatibility"], [38, 4, 1, "", "get_activation_scaling_factor"], [38, 4, 1, "", "get_kv_cache_dtype"], [38, 4, 1, "", "get_kv_cache_scaling_factor"], [38, 4, 1, "", "get_prequant_scaling_factor"], [38, 4, 1, "", "get_scaling_factor"], [38, 4, 1, "", "get_transformer_layers"], [38, 4, 1, "", "get_weight_block_size"], [38, 4, 1, "", "get_weight_scaling_factor"], [38, 4, 1, "", "get_weight_scaling_factor_2"], [38, 4, 1, "", "is_attention"], [38, 4, 1, "", "is_decoder_list"], [38, 4, 1, "", "is_embedding"], [38, 4, 1, "", "is_layernorm"], [38, 4, 1, "", "is_linear"], [38, 4, 1, "", "is_mlp"], [38, 4, 1, "", "is_moe"]], "modelopt.torch.export.model_config": [[39, 1, 1, "", "AttentionConfig"], [39, 1, 1, "", "DecoderLayerConfig"], [39, 1, 1, "", "EmbeddingConfig"], [39, 1, 1, "", "ExpertConfig"], [39, 1, 1, "", "LayernormConfig"], [39, 1, 1, "", "LinearConfig"], [39, 1, 1, "", "MLPConfig"], [39, 1, 1, "", "MOEConfig"], [39, 1, 1, "", "ModelConfig"], [39, 1, 1, "", "QKVConfig"]], "modelopt.torch.export.model_config.AttentionConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "clip_qkv"], [39, 5, 1, "", "dense"], [39, 5, 1, "", "kv_cache_dtype"], [39, 5, 1, "", "kv_cache_scaling_factor"], [39, 5, 1, "", "qkv"], [39, 5, 1, "", "rotary_dim"]], "modelopt.torch.export.model_config.DecoderLayerConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "alibi_bias_max"], [39, 5, 1, "", "apply_residual_connection_post_layernorm"], [39, 5, 1, "", "attention"], [39, 5, 1, "", "attention_head_size"], [39, 5, 1, "", "decoder_type"], [39, 3, 1, "", "ffn_hidden_size_local"], [39, 3, 1, "", "hidden_size"], [39, 5, 1, "", "input_layernorm"], [39, 5, 1, "", "max_position_embeddings"], [39, 5, 1, "", "mlp"], [39, 5, 1, "", "mlp_layernorm"], [39, 5, 1, "", "model_name"], [39, 5, 1, "", "moe_num_experts"], [39, 5, 1, "", "moe_renorm_mode"], [39, 5, 1, "", "moe_top_k"], [39, 5, 1, "", "moe_tp_mode"], [39, 5, 1, "", "new_decoder_architecture"], [39, 5, 1, "", "num_attention_heads"], [39, 5, 1, "", "num_kv_heads"], [39, 5, 1, "", "parallel_attention"], [39, 5, 1, "", "partial_rotary_factor"], [39, 5, 1, "", "post_layernorm"], [39, 5, 1, "", "quantization"], [39, 5, 1, "", "residual_layernorm"], [39, 5, 1, "", "residual_mlp"], [39, 5, 1, "", "rope_ratio"], [39, 5, 1, "", "rotary_base"], [39, 5, 1, "", "rotary_pct"], [39, 5, 1, "", "seq_length"], [39, 5, 1, "", "use_alibi"], [39, 5, 1, "", "use_cache"]], "modelopt.torch.export.model_config.EmbeddingConfig": [[39, 2, 1, "", "__init__"], [39, 3, 1, "", "hidden_size"], [39, 3, 1, "", "local_vocab_size"], [39, 5, 1, "", "weight"]], "modelopt.torch.export.model_config.ExpertConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "fc"], [39, 5, 1, "", "proj"]], "modelopt.torch.export.model_config.LayernormConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "bias"], [39, 5, 1, "", "eps"], [39, 5, 1, "", "layernorm_type"], [39, 5, 1, "", "weight"]], "modelopt.torch.export.model_config.LinearConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "activation_scaling_factor"], [39, 5, 1, "", "awq_block_size"], [39, 5, 1, "", "bias"], [39, 5, 1, "", "linear_type"], [39, 5, 1, "", "prequant_scaling_factor"], [39, 5, 1, "", "weight"], [39, 5, 1, "", "weights_scaling_factor"], [39, 5, 1, "", "weights_scaling_factor_2"]], "modelopt.torch.export.model_config.MLPConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "fc"], [39, 5, 1, "", "gate"], [39, 5, 1, "", "hidden_act"], [39, 5, 1, "", "merged_fc1_gate"], [39, 5, 1, "", "proj"]], "modelopt.torch.export.model_config.MOEConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "experts"], [39, 3, 1, "", "fc"], [39, 5, 1, "", "hidden_act"], [39, 5, 1, "", "router"]], "modelopt.torch.export.model_config.ModelConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "dtype"], [39, 3, 1, "", "hidden_act"], [39, 3, 1, "", "hidden_size"], [39, 5, 1, "", "layers"], [39, 5, 1, "", "lm_head"], [39, 5, 1, "", "ln_embed"], [39, 5, 1, "", "ln_f"], [39, 3, 1, "", "max_position_embeddings"], [39, 3, 1, "", "num_attention_heads"], [39, 3, 1, "", "num_kv_heads"], [39, 5, 1, "", "pipeline_parallel"], [39, 5, 1, "", "position_embedding"], [39, 5, 1, "", "quantization"], [39, 5, 1, "", "rank"], [39, 5, 1, "", "share_embedding_table"], [39, 5, 1, "", "tensor_parallel"], [39, 5, 1, "", "version"], [39, 5, 1, "", "vocab_embedding"], [39, 5, 1, "", "vocab_size"], [39, 3, 1, "", "vocab_size_padded"]], "modelopt.torch.export.model_config.QKVConfig": [[39, 2, 1, "", "__init__"], [39, 3, 1, "", "activation_scaling_factor"], [39, 3, 1, "", "awq_block_size"], [39, 3, 1, "", "bias"], [39, 5, 1, "", "k"], [39, 3, 1, "", "prequant_scaling_factor"], [39, 5, 1, "", "q"], [39, 5, 1, "", "v"], [39, 3, 1, "", "weight"], [39, 3, 1, "", "weights_scaling_factor"], [39, 3, 1, "", "weights_scaling_factor_2"]], "modelopt.torch.export.model_config_export": [[40, 4, 1, "", "export_tensorrt_llm_checkpoint"], [40, 4, 1, "", "torch_to_tensorrt_llm_checkpoint"]], "modelopt.torch.export.model_config_utils": [[41, 4, 1, "", "from_quantized_weight"], [41, 4, 1, "", "merge_fc1_gate"], [41, 4, 1, "", "merge_qkv"], [41, 4, 1, "", "model_config_from_dict"], [41, 4, 1, "", "model_config_to_dict"], [41, 4, 1, "", "naive_quantization"], [41, 4, 1, "", "pack_linear_weights"], [41, 4, 1, "", "pad_weights"], [41, 4, 1, "", "restore_model_config"], [41, 4, 1, "", "split_config_and_weights"], [41, 4, 1, "", "to_quantized_weight"]], "modelopt.torch.export.postprocess": [[42, 4, 1, "", "check_weight_shape_valid"], [42, 4, 1, "", "pad_embedding_lm_head"], [42, 4, 1, "", "postprocess_model_config"], [42, 4, 1, "", "postprocess_tensors"]], "modelopt.torch.export.scaling_factor_utils": [[43, 4, 1, "", "get_weights_scaling_factor"], [43, 4, 1, "", "resmooth_and_get_scale"]], "modelopt.torch.export.tensorrt_llm_utils": [[44, 4, 1, "", "convert_to_tensorrt_llm_config"], [44, 4, 1, "", "is_tensorrt_llm_0_8_or_9"], [44, 4, 1, "", "weights_to_npz"]], "modelopt.torch.export.transformer_engine": [[45, 4, 1, "", "convert_to_transformer_engine"]], "modelopt.torch.opt": [[47, 0, 0, "-", "config"], [48, 0, 0, "-", "conversion"], [49, 0, 0, "-", "dynamic"], [50, 0, 0, "-", "hparam"], [51, 0, 0, "-", "mode"], [52, 0, 0, "-", "plugins"], [53, 0, 0, "-", "searcher"], [54, 0, 0, "-", "utils"]], "modelopt.torch.opt.config": [[47, 6, 1, "", "ModeloptBaseConfig"], [47, 6, 1, "", "ModeloptBaseRule"], [47, 6, 1, "", "ModeloptBaseRuleConfig"], [47, 4, 1, "", "ModeloptField"], [47, 4, 1, "", "get_kwargs_for_create_model_with_rules"]], "modelopt.torch.opt.config.ModeloptBaseConfig": [[47, 2, 1, "", "get"], [47, 2, 1, "", "get_field_name_from_key"], [47, 2, 1, "", "items"], [47, 2, 1, "", "keys"], [47, 2, 1, "", "model_dump"], [47, 2, 1, "", "model_dump_json"], [47, 2, 1, "", "update"], [47, 2, 1, "", "values"]], "modelopt.torch.opt.config.ModeloptBaseRule": [[47, 2, 1, "", "customize_rule"], [47, 2, 1, "", "get_rule_type"], [47, 2, 1, "", "validate_rule"]], "modelopt.torch.opt.config.ModeloptBaseRuleConfig": [[47, 2, 1, "", "register_default"], [47, 2, 1, "", "unregister_default"]], "modelopt.torch.opt.conversion": [[48, 1, 1, "", "ModeloptStateManager"], [48, 4, 1, "", "apply_mode"], [48, 4, 1, "", "modelopt_state"], [48, 4, 1, "", "restore"], [48, 4, 1, "", "restore_from_modelopt_state"], [48, 4, 1, "", "save"]], "modelopt.torch.opt.conversion.ModeloptStateManager": [[48, 2, 1, "", "__init__"], [48, 2, 1, "", "add_mode"], [48, 2, 1, "", "check_mode"], [48, 2, 1, "", "get_config_class"], [48, 3, 1, "", "has_state"], [48, 2, 1, "", "is_converted"], [48, 3, 1, "", "last_mode"], [48, 2, 1, "", "load_state_dict"], [48, 2, 1, "", "modes_with_states"], [48, 2, 1, "", "state_dict"], [48, 2, 1, "", "transfer_state_dict"], [48, 2, 1, "", "update_last_state_before_new_mode"], [48, 2, 1, "", "update_last_state_before_save"]], "modelopt.torch.opt.dynamic": [[49, 1, 1, "", "DynamicModule"], [49, 1, 1, "", "DynamicSpace"]], "modelopt.torch.opt.dynamic.DynamicModule": [[49, 2, 1, "", "__init__"], [49, 2, 1, "", "convert"], [49, 2, 1, "", "export"], [49, 2, 1, "", "extra_repr"], [49, 2, 1, "", "force_assign"], [49, 2, 1, "", "freeze"], [49, 2, 1, "", "get_hparam"], [49, 2, 1, "", "modify"], [49, 2, 1, "", "named_hparams"], [49, 3, 1, "", "original_cls"], [49, 2, 1, "", "reset_dynamic_attributes"]], "modelopt.torch.opt.dynamic.DynamicSpace": [[49, 2, 1, "", "__init__"], [49, 2, 1, "", "config"], [49, 2, 1, "", "convert_to_dynamic"], [49, 2, 1, "", "export"], [49, 2, 1, "", "get_hparam"], [49, 2, 1, "", "is_configurable"], [49, 2, 1, "", "is_dynamic"], [49, 2, 1, "", "named_dynamic_modules"], [49, 2, 1, "", "named_hparams"], [49, 2, 1, "", "select"], [49, 2, 1, "", "size"]], "modelopt.torch.opt.hparam": [[50, 1, 1, "", "Hparam"]], "modelopt.torch.opt.hparam.Hparam": [[50, 5, 1, "", "ActiveSlice"], [50, 5, 1, "", "Importance"], [50, 5, 1, "", "ImportanceEstimator"], [50, 2, 1, "", "__init__"], [50, 3, 1, "", "active"], [50, 3, 1, "", "active_slice"], [50, 3, 1, "", "choices"], [50, 2, 1, "", "enforce_order"], [50, 3, 1, "", "importance"], [50, 3, 1, "", "is_configurable"], [50, 3, 1, "", "is_sortable"], [50, 3, 1, "", "max"], [50, 3, 1, "", "min"], [50, 3, 1, "", "original"], [50, 2, 1, "", "register_importance"]], "modelopt.torch.opt.searcher": [[53, 1, 1, "", "BaseSearcher"]], "modelopt.torch.opt.searcher.BaseSearcher": [[53, 2, 1, "", "__init__"], [53, 2, 1, "", "after_search"], [53, 2, 1, "", "before_search"], [53, 5, 1, "", "config"], [53, 5, 1, "", "constraints"], [53, 2, 1, "", "construct_forward_loop"], [53, 3, 1, "", "default_search_config"], [53, 3, 1, "", "default_state_dict"], [53, 5, 1, "", "dummy_input"], [53, 2, 1, "", "eval_score"], [53, 5, 1, "", "forward_loop"], [53, 3, 1, "", "has_score"], [53, 2, 1, "", "load_search_checkpoint"], [53, 5, 1, "", "model"], [53, 2, 1, "", "reset_search"], [53, 2, 1, "", "run_search"], [53, 2, 1, "", "sanitize_search_config"], [53, 2, 1, "", "save_search_checkpoint"], [53, 2, 1, "", "search"], [53, 2, 1, "", "state_dict"]], "modelopt.torch.opt.utils": [[54, 4, 1, "", "is_configurable"], [54, 4, 1, "", "is_dynamic"], [54, 4, 1, "", "named_hparams"], [54, 4, 1, "", "search_space_size"]], "modelopt.torch.quantization": [[56, 0, 0, "-", "calib"], [60, 0, 0, "-", "config"], [61, 0, 0, "-", "conversion"], [62, 0, 0, "-", "extensions"], [63, 0, 0, "-", "mode"], [64, 0, 0, "-", "model_calib"], [65, 0, 0, "-", "model_quant"], [66, 0, 0, "-", "nn"], [78, 0, 0, "-", "optim"], [79, 0, 0, "-", "plugins"], [80, 0, 0, "-", "quant_modules"], [81, 0, 0, "-", "tensor_quant"], [82, 0, 0, "-", "utils"]], "modelopt.torch.quantization.calib": [[57, 0, 0, "-", "calibrator"], [58, 0, 0, "-", "histogram"], [59, 0, 0, "-", "max"]], "modelopt.torch.quantization.calib.histogram": [[58, 1, 1, "", "HistogramCalibrator"], [58, 4, 1, "", "calibrate_weights"]], "modelopt.torch.quantization.calib.histogram.HistogramCalibrator": [[58, 2, 1, "", "__init__"], [58, 2, 1, "", "collect"], [58, 2, 1, "", "compute_amax"], [58, 2, 1, "", "reset"]], "modelopt.torch.quantization.calib.max": [[59, 1, 1, "", "MaxCalibrator"]], "modelopt.torch.quantization.calib.max.MaxCalibrator": [[59, 2, 1, "", "__init__"], [59, 3, 1, "", "amaxs"], [59, 2, 1, "", "collect"], [59, 2, 1, "", "compute_amax"], [59, 2, 1, "", "reset"]], "modelopt.torch.quantization.config": [[60, 6, 1, "", "QuantizeConfig"]], "modelopt.torch.quantization.config.QuantizeConfig": [[60, 7, 1, "", "algorithm"], [60, 7, 1, "", "quant_cfg"]], "modelopt.torch.quantization.conversion": [[61, 4, 1, "", "register"], [61, 4, 1, "", "replace_quant_module"], [61, 4, 1, "", "set_quantizer_attribute"], [61, 4, 1, "", "set_quantizer_by_cfg"], [61, 4, 1, "", "unregister"]], "modelopt.torch.quantization.mode": [[63, 1, 1, "", "QuantizeExportModeDescriptor"], [63, 1, 1, "", "QuantizeModeDescriptor"]], "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor": [[63, 3, 1, "", "config_class"], [63, 3, 1, "", "convert"], [63, 3, 1, "", "is_export_mode"], [63, 3, 1, "", "name"], [63, 3, 1, "", "restore"]], "modelopt.torch.quantization.mode.QuantizeModeDescriptor": [[63, 3, 1, "", "config_class"], [63, 3, 1, "", "convert"], [63, 3, 1, "", "export_mode"], [63, 3, 1, "", "name"], [63, 3, 1, "", "next_modes"], [63, 3, 1, "", "restore"], [63, 3, 1, "", "update_for_new_mode"], [63, 3, 1, "", "update_for_save"]], "modelopt.torch.quantization.model_calib": [[64, 4, 1, "", "calibrate"], [64, 4, 1, "", "postprocess_amax"]], "modelopt.torch.quantization.model_quant": [[65, 4, 1, "", "disable_quantizer"], [65, 4, 1, "", "enable_quantizer"], [65, 4, 1, "", "fold_weight"], [65, 4, 1, "", "print_quant_summary"], [65, 4, 1, "", "quantize"]], "modelopt.torch.quantization.nn": [[67, 0, 0, "-", "functional"], [68, 0, 0, "-", "modules"]], "modelopt.torch.quantization.nn.functional": [[67, 1, 1, "", "ClipFunction"]], "modelopt.torch.quantization.nn.functional.ClipFunction": [[67, 2, 1, "", "backward"], [67, 2, 1, "", "forward"]], "modelopt.torch.quantization.nn.modules": [[69, 0, 0, "-", "clip"], [70, 0, 0, "-", "quant_activations"], [71, 0, 0, "-", "quant_batchnorm"], [72, 0, 0, "-", "quant_conv"], [73, 0, 0, "-", "quant_instancenorm"], [74, 0, 0, "-", "quant_linear"], [75, 0, 0, "-", "quant_module"], [76, 0, 0, "-", "quant_pooling"], [77, 0, 0, "-", "tensor_quantizer"]], "modelopt.torch.quantization.nn.modules.clip": [[69, 1, 1, "", "Clip"]], "modelopt.torch.quantization.nn.modules.clip.Clip": [[69, 2, 1, "", "__init__"], [69, 2, 1, "", "forward"]], "modelopt.torch.quantization.nn.modules.quant_conv": [[72, 5, 1, "", "Conv1d"], [72, 5, 1, "", "Conv2d"], [72, 5, 1, "", "Conv3d"], [72, 5, 1, "", "ConvTranspose1d"], [72, 5, 1, "", "ConvTranspose2d"], [72, 5, 1, "", "ConvTranspose3d"], [72, 1, 1, "", "QuantConv1d"], [72, 1, 1, "", "QuantConv2d"], [72, 1, 1, "", "QuantConv3d"], [72, 1, 1, "", "QuantConvTranspose1d"], [72, 1, 1, "", "QuantConvTranspose2d"], [72, 1, 1, "", "QuantConvTranspose3d"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_instancenorm": [[73, 1, 1, "", "QuantInstanceNorm1d"], [73, 1, 1, "", "QuantInstanceNorm2d"], [73, 1, 1, "", "QuantInstanceNorm3d"]], "modelopt.torch.quantization.nn.modules.quant_linear": [[74, 5, 1, "", "Linear"], [74, 1, 1, "", "QuantLinear"]], "modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear": [[74, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_module": [[75, 1, 1, "", "QuantInputBase"], [75, 1, 1, "", "QuantLinearConvBase"]], "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase": [[75, 5, 1, "", "default_quant_desc_input"], [75, 5, 1, "", "default_quant_desc_output"], [75, 2, 1, "", "forward"], [75, 5, 1, "", "input_quantizer"], [75, 5, 1, "", "output_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase": [[75, 5, 1, "", "default_quant_desc_weight"], [75, 2, 1, "", "forward"], [75, 2, 1, "", "initialize_quantizer_with_dummy_states"], [75, 2, 1, "", "quantize_weight"], [75, 5, 1, "", "weight_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_pooling": [[76, 5, 1, "", "AdaptiveAvgPool1d"], [76, 5, 1, "", "AdaptiveAvgPool2d"], [76, 5, 1, "", "AdaptiveAvgPool3d"], [76, 5, 1, "", "AvgPool1d"], [76, 5, 1, "", "AvgPool2d"], [76, 5, 1, "", "AvgPool3d"], [76, 5, 1, "", "MaxPool1d"], [76, 5, 1, "", "MaxPool2d"], [76, 5, 1, "", "MaxPool3d"], [76, 1, 1, "", "QuantAdaptiveAvgPool1d"], [76, 1, 1, "", "QuantAdaptiveAvgPool2d"], [76, 1, 1, "", "QuantAdaptiveAvgPool3d"], [76, 1, 1, "", "QuantAvgPool1d"], [76, 1, 1, "", "QuantAvgPool2d"], [76, 1, 1, "", "QuantAvgPool3d"], [76, 1, 1, "", "QuantMaxPool1d"], [76, 1, 1, "", "QuantMaxPool2d"], [76, 1, 1, "", "QuantMaxPool3d"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer": [[77, 1, 1, "", "SequentialQuantizer"], [77, 1, 1, "", "TensorQuantizer"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer": [[77, 2, 1, "", "__init__"], [77, 2, 1, "", "disable"], [77, 2, 1, "", "get_modelopt_state"], [77, 2, 1, "", "replace_sequential_quantizer_with_single_quantizer"], [77, 2, 1, "", "set_from_attribute_dict"], [77, 2, 1, "", "tensor_quantizer_iterator"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer": [[77, 2, 1, "", "__init__"], [77, 3, 1, "", "amax"], [77, 3, 1, "", "axis"], [77, 3, 1, "", "block_sizes"], [77, 2, 1, "", "clean_up_after_set_from_modelopt_state"], [77, 2, 1, "", "disable"], [77, 2, 1, "", "disable_calib"], [77, 2, 1, "", "disable_clip"], [77, 2, 1, "", "disable_quant"], [77, 2, 1, "", "enable"], [77, 2, 1, "", "enable_calib"], [77, 2, 1, "", "enable_clip"], [77, 2, 1, "", "enable_quant"], [77, 2, 1, "", "export_amax"], [77, 2, 1, "", "extra_repr"], [77, 3, 1, "", "fake_quant"], [77, 2, 1, "", "forward"], [77, 2, 1, "", "get_modelopt_state"], [77, 2, 1, "", "init_learn_amax"], [77, 3, 1, "", "is_enabled"], [77, 2, 1, "", "load_calib_amax"], [77, 3, 1, "", "maxbound"], [77, 3, 1, "", "narrow_range"], [77, 3, 1, "", "num_bits"], [77, 3, 1, "", "pre_quant_scale"], [77, 2, 1, "", "reset_amax"], [77, 3, 1, "", "scale"], [77, 2, 1, "", "set_from_attribute_dict"], [77, 2, 1, "", "set_from_modelopt_state"], [77, 3, 1, "", "step_size"], [77, 2, 1, "", "sync_amax_across_distributed_group"], [77, 3, 1, "", "unsigned"]], "modelopt.torch.quantization.optim": [[78, 4, 1, "", "freeze_parameters"], [78, 4, 1, "", "group_parameters"], [78, 4, 1, "", "match_parameters"], [78, 4, 1, "", "quant_weight_inplace"]], "modelopt.torch.quantization.quant_modules": [[80, 4, 1, "", "deactivate"], [80, 4, 1, "", "enable_onnx_export"], [80, 4, 1, "", "initialize"]], "modelopt.torch.quantization.tensor_quant": [[81, 1, 1, "", "FakeAffineTensorQuantFunction"], [81, 1, 1, "", "FakeTensorQuantFunction"], [81, 1, 1, "", "LegacyFakeTensorQuantFunction"], [81, 5, 1, "", "QuantDescriptor"], [81, 1, 1, "", "ScaledE4M3Function"], [81, 1, 1, "", "ScaledQuantDescriptor"], [81, 1, 1, "", "TensorQuantFunction"], [81, 4, 1, "", "scaled_e4m3_abstract"]], "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"]], "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"], [81, 2, 1, "", "symbolic"]], "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"]], "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"], [81, 2, 1, "", "symbolic"]], "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor": [[81, 2, 1, "", "__init__"], [81, 3, 1, "", "amax"], [81, 3, 1, "", "axis"], [81, 3, 1, "", "block_sizes"], [81, 3, 1, "", "calib_method"], [81, 2, 1, "", "dict"], [81, 3, 1, "", "fake_quant"], [81, 2, 1, "", "get_block_quant_axes_and_sizes"], [81, 3, 1, "", "learn_amax"], [81, 3, 1, "", "name"], [81, 3, 1, "", "narrow_range"], [81, 3, 1, "", "num_bits"], [81, 3, 1, "", "scale_amax"], [81, 3, 1, "", "unsigned"]], "modelopt.torch.quantization.tensor_quant.TensorQuantFunction": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"], [81, 2, 1, "", "symbolic"]], "modelopt.torch.quantization.utils": [[82, 4, 1, "", "export_torch_mode"], [82, 4, 1, "", "is_quantized"], [82, 4, 1, "", "is_quantized_column_parallel_linear"], [82, 4, 1, "", "is_quantized_layer_with_weight"], [82, 4, 1, "", "is_quantized_row_parallel_linear"], [82, 4, 1, "", "is_torch_library_supported"], [82, 4, 1, "", "reduce_amax"], [82, 4, 1, "", "replace_function"]], "modelopt.torch.sparsity": [[84, 0, 0, "-", "config"], [85, 0, 0, "-", "magnitude"], [86, 0, 0, "-", "mode"], [87, 0, 0, "-", "module"], [88, 0, 0, "-", "plugins"], [89, 0, 0, "-", "searcher"], [90, 0, 0, "-", "sparsegpt"], [91, 0, 0, "-", "sparsification"]], "modelopt.torch.sparsity.config": [[84, 6, 1, "", "ExportSparseConfig"], [84, 6, 1, "", "SparseGPTConfig"], [84, 6, 1, "", "SparseMagnitudeConfig"]], "modelopt.torch.sparsity.config.SparseGPTConfig": [[84, 7, 1, "", "nn_conv2d"], [84, 7, 1, "", "nn_linear"]], "modelopt.torch.sparsity.config.SparseMagnitudeConfig": [[84, 7, 1, "", "nn_conv2d"], [84, 7, 1, "", "nn_linear"]], "modelopt.torch.sparsity.magnitude": [[85, 1, 1, "", "MagnitudeSearcher"], [85, 4, 1, "", "compute_valid_1d_patterns"], [85, 4, 1, "", "create_asp_mask"], [85, 4, 1, "", "fill"], [85, 4, 1, "", "get_nmprune_info"], [85, 4, 1, "", "m4n2_1d"], [85, 4, 1, "", "mn_1d_best"], [85, 4, 1, "", "reshape_1d"]], "modelopt.torch.sparsity.mode": [[86, 1, 1, "", "ExportSparseModeDescriptor"], [86, 1, 1, "", "SparseGPTModeDescriptor"], [86, 1, 1, "", "SparseMagnitudeModeDescriptor"], [86, 4, 1, "", "convert_sparse_model"], [86, 4, 1, "", "export_sparse"], [86, 4, 1, "", "restore_export_sparse"], [86, 4, 1, "", "restore_sparse_model"], [86, 4, 1, "", "update_sparse_metadata"]], "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor": [[86, 3, 1, "", "config_class"], [86, 3, 1, "", "convert"], [86, 3, 1, "", "is_export_mode"], [86, 3, 1, "", "name"], [86, 3, 1, "", "restore"]], "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor": [[86, 3, 1, "", "config_class"], [86, 3, 1, "", "name"], [86, 3, 1, "", "search_algorithm"]], "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor": [[86, 3, 1, "", "config_class"], [86, 3, 1, "", "convert"], [86, 3, 1, "", "export_mode"], [86, 3, 1, "", "name"], [86, 3, 1, "", "next_modes"], [86, 3, 1, "", "restore"], [86, 3, 1, "", "search_algorithm"], [86, 3, 1, "", "update_for_new_mode"], [86, 3, 1, "", "update_for_save"]], "modelopt.torch.sparsity.module": [[87, 1, 1, "", "SparseModule"]], "modelopt.torch.sparsity.module.SparseModule": [[87, 2, 1, "", "modify"], [87, 2, 1, "", "set_mask"]], "modelopt.torch.sparsity.searcher": [[89, 1, 1, "", "BaseSparseSearcher"]], "modelopt.torch.sparsity.searcher.BaseSparseSearcher": [[89, 3, 1, "", "default_search_config"], [89, 3, 1, "", "default_state_dict"], [89, 2, 1, "", "run_search"], [89, 2, 1, "", "sanitize_search_config"]], "modelopt.torch.sparsity.sparsegpt": [[90, 1, 1, "", "SparseGPTSearcher"], [90, 4, 1, "", "create_sgpt_mask"], [90, 4, 1, "", "invert"], [90, 4, 1, "", "prepare"]], "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher": [[90, 2, 1, "", "after_search"], [90, 2, 1, "", "before_search"], [90, 3, 1, "", "default_search_config"]], "modelopt.torch.sparsity.sparsification": [[91, 4, 1, "", "export"], [91, 4, 1, "", "sparsify"]], "modelopt.torch.utils": [[93, 0, 0, "-", "cpp_extension"], [94, 0, 0, "-", "dataset_utils"], [95, 0, 0, "-", "distributed"], [96, 0, 0, "-", "graph"], [97, 0, 0, "-", "list"], [98, 0, 0, "-", "logging"], [99, 0, 0, "-", "network"], [100, 0, 0, "-", "perf"], [101, 0, 0, "-", "random"], [102, 0, 0, "-", "tensor"]], "modelopt.torch.utils.cpp_extension": [[93, 4, 1, "", "load_cpp_extension"]], "modelopt.torch.utils.dataset_utils": [[94, 4, 1, "", "create_forward_loop"], [94, 4, 1, "", "get_dataset_dataloader"]], "modelopt.torch.utils.distributed": [[95, 4, 1, "", "backend"], [95, 4, 1, "", "barrier"], [95, 4, 1, "", "get_data_parallel_group"], [95, 4, 1, "", "get_tensor_parallel_group"], [95, 4, 1, "", "is_master"], [95, 4, 1, "", "rank"], [95, 4, 1, "", "set_data_parallel_group"], [95, 4, 1, "", "set_tensor_parallel_group"], [95, 4, 1, "", "size"]], "modelopt.torch.utils.graph": [[96, 4, 1, "", "match"]], "modelopt.torch.utils.list": [[97, 4, 1, "", "list_closest_to_median"], [97, 4, 1, "", "stats"], [97, 4, 1, "", "val2list"], [97, 4, 1, "", "val2tuple"]], "modelopt.torch.utils.logging": [[98, 8, 1, "", "DeprecatedError"], [98, 4, 1, "", "no_stdout"], [98, 4, 1, "", "num2hrb"], [98, 4, 1, "", "print_rank_0"]], "modelopt.torch.utils.network": [[99, 4, 1, "", "compare_dict"], [99, 4, 1, "", "get_model_attributes"], [99, 4, 1, "", "get_module_device"], [99, 4, 1, "", "get_same_padding"], [99, 4, 1, "", "init_model_from_model_like"], [99, 4, 1, "", "is_channels_last"], [99, 4, 1, "", "is_parallel"], [99, 4, 1, "", "make_divisible"], [99, 4, 1, "", "model_to"], [99, 4, 1, "", "param_num"], [99, 4, 1, "", "param_num_from_forward"], [99, 4, 1, "", "remove_bn"], [99, 4, 1, "", "run_forward_loop"], [99, 4, 1, "", "set_submodule"], [99, 4, 1, "", "standardize_constructor_args"], [99, 4, 1, "", "standardize_model_args"], [99, 4, 1, "", "standardize_model_like_tuple"], [99, 4, 1, "", "standardize_named_model_args"], [99, 4, 1, "", "unwrap_model"], [99, 4, 1, "", "zero_grad"]], "modelopt.torch.utils.perf": [[100, 1, 1, "", "Timer"], [100, 4, 1, "", "clear_cuda_cache"], [100, 4, 1, "", "get_cuda_memory_stats"], [100, 4, 1, "", "report_memory"]], "modelopt.torch.utils.perf.Timer": [[100, 2, 1, "", "__init__"], [100, 2, 1, "", "start"], [100, 2, 1, "", "stop"]], "modelopt.torch.utils.random": [[101, 4, 1, "", "centroid"], [101, 4, 1, "", "choice"], [101, 4, 1, "", "original"], [101, 4, 1, "", "random"], [101, 4, 1, "", "sample"], [101, 4, 1, "", "shuffle"]], "modelopt.torch.utils.tensor": [[102, 4, 1, "", "numpy_to_torch"], [102, 4, 1, "", "torch_detach"], [102, 4, 1, "", "torch_to"], [102, 4, 1, "", "torch_to_numpy"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:property", "4": "py:function", "5": "py:attribute", "6": "py:pydantic_model", "7": "py:pydantic_field", "8": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "property", "Python property"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"], "6": ["py", "pydantic_model", "Python model"], "7": ["py", "pydantic_field", "Python field"], "8": ["py", "exception", "Python exception"]}, "titleterms": {"tensorrt": [0, 2], "llm": [0, 16], "deploy": [0, 4, 12], "export": [0, 36], "quantiz": [0, 2, 4, 6, 8, 9, 10, 11, 22, 33, 55, 60], "model": [0, 2, 3, 4, 5, 7, 10, 11, 12, 13], "support": [0, 12], "matrix": 0, "checkpoint": 0, "convert": 0, "all": 1, "modelopt": [1, 12, 14], "exampl": [1, 12], "overview": 2, "nvidia": 2, "optim": [2, 3, 12, 13, 78], "techniqu": 2, "sparsiti": [2, 5, 7, 83], "instal": 3, "system": 3, "requir": [3, 10], "check": 3, "quick": [4, 5], "start": [4, 5, 12], "ptq": [4, 10, 11], "pytorch": [4, 5, 11], "post": [5, 7, 10, 11], "train": [5, 7, 8, 10, 11], "sparsif": [5, 7, 91], "pt": 5, "introduct": 7, "save": 7, "restor": 7, "spars": 7, "concept": [7, 8], "structur": 7, "unstructur": 7, "n": 7, "m": 7, "algorithm": [7, 8], "basic": 8, "precis": 8, "format": [8, 60], "scale": 8, "factor": 8, "block": 8, "calibr": [8, 10, 57], "awar": [8, 11], "qat": [8, 11], "more": 8, "read": 8, "best": 9, "practic": 9, "choos": 9, "right": 9, "method": 9, "onnx": [10, 20], "beta": 10, "appli": [10, 11], "prepar": 10, "dataset": 10, "call": 10, "function": [10, 67], "deploi": [10, 15], "compar": 10, "perform": 10, "store": 11, "load": 11, "advanc": 11, "topic": 11, "tensorquant": 11, "custom": 11, "config": [11, 47, 60, 84], "modul": [11, 68, 87], "placement": 11, "fast": 11, "evalu": 11, "welcom": 12, "document": 12, "get": 12, "guid": 12, "refer": 12, "changelog": 13, "0": 13, "11": 13, "2024": 13, "05": 13, "07": 13, "api": 14, "gener": 17, "model_config_trt": 18, "nemo_util": 19, "op_typ": 21, "calib_util": 23, "graph_util": 24, "gs_patch": 25, "int4": 26, "oper": 27, "ort_patch": 28, "ort_util": 29, "partit": 30, "qdq_util": 31, "quant_util": 32, "util": [34, 54, 82, 92], "torch": 35, "distribut": [37, 95], "layer_util": 38, "model_config": 39, "model_config_export": 40, "model_config_util": 41, "postprocess": 42, "scaling_factor_util": 43, "tensorrt_llm_util": 44, "transformer_engin": 45, "opt": 46, "convers": [48, 61], "dynam": 49, "hparam": 50, "mode": [51, 63, 86], "plugin": [52, 79, 88], "searcher": [53, 89], "calib": 56, "histogram": 58, "max": 59, "extens": 62, "model_calib": 64, "model_qu": 65, "nn": 66, "clip": 69, "quant_activ": 70, "quant_batchnorm": 71, "quant_conv": 72, "quant_instancenorm": 73, "quant_linear": 74, "quant_modul": [75, 80], "quant_pool": 76, "tensor_quant": 77, "tensor_qu": 81, "magnitud": 85, "sparsegpt": 90, "cpp_extens": 93, "dataset_util": 94, "graph": 96, "list": 97, "log": 98, "network": 99, "perf": 100, "random": 101, "tensor": 102, "contact": 103, "u": 103, "faq": 104, "1": 104, "potenti": 104, "memori": 104, "leak": 104, "fsdp": 104, "use_orig_param": 104, "true": 104}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx": 60}, "alltitles": {"TensorRT-LLM Deployment": [[0, "tensorrt-llm-deployment"]], "Export Quantized Model": [[0, "export-quantized-model"]], "Model support matrix for the TensorRT-LLM checkpoint export": [[0, "id1"]], "Convert to TensorRT-LLM": [[0, "convert-to-tensorrt-llm"]], "All ModelOpt Examples": [[1, "all-modelopt-examples"]], "Overview": [[2, "overview"]], "NVIDIA TensorRT Model Optimizer": [[2, "nvidia-tensorrt-model-optimizer"]], "Techniques": [[2, "techniques"]], "Quantization": [[2, "quantization"], [4, "quantization"], [6, "quantization"]], "Sparsity": [[2, "sparsity"], [5, "sparsity"], [7, "sparsity"]], "Installation": [[3, "installation"]], "System requirements": [[3, "system-requirements"]], "Install Model Optimizer": [[3, "install-model-optimizer"]], "Check installation": [[3, "check-installation"]], "Quick Start: Quantization": [[4, "quick-start-quantization"]], "PTQ for PyTorch models": [[4, "ptq-for-pytorch-models"]], "Deployment": [[4, "deployment"], [12, null]], "Quick Start: Sparsity": [[5, "quick-start-sparsity"]], "Post-Training Sparsification (PTS) for PyTorch models": [[5, "post-training-sparsification-pts-for-pytorch-models"]], "Introduction": [[7, "introduction"]], "Post-Training Sparsification": [[7, "post-training-sparsification"]], "Save and restore the sparse model": [[7, "save-and-restore-the-sparse-model"]], "Sparsity Concepts": [[7, "sparsity-concepts"]], "Structured and Unstructured Sparsity": [[7, "structured-and-unstructured-sparsity"]], "N:M Sparsity": [[7, "n-m-sparsity"]], "Sparsification algorithm": [[7, "sparsification-algorithm"]], "Basic Concepts": [[8, "basic-concepts"]], "Precision format": [[8, "precision-format"]], "Scaling factor": [[8, "scaling-factor"]], "Block format": [[8, "block-format"]], "Calibration algorithm": [[8, "calibration-algorithm"]], "Quantization-aware training (QAT)": [[8, "quantization-aware-training-qat"]], "More Readings": [[8, "more-readings"]], "Best practices to choose the right quantization methods": [[9, "best-practices-to-choose-the-right-quantization-methods"]], "ONNX Quantization (Beta)": [[10, "onnx-quantization-beta"]], "Requirements": [[10, "requirements"]], "Apply Post Training Quantization (PTQ)": [[10, "apply-post-training-quantization-ptq"], [11, "apply-post-training-quantization-ptq"]], "Prepare calibration dataset": [[10, "prepare-calibration-dataset"]], "Call PTQ function": [[10, "call-ptq-function"]], "Deploy Quantized ONNX Model": [[10, "deploy-quantized-onnx-model"]], "Compare the performance": [[10, "compare-the-performance"]], "PyTorch Quantization": [[11, "pytorch-quantization"]], "Quantization-aware Training (QAT)": [[11, "quantization-aware-training-qat"]], "Storing and loading quantized model": [[11, "storing-and-loading-quantized-model"]], "Advanced Topics": [[11, "advanced-topics"]], "TensorQuantizer": [[11, "tensorquantizer"]], "Customize quantizer config": [[11, "customize-quantizer-config"]], "Custom quantized module and quantizer placement": [[11, "custom-quantized-module-and-quantizer-placement"]], "Fast evaluation": [[11, "fast-evaluation"]], "Welcome to Model Optimizer (ModelOpt) documentation!": [[12, "welcome-to-model-optimizer-modelopt-documentation"]], "Getting Started": [[12, null]], "Optimization Guides": [[12, null]], "Examples": [[12, null]], "Reference": [[12, null]], "Support": [[12, null]], "Model Optimizer Changelog": [[13, "model-optimizer-changelog"]], "0.11 (2024-05-07)": [[13, "id1"]], "modelopt API": [[14, "modelopt-api"]], "deploy": [[15, "deploy"]], "llm": [[16, "llm"]], "generate": [[17, "generate"]], "model_config_trt": [[18, "model-config-trt"]], "nemo_utils": [[19, "nemo-utils"]], "onnx": [[20, "onnx"]], "op_types": [[21, "op-types"]], "quantization": [[22, "quantization"], [55, "quantization"]], "calib_utils": [[23, "calib-utils"]], "graph_utils": [[24, "graph-utils"]], "gs_patching": [[25, "gs-patching"]], "int4": [[26, "int4"]], "operators": [[27, "operators"]], "ort_patching": [[28, "ort-patching"]], "ort_utils": [[29, "ort-utils"]], "partitioning": [[30, "partitioning"]], "qdq_utils": [[31, "qdq-utils"]], "quant_utils": [[32, "quant-utils"]], "quantize": [[33, "quantize"]], "utils": [[34, "utils"], [54, "utils"], [82, "utils"], [92, "utils"]], "torch": [[35, "torch"]], "export": [[36, "export"]], "distribute": [[37, "distribute"]], "layer_utils": [[38, "layer-utils"]], "model_config": [[39, "model-config"]], "model_config_export": [[40, "model-config-export"]], "model_config_utils": [[41, "model-config-utils"]], "postprocess": [[42, "postprocess"]], "scaling_factor_utils": [[43, "scaling-factor-utils"]], "tensorrt_llm_utils": [[44, "tensorrt-llm-utils"]], "transformer_engine": [[45, "transformer-engine"]], "opt": [[46, "opt"]], "config": [[47, "config"], [60, "config"], [84, "config"]], "conversion": [[48, "conversion"], [61, "conversion"]], "dynamic": [[49, "dynamic"]], "hparam": [[50, "hparam"]], "mode": [[51, "mode"], [63, "mode"], [86, "mode"]], "plugins": [[52, "plugins"], [79, "plugins"], [88, "plugins"]], "searcher": [[53, "searcher"], [89, "searcher"]], "calib": [[56, "calib"]], "calibrator": [[57, "calibrator"]], "histogram": [[58, "histogram"]], "max": [[59, "max"]], "Quantization Formats": [[60, "quantization-formats"]], "Quantization Configs": [[60, "quantization-configs"]], "extensions": [[62, "extensions"]], "model_calib": [[64, "model-calib"]], "model_quant": [[65, "model-quant"]], "nn": [[66, "nn"]], "functional": [[67, "functional"]], "modules": [[68, "modules"]], "clip": [[69, "clip"]], "quant_activations": [[70, "quant-activations"]], "quant_batchnorm": [[71, "quant-batchnorm"]], "quant_conv": [[72, "quant-conv"]], "quant_instancenorm": [[73, "quant-instancenorm"]], "quant_linear": [[74, "quant-linear"]], "quant_module": [[75, "quant-module"]], "quant_pooling": [[76, "quant-pooling"]], "tensor_quantizer": [[77, "tensor-quantizer"]], "optim": [[78, "optim"]], "quant_modules": [[80, "quant-modules"]], "tensor_quant": [[81, "tensor-quant"]], "sparsity": [[83, "sparsity"]], "magnitude": [[85, "magnitude"]], "module": [[87, "module"]], "sparsegpt": [[90, "sparsegpt"]], "sparsification": [[91, "sparsification"]], "cpp_extension": [[93, "cpp-extension"]], "dataset_utils": [[94, "dataset-utils"]], "distributed": [[95, "distributed"]], "graph": [[96, "graph"]], "list": [[97, "list"]], "logging": [[98, "logging"]], "network": [[99, "network"]], "perf": [[100, "perf"]], "random": [[101, "random"]], "tensor": [[102, "tensor"]], "Contact us": [[103, "contact-us"]], "FAQs": [[104, "faqs"]], "1. Potential memory leak for FSDP with use_orig_params=True": [[104, "potential-memory-leak-for-fsdp-with-use-orig-params-true"]]}, "indexentries": {"modelopt.deploy": [[15, "module-modelopt.deploy"]], "module": [[15, "module-modelopt.deploy"], [16, "module-modelopt.deploy.llm"], [17, "module-modelopt.deploy.llm.generate"], [18, "module-modelopt.deploy.llm.model_config_trt"], [19, "module-modelopt.deploy.llm.nemo_utils"], [20, "module-modelopt.onnx"], [21, "module-modelopt.onnx.op_types"], [22, "module-modelopt.onnx.quantization"], [23, "module-modelopt.onnx.quantization.calib_utils"], [24, "module-modelopt.onnx.quantization.graph_utils"], [25, "module-modelopt.onnx.quantization.gs_patching"], [26, "module-modelopt.onnx.quantization.int4"], [27, "module-modelopt.onnx.quantization.operators"], [28, "module-modelopt.onnx.quantization.ort_patching"], [29, "module-modelopt.onnx.quantization.ort_utils"], [30, "module-modelopt.onnx.quantization.partitioning"], [31, "module-modelopt.onnx.quantization.qdq_utils"], [32, "module-modelopt.onnx.quantization.quant_utils"], [33, "module-modelopt.onnx.quantization.quantize"], [34, "module-modelopt.onnx.utils"], [35, "module-modelopt.torch"], [36, "module-modelopt.torch.export"], [37, "module-modelopt.torch.export.distribute"], [38, "module-modelopt.torch.export.layer_utils"], [39, "module-modelopt.torch.export.model_config"], [40, "module-modelopt.torch.export.model_config_export"], [41, "module-modelopt.torch.export.model_config_utils"], [42, "module-modelopt.torch.export.postprocess"], [43, "module-modelopt.torch.export.scaling_factor_utils"], [44, "module-modelopt.torch.export.tensorrt_llm_utils"], [45, "module-modelopt.torch.export.transformer_engine"], [46, "module-modelopt.torch.opt"], [47, "module-modelopt.torch.opt.config"], [48, "module-modelopt.torch.opt.conversion"], [49, "module-modelopt.torch.opt.dynamic"], [50, "module-modelopt.torch.opt.hparam"], [51, "module-modelopt.torch.opt.mode"], [52, "module-modelopt.torch.opt.plugins"], [53, "module-modelopt.torch.opt.searcher"], [54, "module-modelopt.torch.opt.utils"], [55, "module-modelopt.torch.quantization"], [56, "module-modelopt.torch.quantization.calib"], [57, "module-modelopt.torch.quantization.calib.calibrator"], [58, "module-modelopt.torch.quantization.calib.histogram"], [59, "module-modelopt.torch.quantization.calib.max"], [60, "module-modelopt.torch.quantization.config"], [61, "module-modelopt.torch.quantization.conversion"], [62, "module-modelopt.torch.quantization.extensions"], [63, "module-modelopt.torch.quantization.mode"], [64, "module-modelopt.torch.quantization.model_calib"], [65, "module-modelopt.torch.quantization.model_quant"], [66, "module-modelopt.torch.quantization.nn"], [67, "module-modelopt.torch.quantization.nn.functional"], [68, "module-modelopt.torch.quantization.nn.modules"], [69, "module-modelopt.torch.quantization.nn.modules.clip"], [70, "module-modelopt.torch.quantization.nn.modules.quant_activations"], [71, "module-modelopt.torch.quantization.nn.modules.quant_batchnorm"], [72, "module-modelopt.torch.quantization.nn.modules.quant_conv"], [73, "module-modelopt.torch.quantization.nn.modules.quant_instancenorm"], [74, "module-modelopt.torch.quantization.nn.modules.quant_linear"], [75, "module-modelopt.torch.quantization.nn.modules.quant_module"], [76, "module-modelopt.torch.quantization.nn.modules.quant_pooling"], [77, "module-modelopt.torch.quantization.nn.modules.tensor_quantizer"], [78, "module-modelopt.torch.quantization.optim"], [79, "module-modelopt.torch.quantization.plugins"], [80, "module-modelopt.torch.quantization.quant_modules"], [81, "module-modelopt.torch.quantization.tensor_quant"], [82, "module-modelopt.torch.quantization.utils"], [83, "module-modelopt.torch.sparsity"], [84, "module-modelopt.torch.sparsity.config"], [85, "module-modelopt.torch.sparsity.magnitude"], [86, "module-modelopt.torch.sparsity.mode"], [87, "module-modelopt.torch.sparsity.module"], [88, "module-modelopt.torch.sparsity.plugins"], [89, "module-modelopt.torch.sparsity.searcher"], [90, "module-modelopt.torch.sparsity.sparsegpt"], [91, "module-modelopt.torch.sparsity.sparsification"], [92, "module-modelopt.torch.utils"], [93, "module-modelopt.torch.utils.cpp_extension"], [94, "module-modelopt.torch.utils.dataset_utils"], [95, "module-modelopt.torch.utils.distributed"], [96, "module-modelopt.torch.utils.graph"], [97, "module-modelopt.torch.utils.list"], [98, "module-modelopt.torch.utils.logging"], [99, "module-modelopt.torch.utils.network"], [100, "module-modelopt.torch.utils.perf"], [101, "module-modelopt.torch.utils.random"], [102, "module-modelopt.torch.utils.tensor"]], "modelopt.deploy.llm": [[16, "module-modelopt.deploy.llm"]], "llm (class in modelopt.deploy.llm.generate)": [[17, "modelopt.deploy.llm.generate.LLM"]], "__init__() (llm method)": [[17, "modelopt.deploy.llm.generate.LLM.__init__"]], "generate_text() (llm method)": [[17, "modelopt.deploy.llm.generate.LLM.generate_text"]], "max_beam_width (llm property)": [[17, "modelopt.deploy.llm.generate.LLM.max_beam_width"]], "max_input_len (llm property)": [[17, "modelopt.deploy.llm.generate.LLM.max_input_len"]], "modelopt.deploy.llm.generate": [[17, "module-modelopt.deploy.llm.generate"]], "build_tensorrt_llm() (in module modelopt.deploy.llm.model_config_trt)": [[18, "modelopt.deploy.llm.model_config_trt.build_tensorrt_llm"]], "build_tensorrt_llm_rank() (in module modelopt.deploy.llm.model_config_trt)": [[18, "modelopt.deploy.llm.model_config_trt.build_tensorrt_llm_rank"]], "modelopt.deploy.llm.model_config_trt": [[18, "module-modelopt.deploy.llm.model_config_trt"]], "customsentencepiecetokenizer (class in modelopt.deploy.llm.nemo_utils)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer"]], "__init__() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.__init__"]], "batch_decode() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.batch_decode"]], "batch_encode_plus() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.batch_encode_plus"]], "decode() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.decode"]], "encode() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.encode"]], "eos_token (customsentencepiecetokenizer property)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.eos_token"]], "eos_token_id (customsentencepiecetokenizer property)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.eos_token_id"]], "get_nemo_tokenizer() (in module modelopt.deploy.llm.nemo_utils)": [[19, "modelopt.deploy.llm.nemo_utils.get_nemo_tokenizer"]], "get_tokenzier() (in module modelopt.deploy.llm.nemo_utils)": [[19, "modelopt.deploy.llm.nemo_utils.get_tokenzier"]], "modelopt.deploy.llm.nemo_utils": [[19, "module-modelopt.deploy.llm.nemo_utils"]], "pad_token (customsentencepiecetokenizer property)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.pad_token"]], "pad_token_id (customsentencepiecetokenizer property)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.pad_token_id"]], "modelopt.onnx": [[20, "module-modelopt.onnx"]], "get_quantizable_op_types() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.get_quantizable_op_types"]], "is_binary_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_binary_op"]], "is_control_flow_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_control_flow_op"]], "is_conversion_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_conversion_op"]], "is_copy_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_copy_op"]], "is_default_quantizable_op_by_ort() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_default_quantizable_op_by_ort"]], "is_fusible_reduction_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_fusible_reduction_op"]], "is_generator_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_generator_op"]], "is_irregular_mem_access_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_irregular_mem_access_op"]], "is_linear_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_linear_op"]], "is_modifier_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_modifier_op"]], "is_multiclass_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_multiclass_op"]], "is_non_reshape_copy_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_non_reshape_copy_op"]], "is_normalization_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_normalization_op"]], "is_pointwise_or_elementwise_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_pointwise_or_elementwise_op"]], "is_pooling_or_window_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_pooling_or_window_op"]], "is_recurrent_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_recurrent_op"]], "is_selection_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_selection_op"]], "is_sequence_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_sequence_op"]], "is_shape_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_shape_op"]], "is_unary_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_unary_op"]], "modelopt.onnx.op_types": [[21, "module-modelopt.onnx.op_types"]], "modelopt.onnx.quantization": [[22, "module-modelopt.onnx.quantization"]], "calibrationdataprovider (class in modelopt.onnx.quantization.calib_utils)": [[23, "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider"]], "randomdataprovider (class in modelopt.onnx.quantization.calib_utils)": [[23, "modelopt.onnx.quantization.calib_utils.RandomDataProvider"]], "__init__() (calibrationdataprovider method)": [[23, "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider.__init__"]], "__init__() (randomdataprovider method)": [[23, "modelopt.onnx.quantization.calib_utils.RandomDataProvider.__init__"]], "get_next() (calibrationdataprovider method)": [[23, "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider.get_next"]], "get_next() (randomdataprovider method)": [[23, "modelopt.onnx.quantization.calib_utils.RandomDataProvider.get_next"]], "modelopt.onnx.quantization.calib_utils": [[23, "module-modelopt.onnx.quantization.calib_utils"]], "build_non_residual_input_map() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.build_non_residual_input_map"]], "classify_partition_nodes() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.classify_partition_nodes"]], "filter_quantizable_kgen_heads() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.filter_quantizable_kgen_heads"]], "get_fusible_backbone() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.get_fusible_backbone"]], "has_const_input() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.has_const_input"]], "has_path_type() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.has_path_type"]], "is_const_input() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.is_const_input"]], "modelopt.onnx.quantization.graph_utils": [[24, "module-modelopt.onnx.quantization.graph_utils"]], "print_stat() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.print_stat"]], "remove_partial_input_qdq() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.remove_partial_input_qdq"]], "modelopt.onnx.quantization.gs_patching": [[25, "module-modelopt.onnx.quantization.gs_patching"]], "patch_gs_modules() (in module modelopt.onnx.quantization.gs_patching)": [[25, "modelopt.onnx.quantization.gs_patching.patch_gs_modules"]], "awqcliphelper (class in modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper"]], "__init__() (awqcliphelper method)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.__init__"]], "alpha_step (awqcliphelper attribute)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.alpha_step"]], "alphas (awqcliphelper attribute)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.alphas"]], "dq_tensor() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.dq_tensor"]], "find_scales() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.find_scales"]], "min_alpha (awqcliphelper attribute)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.min_alpha"]], "modelopt.onnx.quantization.int4": [[26, "module-modelopt.onnx.quantization.int4"]], "quant_tensor() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.quant_tensor"]], "quantize_int4() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.quantize_int4"]], "quantize_int4_awq_clip() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.quantize_int4_awq_clip"]], "quantize_int4_rtn() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.quantize_int4_rtn"]], "rtn() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.rtn"]], "update_best_params() (awqcliphelper method)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.update_best_params"]], "qdqconvtranspose (class in modelopt.onnx.quantization.operators)": [[27, "modelopt.onnx.quantization.operators.QDQConvTranspose"]], "qdqnormalization (class in modelopt.onnx.quantization.operators)": [[27, "modelopt.onnx.quantization.operators.QDQNormalization"]], "__init__() (qdqconvtranspose method)": [[27, "modelopt.onnx.quantization.operators.QDQConvTranspose.__init__"]], "__init__() (qdqnormalization method)": [[27, "modelopt.onnx.quantization.operators.QDQNormalization.__init__"]], "modelopt.onnx.quantization.operators": [[27, "module-modelopt.onnx.quantization.operators"]], "quantize() (qdqconvtranspose method)": [[27, "modelopt.onnx.quantization.operators.QDQConvTranspose.quantize"]], "quantize() (qdqnormalization method)": [[27, "modelopt.onnx.quantization.operators.QDQNormalization.quantize"]], "modelopt.onnx.quantization.ort_patching": [[28, "module-modelopt.onnx.quantization.ort_patching"]], "patch_ort_modules() (in module modelopt.onnx.quantization.ort_patching)": [[28, "modelopt.onnx.quantization.ort_patching.patch_ort_modules"]], "create_inference_session() (in module modelopt.onnx.quantization.ort_utils)": [[29, "modelopt.onnx.quantization.ort_utils.create_inference_session"]], "modelopt.onnx.quantization.ort_utils": [[29, "module-modelopt.onnx.quantization.ort_utils"]], "find_fusible_partitions() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_fusible_partitions"]], "find_hardcoded_patterns() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_hardcoded_patterns"]], "find_layer_norm_partitions() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_layer_norm_partitions"]], "find_mha_partitions() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_mha_partitions"]], "find_non_quantizable_partitions_from_patterns() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_non_quantizable_partitions_from_patterns"]], "find_quantizable_nodes() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_quantizable_nodes"]], "get_skiped_output_layers() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.get_skiped_output_layers"]], "modelopt.onnx.quantization.partitioning": [[30, "module-modelopt.onnx.quantization.partitioning"]], "insert_dq_nodes() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.insert_dq_nodes"]], "insert_qdq_nodes() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.insert_qdq_nodes"]], "make_gs_dequantize_node() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_dequantize_node"]], "make_gs_dequantize_output() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_dequantize_output"]], "make_gs_quantize_node() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_quantize_node"]], "make_gs_quantize_output() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_quantize_output"]], "make_gs_quantized_weight() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_quantized_weight"]], "make_gs_scale() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_scale"]], "make_gs_zp() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_zp"]], "modelopt.onnx.quantization.qdq_utils": [[31, "module-modelopt.onnx.quantization.qdq_utils"]], "use_trt_qdq_ops() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops"]], "modelopt.onnx.quantization.quant_utils": [[32, "module-modelopt.onnx.quantization.quant_utils"]], "pack_float32_to_4bit_optimized() (in module modelopt.onnx.quantization.quant_utils)": [[32, "modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized"]], "modelopt.onnx.quantization.quantize": [[33, "module-modelopt.onnx.quantization.quantize"]], "quantize() (in module modelopt.onnx.quantization.quantize)": [[33, "modelopt.onnx.quantization.quantize.quantize"]], "duplicate_shared_linear_weights() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.duplicate_shared_linear_weights"]], "find_lowest_common_ancestor() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.find_lowest_common_ancestor"]], "gen_random_inputs() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.gen_random_inputs"]], "get_all_input_names() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_all_input_names"]], "get_batch_size() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_batch_size"]], "get_batch_size_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_batch_size_from_bytes"]], "get_child_nodes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_child_nodes"]], "get_input_names() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_input_names"]], "get_input_names_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_input_names_from_bytes"]], "get_input_shapes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_input_shapes"]], "get_input_shapes_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_input_shapes_from_bytes"]], "get_node_names() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_node_names"]], "get_node_names_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_node_names_from_bytes"]], "get_output_names() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_output_names"]], "get_output_names_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_output_names_from_bytes"]], "get_output_shapes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_output_shapes"]], "get_parent_nodes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_parent_nodes"]], "get_variable_inputs() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_variable_inputs"]], "is_valid_onnx_model() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.is_valid_onnx_model"]], "modelopt.onnx.utils": [[34, "module-modelopt.onnx.utils"]], "name_onnx_nodes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.name_onnx_nodes"]], "randomize_weights() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.randomize_weights"]], "randomize_weights_onnx_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.randomize_weights_onnx_bytes"]], "remove_weights_data() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.remove_weights_data"]], "save_onnx() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.save_onnx"]], "save_onnx_bytes_to_dir() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.save_onnx_bytes_to_dir"]], "validate_batch_size() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.validate_batch_size"]], "validate_onnx() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.validate_onnx"]], "modelopt.torch": [[35, "module-modelopt.torch"]], "modelopt.torch.export": [[36, "module-modelopt.torch.export"]], "nfsworkspace (class in modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.NFSWorkspace"]], "__init__() (nfsworkspace method)": [[37, "modelopt.torch.export.distribute.NFSWorkspace.__init__"]], "barrier() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.barrier"]], "get_configs_parallel() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_configs_parallel"]], "get_group() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_group"]], "get_rank() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_rank"]], "get_tensors_parallel() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_tensors_parallel"]], "get_world_size() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_world_size"]], "is_initialized (nfsworkspace property)": [[37, "modelopt.torch.export.distribute.NFSWorkspace.is_initialized"]], "modelopt.torch.export.distribute": [[37, "module-modelopt.torch.export.distribute"]], "read_configs_and_weights_from_rank() (nfsworkspace method)": [[37, "modelopt.torch.export.distribute.NFSWorkspace.read_configs_and_weights_from_rank"]], "write_configs_and_weights() (nfsworkspace method)": [[37, "modelopt.torch.export.distribute.NFSWorkspace.write_configs_and_weights"]], "build_attention_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_attention_config"]], "build_decoder_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_decoder_config"]], "build_embedding_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_embedding_config"]], "build_layernorm_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_layernorm_config"]], "build_linear_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_linear_config"]], "build_mlp_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_mlp_config"]], "build_moe_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_moe_config"]], "build_qkv() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_qkv"]], "build_stacked_experts() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_stacked_experts"]], "check_model_compatibility() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.check_model_compatibility"]], "get_activation_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_activation_scaling_factor"]], "get_kv_cache_dtype() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_kv_cache_dtype"]], "get_kv_cache_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_kv_cache_scaling_factor"]], "get_prequant_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_prequant_scaling_factor"]], "get_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_scaling_factor"]], "get_transformer_layers() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_transformer_layers"]], "get_weight_block_size() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_weight_block_size"]], "get_weight_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_weight_scaling_factor"]], "get_weight_scaling_factor_2() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_weight_scaling_factor_2"]], "is_attention() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_attention"]], "is_decoder_list() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_decoder_list"]], "is_embedding() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_embedding"]], "is_layernorm() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_layernorm"]], "is_linear() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_linear"]], "is_mlp() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_mlp"]], "is_moe() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_moe"]], "modelopt.torch.export.layer_utils": [[38, "module-modelopt.torch.export.layer_utils"]], "attentionconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.AttentionConfig"]], "decoderlayerconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig"]], "embeddingconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig"]], "expertconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.ExpertConfig"]], "layernormconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.LayernormConfig"]], "linearconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.LinearConfig"]], "mlpconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.MLPConfig"]], "moeconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.MOEConfig"]], "modelconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.ModelConfig"]], "qkvconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.QKVConfig"]], "__init__() (attentionconfig method)": [[39, "modelopt.torch.export.model_config.AttentionConfig.__init__"]], "__init__() (decoderlayerconfig method)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.__init__"]], "__init__() (embeddingconfig method)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig.__init__"]], "__init__() (expertconfig method)": [[39, "modelopt.torch.export.model_config.ExpertConfig.__init__"]], "__init__() (layernormconfig method)": [[39, "modelopt.torch.export.model_config.LayernormConfig.__init__"]], "__init__() (linearconfig method)": [[39, "modelopt.torch.export.model_config.LinearConfig.__init__"]], "__init__() (mlpconfig method)": [[39, "modelopt.torch.export.model_config.MLPConfig.__init__"]], "__init__() (moeconfig method)": [[39, "modelopt.torch.export.model_config.MOEConfig.__init__"]], "__init__() (modelconfig method)": [[39, "modelopt.torch.export.model_config.ModelConfig.__init__"]], "__init__() (qkvconfig method)": [[39, "modelopt.torch.export.model_config.QKVConfig.__init__"]], "activation_scaling_factor (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.activation_scaling_factor"]], "activation_scaling_factor (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.activation_scaling_factor"]], "alibi_bias_max (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.alibi_bias_max"]], "apply_residual_connection_post_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.apply_residual_connection_post_layernorm"]], "attention (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.attention"]], "attention_head_size (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.attention_head_size"]], "awq_block_size (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.awq_block_size"]], "awq_block_size (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.awq_block_size"]], "bias (layernormconfig attribute)": [[39, "modelopt.torch.export.model_config.LayernormConfig.bias"]], "bias (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.bias"]], "bias (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.bias"]], "clip_qkv (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.clip_qkv"]], "decoder_type (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.decoder_type"]], "dense (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.dense"]], "dtype (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.dtype"]], "eps (layernormconfig attribute)": [[39, "modelopt.torch.export.model_config.LayernormConfig.eps"]], "experts (moeconfig attribute)": [[39, "modelopt.torch.export.model_config.MOEConfig.experts"]], "fc (expertconfig attribute)": [[39, "modelopt.torch.export.model_config.ExpertConfig.fc"]], "fc (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.fc"]], "fc (moeconfig property)": [[39, "modelopt.torch.export.model_config.MOEConfig.fc"]], "ffn_hidden_size_local (decoderlayerconfig property)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.ffn_hidden_size_local"]], "gate (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.gate"]], "hidden_act (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.hidden_act"]], "hidden_act (moeconfig attribute)": [[39, "modelopt.torch.export.model_config.MOEConfig.hidden_act"]], "hidden_act (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.hidden_act"]], "hidden_size (decoderlayerconfig property)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.hidden_size"]], "hidden_size (embeddingconfig property)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig.hidden_size"]], "hidden_size (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.hidden_size"]], "input_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.input_layernorm"]], "k (qkvconfig attribute)": [[39, "modelopt.torch.export.model_config.QKVConfig.k"]], "kv_cache_dtype (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.kv_cache_dtype"]], "kv_cache_scaling_factor (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.kv_cache_scaling_factor"]], "layernorm_type (layernormconfig attribute)": [[39, "modelopt.torch.export.model_config.LayernormConfig.layernorm_type"]], "layers (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.layers"]], "linear_type (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.linear_type"]], "lm_head (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.lm_head"]], "ln_embed (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.ln_embed"]], "ln_f (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.ln_f"]], "local_vocab_size (embeddingconfig property)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig.local_vocab_size"]], "max_position_embeddings (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.max_position_embeddings"]], "max_position_embeddings (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.max_position_embeddings"]], "merged_fc1_gate (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.merged_fc1_gate"]], "mlp (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.mlp"]], "mlp_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.mlp_layernorm"]], "model_name (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.model_name"]], "modelopt.torch.export.model_config": [[39, "module-modelopt.torch.export.model_config"]], "moe_num_experts (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_num_experts"]], "moe_renorm_mode (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_renorm_mode"]], "moe_top_k (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_top_k"]], "moe_tp_mode (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_tp_mode"]], "new_decoder_architecture (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.new_decoder_architecture"]], "num_attention_heads (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.num_attention_heads"]], "num_attention_heads (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.num_attention_heads"]], "num_kv_heads (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.num_kv_heads"]], "num_kv_heads (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.num_kv_heads"]], "parallel_attention (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.parallel_attention"]], "partial_rotary_factor (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.partial_rotary_factor"]], "pipeline_parallel (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.pipeline_parallel"]], "position_embedding (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.position_embedding"]], "post_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.post_layernorm"]], "prequant_scaling_factor (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.prequant_scaling_factor"]], "prequant_scaling_factor (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.prequant_scaling_factor"]], "proj (expertconfig attribute)": [[39, "modelopt.torch.export.model_config.ExpertConfig.proj"]], "proj (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.proj"]], "q (qkvconfig attribute)": [[39, "modelopt.torch.export.model_config.QKVConfig.q"]], "qkv (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.qkv"]], "quantization (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.quantization"]], "quantization (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.quantization"]], "rank (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.rank"]], "residual_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.residual_layernorm"]], "residual_mlp (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.residual_mlp"]], "rope_ratio (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.rope_ratio"]], "rotary_base (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.rotary_base"]], "rotary_dim (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.rotary_dim"]], "rotary_pct (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.rotary_pct"]], "router (moeconfig attribute)": [[39, "modelopt.torch.export.model_config.MOEConfig.router"]], "seq_length (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.seq_length"]], "share_embedding_table (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.share_embedding_table"]], "tensor_parallel (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.tensor_parallel"]], "use_alibi (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.use_alibi"]], "use_cache (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.use_cache"]], "v (qkvconfig attribute)": [[39, "modelopt.torch.export.model_config.QKVConfig.v"]], "version (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.version"]], "vocab_embedding (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.vocab_embedding"]], "vocab_size (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.vocab_size"]], "vocab_size_padded (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.vocab_size_padded"]], "weight (embeddingconfig attribute)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig.weight"]], "weight (layernormconfig attribute)": [[39, "modelopt.torch.export.model_config.LayernormConfig.weight"]], "weight (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.weight"]], "weight (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.weight"]], "weights_scaling_factor (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.weights_scaling_factor"]], "weights_scaling_factor (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.weights_scaling_factor"]], "weights_scaling_factor_2 (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.weights_scaling_factor_2"]], "weights_scaling_factor_2 (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.weights_scaling_factor_2"]], "export_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)": [[40, "modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint"]], "modelopt.torch.export.model_config_export": [[40, "module-modelopt.torch.export.model_config_export"]], "torch_to_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)": [[40, "modelopt.torch.export.model_config_export.torch_to_tensorrt_llm_checkpoint"]], "from_quantized_weight() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.from_quantized_weight"]], "merge_fc1_gate() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.merge_fc1_gate"]], "merge_qkv() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.merge_qkv"]], "model_config_from_dict() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.model_config_from_dict"]], "model_config_to_dict() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.model_config_to_dict"]], "modelopt.torch.export.model_config_utils": [[41, "module-modelopt.torch.export.model_config_utils"]], "naive_quantization() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.naive_quantization"]], "pack_linear_weights() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.pack_linear_weights"]], "pad_weights() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.pad_weights"]], "restore_model_config() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.restore_model_config"]], "split_config_and_weights() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.split_config_and_weights"]], "to_quantized_weight() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.to_quantized_weight"]], "check_weight_shape_valid() (in module modelopt.torch.export.postprocess)": [[42, "modelopt.torch.export.postprocess.check_weight_shape_valid"]], "modelopt.torch.export.postprocess": [[42, "module-modelopt.torch.export.postprocess"]], "pad_embedding_lm_head() (in module modelopt.torch.export.postprocess)": [[42, "modelopt.torch.export.postprocess.pad_embedding_lm_head"]], "postprocess_model_config() (in module modelopt.torch.export.postprocess)": [[42, "modelopt.torch.export.postprocess.postprocess_model_config"]], "postprocess_tensors() (in module modelopt.torch.export.postprocess)": [[42, "modelopt.torch.export.postprocess.postprocess_tensors"]], "get_weights_scaling_factor() (in module modelopt.torch.export.scaling_factor_utils)": [[43, "modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor"]], "modelopt.torch.export.scaling_factor_utils": [[43, "module-modelopt.torch.export.scaling_factor_utils"]], "resmooth_and_get_scale() (in module modelopt.torch.export.scaling_factor_utils)": [[43, "modelopt.torch.export.scaling_factor_utils.resmooth_and_get_scale"]], "convert_to_tensorrt_llm_config() (in module modelopt.torch.export.tensorrt_llm_utils)": [[44, "modelopt.torch.export.tensorrt_llm_utils.convert_to_tensorrt_llm_config"]], "is_tensorrt_llm_0_8_or_9() (in module modelopt.torch.export.tensorrt_llm_utils)": [[44, "modelopt.torch.export.tensorrt_llm_utils.is_tensorrt_llm_0_8_or_9"]], "modelopt.torch.export.tensorrt_llm_utils": [[44, "module-modelopt.torch.export.tensorrt_llm_utils"]], "weights_to_npz() (in module modelopt.torch.export.tensorrt_llm_utils)": [[44, "modelopt.torch.export.tensorrt_llm_utils.weights_to_npz"]], "convert_to_transformer_engine() (in module modelopt.torch.export.transformer_engine)": [[45, "modelopt.torch.export.transformer_engine.convert_to_transformer_engine"]], "modelopt.torch.export.transformer_engine": [[45, "module-modelopt.torch.export.transformer_engine"]], "modelopt.torch.opt": [[46, "module-modelopt.torch.opt"]], "modeloptfield() (in module modelopt.torch.opt.config)": [[47, "modelopt.torch.opt.config.ModeloptField"]], "customize_rule() (modeloptbaserule class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRule.customize_rule"]], "get() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.get"]], "get_field_name_from_key() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.get_field_name_from_key"]], "get_kwargs_for_create_model_with_rules() (in module modelopt.torch.opt.config)": [[47, "modelopt.torch.opt.config.get_kwargs_for_create_model_with_rules"]], "get_rule_type() (modeloptbaserule class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRule.get_rule_type"]], "items() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.items"]], "keys() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.keys"]], "model_dump() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.model_dump"]], "model_dump_json() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.model_dump_json"]], "modelopt.torch.opt.config": [[47, "module-modelopt.torch.opt.config"]], "register_default() (modeloptbaseruleconfig class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRuleConfig.register_default"]], "unregister_default() (modeloptbaseruleconfig class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRuleConfig.unregister_default"]], "update() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.update"]], "validate_rule() (modeloptbaserule class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRule.validate_rule"]], "values() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.values"]], "modeloptstatemanager (class in modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager"]], "__init__() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.__init__"]], "add_mode() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.add_mode"]], "apply_mode() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.apply_mode"]], "check_mode() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.check_mode"]], "get_config_class() (modeloptstatemanager static method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.get_config_class"]], "has_state (modeloptstatemanager property)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.has_state"]], "is_converted() (modeloptstatemanager class method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.is_converted"]], "last_mode (modeloptstatemanager property)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.last_mode"]], "load_state_dict() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.load_state_dict"]], "modelopt.torch.opt.conversion": [[48, "module-modelopt.torch.opt.conversion"]], "modelopt_state() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.modelopt_state"]], "modes_with_states() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.modes_with_states"]], "restore() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.restore"]], "restore_from_modelopt_state() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.restore_from_modelopt_state"]], "save() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.save"]], "state_dict() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.state_dict"]], "transfer_state_dict() (modeloptstatemanager class method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.transfer_state_dict"]], "update_last_state_before_new_mode() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.update_last_state_before_new_mode"]], "update_last_state_before_save() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.update_last_state_before_save"]], "dynamicmodule (class in modelopt.torch.opt.dynamic)": [[49, "modelopt.torch.opt.dynamic.DynamicModule"]], "dynamicspace (class in modelopt.torch.opt.dynamic)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace"]], "__init__() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.__init__"]], "__init__() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.__init__"]], "config() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.config"]], "convert() (dynamicmodule class method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.convert"]], "convert_to_dynamic() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.convert_to_dynamic"]], "export() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.export"]], "export() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.export"]], "extra_repr() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.extra_repr"]], "force_assign() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.force_assign"]], "freeze() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.freeze"]], "get_hparam() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.get_hparam"]], "get_hparam() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.get_hparam"]], "is_configurable() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.is_configurable"]], "is_dynamic() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.is_dynamic"]], "modelopt.torch.opt.dynamic": [[49, "module-modelopt.torch.opt.dynamic"]], "modify() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.modify"]], "named_dynamic_modules() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.named_dynamic_modules"]], "named_hparams() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.named_hparams"]], "named_hparams() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.named_hparams"]], "original_cls (dynamicmodule property)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.original_cls"]], "reset_dynamic_attributes() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.reset_dynamic_attributes"]], "select() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.select"]], "size() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.size"]], "activeslice (hparam attribute)": [[50, "modelopt.torch.opt.hparam.Hparam.ActiveSlice"]], "hparam (class in modelopt.torch.opt.hparam)": [[50, "modelopt.torch.opt.hparam.Hparam"]], "importance (hparam attribute)": [[50, "modelopt.torch.opt.hparam.Hparam.Importance"]], "importanceestimator (hparam attribute)": [[50, "modelopt.torch.opt.hparam.Hparam.ImportanceEstimator"]], "__init__() (hparam method)": [[50, "modelopt.torch.opt.hparam.Hparam.__init__"]], "active (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.active"]], "active_slice (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.active_slice"]], "choices (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.choices"]], "enforce_order() (hparam method)": [[50, "modelopt.torch.opt.hparam.Hparam.enforce_order"]], "importance (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.importance"]], "is_configurable (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.is_configurable"]], "is_sortable (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.is_sortable"]], "max (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.max"]], "min (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.min"]], "modelopt.torch.opt.hparam": [[50, "module-modelopt.torch.opt.hparam"]], "original (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.original"]], "register_importance() (hparam method)": [[50, "modelopt.torch.opt.hparam.Hparam.register_importance"]], "modelopt.torch.opt.mode": [[51, "module-modelopt.torch.opt.mode"]], "modelopt.torch.opt.plugins": [[52, "module-modelopt.torch.opt.plugins"]], "basesearcher (class in modelopt.torch.opt.searcher)": [[53, "modelopt.torch.opt.searcher.BaseSearcher"]], "__init__() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.__init__"]], "after_search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.after_search"]], "before_search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.before_search"]], "config (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.config"]], "constraints (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.constraints"]], "construct_forward_loop() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.construct_forward_loop"]], "default_search_config (basesearcher property)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.default_search_config"]], "default_state_dict (basesearcher property)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.default_state_dict"]], "dummy_input (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.dummy_input"]], "eval_score() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.eval_score"]], "forward_loop (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.forward_loop"]], "has_score (basesearcher property)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.has_score"]], "load_search_checkpoint() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.load_search_checkpoint"]], "model (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.model"]], "modelopt.torch.opt.searcher": [[53, "module-modelopt.torch.opt.searcher"]], "reset_search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.reset_search"]], "run_search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.run_search"]], "sanitize_search_config() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.sanitize_search_config"]], "save_search_checkpoint() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.save_search_checkpoint"]], "search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.search"]], "state_dict() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.state_dict"]], "is_configurable() (in module modelopt.torch.opt.utils)": [[54, "modelopt.torch.opt.utils.is_configurable"]], "is_dynamic() (in module modelopt.torch.opt.utils)": [[54, "modelopt.torch.opt.utils.is_dynamic"]], "modelopt.torch.opt.utils": [[54, "module-modelopt.torch.opt.utils"]], "named_hparams() (in module modelopt.torch.opt.utils)": [[54, "modelopt.torch.opt.utils.named_hparams"]], "search_space_size() (in module modelopt.torch.opt.utils)": [[54, "modelopt.torch.opt.utils.search_space_size"]], "modelopt.torch.quantization": [[55, "module-modelopt.torch.quantization"]], "modelopt.torch.quantization.calib": [[56, "module-modelopt.torch.quantization.calib"]], "modelopt.torch.quantization.calib.calibrator": [[57, "module-modelopt.torch.quantization.calib.calibrator"]], "histogramcalibrator (class in modelopt.torch.quantization.calib.histogram)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator"]], "__init__() (histogramcalibrator method)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.__init__"]], "calibrate_weights() (in module modelopt.torch.quantization.calib.histogram)": [[58, "modelopt.torch.quantization.calib.histogram.calibrate_weights"]], "collect() (histogramcalibrator method)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.collect"]], "compute_amax() (histogramcalibrator method)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.compute_amax"]], "modelopt.torch.quantization.calib.histogram": [[58, "module-modelopt.torch.quantization.calib.histogram"]], "reset() (histogramcalibrator method)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.reset"]], "maxcalibrator (class in modelopt.torch.quantization.calib.max)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator"]], "__init__() (maxcalibrator method)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.__init__"]], "amaxs (maxcalibrator property)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.amaxs"]], "collect() (maxcalibrator method)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.collect"]], "compute_amax() (maxcalibrator method)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.compute_amax"]], "modelopt.torch.quantization.calib.max": [[59, "module-modelopt.torch.quantization.calib.max"]], "reset() (maxcalibrator method)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.reset"]], "algorithm (quantizeconfig attribute)": [[60, "modelopt.torch.quantization.config.QuantizeConfig.algorithm"]], "modelopt.torch.quantization.config": [[60, "module-modelopt.torch.quantization.config"]], "quant_cfg (quantizeconfig attribute)": [[60, "modelopt.torch.quantization.config.QuantizeConfig.quant_cfg"]], "modelopt.torch.quantization.conversion": [[61, "module-modelopt.torch.quantization.conversion"]], "register() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.register"]], "replace_quant_module() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.replace_quant_module"]], "set_quantizer_attribute() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.set_quantizer_attribute"]], "set_quantizer_by_cfg() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.set_quantizer_by_cfg"]], "unregister() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.unregister"]], "modelopt.torch.quantization.extensions": [[62, "module-modelopt.torch.quantization.extensions"]], "quantizeexportmodedescriptor (class in modelopt.torch.quantization.mode)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor"]], "quantizemodedescriptor (class in modelopt.torch.quantization.mode)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor"]], "config_class (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.config_class"]], "config_class (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.config_class"]], "convert (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.convert"]], "convert (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.convert"]], "export_mode (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.export_mode"]], "is_export_mode (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.is_export_mode"]], "modelopt.torch.quantization.mode": [[63, "module-modelopt.torch.quantization.mode"]], "name (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.name"]], "name (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.name"]], "next_modes (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.next_modes"]], "restore (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.restore"]], "restore (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.restore"]], "update_for_new_mode (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.update_for_new_mode"]], "update_for_save (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.update_for_save"]], "calibrate() (in module modelopt.torch.quantization.model_calib)": [[64, "modelopt.torch.quantization.model_calib.calibrate"]], "modelopt.torch.quantization.model_calib": [[64, "module-modelopt.torch.quantization.model_calib"]], "postprocess_amax() (in module modelopt.torch.quantization.model_calib)": [[64, "modelopt.torch.quantization.model_calib.postprocess_amax"]], "disable_quantizer() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.disable_quantizer"]], "enable_quantizer() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.enable_quantizer"]], "fold_weight() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.fold_weight"]], "modelopt.torch.quantization.model_quant": [[65, "module-modelopt.torch.quantization.model_quant"]], "print_quant_summary() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.print_quant_summary"]], "quantize() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.quantize"]], "modelopt.torch.quantization.nn": [[66, "module-modelopt.torch.quantization.nn"]], "clipfunction (class in modelopt.torch.quantization.nn.functional)": [[67, "modelopt.torch.quantization.nn.functional.ClipFunction"]], "backward() (clipfunction static method)": [[67, "modelopt.torch.quantization.nn.functional.ClipFunction.backward"]], "forward() (clipfunction static method)": [[67, "modelopt.torch.quantization.nn.functional.ClipFunction.forward"]], "modelopt.torch.quantization.nn.functional": [[67, "module-modelopt.torch.quantization.nn.functional"]], "modelopt.torch.quantization.nn.modules": [[68, "module-modelopt.torch.quantization.nn.modules"]], "clip (class in modelopt.torch.quantization.nn.modules.clip)": [[69, "modelopt.torch.quantization.nn.modules.clip.Clip"]], "__init__() (clip method)": [[69, "modelopt.torch.quantization.nn.modules.clip.Clip.__init__"]], "forward() (clip method)": [[69, "modelopt.torch.quantization.nn.modules.clip.Clip.forward"]], "modelopt.torch.quantization.nn.modules.clip": [[69, "module-modelopt.torch.quantization.nn.modules.clip"]], "modelopt.torch.quantization.nn.modules.quant_activations": [[70, "module-modelopt.torch.quantization.nn.modules.quant_activations"]], "modelopt.torch.quantization.nn.modules.quant_batchnorm": [[71, "module-modelopt.torch.quantization.nn.modules.quant_batchnorm"]], "conv1d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.Conv1d"]], "conv2d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.Conv2d"]], "conv3d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.Conv3d"]], "convtranspose1d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.ConvTranspose1d"]], "convtranspose2d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.ConvTranspose2d"]], "convtranspose3d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.ConvTranspose3d"]], "quantconv1d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d"]], "quantconv2d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d"]], "quantconv3d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d"]], "quantconvtranspose1d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d"]], "quantconvtranspose2d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d"]], "quantconvtranspose3d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d"]], "default_quant_desc_weight (quantconv1d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconv2d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconv3d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconvtranspose1d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconvtranspose2d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconvtranspose3d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d.default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv": [[72, "module-modelopt.torch.quantization.nn.modules.quant_conv"]], "quantinstancenorm1d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)": [[73, "modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm1d"]], "quantinstancenorm2d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)": [[73, "modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm2d"]], "quantinstancenorm3d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)": [[73, "modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm3d"]], "modelopt.torch.quantization.nn.modules.quant_instancenorm": [[73, "module-modelopt.torch.quantization.nn.modules.quant_instancenorm"]], "linear (in module modelopt.torch.quantization.nn.modules.quant_linear)": [[74, "modelopt.torch.quantization.nn.modules.quant_linear.Linear"]], "quantlinear (class in modelopt.torch.quantization.nn.modules.quant_linear)": [[74, "modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear"]], "default_quant_desc_weight (quantlinear attribute)": [[74, "modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear.default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_linear": [[74, "module-modelopt.torch.quantization.nn.modules.quant_linear"]], "quantinputbase (class in modelopt.torch.quantization.nn.modules.quant_module)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase"]], "quantlinearconvbase (class in modelopt.torch.quantization.nn.modules.quant_module)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase"]], "default_quant_desc_input (quantinputbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_input"]], "default_quant_desc_output (quantinputbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_output"]], "default_quant_desc_weight (quantlinearconvbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.default_quant_desc_weight"]], "forward() (quantinputbase method)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.forward"]], "forward() (quantlinearconvbase method)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.forward"]], "initialize_quantizer_with_dummy_states() (quantlinearconvbase static method)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.initialize_quantizer_with_dummy_states"]], "input_quantizer (quantinputbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.input_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_module": [[75, "module-modelopt.torch.quantization.nn.modules.quant_module"]], "output_quantizer (quantinputbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.output_quantizer"]], "quantize_weight() (quantlinearconvbase method)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.quantize_weight"]], "weight_quantizer (quantlinearconvbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.weight_quantizer"]], "adaptiveavgpool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool1d"]], "adaptiveavgpool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool2d"]], "adaptiveavgpool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool3d"]], "avgpool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool1d"]], "avgpool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool2d"]], "avgpool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool3d"]], "maxpool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool1d"]], "maxpool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool2d"]], "maxpool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool3d"]], "quantadaptiveavgpool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAdaptiveAvgPool1d"]], "quantadaptiveavgpool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAdaptiveAvgPool2d"]], "quantadaptiveavgpool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAdaptiveAvgPool3d"]], "quantavgpool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAvgPool1d"]], "quantavgpool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAvgPool2d"]], "quantavgpool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAvgPool3d"]], "quantmaxpool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool1d"]], "quantmaxpool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool2d"]], "quantmaxpool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool3d"]], "modelopt.torch.quantization.nn.modules.quant_pooling": [[76, "module-modelopt.torch.quantization.nn.modules.quant_pooling"]], "sequentialquantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer"]], "tensorquantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"]], "__init__() (sequentialquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.__init__"]], "__init__() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.__init__"]], "amax (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.amax"]], "axis (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.axis"]], "block_sizes (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.block_sizes"]], "clean_up_after_set_from_modelopt_state() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.clean_up_after_set_from_modelopt_state"]], "disable() (sequentialquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.disable"]], "disable() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable"]], "disable_calib() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_calib"]], "disable_clip() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_clip"]], "disable_quant() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_quant"]], "enable() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable"]], "enable_calib() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_calib"]], "enable_clip() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_clip"]], "enable_quant() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_quant"]], "export_amax() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.export_amax"]], "extra_repr() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.extra_repr"]], "fake_quant (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.fake_quant"]], "forward() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.forward"]], "get_modelopt_state() (sequentialquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.get_modelopt_state"]], "get_modelopt_state() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.get_modelopt_state"]], "init_learn_amax() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.init_learn_amax"]], "is_enabled (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.is_enabled"]], "load_calib_amax() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.load_calib_amax"]], "maxbound (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.maxbound"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer": [[77, "module-modelopt.torch.quantization.nn.modules.tensor_quantizer"]], "narrow_range (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.narrow_range"]], "num_bits (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.num_bits"]], "pre_quant_scale (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.pre_quant_scale"]], "replace_sequential_quantizer_with_single_quantizer() (sequentialquantizer static method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.replace_sequential_quantizer_with_single_quantizer"]], "reset_amax() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.reset_amax"]], "scale (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.scale"]], "set_from_attribute_dict() (sequentialquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.set_from_attribute_dict"]], "set_from_attribute_dict() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_dict"]], "set_from_modelopt_state() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_modelopt_state"]], "step_size (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.step_size"]], "sync_amax_across_distributed_group() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.sync_amax_across_distributed_group"]], "tensor_quantizer_iterator() (sequentialquantizer static method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.tensor_quantizer_iterator"]], "unsigned (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.unsigned"]], "freeze_parameters() (in module modelopt.torch.quantization.optim)": [[78, "modelopt.torch.quantization.optim.freeze_parameters"]], "group_parameters() (in module modelopt.torch.quantization.optim)": [[78, "modelopt.torch.quantization.optim.group_parameters"]], "match_parameters() (in module modelopt.torch.quantization.optim)": [[78, "modelopt.torch.quantization.optim.match_parameters"]], "modelopt.torch.quantization.optim": [[78, "module-modelopt.torch.quantization.optim"]], "quant_weight_inplace() (in module modelopt.torch.quantization.optim)": [[78, "modelopt.torch.quantization.optim.quant_weight_inplace"]], "modelopt.torch.quantization.plugins": [[79, "module-modelopt.torch.quantization.plugins"]], "deactivate() (in module modelopt.torch.quantization.quant_modules)": [[80, "modelopt.torch.quantization.quant_modules.deactivate"]], "enable_onnx_export() (in module modelopt.torch.quantization.quant_modules)": [[80, "modelopt.torch.quantization.quant_modules.enable_onnx_export"]], "initialize() (in module modelopt.torch.quantization.quant_modules)": [[80, "modelopt.torch.quantization.quant_modules.initialize"]], "modelopt.torch.quantization.quant_modules": [[80, "module-modelopt.torch.quantization.quant_modules"]], "fakeaffinetensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction"]], "faketensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction"]], "legacyfaketensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction"]], "quantdescriptor (in module modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.QuantDescriptor"]], "scalede4m3function (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function"]], "scaledquantdescriptor (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor"]], "tensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction"]], "__init__() (scaledquantdescriptor method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.__init__"]], "amax (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.amax"]], "axis (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.axis"]], "backward() (fakeaffinetensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction.backward"]], "backward() (faketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.backward"]], "backward() (legacyfaketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction.backward"]], "backward() (scalede4m3function static method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.backward"]], "backward() (tensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction.backward"]], "block_sizes (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.block_sizes"]], "calib_method (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.calib_method"]], "dict() (scaledquantdescriptor method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.dict"]], "fake_quant (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.fake_quant"]], "forward() (fakeaffinetensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction.forward"]], "forward() (faketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.forward"]], "forward() (legacyfaketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction.forward"]], "forward() (scalede4m3function static method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.forward"]], "forward() (tensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction.forward"]], "get_block_quant_axes_and_sizes() (scaledquantdescriptor static method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.get_block_quant_axes_and_sizes"]], "learn_amax (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.learn_amax"]], "modelopt.torch.quantization.tensor_quant": [[81, "module-modelopt.torch.quantization.tensor_quant"]], "name (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.name"]], "narrow_range (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.narrow_range"]], "num_bits (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.num_bits"]], "scale_amax (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.scale_amax"]], "scaled_e4m3_abstract() (in module modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.scaled_e4m3_abstract"]], "symbolic() (faketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.symbolic"]], "symbolic() (scalede4m3function static method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.symbolic"]], "symbolic() (tensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction.symbolic"]], "unsigned (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.unsigned"]], "export_torch_mode() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.export_torch_mode"]], "is_quantized() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_quantized"]], "is_quantized_column_parallel_linear() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_quantized_column_parallel_linear"]], "is_quantized_layer_with_weight() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_quantized_layer_with_weight"]], "is_quantized_row_parallel_linear() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_quantized_row_parallel_linear"]], "is_torch_library_supported() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_torch_library_supported"]], "modelopt.torch.quantization.utils": [[82, "module-modelopt.torch.quantization.utils"]], "reduce_amax() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.reduce_amax"]], "replace_function() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.replace_function"]], "modelopt.torch.sparsity": [[83, "module-modelopt.torch.sparsity"]], "modelopt.torch.sparsity.config": [[84, "module-modelopt.torch.sparsity.config"]], "nn_conv2d (sparsegptconfig attribute)": [[84, "modelopt.torch.sparsity.config.SparseGPTConfig.nn_conv2d"]], "nn_conv2d (sparsemagnitudeconfig attribute)": [[84, "modelopt.torch.sparsity.config.SparseMagnitudeConfig.nn_conv2d"]], "nn_linear (sparsegptconfig attribute)": [[84, "modelopt.torch.sparsity.config.SparseGPTConfig.nn_linear"]], "nn_linear (sparsemagnitudeconfig attribute)": [[84, "modelopt.torch.sparsity.config.SparseMagnitudeConfig.nn_linear"]], "magnitudesearcher (class in modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.MagnitudeSearcher"]], "compute_valid_1d_patterns() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.compute_valid_1d_patterns"]], "create_asp_mask() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.create_asp_mask"]], "fill() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.fill"]], "get_nmprune_info() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.get_nmprune_info"]], "m4n2_1d() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.m4n2_1d"]], "mn_1d_best() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.mn_1d_best"]], "modelopt.torch.sparsity.magnitude": [[85, "module-modelopt.torch.sparsity.magnitude"]], "reshape_1d() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.reshape_1d"]], "exportsparsemodedescriptor (class in modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor"]], "sparsegptmodedescriptor (class in modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor"]], "sparsemagnitudemodedescriptor (class in modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor"]], "config_class (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.config_class"]], "config_class (sparsegptmodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.config_class"]], "config_class (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.config_class"]], "convert (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.convert"]], "convert (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.convert"]], "convert_sparse_model() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.convert_sparse_model"]], "export_mode (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.export_mode"]], "export_sparse() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.export_sparse"]], "is_export_mode (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.is_export_mode"]], "modelopt.torch.sparsity.mode": [[86, "module-modelopt.torch.sparsity.mode"]], "name (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.name"]], "name (sparsegptmodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.name"]], "name (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.name"]], "next_modes (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.next_modes"]], "restore (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.restore"]], "restore (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.restore"]], "restore_export_sparse() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.restore_export_sparse"]], "restore_sparse_model() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.restore_sparse_model"]], "search_algorithm (sparsegptmodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.search_algorithm"]], "search_algorithm (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.search_algorithm"]], "update_for_new_mode (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.update_for_new_mode"]], "update_for_save (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.update_for_save"]], "update_sparse_metadata() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.update_sparse_metadata"]], "sparsemodule (class in modelopt.torch.sparsity.module)": [[87, "modelopt.torch.sparsity.module.SparseModule"]], "modelopt.torch.sparsity.module": [[87, "module-modelopt.torch.sparsity.module"]], "modify() (sparsemodule method)": [[87, "modelopt.torch.sparsity.module.SparseModule.modify"]], "set_mask() (sparsemodule method)": [[87, "modelopt.torch.sparsity.module.SparseModule.set_mask"]], "modelopt.torch.sparsity.plugins": [[88, "module-modelopt.torch.sparsity.plugins"]], "basesparsesearcher (class in modelopt.torch.sparsity.searcher)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher"]], "default_search_config (basesparsesearcher property)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.default_search_config"]], "default_state_dict (basesparsesearcher property)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.default_state_dict"]], "modelopt.torch.sparsity.searcher": [[89, "module-modelopt.torch.sparsity.searcher"]], "run_search() (basesparsesearcher method)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.run_search"]], "sanitize_search_config() (basesparsesearcher method)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.sanitize_search_config"]], "sparsegptsearcher (class in modelopt.torch.sparsity.sparsegpt)": [[90, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher"]], "after_search() (sparsegptsearcher method)": [[90, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.after_search"]], "before_search() (sparsegptsearcher method)": [[90, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.before_search"]], "create_sgpt_mask() (in module modelopt.torch.sparsity.sparsegpt)": [[90, "modelopt.torch.sparsity.sparsegpt.create_sgpt_mask"]], "default_search_config (sparsegptsearcher property)": [[90, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.default_search_config"]], "invert() (in module modelopt.torch.sparsity.sparsegpt)": [[90, "modelopt.torch.sparsity.sparsegpt.invert"]], "modelopt.torch.sparsity.sparsegpt": [[90, "module-modelopt.torch.sparsity.sparsegpt"]], "prepare() (in module modelopt.torch.sparsity.sparsegpt)": [[90, "modelopt.torch.sparsity.sparsegpt.prepare"]], "export() (in module modelopt.torch.sparsity.sparsification)": [[91, "modelopt.torch.sparsity.sparsification.export"]], "modelopt.torch.sparsity.sparsification": [[91, "module-modelopt.torch.sparsity.sparsification"]], "sparsify() (in module modelopt.torch.sparsity.sparsification)": [[91, "modelopt.torch.sparsity.sparsification.sparsify"]], "modelopt.torch.utils": [[92, "module-modelopt.torch.utils"]], "load_cpp_extension() (in module modelopt.torch.utils.cpp_extension)": [[93, "modelopt.torch.utils.cpp_extension.load_cpp_extension"]], "modelopt.torch.utils.cpp_extension": [[93, "module-modelopt.torch.utils.cpp_extension"]], "create_forward_loop() (in module modelopt.torch.utils.dataset_utils)": [[94, "modelopt.torch.utils.dataset_utils.create_forward_loop"]], "get_dataset_dataloader() (in module modelopt.torch.utils.dataset_utils)": [[94, "modelopt.torch.utils.dataset_utils.get_dataset_dataloader"]], "modelopt.torch.utils.dataset_utils": [[94, "module-modelopt.torch.utils.dataset_utils"]], "backend() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.backend"]], "barrier() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.barrier"]], "get_data_parallel_group() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.get_data_parallel_group"]], "get_tensor_parallel_group() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.get_tensor_parallel_group"]], "is_master() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.is_master"]], "modelopt.torch.utils.distributed": [[95, "module-modelopt.torch.utils.distributed"]], "rank() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.rank"]], "set_data_parallel_group() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.set_data_parallel_group"]], "set_tensor_parallel_group() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.set_tensor_parallel_group"]], "size() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.size"]], "match() (in module modelopt.torch.utils.graph)": [[96, "modelopt.torch.utils.graph.match"]], "modelopt.torch.utils.graph": [[96, "module-modelopt.torch.utils.graph"]], "list_closest_to_median() (in module modelopt.torch.utils.list)": [[97, "modelopt.torch.utils.list.list_closest_to_median"]], "modelopt.torch.utils.list": [[97, "module-modelopt.torch.utils.list"]], "stats() (in module modelopt.torch.utils.list)": [[97, "modelopt.torch.utils.list.stats"]], "val2list() (in module modelopt.torch.utils.list)": [[97, "modelopt.torch.utils.list.val2list"]], "val2tuple() (in module modelopt.torch.utils.list)": [[97, "modelopt.torch.utils.list.val2tuple"]], "deprecatederror": [[98, "modelopt.torch.utils.logging.DeprecatedError"]], "modelopt.torch.utils.logging": [[98, "module-modelopt.torch.utils.logging"]], "no_stdout() (in module modelopt.torch.utils.logging)": [[98, "modelopt.torch.utils.logging.no_stdout"]], "num2hrb() (in module modelopt.torch.utils.logging)": [[98, "modelopt.torch.utils.logging.num2hrb"]], "print_rank_0() (in module modelopt.torch.utils.logging)": [[98, "modelopt.torch.utils.logging.print_rank_0"]], "compare_dict() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.compare_dict"]], "get_model_attributes() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.get_model_attributes"]], "get_module_device() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.get_module_device"]], "get_same_padding() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.get_same_padding"]], "init_model_from_model_like() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.init_model_from_model_like"]], "is_channels_last() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.is_channels_last"]], "is_parallel() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.is_parallel"]], "make_divisible() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.make_divisible"]], "model_to() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.model_to"]], "modelopt.torch.utils.network": [[99, "module-modelopt.torch.utils.network"]], "param_num() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.param_num"]], "param_num_from_forward() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.param_num_from_forward"]], "remove_bn() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.remove_bn"]], "run_forward_loop() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.run_forward_loop"]], "set_submodule() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.set_submodule"]], "standardize_constructor_args() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.standardize_constructor_args"]], "standardize_model_args() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.standardize_model_args"]], "standardize_model_like_tuple() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.standardize_model_like_tuple"]], "standardize_named_model_args() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.standardize_named_model_args"]], "unwrap_model() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.unwrap_model"]], "zero_grad() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.zero_grad"]], "timer (class in modelopt.torch.utils.perf)": [[100, "modelopt.torch.utils.perf.Timer"]], "__init__() (timer method)": [[100, "modelopt.torch.utils.perf.Timer.__init__"]], "clear_cuda_cache() (in module modelopt.torch.utils.perf)": [[100, "modelopt.torch.utils.perf.clear_cuda_cache"]], "get_cuda_memory_stats() (in module modelopt.torch.utils.perf)": [[100, "modelopt.torch.utils.perf.get_cuda_memory_stats"]], "modelopt.torch.utils.perf": [[100, "module-modelopt.torch.utils.perf"]], "report_memory() (in module modelopt.torch.utils.perf)": [[100, "modelopt.torch.utils.perf.report_memory"]], "start() (timer method)": [[100, "modelopt.torch.utils.perf.Timer.start"]], "stop() (timer method)": [[100, "modelopt.torch.utils.perf.Timer.stop"]], "centroid() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.centroid"]], "choice() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.choice"]], "modelopt.torch.utils.random": [[101, "module-modelopt.torch.utils.random"]], "original() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.original"]], "random() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.random"]], "sample() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.sample"]], "shuffle() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.shuffle"]], "modelopt.torch.utils.tensor": [[102, "module-modelopt.torch.utils.tensor"]], "numpy_to_torch() (in module modelopt.torch.utils.tensor)": [[102, "modelopt.torch.utils.tensor.numpy_to_torch"]], "torch_detach() (in module modelopt.torch.utils.tensor)": [[102, "modelopt.torch.utils.tensor.torch_detach"]], "torch_to() (in module modelopt.torch.utils.tensor)": [[102, "modelopt.torch.utils.tensor.torch_to"]], "torch_to_numpy() (in module modelopt.torch.utils.tensor)": [[102, "modelopt.torch.utils.tensor.torch_to_numpy"]]}})
\ No newline at end of file
+Search.setIndex({"docnames": ["deployment/1_tensorrt_llm_deployment", "examples/0_all_examples", "getting_started/1_overview", "getting_started/2_installation", "getting_started/3_quantization", "getting_started/5_distillation", "getting_started/6_sparsity", "guides/1_quantization", "guides/4_distillation", "guides/5_sparsity", "guides/_basic_quantization", "guides/_choosing_quant_methods", "guides/_onnx_quantization", "guides/_pytorch_quantization", "index", "reference/0_versions", "reference/1_modelopt_api", "reference/generated/modelopt.deploy", "reference/generated/modelopt.deploy.llm", "reference/generated/modelopt.deploy.llm.generate", "reference/generated/modelopt.deploy.llm.nemo_utils", "reference/generated/modelopt.onnx", "reference/generated/modelopt.onnx.op_types", "reference/generated/modelopt.onnx.quantization", "reference/generated/modelopt.onnx.quantization.calib_utils", "reference/generated/modelopt.onnx.quantization.extensions", "reference/generated/modelopt.onnx.quantization.fp8", "reference/generated/modelopt.onnx.quantization.graph_utils", "reference/generated/modelopt.onnx.quantization.gs_patching", "reference/generated/modelopt.onnx.quantization.int4", "reference/generated/modelopt.onnx.quantization.int8", "reference/generated/modelopt.onnx.quantization.operators", "reference/generated/modelopt.onnx.quantization.ort_patching", "reference/generated/modelopt.onnx.quantization.ort_utils", "reference/generated/modelopt.onnx.quantization.partitioning", "reference/generated/modelopt.onnx.quantization.qdq_utils", "reference/generated/modelopt.onnx.quantization.quant_utils", "reference/generated/modelopt.onnx.quantization.quantize", "reference/generated/modelopt.onnx.utils", "reference/generated/modelopt.torch", "reference/generated/modelopt.torch.distill", "reference/generated/modelopt.torch.distill.config", "reference/generated/modelopt.torch.distill.distillation", "reference/generated/modelopt.torch.distill.distillation_model", "reference/generated/modelopt.torch.distill.loss_balancers", "reference/generated/modelopt.torch.distill.losses", "reference/generated/modelopt.torch.distill.mode", "reference/generated/modelopt.torch.distill.registry", "reference/generated/modelopt.torch.export", "reference/generated/modelopt.torch.export.distribute", "reference/generated/modelopt.torch.export.hf_config_map", "reference/generated/modelopt.torch.export.layer_utils", "reference/generated/modelopt.torch.export.model_config", "reference/generated/modelopt.torch.export.model_config_export", "reference/generated/modelopt.torch.export.model_config_utils", "reference/generated/modelopt.torch.export.postprocess", "reference/generated/modelopt.torch.export.scaling_factor_utils", "reference/generated/modelopt.torch.export.tensorrt_llm_utils", "reference/generated/modelopt.torch.export.transformer_engine", "reference/generated/modelopt.torch.export.vllm", "reference/generated/modelopt.torch.opt", "reference/generated/modelopt.torch.opt.config", "reference/generated/modelopt.torch.opt.conversion", "reference/generated/modelopt.torch.opt.dynamic", "reference/generated/modelopt.torch.opt.hparam", "reference/generated/modelopt.torch.opt.mode", "reference/generated/modelopt.torch.opt.plugins", "reference/generated/modelopt.torch.opt.searcher", "reference/generated/modelopt.torch.opt.utils", "reference/generated/modelopt.torch.quantization", "reference/generated/modelopt.torch.quantization.algorithms", "reference/generated/modelopt.torch.quantization.calib", "reference/generated/modelopt.torch.quantization.calib.calibrator", "reference/generated/modelopt.torch.quantization.calib.histogram", "reference/generated/modelopt.torch.quantization.calib.max", "reference/generated/modelopt.torch.quantization.config", "reference/generated/modelopt.torch.quantization.conversion", "reference/generated/modelopt.torch.quantization.extensions", "reference/generated/modelopt.torch.quantization.mode", "reference/generated/modelopt.torch.quantization.model_calib", "reference/generated/modelopt.torch.quantization.model_quant", "reference/generated/modelopt.torch.quantization.nn", "reference/generated/modelopt.torch.quantization.nn.functional", "reference/generated/modelopt.torch.quantization.nn.modules", "reference/generated/modelopt.torch.quantization.nn.modules.clip", "reference/generated/modelopt.torch.quantization.nn.modules.quant_activations", "reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm", "reference/generated/modelopt.torch.quantization.nn.modules.quant_conv", "reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm", "reference/generated/modelopt.torch.quantization.nn.modules.quant_linear", "reference/generated/modelopt.torch.quantization.nn.modules.quant_module", "reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling", "reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn", "reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer", "reference/generated/modelopt.torch.quantization.optim", "reference/generated/modelopt.torch.quantization.plugins", "reference/generated/modelopt.torch.quantization.qtensor", "reference/generated/modelopt.torch.quantization.qtensor.base_qtensor", "reference/generated/modelopt.torch.quantization.qtensor.int4_tensor", "reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor", "reference/generated/modelopt.torch.quantization.quant_modules", "reference/generated/modelopt.torch.quantization.tensor_quant", "reference/generated/modelopt.torch.quantization.utils", "reference/generated/modelopt.torch.sparsity", "reference/generated/modelopt.torch.sparsity.config", "reference/generated/modelopt.torch.sparsity.magnitude", "reference/generated/modelopt.torch.sparsity.mode", "reference/generated/modelopt.torch.sparsity.module", "reference/generated/modelopt.torch.sparsity.plugins", "reference/generated/modelopt.torch.sparsity.searcher", "reference/generated/modelopt.torch.sparsity.sparsegpt", "reference/generated/modelopt.torch.sparsity.sparsification", "reference/generated/modelopt.torch.utils", "reference/generated/modelopt.torch.utils.cpp_extension", "reference/generated/modelopt.torch.utils.dataset_utils", "reference/generated/modelopt.torch.utils.distributed", "reference/generated/modelopt.torch.utils.graph", "reference/generated/modelopt.torch.utils.list", "reference/generated/modelopt.torch.utils.logging", "reference/generated/modelopt.torch.utils.network", "reference/generated/modelopt.torch.utils.perf", "reference/generated/modelopt.torch.utils.random", "reference/generated/modelopt.torch.utils.tensor", "support/1_contact", "support/2_faqs"], "filenames": ["deployment/1_tensorrt_llm_deployment.rst", "examples/0_all_examples.rst", "getting_started/1_overview.rst", "getting_started/2_installation.rst", "getting_started/3_quantization.rst", "getting_started/5_distillation.rst", "getting_started/6_sparsity.rst", "guides/1_quantization.rst", "guides/4_distillation.rst", "guides/5_sparsity.rst", "guides/_basic_quantization.rst", "guides/_choosing_quant_methods.rst", "guides/_onnx_quantization.rst", "guides/_pytorch_quantization.rst", "index.rst", "reference/0_versions.rst", "reference/1_modelopt_api.rst", "reference/generated/modelopt.deploy.rst", "reference/generated/modelopt.deploy.llm.rst", "reference/generated/modelopt.deploy.llm.generate.rst", "reference/generated/modelopt.deploy.llm.nemo_utils.rst", "reference/generated/modelopt.onnx.rst", "reference/generated/modelopt.onnx.op_types.rst", "reference/generated/modelopt.onnx.quantization.rst", "reference/generated/modelopt.onnx.quantization.calib_utils.rst", "reference/generated/modelopt.onnx.quantization.extensions.rst", "reference/generated/modelopt.onnx.quantization.fp8.rst", "reference/generated/modelopt.onnx.quantization.graph_utils.rst", "reference/generated/modelopt.onnx.quantization.gs_patching.rst", "reference/generated/modelopt.onnx.quantization.int4.rst", "reference/generated/modelopt.onnx.quantization.int8.rst", "reference/generated/modelopt.onnx.quantization.operators.rst", "reference/generated/modelopt.onnx.quantization.ort_patching.rst", "reference/generated/modelopt.onnx.quantization.ort_utils.rst", "reference/generated/modelopt.onnx.quantization.partitioning.rst", "reference/generated/modelopt.onnx.quantization.qdq_utils.rst", "reference/generated/modelopt.onnx.quantization.quant_utils.rst", "reference/generated/modelopt.onnx.quantization.quantize.rst", "reference/generated/modelopt.onnx.utils.rst", "reference/generated/modelopt.torch.rst", "reference/generated/modelopt.torch.distill.rst", "reference/generated/modelopt.torch.distill.config.rst", "reference/generated/modelopt.torch.distill.distillation.rst", "reference/generated/modelopt.torch.distill.distillation_model.rst", "reference/generated/modelopt.torch.distill.loss_balancers.rst", "reference/generated/modelopt.torch.distill.losses.rst", "reference/generated/modelopt.torch.distill.mode.rst", "reference/generated/modelopt.torch.distill.registry.rst", "reference/generated/modelopt.torch.export.rst", "reference/generated/modelopt.torch.export.distribute.rst", "reference/generated/modelopt.torch.export.hf_config_map.rst", "reference/generated/modelopt.torch.export.layer_utils.rst", "reference/generated/modelopt.torch.export.model_config.rst", "reference/generated/modelopt.torch.export.model_config_export.rst", "reference/generated/modelopt.torch.export.model_config_utils.rst", "reference/generated/modelopt.torch.export.postprocess.rst", "reference/generated/modelopt.torch.export.scaling_factor_utils.rst", "reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst", "reference/generated/modelopt.torch.export.transformer_engine.rst", "reference/generated/modelopt.torch.export.vllm.rst", "reference/generated/modelopt.torch.opt.rst", "reference/generated/modelopt.torch.opt.config.rst", "reference/generated/modelopt.torch.opt.conversion.rst", "reference/generated/modelopt.torch.opt.dynamic.rst", "reference/generated/modelopt.torch.opt.hparam.rst", "reference/generated/modelopt.torch.opt.mode.rst", "reference/generated/modelopt.torch.opt.plugins.rst", "reference/generated/modelopt.torch.opt.searcher.rst", "reference/generated/modelopt.torch.opt.utils.rst", "reference/generated/modelopt.torch.quantization.rst", "reference/generated/modelopt.torch.quantization.algorithms.rst", "reference/generated/modelopt.torch.quantization.calib.rst", "reference/generated/modelopt.torch.quantization.calib.calibrator.rst", "reference/generated/modelopt.torch.quantization.calib.histogram.rst", "reference/generated/modelopt.torch.quantization.calib.max.rst", "reference/generated/modelopt.torch.quantization.config.rst", "reference/generated/modelopt.torch.quantization.conversion.rst", "reference/generated/modelopt.torch.quantization.extensions.rst", "reference/generated/modelopt.torch.quantization.mode.rst", "reference/generated/modelopt.torch.quantization.model_calib.rst", "reference/generated/modelopt.torch.quantization.model_quant.rst", "reference/generated/modelopt.torch.quantization.nn.rst", "reference/generated/modelopt.torch.quantization.nn.functional.rst", "reference/generated/modelopt.torch.quantization.nn.modules.rst", "reference/generated/modelopt.torch.quantization.nn.modules.clip.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_module.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_rnn.rst", "reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.rst", "reference/generated/modelopt.torch.quantization.optim.rst", "reference/generated/modelopt.torch.quantization.plugins.rst", "reference/generated/modelopt.torch.quantization.qtensor.rst", "reference/generated/modelopt.torch.quantization.qtensor.base_qtensor.rst", "reference/generated/modelopt.torch.quantization.qtensor.int4_tensor.rst", "reference/generated/modelopt.torch.quantization.qtensor.nf4_tensor.rst", "reference/generated/modelopt.torch.quantization.quant_modules.rst", "reference/generated/modelopt.torch.quantization.tensor_quant.rst", "reference/generated/modelopt.torch.quantization.utils.rst", "reference/generated/modelopt.torch.sparsity.rst", "reference/generated/modelopt.torch.sparsity.config.rst", "reference/generated/modelopt.torch.sparsity.magnitude.rst", "reference/generated/modelopt.torch.sparsity.mode.rst", "reference/generated/modelopt.torch.sparsity.module.rst", "reference/generated/modelopt.torch.sparsity.plugins.rst", "reference/generated/modelopt.torch.sparsity.searcher.rst", "reference/generated/modelopt.torch.sparsity.sparsegpt.rst", "reference/generated/modelopt.torch.sparsity.sparsification.rst", "reference/generated/modelopt.torch.utils.rst", "reference/generated/modelopt.torch.utils.cpp_extension.rst", "reference/generated/modelopt.torch.utils.dataset_utils.rst", "reference/generated/modelopt.torch.utils.distributed.rst", "reference/generated/modelopt.torch.utils.graph.rst", "reference/generated/modelopt.torch.utils.list.rst", "reference/generated/modelopt.torch.utils.logging.rst", "reference/generated/modelopt.torch.utils.network.rst", "reference/generated/modelopt.torch.utils.perf.rst", "reference/generated/modelopt.torch.utils.random.rst", "reference/generated/modelopt.torch.utils.tensor.rst", "support/1_contact.rst", "support/2_faqs.rst"], "titles": ["TensorRT-LLM Deployment", "GitHub Examples", "Overview", "Installation", "Quick Start: Quantization", "Quick Start: Distillation", "Quick Start: Sparsity", "Quantization", "Distillation", "Sparsity", "Basic Concepts", "Best practices to choose the right quantization methods", "ONNX Quantization (Beta)", "PyTorch Quantization", "Welcome to Model Optimizer (ModelOpt) documentation!", "Changelog", "modelopt API", "deploy", "llm", "generate", "nemo_utils", "onnx", "op_types", "quantization", "calib_utils", "extensions", "fp8", "graph_utils", "gs_patching", "int4", "int8", "operators", "ort_patching", "ort_utils", "partitioning", "qdq_utils", "quant_utils", "modelopt.onnx.quantization.quantize", "utils", "torch", "distill", "config", "distillation", "distillation_model", "loss_balancers", "losses", "mode", "registry", "export", "distribute", "hf_config_map", "layer_utils", "model_config", "model_config_export", "model_config_utils", "postprocess", "scaling_factor_utils", "tensorrt_llm_utils", "transformer_engine", "vllm", "opt", "config", "conversion", "dynamic", "hparam", "mode", "plugins", "searcher", "utils", "quantization", "algorithms", "calib", "calibrator", "histogram", "max", "config", "conversion", "extensions", "mode", "model_calib", "model_quant", "nn", "functional", "modules", "clip", "quant_activations", "quant_batchnorm", "quant_conv", "quant_instancenorm", "quant_linear", "quant_module", "quant_pooling", "quant_rnn", "tensor_quantizer", "optim", "plugins", "qtensor", "base_qtensor", "int4_tensor", "nf4_tensor", "quant_modules", "tensor_quant", "utils", "sparsity", "config", "magnitude", "mode", "module", "plugins", "searcher", "sparsegpt", "sparsification", "utils", "cpp_extension", "dataset_utils", "distributed", "graph", "list", "logging", "network", "perf", "random", "tensor", "Contact us", "FAQs"], "terms": {"pleas": [0, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15, 19, 60, 62, 75, 80, 95, 111, 124], "read": [0, 24, 49], "workflow": [0, 2, 65, 67], "first": [0, 3, 5, 9, 11, 13, 27, 29, 38, 49, 80, 113, 119], "befor": [0, 3, 10, 27, 45, 63, 67, 75, 78, 90, 92, 121], "go": 0, "through": [0, 5, 6, 10, 13, 79, 80, 101], "thi": [0, 3, 5, 6, 8, 9, 11, 12, 13, 15, 20, 22, 27, 29, 31, 32, 33, 34, 36, 37, 38, 41, 43, 44, 45, 46, 49, 51, 52, 54, 55, 57, 61, 62, 63, 64, 67, 70, 73, 75, 76, 78, 79, 80, 82, 92, 93, 95, 97, 99, 100, 101, 102, 105, 106, 107, 111, 114, 119, 121, 124], "section": [0, 8, 9], "modelopt": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 27, 33, 42, 50, 52, 60, 61, 62, 65, 71, 75, 76, 80, 114, 121, 124], "toolkit": [0, 7], "automat": [0, 2, 9, 13, 63, 105, 111], "convers": [0, 8, 11, 22, 37, 42, 63, 121], "engin": [0, 12, 15, 19, 52, 53], "acceler": [0, 2, 4, 6, 9], "inferenc": 0, "i": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 19, 20, 22, 26, 27, 29, 31, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 49, 51, 52, 53, 54, 55, 57, 61, 62, 63, 64, 65, 67, 68, 70, 73, 74, 75, 76, 78, 79, 80, 82, 90, 92, 93, 97, 100, 101, 102, 104, 105, 106, 107, 111, 114, 115, 119, 121, 124], "achiev": [0, 5, 7, 9, 13, 63], "huggingfac": [0, 3, 4, 5, 13, 19, 20, 48, 52, 95], "nemo": [0, 2, 4, 13, 15, 20, 48, 52, 95], "build": [0, 3, 12, 15, 20, 27, 51, 52, 53, 111], "from": [0, 1, 2, 4, 5, 8, 9, 10, 11, 15, 19, 20, 24, 27, 32, 33, 34, 35, 37, 38, 41, 43, 51, 52, 54, 56, 60, 61, 62, 63, 64, 67, 73, 75, 76, 80, 93, 101, 105, 110, 111, 114, 117, 119, 121, 122], "after": [0, 6, 8, 9, 10, 12, 13, 15, 27, 33, 44, 49, 60, 62, 63, 65, 67, 75, 79, 111, 119], "can": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 22, 27, 36, 37, 46, 52, 53, 60, 61, 62, 63, 65, 67, 70, 75, 78, 80, 93, 95, 101, 104, 106, 114, 119, 120, 121], "format": [0, 2, 4, 7, 9, 11, 12, 13, 15, 24, 37, 41, 49, 51, 52, 53, 54, 67, 70, 80, 93, 98, 99, 114, 119], "store": [0, 9, 37, 41, 53, 62, 63, 64, 67, 98, 99, 101], "A": [0, 8, 9, 10, 11, 13, 19, 34, 35, 37, 41, 42, 43, 45, 47, 49, 51, 53, 54, 61, 62, 63, 64, 65, 67, 70, 73, 74, 75, 76, 79, 80, 84, 92, 93, 97, 101, 102, 109, 111, 114, 119, 120], "singl": [0, 8, 10, 12, 27, 36, 41, 43, 54, 55, 56, 64, 65, 80, 92, 93, 119], "json": [0, 41, 49, 53, 54, 61, 75, 104], "file": [0, 12, 15, 26, 29, 30, 36, 37, 38, 49, 51, 53, 57, 61, 62, 113], "record": [0, 62, 65], "structur": 0, "metadata": [0, 9, 62, 75, 106], "config": [0, 4, 6, 8, 9, 15, 19, 20, 42, 46, 49, 50, 51, 52, 53, 54, 55, 57, 62, 63, 67, 70, 78, 80, 98, 99, 106, 109, 110, 111, 121], "group": [0, 11, 15, 49, 56, 75, 80, 93, 115], "safetensor": [0, 53], "each": [0, 9, 10, 13, 15, 29, 37, 41, 43, 44, 49, 53, 56, 57, 60, 62, 63, 64, 65, 70, 75, 76, 98, 99, 102, 104, 111, 121], "local": [0, 49], "calibr": [0, 4, 6, 9, 11, 13, 15, 24, 29, 35, 37, 53, 70, 71, 73, 74, 75, 79, 80, 87, 89, 90, 92, 93, 111, 114], "gpu": [0, 2, 9, 11, 15, 53, 73, 120, 121], "rank": [0, 49, 52, 53, 55, 56, 75, 80, 93, 102, 115, 120], "weight": [0, 2, 6, 9, 10, 11, 13, 15, 20, 29, 35, 37, 38, 43, 44, 49, 51, 52, 53, 54, 55, 56, 57, 62, 63, 73, 75, 76, 79, 80, 90, 92, 101, 102, 105, 107, 111], "scale": [0, 12, 15, 24, 29, 35, 51, 52, 53, 54, 56, 75, 79, 93, 97, 98, 99, 101], "factor": [0, 11, 29, 35, 51, 52, 53, 54, 56, 70, 75, 79, 101], "per": [0, 8, 10, 11, 15, 36, 42, 53, 70, 75, 80, 93, 97, 101, 111], "The": [0, 2, 4, 5, 8, 9, 10, 11, 12, 13, 15, 19, 20, 27, 29, 35, 36, 37, 41, 42, 43, 44, 46, 49, 51, 52, 53, 54, 55, 61, 62, 63, 67, 70, 75, 76, 78, 79, 80, 82, 93, 97, 98, 99, 101, 102, 105, 106, 111, 114, 116, 119, 121], "api": [0, 2, 3, 4, 5, 6, 8, 9, 12, 13, 14, 15, 19, 20, 40, 42, 53, 79, 80, 100, 103, 111], "export_tensorrt_llm_checkpoint": [0, 53], "us": [0, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 22, 27, 29, 30, 36, 37, 41, 42, 43, 45, 46, 49, 53, 56, 59, 60, 61, 62, 63, 67, 70, 73, 74, 75, 78, 79, 80, 82, 84, 92, 93, 100, 101, 104, 105, 107, 111, 113, 114, 115, 119, 120, 121, 124], "follow": [0, 3, 8, 9, 10, 11, 12, 13, 15, 27, 42, 46, 51, 62, 67, 75, 78, 80, 101, 111], "torch": [0, 2, 3, 4, 5, 6, 8, 9, 13, 15, 33, 40, 42, 44, 49, 51, 53, 54, 59, 60, 62, 63, 67, 71, 73, 75, 76, 80, 92, 93, 97, 98, 99, 101, 113, 114, 115, 119, 121, 122], "import": [0, 3, 4, 5, 6, 8, 9, 10, 12, 13, 15, 61, 62, 64, 70, 75, 76, 80, 114, 119, 121], "inference_mod": 0, "decoder_typ": [0, 51, 52, 53], "type": [0, 8, 9, 19, 20, 22, 24, 26, 27, 29, 30, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 49, 51, 52, 53, 54, 55, 56, 61, 62, 63, 64, 65, 67, 68, 70, 73, 75, 76, 78, 79, 80, 87, 89, 90, 92, 93, 97, 98, 99, 101, 105, 106, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122], "str": [0, 19, 20, 22, 24, 26, 27, 29, 30, 33, 34, 35, 37, 38, 41, 42, 43, 44, 45, 46, 49, 51, 52, 53, 54, 55, 57, 59, 61, 62, 63, 67, 68, 70, 73, 75, 76, 78, 79, 80, 92, 93, 104, 105, 106, 109, 110, 111, 113, 114, 115, 117, 118, 119], "e": [0, 5, 6, 8, 9, 10, 11, 13, 15, 29, 37, 45, 53, 63, 65, 67, 75, 80, 92, 101, 113, 119], "g": [0, 6, 9, 10, 13, 15, 53, 63, 65, 75, 101, 113, 119], "gptj": [0, 53], "llama": [0, 53], "gptnext": [0, 53], "dtype": [0, 28, 35, 37, 51, 52, 53, 93, 97, 98, 99, 101, 119], "data": [0, 4, 5, 6, 9, 11, 12, 13, 15, 24, 36, 37, 38, 53, 71, 73, 75, 79, 80, 93, 97, 98, 99, 111, 114, 115, 119, 122], "unquant": [0, 10, 53], "layer": [0, 8, 9, 13, 15, 27, 34, 37, 41, 43, 49, 51, 52, 53, 56, 57, 64, 70, 75, 80, 92, 104, 111, 119], "export_dir": [0, 53, 57, 59], "directori": [0, 12, 19, 37, 38, 49, 53, 57], "where": [0, 9, 10, 13, 29, 37, 43, 67, 70, 75, 80, 90, 119], "inference_tensor_parallel": [0, 15, 53, 55], "number": [0, 29, 45, 56, 63, 64, 73, 74, 75, 80, 84, 114, 115, 118, 119, 121], "infer": [0, 2, 4, 6, 9, 11, 33, 52, 53, 55, 62, 67, 114], "time": [0, 3, 11, 15, 53, 80, 111, 113, 117], "tensor": [0, 5, 8, 9, 10, 11, 13, 15, 24, 27, 29, 35, 37, 38, 43, 44, 45, 49, 51, 52, 53, 54, 55, 56, 63, 64, 70, 73, 74, 75, 80, 82, 84, 92, 93, 96, 97, 98, 99, 101, 102, 105, 110, 115, 119], "parallel": [0, 9, 15, 51, 53, 55, 102, 115, 119], "inference_pipeline_parallel": [0, 53, 55], "pipelin": [0, 5, 8, 9, 13, 53, 80], "If": [0, 3, 8, 9, 11, 12, 13, 24, 37, 41, 42, 43, 44, 49, 51, 52, 55, 56, 62, 63, 64, 73, 74, 75, 76, 79, 80, 84, 93, 102, 104, 111, 114, 119], "call": [0, 3, 8, 9, 13, 33, 36, 43, 61, 63, 76, 90, 92, 93, 107, 111, 114], "success": [0, 3], "save": [0, 6, 7, 8, 12, 13, 37, 38, 43, 49, 52, 53, 59, 60, 62, 67, 74, 78, 93, 101], "otherwis": [0, 37, 49, 51, 76, 101, 116], "state_dict": [0, 9, 13, 41, 43, 49, 62, 67, 80], "instead": [0, 15, 36, 49, 63, 64, 65, 70, 73, 100, 101, 104], "fp16": [0, 11, 12, 27, 37, 93], "bf16": [0, 11], "fp8": [0, 2, 10, 11, 13, 15, 27, 37, 51, 52, 53, 75, 93], "int8_sq": [0, 52], "int4_awq": [0, 15], "gpt2": [0, 53], "ye": 0, "No": 0, "2": [0, 2, 6, 9, 15, 24, 27, 29, 36, 70, 80, 101, 105], "3": [0, 3, 15, 51, 70, 75, 80, 101], "mistral": 0, "mixtral": [0, 15], "8x7b": 0, "falcon": 0, "40b": 0, "180b": 0, "7b": 0, "mpt": 0, "30b": 0, "baichuan": 0, "1": [0, 3, 13, 15, 19, 24, 29, 44, 45, 51, 52, 53, 54, 55, 70, 73, 75, 80, 101, 102, 114, 117, 119, 121], "chatglm2": 0, "6b": [0, 9], "bloom": 0, "phi": [0, 57], "nemotron": 0, "8": [0, 2, 3, 11, 12, 29, 51, 57, 73, 74, 75, 87, 89, 90, 92, 101, 113, 119], "gemma": [0, 15], "2b": 0, "recurr": [0, 22, 51, 52], "starcod": [0, 15], "qwen": [0, 15], "5": [0, 29, 34, 44, 75], "onc": [0, 33, 73], "avail": [0, 2, 3, 4, 8, 10, 15, 24, 42, 47, 64, 111, 115], "deploi": [0, 3, 11, 15], "all": [1, 2, 3, 9, 15, 32, 34, 35, 37, 38, 49, 51, 52, 53, 55, 61, 62, 63, 68, 73, 74, 76, 80, 92, 93, 102, 104, 105, 107, 115, 119, 121], "access": [1, 9, 22, 49, 61, 63, 75], "repositori": [1, 2, 7], "com": [1, 3, 19, 20, 22, 53, 101, 119], "nvidia": [1, 3, 6, 7, 9, 13, 15, 19, 20, 53, 75, 105], "tensorrt": [1, 3, 4, 7, 11, 12, 13, 14, 15, 19, 24, 27, 30, 37, 51, 52, 53, 57, 80], "model": [1, 7, 8, 10, 11, 17, 19, 20, 21, 23, 24, 26, 27, 29, 30, 34, 37, 38, 39, 41, 42, 43, 46, 49, 51, 52, 53, 55, 57, 58, 59, 60, 62, 63, 65, 67, 68, 70, 73, 75, 76, 78, 79, 80, 93, 100, 106, 111, 114, 119, 121, 124], "optim": [1, 4, 7, 8, 9, 13, 17, 21, 23, 36, 39, 42, 53, 58, 59, 60, 62, 65, 67, 68, 70, 75, 92, 111], "minim": [2, 8, 10, 15, 67, 70, 75], "cost": [2, 10], "present": [2, 5, 8, 38], "signific": 2, "challeng": 2, "gener": [2, 10, 12, 22, 37, 38, 45, 52, 54, 60, 61, 62, 63, 64, 68, 75, 105, 109, 121], "ai": 2, "continu": [2, 9, 111], "grow": 2, "complex": 2, "size": [2, 8, 10, 11, 15, 36, 38, 41, 49, 51, 52, 56, 63, 68, 75, 80, 93, 98, 99, 105, 114, 115, 119], "refer": [2, 4, 7, 8, 9, 12, 13, 15, 19, 20, 22, 27, 60, 64, 92, 93, 111, 119], "librari": [2, 37], "compris": [2, 11], "state": [2, 8, 13, 43, 46, 49, 62, 65, 67, 70, 78, 90, 93, 109, 119], "art": 2, "includ": [2, 3, 8, 10, 15, 19, 52, 63, 75, 114], "compress": [2, 8, 9, 11, 70, 80], "It": [2, 8, 13, 49, 65, 75, 78, 80, 92, 93, 97, 101, 105, 111, 119], "accept": [2, 8, 15, 92, 101], "onnx": [2, 3, 4, 7, 11, 13, 15, 22, 23, 24, 26, 27, 29, 30, 33, 34, 35, 36, 38, 67, 75, 100, 101, 119], "input": [2, 4, 5, 6, 9, 12, 13, 15, 19, 24, 27, 31, 34, 35, 36, 37, 38, 43, 63, 73, 75, 76, 80, 82, 84, 88, 90, 92, 93, 97, 98, 99, 101, 102, 111, 114, 119, 122], "provid": [2, 6, 8, 9, 10, 12, 13, 24, 27, 33, 36, 44, 49, 56, 60, 61, 62, 63, 67, 71, 80, 97, 104, 111, 114, 119], "python": [2, 3, 12, 15, 22, 54, 92, 121], "user": [2, 4, 5, 8, 9, 11, 12, 13, 15, 22, 27, 33, 37, 49, 60, 62, 63, 64, 75, 80], "easili": [2, 5], "stack": [2, 7, 78], "differ": [2, 7, 8, 10, 11, 22, 42, 44, 45, 63, 75, 92, 111, 114], "produc": [2, 27, 38], "checkpoint": [2, 6, 8, 9, 15, 41, 43, 52, 53, 57, 59, 60, 62, 67, 93], "seamlessli": [2, 8], "integr": [2, 5], "within": [2, 8, 12, 65, 75, 101, 102, 118], "softwar": [2, 7], "ecosystem": 2, "readi": [2, 4, 42], "deploy": [2, 5, 7, 11, 13, 15, 17, 18, 39, 53, 67, 80], "downstream": 2, "framework": [2, 7, 13, 15], "like": [2, 4, 7, 9, 10, 12, 13, 15, 24, 34, 37, 44, 62, 63, 67, 74, 93, 119], "llm": [2, 3, 4, 7, 11, 13, 14, 15, 19, 48, 51, 52, 53, 57, 75, 80], "further": [2, 5, 99], "ar": [2, 3, 5, 8, 9, 10, 11, 13, 15, 20, 27, 29, 30, 34, 37, 38, 41, 42, 44, 51, 52, 53, 54, 55, 57, 61, 62, 63, 67, 75, 76, 79, 80, 90, 92, 93, 101, 102, 104, 111, 113, 119], "plan": 2, "megatron": [2, 15, 95, 108], "lm": 2, "train": [2, 4, 8, 15, 42, 55, 92, 111, 124], "loop": [2, 4, 5, 6, 13, 36, 67, 111, 114], "For": [2, 3, 9, 10, 11, 12, 13, 19, 37, 44, 53, 55, 61, 62, 63, 64, 65, 75, 79, 80, 101, 104, 119], "enterpris": 2, "bit": [2, 10, 11, 36, 73, 74, 75, 101], "stabl": 2, "diffus": [2, 95], "also": [2, 4, 5, 8, 9, 13, 54, 63, 65, 70, 73, 75, 76, 80, 82, 119, 121], "nim": 2, "free": 2, "develop": 2, "pypi": [2, 3], "visit": 2, "github": [2, 4, 6, 7, 14, 19, 20, 22, 53, 101, 119, 123], "end": [2, 4, 6, 8, 27, 75, 80, 119, 120], "exampl": [2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 15, 19, 20, 37, 44, 61, 62, 64, 65, 76, 79, 80, 95, 104, 114, 121], "script": [2, 8, 20], "recip": [2, 70], "an": [2, 4, 5, 6, 8, 9, 10, 12, 13, 15, 26, 27, 29, 30, 33, 36, 38, 41, 42, 46, 49, 51, 53, 55, 62, 63, 64, 65, 70, 73, 74, 75, 76, 78, 79, 80, 82, 93, 101, 104, 106, 111, 119, 121], "effect": [2, 4, 6, 8, 10, 13, 101], "larg": [2, 11, 36], "2x": [2, 9], "4x": 2, "speed": [2, 4, 6, 9], "up": [2, 3, 4, 9, 11, 42, 49, 60, 63, 67, 93, 111], "while": [2, 5, 10, 13, 15, 63, 67, 70, 100], "preserv": [2, 10, 62], "qualiti": [2, 8, 13], "enabl": [2, 4, 8, 9, 13, 15, 43, 64, 70, 75, 80, 87, 89, 90, 92, 93, 102, 113], "highli": [2, 9, 13], "perform": [2, 4, 5, 6, 8, 11, 13, 19, 26, 29, 30, 49, 54, 73, 75, 79, 80, 93, 99, 101, 120], "int8": [2, 4, 11, 12, 37, 51, 75, 93, 99], "int4": [2, 11, 12, 13, 37, 75, 93, 98], "etc": [2, 12, 13, 27, 29, 37, 70, 75, 80, 93], "support": [2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 15, 20, 26, 28, 29, 30, 33, 35, 37, 43, 48, 51, 52, 62, 63, 67, 73, 75, 76, 78, 79, 80, 81, 82, 83, 95, 100, 102, 108, 111, 119], "advanc": [2, 4, 6, 10, 70], "algorithm": [2, 4, 11, 12, 13, 15, 29, 42, 60, 61, 62, 65, 67, 75, 79, 80, 103, 106, 109, 110, 111], "smoothquant": [2, 4, 10, 11, 13, 70, 75, 79, 93], "awq": [2, 4, 10, 11, 13, 29, 52, 55, 70, 75, 98], "doubl": [2, 75, 99], "easi": [2, 6, 9, 12], "both": [2, 4, 5, 8, 9, 10, 11, 15, 29, 42, 63, 90], "post": [2, 4, 10, 15, 67, 119], "ptq": [2, 10, 15, 27], "awar": [2, 4, 15, 29], "qat": [2, 4, 15], "page": 2, "list": [2, 3, 10, 13, 19, 26, 27, 30, 33, 34, 37, 38, 42, 43, 44, 49, 51, 52, 53, 55, 56, 57, 62, 70, 74, 75, 76, 80, 92, 93, 111, 113, 122], "reduc": [2, 4, 6, 8, 9, 10, 41, 43, 44, 45, 75, 80, 92, 99, 102, 111, 121], "memori": [2, 4, 6, 7, 9, 11, 13, 15, 38, 53, 55, 75, 99, 119, 120], "footprint": [2, 4, 6, 9], "deep": [2, 4, 6, 10], "learn": [2, 4, 5, 6, 8, 10, 13, 43, 46, 60, 75, 80, 84, 93, 102], "mt": [2, 6, 9], "sparsifi": [2, 6, 9, 15, 111], "appli": [2, 6, 8, 9, 11, 15, 26, 29, 30, 43, 44, 62, 75, 80, 88, 93, 101, 105, 114], "given": [2, 5, 6, 22, 27, 35, 37, 38, 56, 61, 63, 67, 68, 75, 76, 80, 93, 102, 105, 110, 111, 114, 119, 121], "4": [2, 6, 9, 10, 11, 13, 24, 36, 51, 75, 101, 105], "pattern": [2, 6, 9, 27, 34, 37, 75, 80, 104, 105, 111, 116], "variou": [2, 6, 35, 60, 62, 76, 93, 111], "sparsif": [2, 15, 103, 106], "method": [2, 5, 6, 7, 9, 13, 20, 27, 36, 37, 61, 62, 63, 67, 73, 75, 76, 79, 80, 90, 92, 93, 97, 99, 101, 119], "asp": [2, 6, 9, 105], "sparsegpt": [2, 6, 9, 106, 111], "pt": [2, 13, 62], "sat": [2, 15], "latter": 2, "recommend": [2, 3, 11, 13, 75, 101, 111], "accuraci": [2, 5, 7, 8, 9, 10, 11], "degrad": [2, 11, 13], "current": [3, 7, 9, 12, 15, 26, 29, 61, 62, 63, 64, 70, 80, 92, 95, 106, 108, 111, 113, 115], "ha": [3, 5, 8, 11, 13, 24, 27, 38, 49, 62, 63, 64, 67, 80, 84, 102], "o": 3, "linux": 3, "architectur": [3, 8, 9, 53, 62, 63, 121], "x86_64": 3, "13": 3, "cuda": [3, 75, 77, 92, 101, 113, 120], "11": [3, 113], "pytorch": [3, 7, 9, 11, 15, 45, 60, 63, 75, 76, 79, 80, 82, 84, 101, 102, 114, 119, 122], "option": [3, 9, 15, 27, 34, 37, 51, 56, 62, 63, 64, 67, 76, 111, 121], "0": [3, 12, 19, 29, 38, 44, 45, 52, 53, 57, 73, 75, 80, 87, 89, 92, 93, 101, 114, 119, 120, 121], "its": [3, 5, 8, 9, 15, 42, 57, 62, 63, 67, 70, 75, 76, 80, 93, 111, 119], "depend": [3, 15, 27, 63, 64], "via": [3, 8, 9, 11, 29, 46, 62, 63, 67, 78, 106, 107, 119, 121], "pip": [3, 15], "review": 3, "licens": 3, "term": [3, 8, 9], "ani": [3, 5, 8, 15, 27, 37, 41, 42, 43, 44, 46, 49, 53, 54, 57, 61, 62, 63, 64, 67, 70, 75, 78, 80, 93, 102, 104, 106, 107, 109, 110, 111, 113, 116, 117, 119, 121], "quick": [3, 14], "detail": [3, 4, 5, 7, 10, 11, 12, 13, 37, 41, 67, 75, 76, 80, 93, 101, 104, 111], "instruct": 3, "set": [3, 4, 9, 13, 15, 19, 28, 33, 34, 35, 41, 42, 43, 44, 46, 51, 53, 57, 60, 61, 62, 63, 64, 67, 70, 73, 75, 76, 78, 80, 93, 106, 107, 111, 115, 119], "virtual": 3, "environ": 3, "we": [3, 4, 8, 9, 10, 11, 13, 27, 29, 51, 52, 53, 55, 61, 62, 63, 64, 65, 67, 70, 73, 78, 80, 92, 95, 101, 107, 108, 111, 114, 119, 121], "you": [3, 6, 7, 8, 9, 11, 12, 13, 15, 61, 62, 75, 80, 95, 100, 104, 111, 119], "don": [3, 37, 67, 78], "t": [3, 34, 37, 67, 73, 78, 82, 92, 102, 121], "have": [3, 9, 10, 11, 13, 15, 24, 37, 44, 49, 61, 63, 75, 76, 80, 119], "one": [3, 5, 8, 10, 51, 52, 55, 62, 63, 73, 76, 95, 102, 105], "alreadi": [3, 8, 34, 41, 62], "run": [3, 8, 12, 13, 20, 27, 67, 73, 80, 111, 119], "command": [3, 12], "activ": [3, 8, 10, 11, 29, 37, 51, 52, 63, 64, 70, 75, 80, 85, 101, 107, 119], "conda": 3, "name": [3, 12, 15, 27, 34, 35, 37, 38, 43, 44, 46, 51, 53, 61, 63, 67, 68, 70, 75, 76, 78, 80, 102, 104, 106, 113, 114, 119, 120], "12": [3, 15, 113], "creat": [3, 8, 9, 12, 13, 27, 33, 35, 49, 53, 62, 73, 75, 93, 97, 105, 110, 114, 119], "n": [3, 105], "desir": [3, 4, 8, 35, 42, 62, 111], "version": [3, 13, 22, 36, 45, 52, 57, 63, 89, 91, 101, 102, 113], "By": [3, 12, 15, 31, 75], "default": [3, 5, 12, 13, 15, 22, 31, 33, 37, 41, 45, 51, 53, 61, 63, 64, 67, 70, 73, 75, 76, 84, 93, 101, 102, 104, 105, 109, 110, 111, 119, 121], "latest": 3, "want": [3, 9, 13, 49, 62, 63, 67, 75, 78, 101, 104, 111, 121], "specif": [3, 8, 9, 11, 27, 46, 63, 65, 70, 75, 76, 97, 104, 114], "your": [3, 8, 9, 11, 13, 15, 45, 75, 80, 95, 111], "extra": [3, 15, 20, 93, 119], "index": [3, 93], "url": 3, "http": [3, 19, 20, 22, 45, 53, 82, 101, 119], "download": [3, 12], "org": [3, 45, 82], "whl": 3, "cu118": 3, "identifi": [3, 34], "correct": [3, 70, 76, 80, 90], "partial": [3, 27], "note": [3, 5, 13, 20, 22, 27, 31, 33, 34, 38, 41, 43, 49, 62, 63, 64, 78, 92, 104, 107], "when": [3, 8, 9, 11, 15, 27, 37, 41, 43, 49, 61, 62, 63, 64, 73, 75, 92, 93, 107, 114, 124], "without": [3, 5, 9, 43, 53, 63, 119], "onli": [3, 5, 6, 8, 9, 10, 11, 13, 15, 22, 26, 27, 29, 31, 34, 35, 37, 41, 42, 48, 51, 52, 54, 57, 61, 63, 73, 75, 79, 80, 82, 84, 92, 101, 107, 118, 119], "barebon": 3, "none": [3, 20, 26, 27, 29, 30, 33, 37, 38, 41, 43, 44, 45, 46, 49, 51, 52, 53, 55, 56, 57, 61, 62, 63, 64, 67, 68, 70, 73, 74, 75, 76, 78, 79, 80, 87, 89, 90, 92, 93, 101, 102, 104, 106, 107, 109, 111, 113, 114, 115, 119, 120, 121], "modul": [3, 5, 8, 9, 15, 17, 18, 20, 21, 23, 25, 28, 29, 31, 32, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 51, 52, 53, 56, 58, 59, 60, 61, 62, 63, 64, 66, 67, 68, 69, 70, 71, 73, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86, 88, 90, 91, 92, 93, 94, 95, 96, 100, 102, 103, 104, 106, 108, 111, 112, 113, 114, 116, 119], "work": [3, 8, 10, 12, 27, 49, 95, 119], "appropri": [3, 4, 6, 64, 75, 93], "below": [3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 63, 75, 80, 119], "need": [3, 5, 8, 10, 13, 15, 44, 49, 51, 52, 57, 62, 63, 64, 75, 80, 100, 101, 119], "correctli": [3, 9, 13, 61, 93, 95], "correspond": [3, 41, 44, 46, 62, 63, 64, 75, 78, 80, 106, 119, 121], "_deploi": [3, 15], "addition": [3, 27, 62, 80], "3rd": 3, "parti": [3, 66, 95, 108], "plugin": [3, 37], "third": [3, 27, 66, 95, 108], "packag": [3, 15, 17, 48, 69, 92, 102, 115], "transform": [3, 9, 12, 51, 52, 54, 80], "hf": [3, 15, 20, 50, 53], "replac": [3, 8, 12, 13, 33, 35, 63, 75, 76, 80, 92, 93, 102], "": [3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 17, 27, 29, 33, 37, 41, 42, 43, 45, 46, 49, 51, 61, 62, 63, 65, 74, 75, 78, 82, 92, 106, 111, 114, 117, 119], "quantiz": [3, 14, 15, 22, 26, 27, 29, 30, 31, 33, 34, 35, 36, 51, 52, 53, 54, 55, 56, 58, 59, 65, 70, 71, 73, 74, 76, 78, 79, 80, 81, 83, 85, 86, 87, 88, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 100, 101, 102, 114], "compil": [3, 12, 34, 37, 113], "fast": [3, 80], "kernel": [3, 11, 27, 34, 119], "mai": [3, 5, 8, 9, 10, 13, 62, 63, 80, 101, 111, 113], "take": [3, 9, 13, 61, 64, 73, 75, 76, 79, 80, 93, 101, 111, 113, 119], "few": [3, 8, 80, 113], "minut": [3, 11], "subsequ": [3, 9, 67, 113], "much": [3, 8, 13, 75], "faster": [3, 5, 8, 36, 64], "To": [3, 5, 7, 8, 9, 13, 15, 75, 80, 104], "invok": [3, 8, 37, 118], "pre": [3, 8, 9, 13, 34, 37, 56, 67, 92], "docker": 3, "c": [3, 8, 25, 36, 77, 113], "extens": [3, 20, 101, 113], "ext": 3, "print": [3, 13, 27, 37, 80, 111, 118], "cuda_ext": 3, "cuda_ext_fp8": 3, "techniqu": [4, 6, 9, 10, 13, 15], "mtq": [4, 13, 15, 75, 76, 100, 114], "more": [4, 5, 6, 7, 8, 9, 11, 13, 46, 60, 61, 63, 75, 76, 80, 82, 93, 98, 99, 101, 111, 119], "case": [4, 6, 8, 9, 11, 13, 44, 55, 63, 75], "requir": [4, 6, 8, 9, 11, 13, 15, 37, 52, 75, 78, 79, 80, 93, 113], "configur": [4, 6, 8, 9, 13, 15, 33, 41, 42, 61, 63, 64, 65, 67, 68, 70, 76, 80, 97, 104, 111, 114, 121], "forward": [4, 5, 6, 10, 13, 27, 43, 44, 45, 57, 67, 76, 79, 80, 82, 84, 90, 92, 93, 101, 110, 111, 114, 119], "here": [4, 6, 13, 34, 36, 44, 63, 64, 75, 76, 80, 92, 119], "setup": [4, 5, 6, 13, 76], "get_model": [4, 6, 13], "show": [4, 5, 13, 41, 61, 75, 104], "rough": 4, "how": [4, 5, 8, 11, 20, 45, 60, 64, 75, 95, 101, 119], "loader": [4, 5, 6, 13, 80, 111, 119], "calib_s": [4, 6, 13], "data_load": [4, 6, 9, 13, 80, 111, 119], "get_dataload": [4, 13], "num_sampl": [4, 6, 13, 114], "defin": [4, 5, 6, 8, 9, 10, 13, 34, 50, 52, 60, 65, 67, 75, 76, 80, 82, 93, 106, 121], "forward_loop": [4, 6, 13, 67, 75, 79, 80, 111, 114], "function": [4, 5, 8, 13, 20, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 45, 49, 51, 53, 54, 55, 56, 57, 58, 59, 61, 62, 64, 65, 67, 68, 70, 73, 75, 76, 77, 79, 80, 84, 92, 93, 94, 99, 100, 101, 102, 105, 106, 107, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122], "should": [4, 8, 12, 13, 15, 24, 27, 33, 34, 36, 37, 42, 44, 51, 52, 61, 62, 63, 64, 75, 76, 79, 80, 97, 111, 119], "wrap": [4, 5, 8, 13, 41, 61, 62, 97, 119], "insid": [4, 51, 111], "def": [4, 5, 13, 76, 80], "batch": [4, 11, 13, 19, 29, 38, 63, 75, 80, 86, 111, 114, 119], "int8_smoothquant_cfg": [4, 13, 75], "just": [4, 52, 78, 119], "regular": [4, 9, 10, 13, 37, 60, 63, 64, 106, 111], "evalu": [4, 20, 67, 80], "export": [4, 7, 8, 9, 13, 15, 42, 46, 51, 53, 55, 57, 59, 60, 63, 65, 67, 75, 78, 93, 100, 102, 106, 111, 119], "see": [4, 5, 8, 11, 12, 13, 20, 63, 73, 74, 75, 76, 80, 101, 111, 119], "guid": [4, 5, 6, 13], "next": [4, 5, 6, 15, 24, 27, 106], "step": [4, 5, 6, 8, 9, 12, 62, 65, 67, 75, 93], "about": [4, 5, 6, 8, 9, 46, 47, 60, 75, 80, 93], "usag": [4, 5, 6, 8, 9, 13, 61, 80, 99, 114, 120], "checkout": [4, 6], "out": [4, 5, 6, 8, 9, 36, 42, 44, 46, 75, 95], "wrapper": [5, 19, 43, 62, 97, 119], "util": [5, 8, 9, 15, 18, 20, 22, 24, 27, 33, 34, 35, 36, 49, 51, 54, 55, 56, 57, 59, 62, 65, 76, 79, 110, 113, 114, 115, 116, 117, 118, 119, 120, 122], "knowledg": [5, 41, 43, 44, 46], "among": [5, 8, 64], "teacher": [5, 41, 42, 43, 45], "student": [5, 41, 42, 43, 44, 45], "pretrain": 5, "potenti": [5, 8, 10, 43], "smaller": [5, 8, 9], "higher": [5, 9, 80], "than": [5, 8, 9, 13, 20, 63, 75], "could": [5, 8, 11, 13, 19, 63, 64, 75, 76, 80], "own": [5, 8, 13, 45, 55, 63, 75, 95], "necessari": [5, 8, 92, 93, 97, 107], "obtain": [5, 8, 9, 44], "act": 5, "usuali": 5, "serv": [5, 8, 11], "torchvis": [5, 8], "resnet50": [5, 8], "resnet18": 5, "student_model": [5, 8], "callabl": [5, 8, 13, 41, 43, 44, 46, 62, 64, 67, 75, 76, 78, 79, 80, 92, 106, 111, 114, 119], "which": [5, 8, 10, 13, 15, 27, 34, 36, 43, 60, 61, 62, 64, 67, 70, 73, 75, 76, 79, 80, 82, 90, 92, 93, 102, 104, 105, 119, 121], "return": [5, 8, 9, 13, 19, 20, 22, 24, 26, 27, 29, 30, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 49, 51, 52, 53, 54, 55, 56, 57, 61, 62, 63, 64, 67, 68, 70, 73, 74, 75, 76, 77, 78, 79, 80, 93, 97, 98, 99, 101, 102, 105, 106, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122], "teacher_factori": 5, "teacher_model": [5, 8, 41, 43], "load_state_dict": [5, 13, 43, 62], "pretrained_weight": 5, "As": [5, 8, 101], "involv": [5, 8, 49], "least": [5, 34, 61], "two": [5, 8, 9, 27, 36, 38, 51, 53, 76, 119], "simplifi": 5, "process": [5, 8, 9, 10, 12, 37, 42, 46, 49, 53, 55, 62, 63, 67, 104, 111, 114, 115, 118, 119, 121], "assum": [5, 27, 45, 51, 80, 119], "output": [5, 8, 12, 13, 19, 27, 34, 35, 37, 38, 41, 43, 45, 51, 63, 70, 75, 80, 93, 101, 119], "logit": [5, 8, 45], "mtd": [5, 8, 44], "distillation_config": [5, 8], "initi": [5, 9, 13, 19, 24, 27, 29, 35, 41, 62, 63, 64, 70, 73, 74, 76, 84, 90, 93, 97, 100, 107, 114, 115, 119, 120], "criterion": [5, 8, 41, 43, 44], "logitsdistillationloss": [5, 8, 45], "receiv": 5, "order": [5, 8, 13, 20, 44, 62, 63, 64, 75, 104, 119], "loss_balanc": [5, 8, 41, 43], "staticlossbalanc": [5, 8, 44], "combin": [5, 8], "multipl": [5, 8, 15, 37, 43, 55, 56, 62, 63, 70, 75, 93, 119], "loss": [5, 10, 29, 41, 43, 44, 70, 80, 111], "omit": [5, 8], "distillation_model": [5, 8], "convert": [5, 9, 13, 15, 36, 37, 42, 44, 46, 51, 52, 53, 54, 57, 58, 59, 62, 63, 67, 78, 97, 98, 99, 104, 106, 111, 118, 119, 121, 122, 124], "mode": [5, 6, 8, 15, 37, 41, 42, 61, 62, 67, 75, 92, 93, 102, 104, 111], "kd_loss": [5, 8, 42], "either": [5, 8, 9, 33, 37, 63, 119], "nn": [5, 13, 41, 44, 51, 62, 63, 67, 73, 75, 76, 89, 91, 93, 97, 104, 119], "tupl": [5, 8, 27, 34, 38, 41, 42, 43, 46, 49, 51, 53, 62, 63, 64, 67, 68, 70, 73, 74, 75, 78, 80, 92, 93, 98, 99, 102, 105, 106, 110, 111, 117, 119], "model_cl": [5, 62], "arg": [5, 8, 15, 20, 41, 43, 62, 63, 67, 90, 92, 93, 94, 100, 107, 118, 119, 121, 122], "kwarg": [5, 8, 20, 41, 43, 61, 62, 63, 79, 90, 92, 93, 94, 97, 98, 99, 100, 107, 118, 119, 121, 122], "between": [5, 7, 8, 41, 43, 50, 73], "determin": [5, 8, 71, 114, 121], "origin": [5, 7, 8, 9, 10, 11, 12, 13, 15, 37, 42, 43, 44, 53, 54, 62, 63, 64, 70, 75, 76, 80, 90, 92, 97, 119, 121], "info": [5, 27, 61, 119], "simpli": [5, 13, 63], "usual": [5, 8, 9, 62, 63, 67], "compute_kd_loss": [5, 8, 43, 44], "comput": [5, 8, 10, 11, 13, 29, 41, 43, 44, 45, 64, 73, 75, 93, 98, 99, 102, 105, 111, 116, 117, 122], "addit": [5, 8, 31, 51, 61, 62, 63, 67, 70, 75, 76, 80, 111], "train_load": [5, 13], "get_train_load": 5, "loss_fn": 5, "get_user_loss_fn": 5, "label": [5, 45, 114, 119], "train_dataload": 5, "zero_grad": [5, 119], "same": [5, 8, 10, 15, 37, 38, 49, 52, 62, 63, 70, 80, 92, 93, 101, 104, 111, 119, 121], "loss_tot": 5, "student_loss": [5, 43, 44], "backward": [5, 10, 15, 27, 53, 57, 80, 82, 101, 119], "dataparallel": 5, "break": [5, 15], "featur": [5, 6, 8, 9, 15, 45, 64, 67], "trainer": [5, 13], "revert": [5, 63], "class": [5, 8, 13, 15, 19, 20, 24, 29, 31, 41, 43, 44, 45, 46, 49, 52, 61, 62, 63, 64, 65, 67, 70, 71, 72, 73, 74, 75, 76, 78, 82, 84, 87, 88, 89, 90, 91, 92, 93, 96, 97, 98, 99, 101, 105, 106, 107, 109, 110, 119, 120], "modif": [5, 8, 15, 60, 62], "attach": 5, "document": [5, 7, 75, 111], "inform": [5, 8, 47, 52, 62, 70, 75, 92, 93, 105, 111], "get_train_dataload": 6, "sparsity_config": [6, 9], "collect_func": [6, 9, 80, 111, 119], "lambda": [6, 8, 9], "x": [6, 9, 13, 29, 73, 74, 105, 117, 119], "driven": [6, 9], "sparse_magnitud": [6, 9, 104, 111], "doe": [6, 9, 11, 15, 31, 37, 62, 63, 64, 75, 78, 80], "pure": [6, 8, 61], "base": [6, 7, 9, 11, 15, 19, 20, 24, 29, 31, 37, 41, 42, 43, 44, 45, 46, 49, 52, 61, 62, 63, 64, 67, 70, 72, 73, 74, 75, 76, 78, 79, 80, 82, 84, 87, 88, 89, 90, 91, 92, 93, 97, 98, 99, 101, 104, 105, 106, 107, 109, 110, 118, 119, 120], "substitut": [6, 70], "iter": [6, 19, 24, 43, 53, 62, 63, 80, 92, 93, 111, 114, 119], "dataset": [6, 80, 114], "fine": [6, 8, 9, 10, 13, 67, 111], "tune": [6, 8, 9, 10, 13, 67, 111], "check": [6, 12, 13, 20, 27, 38, 46, 55, 61, 62, 63, 67, 68, 92, 95, 102, 116, 119], "hardwar": [7, 9], "simul": [7, 13, 93], "precis": [7, 11, 12, 13, 37, 75, 93], "test": 7, "best": [7, 12, 15, 19, 67, 70, 80, 105], "trade": 7, "off": 7, "low": [7, 10, 11, 13, 15, 75], "actual": [7, 8, 11, 27, 62, 63, 64, 67, 119], "speedup": [7, 12, 13], "find": [7, 8, 9, 13, 27, 29, 34, 38, 70, 105], "basic": [7, 8, 9, 24, 33, 36, 44, 63, 67, 80, 101], "concept": [7, 60, 75], "practic": [7, 19], "choos": [7, 75, 111, 121], "right": [7, 75, 78], "beta": 7, "allow": [8, 9, 12, 13, 15, 43, 63, 67], "describ": [8, 9, 13, 42, 46, 62, 63, 78, 80, 93, 106, 111], "direct": [8, 13, 92], "transfer": [8, 62, 73], "power": 8, "larger": [8, 75], "meta": [8, 42, 43, 93, 97, 106], "abstract": [8, 44, 67, 72, 101], "awai": 8, "interact": 8, "place": [8, 13, 34, 51, 61, 62, 63, 80, 111, 121], "orign": 8, "line": [8, 12], "code": [8, 13, 36, 46, 53, 78, 95, 106, 121], "calcul": [8, 10, 37, 56, 67, 101, 102, 105], "re": [8, 9, 41, 63], "load": [8, 9, 11, 12, 20, 25, 38, 43, 49, 54, 55, 62, 67, 77, 93, 113, 114], "mto": [8, 9, 13, 62], "restor": [8, 13, 41, 46, 54, 60, 62, 76, 78, 106], "relat": [8, 9, 27, 34, 38, 47], "distillationmodel": [8, 42, 43, 44], "must": [8, 10, 13, 29, 46, 62, 63, 64, 75, 78, 92, 102, 119], "form": [8, 62, 119], "model_cls_or_cal": [8, 119], "model_export": 8, "non": [8, 12, 20, 22, 27, 34, 41, 43, 62, 63, 105, 119], "itself": [8, 93], "avoid": [8, 13, 41, 43, 75, 124], "dict": [8, 12, 19, 24, 27, 34, 35, 37, 38, 41, 42, 43, 44, 46, 49, 53, 54, 57, 61, 62, 63, 65, 67, 70, 75, 76, 78, 80, 93, 104, 106, 109, 110, 111, 117, 119], "upon": [8, 13, 63], "thu": [8, 111], "namespac": 8, "anymor": [8, 63], "expect": [8, 34, 57], "though": [8, 101], "isinst": 8, "still": [8, 63, 70], "dynam": [8, 15, 28, 31, 47, 61, 68, 75, 107, 111, 119], "becom": [8, 11], "subclass": [8, 13, 63, 70, 75, 97], "well": [8, 9, 11, 13, 63, 104, 119, 120], "terminologi": [8, 9], "learnabl": 8, "start": [8, 27, 54, 120], "scratch": 8, "fix": [8, 10, 15, 34, 65, 93, 111], "separ": [8, 10, 13, 37, 62, 73], "task": [8, 40, 44, 62, 67, 80], "implement": [8, 9, 10, 34, 36, 43, 45, 46, 49, 51, 60, 63, 67, 82, 84, 92, 97, 98, 99, 101], "scalar": [8, 43, 45, 80, 82, 93], "broader": 8, "sort": [8, 64], "auxilliari": 8, "hope": 8, "make": [8, 20, 36, 55, 63, 97, 102], "map": [8, 10, 27, 34, 35, 43, 45, 50, 53, 57, 75, 76, 80], "master": [8, 101, 115, 118, 119], "purpos": [8, 47, 60, 101], "reduct": [8, 22, 44, 45], "effici": [8, 9, 92, 98, 99], "prune": [8, 65], "reach": 8, "exceed": 8, "slower": 8, "lotteri": 8, "ticket": 8, "hypothesi": 8, "reason": [8, 101], "behind": [8, 10], "b": 8, "altern": [8, 12], "exist": [8, 9, 49, 51, 62, 63, 92], "often": [8, 11, 13, 34], "One": [8, 37, 73], "whole": [8, 10, 65], "wish": 8, "ideal": [8, 73], "meet": [8, 11, 70, 102], "untrain": 8, "boost": 8, "possess": 8, "satisfactori": 8, "add": [8, 27, 30, 34, 37, 62, 80, 119], "object": [8, 29, 35, 49, 52, 62, 63, 64, 70, 92, 93, 97, 101, 119], "simpl": [8, 9, 12, 13, 84, 120], "enact": 8, "mse": [8, 73], "assumpt": 8, "high": [8, 10, 11, 19, 37, 111], "imit": 8, "possibl": [8, 61, 63, 80, 105], "specifi": [8, 9, 13, 37, 38, 41, 42, 44, 46, 62, 63, 65, 70, 73, 75, 76, 78, 79, 80, 102, 104, 105, 106, 111, 113, 114, 120, 122], "pair": [8, 41, 43, 79], "kei": [8, 12, 13, 42, 44, 54, 61, 62, 63, 75, 76, 79, 80, 104, 111, 119], "dictionari": [8, 9, 12, 13, 15, 24, 27, 29, 38, 41, 42, 43, 61, 62, 63, 67, 75, 76, 79, 80, 104, 111, 119], "respect": [8, 9, 10, 61, 75, 80], "pairwis": 8, "Will": 8, "classifi": 8, "18": 8, "atd": 8, "intermedi": [8, 37, 80], "captur": [8, 80], "pass": [8, 10, 13, 43, 44, 45, 49, 63, 75, 79, 80, 82, 93, 101, 102, 119], "argument": [8, 9, 22, 42, 43, 44, 62, 63, 64, 67, 73, 75, 79, 80, 101, 111, 113, 114, 119], "write": [8, 37, 49, 95], "custom": [8, 12, 20, 35, 37, 61, 63, 75, 76, 97], "especi": 8, "handl": [8, 13, 15, 62, 63, 66, 95, 97, 108], "sever": [8, 63], "valu": [8, 10, 12, 13, 35, 36, 38, 41, 42, 43, 44, 45, 46, 52, 56, 61, 63, 64, 70, 74, 75, 78, 79, 80, 90, 93, 101, 102, 106, 107, 111, 119], "backpropag": [8, 43], "whose": 8, "interfac": [8, 44, 62, 65, 67, 109], "distillationlossbalanc": [8, 43, 44], "fill": [8, 105], "aforement": 8, "ones": [8, 105], "scenario": [8, 10, 11], "classif": 8, "known": 8, "In": [8, 11, 63, 70, 75, 76, 80, 119], "even": [8, 13, 29], "altogeth": 8, "prefer": [8, 12], "over": [8, 19, 29, 38, 61, 63, 67, 80, 88, 114], "whatev": 8, "ground": 8, "truth": 8, "dens": [9, 52], "retrain": 9, "simplest": [9, 10, 13], "wai": [9, 10, 13, 63, 119], "dataload": [9, 13, 114], "magnitud": [9, 106, 111], "automodelforcausallm": 9, "from_pretrain": [9, 13], "eleutherai": 9, "gpt": [9, 70], "j": 9, "calib_dataload": 9, "sparse_model": 9, "threshold": 9, "futur": [9, 13, 111], "modelopt_sparse_model": 9, "pth": 9, "along": [9, 10, 75, 97, 102], "mask": [9, 43, 45, 105, 107, 109, 110, 111], "later": [9, 11, 13, 93, 104], "opt": [9, 11, 13, 15, 62], "unmodifi": [9, 62, 63], "plain": 9, "enforc": [9, 61, 62, 63, 64, 111], "remov": [9, 27, 38, 49, 63, 105, 110, 119], "longer": [9, 13, 55, 63, 75, 100, 111], "dure": [9, 10, 13, 37, 43, 46, 62, 75, 93, 99, 111, 124], "do": [9, 34, 44, 49, 63, 75, 78, 101], "overview": [9, 10, 14], "fraction": 9, "zero": [9, 35, 73, 101, 105], "broadli": 9, "categor": [9, 22], "randomli": 9, "distribut": [9, 15, 26], "across": [9, 10, 49, 52, 55, 63, 93, 121], "matrix": [9, 105, 110], "flexibl": 9, "lead": 9, "poor": 9, "other": [9, 10, 15, 20, 49, 55, 63, 70, 75, 76, 79], "hand": 9, "exploit": 9, "math": [9, 10], "throughput": [9, 11, 12], "special": [9, 13, 44, 63, 75, 76], "grain": [9, 10], "block": [9, 11, 13, 29, 51, 56, 63, 75, 93, 98, 99, 111, 121], "contigu": [9, 54, 55], "element": [9, 27, 36, 105, 117, 119, 121], "most": [9, 10, 11], "nonzero": 9, "due": [9, 13, 70], "benefit": 9, "bandwidth": [9, 10, 11], "core": [9, 15, 92], "deliv": 9, "multipli": [9, 45], "oper": [9, 10, 11, 15, 22, 37, 117, 119], "On": 9, "amper": [9, 11], "four": 9, "There": [9, 101], "mani": [9, 101], "commonli": [9, 10], "approach": 9, "largest": 9, "retain": [9, 102], "rest": 9, "brain": 9, "surgeon": 9, "better": [9, 12], "consist": [10, 20, 61, 63, 92], "found": [10, 27, 45, 61, 63], "topic": 10, "width": [10, 19], "integ": [10, 11, 29, 73, 74, 75, 93, 101], "sign": [10, 36, 93, 101], "mantissa": [10, 75], "float": [10, 19, 29, 35, 36, 44, 45, 51, 52, 64, 67, 70, 73, 75, 80, 87, 89, 90, 92, 99, 101, 110, 117, 118, 119, 120, 121], "point": [10, 11, 13, 35, 75], "expon": [10, 75], "explan": 10, "unscal": 10, "rang": [10, 37, 64, 75, 82, 93, 101, 102], "share": [10, 37, 38, 49, 53, 70, 80], "divid": 10, "common": [10, 13, 20, 34, 36, 38, 54], "global": [10, 35, 74, 80], "channel": [10, 11, 12, 45, 63, 73, 75, 93, 102, 119], "dimens": [10, 29, 36, 38, 45, 75, 102], "typic": [10, 11, 12, 13], "gptq": 10, "stai": 10, "help": [10, 31, 119], "constrain": 10, "adjust": [10, 13, 56, 76, 79], "maxim": [10, 67, 105], "max": [10, 11, 13, 19, 29, 52, 64, 75, 79, 80, 82, 84, 87, 89, 90, 92, 101, 117], "maximum": [10, 74, 102, 114], "unchang": [10, 63], "round": [10, 29, 52], "nearest": [10, 29], "entropi": [10, 30, 37, 73], "view": 10, "updat": [10, 13, 29, 38, 46, 51, 57, 61, 62, 63, 76, 78, 93, 106, 111], "compar": [10, 75, 92, 119], "straight": [10, 101], "estim": [10, 15, 64, 75, 80, 101], "ste": 10, "clip": [10, 30, 75, 82, 93, 101], "explicit": [10, 12], "graph": [10, 15, 27, 34, 35, 38, 122], "represent": [10, 46, 53, 75, 78, 106], "qdq": [10, 12, 27, 31, 34, 35, 75], "node": [10, 12, 13, 15, 27, 34, 35, 37, 38, 53], "network": [10, 49, 110], "three": [11, 27, 80], "primari": 11, "compon": [11, 44], "context": [11, 43, 52, 63, 90, 92, 100, 101, 102, 118], "small": [11, 13], "bound": [11, 44, 67, 84], "limit": 11, "cach": [11, 13, 15, 19, 24, 35, 37, 120], "regim": 11, "give": [11, 119], "superior": 11, "improv": 11, "16": [11, 24, 64], "densiti": 11, "crucial": 11, "consequ": [11, 63], "lower": [11, 61, 84], "choic": [11, 37, 63, 64, 70, 80, 121], "suggest": [11, 80], "priorit": [11, 61], "caus": [11, 34], "veri": 11, "littl": 11, "strong": 11, "try": [11, 122], "earlier": [11, 104], "sq": 11, "might": [11, 13, 62, 63, 80, 101, 119], "toler": 11, "tabl": [11, 75], "summar": 11, "tradeoff": 11, "consid": [11, 22], "medium": 11, "min": [11, 64, 82, 84, 113, 117], "50": 11, "ada": 11, "hopper": 11, "variant": [11, 20, 36], "w4a16": [11, 75], "wise": [11, 13, 27, 43, 75], "25": 11, "ten": 11, "w4a8": [11, 52, 75], "impact": 11, "measur": [11, 54, 120], "10": [11, 12, 13, 15], "popular": 11, "ll": 11, "subject": [11, 22, 80], "togeth": [12, 13, 52, 62, 63], "eq": 12, "advantag": [12, 13], "offer": [12, 13], "expert": [12, 13, 51, 52], "white": 12, "box": 12, "design": 12, "vision": 12, "new": [12, 15, 35, 46, 61, 63, 78, 97, 102], "rule": [12, 13, 27, 61, 63, 70, 80, 107], "real": [12, 13, 75, 79, 93, 96, 97], "6": [12, 29], "link": [12, 15, 54, 70], "done": [12, 13, 36, 111], "random": [12, 15, 24, 38], "npz": [12, 53, 57], "npy": 12, "numpi": [12, 24, 35, 37, 38, 74, 93, 122], "arrai": [12, 35, 36, 37, 74, 93, 122], "calib_data": 12, "np": [12, 54, 73], "randn": 12, "batch_siz": [12, 38, 92, 114], "h": [12, 105], "w": [12, 29, 63, 105], "multi": [12, 49], "match": [12, 13, 27, 34, 61, 75, 76, 80, 104, 116], "input_nam": [12, 27, 38], "shape": [12, 19, 22, 24, 35, 38, 45, 52, 55, 74, 92, 93, 101], "input_name2": 12, "shape2": 12, "savez": 12, "moq": 12, "calibration_data": [12, 24, 37], "calibration_data_path": 12, "onnx_path": [12, 24, 26, 27, 29, 30, 33, 37, 38], "output_path": [12, 26, 29, 30, 37], "quant": [12, 37, 78], "quantize_mod": [12, 37], "m": [12, 75, 101, 105], "path": [12, 15, 19, 20, 24, 27, 37, 38, 49, 53, 55, 57, 59, 93, 113], "calibraton": 12, "tool": [12, 20, 22, 60], "insert": [12, 13, 27, 35, 62, 70], "friendli": [12, 30], "chang": [12, 15, 22, 43, 62, 70, 74, 80], "behavior": [12, 62, 63, 82], "tweak": 12, "param": [12, 119], "op_types_to_quant": [12, 22, 26, 30, 33, 37], "op_types_to_exclud": [12, 26, 27, 30, 37], "trtexec": 12, "usr": 12, "src": [12, 27], "bin": [12, 73], "previou": [12, 13], "saveengin": 12, "report": [12, 120], "latenc": [12, 67], "field": [12, 41, 52, 54, 61, 75, 104], "flag": [12, 37, 62], "implicit": 12, "nativ": [13, 15, 34], "hug": [13, 15], "face": [13, 15, 80], "fake": [13, 62, 75, 93, 97, 101], "mean": [13, 37, 53, 111, 117], "cover": 13, "128": [13, 73, 75, 80, 101], "512": [13, 24, 75, 80, 114], "sampl": [13, 19, 24, 63, 114, 119, 121], "collect": [13, 20, 27, 34, 49, 70, 71, 73, 74, 75, 93, 110], "statist": [13, 71, 75, 80, 93, 102], "around": 13, "select": [13, 22, 29, 37, 48, 63, 79, 105, 121], "look": [13, 34, 44, 61, 63], "verifi": 13, "let": [13, 63], "summari": [13, 80], "successfulli": 13, "print_quant_summari": [13, 80], "normal": [13, 22, 31, 64, 86, 88, 111, 119], "flow": [13, 22], "sample_input": 13, "onnx_fil": 13, "recov": 13, "resourc": 13, "directli": [13, 42, 63, 93, 101], "frozen": [13, 107], "int8_default_cfg": [13, 75], "calib_set": 13, "rate": 13, "durat": 13, "schedul": 13, "epoch": 13, "less": [13, 92], "suffici": [13, 31], "resum": [13, 60], "modelopt_st": [13, 62, 93], "save_model": 13, "restore_from_modelopt_st": [13, 62], "un": [13, 76], "under": [13, 53], "hood": 13, "linear": [13, 22, 27, 51, 52, 54, 70, 75, 80, 89, 90, 102, 104], "conv": [13, 26, 30, 34, 37, 51, 52], "patch": [13, 28, 32, 33, 63], "paramet": [13, 15, 19, 20, 22, 24, 26, 27, 29, 30, 33, 34, 35, 36, 37, 38, 42, 43, 44, 45, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 67, 68, 70, 71, 73, 74, 75, 76, 79, 80, 84, 92, 93, 97, 98, 99, 101, 102, 105, 106, 107, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 121, 122], "quantizerattributeconfig": [13, 15, 73, 74, 75, 76, 80, 87, 89, 90, 92, 93], "thei": [13, 38, 41, 63, 75, 80, 93, 104, 111, 119], "set_from_attribute_config": [13, 15, 76, 93], "explicitli": [13, 28, 61, 63], "quant_x": 13, "quantizer_custom": 13, "num_bit": [13, 51, 73, 74, 75, 87, 89, 90, 92, 93, 101], "block_siz": [13, 29, 75, 87, 89, 90, 92, 93, 97, 98, 99], "disabl": [13, 15, 80, 92, 93, 102], "who": 13, "wildcard": [13, 75, 76, 80], "filter": [13, 75, 76, 80], "copi": [13, 15, 22, 75], "quant_cfg": [13, 75, 76, 80, 114], "bmm": 13, "output_quant": [13, 51, 90], "true": [13, 19, 26, 29, 30, 38, 41, 43, 52, 53, 55, 57, 62, 63, 67, 73, 75, 76, 80, 84, 87, 89, 90, 92, 93, 101, 102, 116, 119], "howev": [13, 63, 70, 80], "regist": [13, 15, 61, 63, 64, 76, 101, 110], "them": [13, 15, 36, 37, 45, 52, 55, 60, 63, 80, 93, 97, 119], "unsupport": 13, "kv": [13, 19], "attent": [13, 51, 52, 56], "layernorm": [13, 51, 52, 76], "quantlayernorm": [13, 76], "__init__": [13, 19, 20, 24, 29, 31, 44, 45, 49, 52, 62, 63, 64, 67, 70, 73, 74, 76, 84, 92, 93, 97, 120], "self": [13, 62, 63, 64, 76, 90, 92], "normalized_shap": [13, 76], "super": [13, 76], "_setup": [13, 63, 76], "input_quant": [13, 75, 76, 80, 90, 92], "weight_quant": [13, 73, 75, 76, 80, 90, 92], "anywher": 13, "f": [13, 62, 76], "layer_norm": [13, 76], "bia": [13, 52, 76, 101], "ep": [13, 52, 76], "so": [13, 37, 48, 52, 61, 63, 70, 73, 74, 92], "instanti": [13, 20], "attribut": [13, 15, 51, 63, 75, 76, 80, 93, 97, 119], "original_cl": [13, 63, 76], "quantized_cl": [13, 76], "fold": [13, 63, 80], "repeat": [13, 117], "inferec": 13, "fold_weight": [13, 80], "quantized_model": 13, "user_evaluate_func": 13, "refactor": 13, "extend": [13, 61, 63], "statement": 13, "instal": [14, 15, 102], "distil": [14, 41, 43, 44, 45, 46, 47], "sparsiti": [14, 15, 104, 105, 106, 107, 108, 109, 111], "changelog": 14, "contact": 14, "u": 14, "faq": 14, "deprec": [15, 73, 94, 100, 118], "quantdescriptor": 15, "tensorquant": [15, 51, 75, 76, 80, 90, 92, 93], "backend": [15, 115], "compat": [15, 51, 53, 57, 59, 62, 63, 80, 97, 119], "ad": [15, 27, 37, 46, 62, 63, 93, 101, 111], "rnn": [15, 92], "lstm": 15, "gru": 15, "now": [15, 34, 78], "dbrx": 15, "upgrad": 15, "experiment": [15, 79, 80, 101], "auto_quant": [15, 70, 80], "search": [15, 60, 62, 63, 67, 68, 70, 75, 80, 106, 109, 110, 111, 121], "qlora": 15, "nf4": [15, 99], "pack": [15, 36, 53, 54, 92, 97, 98, 99], "align": 15, "misc": 15, "warn": [15, 61, 119], "lt": [15, 75], "drop": 15, "releas": 15, "been": [15, 62, 80], "medusa": [15, 51], "decod": [15, 20, 51, 52, 53, 57], "offici": 15, "medusamodel": 15, "gptmodel": 15, "recurrentgemma": [15, 52], "spars": [15, 104, 105, 106, 107, 109, 110, 111], "fit": [15, 52], "vllm": 15, "wa": [15, 67, 119], "renam": 15, "ammo": 15, "full": [15, 49, 51, 52, 61, 62, 75], "product": 15, "being": [15, 63], "inference_gpu": 15, "model_config_export": 15, "torch_to_tensorrt_llm_checkpoint": [15, 53], "chain": [15, 78], "set_data_parallel_group": [15, 115], "set_tensor_parallel_group": [15, 115], "float8": 15, "fsdp": [15, 41, 43], "fulli": [15, 63], "shard": 15, "window": [15, 22], "win_amd64": 15, "wheel": 15, "submodul": [15, 63, 104], "bug": 15, "issu": [15, 41, 119, 123], "dim": [15, 75], "opset": 15, "neg": 15, "pb": 15, "tmp": [15, 53, 59], "folder": 15, "tensorrt_llm": [18, 19, 20, 52, 53, 57], "level": [19, 27, 63, 80, 111], "runner": 19, "hlapi": 19, "profil": [19, 119], "valid": [19, 38, 55, 61, 63], "engine_dir": 19, "token": [19, 20, 59, 75, 114], "kv_cache_config": 19, "tokenizerbas": 19, "int": [19, 29, 35, 38, 45, 49, 51, 52, 53, 55, 56, 57, 63, 64, 68, 70, 73, 75, 80, 92, 93, 97, 98, 99, 102, 105, 114, 115, 117, 119], "blob": [19, 20, 22, 53, 101, 119], "main": [19, 20, 22, 31, 42, 53], "doc": [19, 53, 61, 101], "sourc": [19, 46, 53, 78, 95, 106, 113], "perf": 19, "md": [19, 53, 101], "generate_text": 19, "prompt": 19, "max_new_token": 19, "temperatur": [19, 45], "keep_input_prompt": 19, "text": [19, 20, 114], "string": [19, 42, 62, 73, 75, 76, 79, 80, 105, 111, 118, 119], "length": [19, 64, 92, 102, 114], "bool": [19, 26, 27, 29, 30, 36, 37, 38, 41, 43, 46, 51, 52, 53, 55, 61, 62, 63, 67, 68, 73, 75, 78, 80, 92, 105, 106, 114, 115, 116, 119], "prommpt": 19, "max_beam_width": 19, "2d": [19, 87], "beam": 19, "generate_token": 19, "3d": [19, 87, 88], "sequence_len": 19, "properti": [19, 20, 43, 46, 49, 52, 61, 62, 63, 64, 67, 70, 74, 78, 92, 93, 106, 109, 110], "get": [19, 61, 63, 65, 67, 68, 70, 75, 93, 105, 109, 110, 114, 115, 119, 120], "instanc": [19, 41, 42, 43, 54, 62, 63, 68, 75, 76, 80, 88, 93, 97, 111, 114], "max_input_len": 19, "customsentencepiecetoken": 20, "pretrainedtoken": [20, 114], "sentencepiecetoken": 20, "nemo_exampl": 20, "sh": 20, "constructor": [20, 43, 44, 45, 75, 119], "legaci": 20, "batch_decod": 20, "id": [20, 54], "introduc": 20, "batch_encode_plu": 20, "ignor": [20, 80], "mmethod": 20, "encod": [20, 57], "return_tensor": 20, "max_length": 20, "eos_token": 20, "eos_token_id": 20, "pad_token": 20, "pad_token_id": 20, "get_nemo_token": 20, "tokenizer_cfg_path": 20, "logic": [20, 36, 51, 57, 67], "get_nmt_token": 20, "nlp": 20, "tokenizer_util": 20, "py": [20, 22, 53, 119], "get_tokenzi": 20, "tokenizer_dir_or_path": 20, "dir": [20, 49], "subpackag": [21, 23, 39, 40], "op": [22, 27, 30, 31, 33, 34, 37, 62], "is_binary_op": 22, "whether": [22, 27, 35, 36, 43, 46, 49, 51, 53, 62, 63, 64, 67, 78, 101, 111, 114, 115, 119], "binari": 22, "is_control_flow_op": 22, "control": [22, 75], "categori": 22, "is_conversion_op": 22, "is_copy_op": 22, "is_default_quantizable_op_by_ort": 22, "ort": [22, 31, 32, 33], "nodes_to_quant": [22, 26, 30, 34, 37], "microsoft": 22, "onnxruntim": [22, 33], "registri": [22, 61, 62, 63], "is_fusible_reduction_op": 22, "fusibl": [22, 34], "myelin": [22, 27, 30], "is_generator_op": 22, "is_irregular_mem_access_op": 22, "irreggular": 22, "mem": 22, "is_linear_op": 22, "is_modifier_op": 22, "modifi": [22, 27, 31, 35, 38, 43, 61, 62, 63, 80, 107], "is_multiclass_op": 22, "multiclass": 22, "is_non_reshape_copy_op": 22, "reshap": [22, 105], "is_normalization_op": 22, "is_pointwise_or_elementwise_op": 22, "pointwis": [22, 34, 45], "elementwis": 22, "is_pooling_or_window_op": 22, "pool": [22, 91], "is_recurrent_op": 22, "is_selection_op": 22, "is_sequence_op": 22, "sequenc": [22, 35, 36, 64, 65, 70, 92, 116, 121], "is_shape_op": 22, "is_unary_op": 22, "unari": 22, "calibrationdataprovid": 24, "calibrationdataread": [24, 26, 29, 30], "intial": [24, 49], "ndarrai": [24, 29, 35, 36, 37, 38, 54, 57, 122], "ex": [24, 27], "64": [24, 55, 75], "timestep": 24, "encoder_hidden_st": 24, "768": 24, "1024": [24, 75], "get_next": 24, "reader": 24, "randomdataprovid": 24, "import_scales_from_calib_cach": 24, "cache_path": 24, "float_scal": 24, "tensor_nam": 24, "gemm": [26, 29, 30, 37], "modelproto": [26, 29, 30, 38], "calibration_method": [26, 29, 30, 37], "calibration_data_read": [26, 29, 30], "calibration_cache_path": [26, 30, 37], "nodes_to_exclud": [26, 27, 30, 37], "use_external_data_format": [26, 29, 30, 37], "intermediate_generated_fil": [26, 30], "verbos": [26, 27, 30, 37, 80, 111], "fals": [26, 29, 30, 37, 38, 41, 43, 52, 53, 61, 62, 63, 67, 73, 74, 75, 76, 80, 84, 87, 89, 90, 92, 93, 101, 114, 116, 119, 124], "trt_extra_plugin_lib_path": [26, 30, 32, 33], "matmul": [26, 27, 30, 34, 37], "placement": 27, "add_fp16_fp32_cast": 27, "custom_ops_to_cast_to_fp16": 27, "cast_to_fp16": 27, "cast_to_fp32": 27, "build_non_residual_input_map": 27, "residu": [27, 34], "subgraph": [27, 34], "convolut": [27, 87], "sum": [27, 44, 70, 105], "anoth": [27, 62, 63], "constant": [27, 35, 38, 51, 54, 57, 75], "becaus": [27, 29, 57, 80, 113, 119], "occur": [27, 51, 57, 75], "modern": 27, "convnet": 27, "connect": 27, "v": [27, 52, 80, 119], "classify_partition_nod": 27, "partit": [27, 37], "outsid": 27, "algo": 27, "dst": 27, "filter_quantizable_kgen_head": 27, "cask_fusible_partit": 27, "kgen_partit": 27, "quantizable_op_typ": [27, 34], "kgen": [27, 34], "head": [27, 51], "cask": [27, 34], "find_fp8_mha_partit": 27, "mha": [27, 34, 37], "q": [27, 29, 35, 37, 52, 80], "dq": [27, 35, 37], "bmm1": [27, 37], "mul": [27, 30, 34], "div": [27, 34], "softmax": [27, 34], "cast": [27, 37], "bmm2": [27, 37], "find_mha_partit": [27, 34], "find_nodes_to_exclud": 27, "exclus": 27, "get_fusible_backbon": 27, "backbon": [27, 34], "fuse": [27, 34, 80], "bn": 27, "relu": 27, "some": [27, 34, 36, 41, 43, 51, 57, 65, 74, 82], "tri": 27, "those": [27, 34, 104], "biasadd": 27, "constmul": 27, "has_const_input": 27, "has_path_typ": 27, "path_typ": 27, "is_forward": 27, "wild_card_typ": 27, "path_nod": 27, "wrt": [27, 70], "travers": [27, 34], "wild": 27, "card": 27, "skip": [27, 63, 73, 75, 80], "accumul": [27, 37, 119], "insert_fp8_mha_cast": 27, "onnx_model": [27, 29, 38], "input0": 27, "fp32": [27, 37], "second": [27, 38], "input1": 27, "back": [27, 60, 63, 97], "part": 27, "forbid": 27, "insert_matmul_cast": 27, "matmul_nod": 27, "is_const_input": 27, "const": 27, "foldabl": 27, "print_stat": 27, "stat": [27, 67, 80, 111, 117], "remove_partial_input_qdq": 27, "no_quantize_input": 27, "mark": 27, "onnx_graphsurgeon": 28, "patch_gs_modul": 28, "graphsurgeon": [28, 35], "woq": 29, "awqcliphelp": 29, "helper": [29, 36], "alpha_step": [29, 75], "05": [29, 52, 75], "alpha": [29, 75], "55": 29, "65": [29, 45], "7": 29, "75": 29, "85": 29, "9": [29, 57], "95": 29, "min_alpha": 29, "update_best_param": 29, "dq_tensor": 29, "dequant": [29, 35, 93, 97, 98, 99, 101], "find_scal": 29, "quant_tensor": 29, "awq_clip": [29, 75, 79], "quantize_awq_clip": 29, "data_read": 29, "force_fp16": 29, "k": [29, 52, 80], "quantize_rtn": 29, "gemm_io_typ": 29, "dq_onli": [29, 35], "rtn": 29, "ab": [29, 45, 82], "round_to_even": 29, "denot": 29, "ti": 29, "alwai": 29, "cin": 29, "plug": 29, "rh": 29, "y": [29, 119], "googl": [29, 35, 101], "protobuf": [29, 35], "intern": [29, 35, 63, 121], "enum_type_wrapp": [29, 35], "enumtypewrapp": [29, 35], "0x7f2e8de70cb0": [29, 35], "broken": 29, "heurist": 30, "averagepool": 30, "batchnorm": 30, "convtranspos": [30, 31], "globalaveragepool": 30, "maxpool": 30, "top": [31, 62, 63], "qdqconvtranspos": 31, "qdqoperatorbas": 31, "onnx_quant": 31, "onnx_nod": 31, "init": [31, 92], "qdqnormal": 31, "intend": [31, 63], "contain": [32, 42, 60, 61, 62, 63, 67, 78, 80, 93, 98, 99, 111, 119], "patch_ort_modul": 32, "shoul": 33, "ort_client": 33, "configure_ort": 33, "op_typ": [33, 37, 38], "create_inference_sess": 33, "inferencesess": 33, "get_quantizable_op_typ": 33, "_configure_ort": 33, "suppli": 33, "find_fusible_partit": 34, "partitioned_nod": 34, "non_residual_input": 34, "find_hardcoded_pattern": 34, "tail": 34, "mtl_v1": 34, "reducesum": 34, "sub": [34, 60, 67], "pow": 34, "sqrt": 34, "find_layer_norm_partit": 34, "norm": 34, "find_non_quantizable_partitions_from_pattern": 34, "certain": [34, 62, 63, 80], "counterpart": [34, 63, 80], "find_quantizable_nod": 34, "yet": [34, 80], "get_skiped_output_lay": 34, "paritially_quantizable_nod": 34, "insert_dq_nod": 35, "quantized_weight": 35, "insert_qdq_nod": 35, "weight_map": 35, "make_gs_dequantize_nod": 35, "_basename_": 35, "make_gs_dequantize_output": 35, "variabl": [35, 38, 75, 92, 93, 119], "repres": [35, 63, 64, 101, 119], "make_gs_quantize_nod": 35, "make_gs_quantize_output": 35, "make_gs_quantized_weight": 35, "wq": 35, "make_gs_scal": 35, "make_gs_zp": 35, "replace_scale_valu": 35, "act_scales_dict": 35, "graphproto": [35, 38], "use_trt_qdq_op": 35, "trt": [35, 57], "pack_float32_to_4bit_cpp_bas": 36, "float32": [36, 93, 101], "4bit": 36, "everi": 36, "concecut": 36, "byte": [36, 38, 120], "pack_float32_to_4bit": 36, "round_and_pack": 36, "suppos": 36, "unsign": [36, 73, 74, 75, 87, 89, 90, 92, 93, 101], "ceil": 36, "pack_float32_to_4bit_optim": 36, "mainli": 36, "reli": 36, "move": [36, 73, 122], "therebi": 36, "remain": [36, 63], "keep_intermediate_fil": 37, "trt_plugin": 37, "trt_plugins_precis": 37, "high_precision_dtyp": 37, "mha_accumulation_dtyp": 37, "disable_mha_qdq": 37, "minmax": 37, "aka": 37, "indic": [37, 42, 62, 64, 67, 75, 78, 101, 111, 119, 121], "express": [37, 104], "exclud": 37, "conv__224": 37, "conv__252": 37, "keep": [37, 70, 92, 97], "filenam": 37, "suffix": [37, 44, 118], "throughout": 37, "int4_rtn": 37, "int4_rtn_dq": 37, "int4_rtn_trt": 37, "int4_rtn_trt_dq": 37, "int4_awq_clip": 37, "int4_awq_clip_trt": 37, "semicolon": 37, "lib_1": 37, "lib_2": 37, "tensorrtexecutionprovid": 37, "space": [37, 63, 67, 68, 70, 121], "item": [37, 55, 61], "op_type_1": 37, "op_type_2": 37, "model_nam": [37, 52], "duplicate_shared_const": 38, "duplic": [38, 63, 105], "find_lowest_common_ancestor": 38, "node1": 38, "node2": 38, "lowest": 38, "ancestor": 38, "lca": 38, "distanc": 38, "gen_random_input": 38, "get_all_input_nam": 38, "get_batch_s": 38, "assert": [38, 62], "fail": 38, "get_batch_size_from_byt": 38, "onnx_byt": 38, "get_child_nod": 38, "consum": [38, 53], "get_input_nam": 38, "external_inputs_onli": 38, "extern": 38, "external_input_nam": 38, "initializer_nam": 38, "get_input_names_from_byt": 38, "model_byt": 38, "get_input_shap": 38, "get_input_shapes_from_byt": 38, "get_node_nam": 38, "get_node_names_from_byt": 38, "get_output_nam": 38, "get_output_names_from_byt": 38, "get_output_shap": 38, "get_parent_nod": 38, "get_variable_input": 38, "is_valid_onnx_model": 38, "file_path": 38, "name_onnx_nod": 38, "assign": [38, 63, 75], "statu": 38, "randomize_weight": 38, "randomize_weights_onnx_byt": 38, "seed": [38, 121], "remove_weights_data": 38, "raw": 38, "save_onnx": 38, "save_as_external_data": 38, "save_onnx_bytes_to_dir": 38, "onnx_dir": 38, "onnx_nam": 38, "udpate_domain": 38, "domain": 38, "validate_batch_s": 38, "equal": [38, 64], "validate_onnx": 38, "els": [38, 51], "modeloptconfig": [41, 61, 75, 104], "kdlossconfig": [41, 42], "modeloptbaseconfig": [41, 46, 61, 62, 75, 78, 104, 106], "null": [41, 75, 104], "expose_minimal_state_dict": [41, 43], "_loss": [41, 43, 45], "student_layer_nam": 41, "teacher_layer_nam": 41, "loss_modul": [41, 43], "hide": [41, 43], "unnecessarili": [41, 43], "again": [41, 63], "balanc": [41, 43, 44, 75], "weigh": [41, 43], "scheme": [41, 43, 75], "init_model_from_model_lik": [41, 119], "cannot": [41, 111, 119], "model_dump": [41, 61], "dump": [41, 61], "serial": [41, 49, 119], "dataclass": 41, "turn": 42, "_modedescriptor": [42, 46, 62, 78, 106, 111], "encapsul": [42, 43], "inner": 42, "dynamicmodul": [43, 47, 63, 64, 90, 92, 107, 119], "loss_reduction_fn": 43, "skip_balanc": 43, "total": [43, 44, 75, 80], "prior": [43, 44], "situat": 43, "posit": [43, 51, 75], "hide_loss_modul": 43, "manag": [43, 60, 62, 63, 93, 100, 102], "temporarili": [43, 63], "hide_teacher_model": 43, "overrid": [43, 67], "fetch": 43, "modulelist": [43, 51], "would": [43, 44, 73, 75, 111], "aggreg": 44, "_": 44, "idx": 44, "uniqu": [44, 53, 63, 105, 119], "And": [44, 51, 52], "student_loss_kei": 44, "mod1_": 44, "mod1_t": 44, "mseloss": 44, "mod2_": 44, "mod2_t": 44, "mseloss_0": 44, "mseloss_1": 44, "kd": [44, 45], "set_student_loss_reduction_fn": 44, "student_loss_reduction_fn": 44, "static": [44, 62, 75, 82, 87, 89, 90, 92, 93, 97, 101], "kd_loss_weight": 44, "rais": [44, 62, 74, 84, 101, 102], "valueerror": [44, 84, 101, 102], "kl": 45, "diverg": 45, "paper": [45, 75, 82, 111], "arxiv": [45, 82], "1503": 45, "02531": 45, "batchmean": 45, "soften": 45, "logits_t": 45, "logits_": 45, "final": [45, 51, 63, 67], "afterward": [45, 63], "treat": 45, "predict": [45, 114], "last": [45, 62, 75, 119], "mgdloss": 45, "2205": 45, "01529": 45, "num_student_channel": 45, "num_teacher_channel": 45, "alpha_mgd": 45, "lambda_mgd": 45, "ratio": [45, 75, 105], "out_": 45, "out_t": 45, "bxcxhxw": 45, "na": [46, 121], "mtn": [46, 119, 121], "exportstudentmodedescriptor": 46, "inspect": [46, 51, 78, 106], "config_class": [46, 78, 106], "entrypoint": [46, 67, 78, 106], "is_export_mod": [46, 78, 106], "knowledgedistillationmodedescriptor": 46, "export_mod": [46, 78, 106], "next_mod": [46, 78, 106], "immedi": [46, 78], "update_for_new_mod": [46, 78, 106], "hold": 47, "far": [48, 74], "nfsworkspac": 49, "workspac": [49, 53], "storag": [49, 97, 98, 99], "nf": [49, 53], "modifit": 49, "commun": [49, 53], "nor": 49, "barrier": [49, 115], "respons": 49, "synchron": [49, 93, 115, 121], "workspace_path": [49, 53, 55], "postprocess": [49, 53, 79], "cross": [49, 53], "sharedmemori": 49, "clean": [49, 93], "is_initi": [49, 115], "read_configs_and_weights_from_rank": 49, "target_rank": 49, "target": [49, 53, 54, 55, 62, 63, 98, 99, 114, 119], "write_configs_and_weight": 49, "config_json": 49, "get_configs_parallel": 49, "gather": [49, 80], "shm": 49, "nullabl": 49, "sync": 49, "yield": [49, 53, 62, 63, 68, 75, 80, 93, 111], "empti": [49, 55, 101, 104, 119], "destroi": [49, 111], "consumpt": 49, "get_tensors_parallel": 49, "model_config": [51, 53, 54, 55, 57], "empir": [51, 57], "except": [51, 57, 93, 104, 118], "build_attention_config": 51, "model_metadata_config": 51, "ext_config": 51, "decoderlayerconfig": [51, 52, 57], "attentionconfig": [51, 52], "build_conv_config": 51, "convconfig": [51, 52], "build_decoder_config": 51, "build_embedding_config": 51, "normalization_const": 51, "embed": [51, 52, 55, 73], "embeddingconfig": [51, 52], "build_layernorm_config": 51, "layernormconfig": [51, 52], "build_linear_config": 51, "linear_typ": [51, 52], "linearconfig": [51, 52, 54], "build_medusa_heads_config": 51, "medusaheadconfig": [51, 52], "num_medusa_head": [51, 52], "medsua_head": 51, "compos": 51, "lm_head": [51, 52, 54, 55, 75, 80, 104], "vocab_s": [51, 52], "hidden_s": [51, 52], "num_medusa_lay": [51, 52], "linearactconfig": [51, 52], "hidden_act": [51, 52], "silu": 51, "column": [51, 52, 102], "build_mlp_config": 51, "mlp": [51, 52, 80], "mlpconfig": [51, 52], "build_moe_config": 51, "moe": [51, 75], "moeconfig": [51, 52], "build_qkv": 51, "qkv_modul": 51, "qkv": [51, 52, 54], "qkvconfig": [51, 52, 54], "build_recurrent_config": 51, "build_stacked_expert": 51, "linear_nam": 51, "num_expert": 51, "expert_gett": 51, "experts_weight_1": 51, "experts_weight_2": 51, "check_model_compat": 51, "module_list": 51, "assembl": 51, "get_activation_scaling_factor": 51, "get_kv_cache_dtyp": 51, "kv_cach": 51, "union": [51, 64], "get_kv_cache_scaling_factor": 51, "get_prequant_scaling_factor": 51, "prequant": [51, 52], "get_quantization_format": 51, "get_scaling_factor": 51, "get_transformer_lay": 51, "root": [51, 62], "get_weight_block_s": 51, "get_weight_scaling_factor": 51, "get_weight_scaling_factor_2": 51, "secondari": 51, "is_attent": 51, "is_decoder_list": 51, "is_embed": 51, "is_layernorm": 51, "is_linear": 51, "is_mlp": 51, "is_mo": 51, "is_quantlinear": 51, "is_recurr": 51, "kv_cache_scaling_factor": 52, "kv_cache_dtyp": 52, "rotary_dim": 52, "inf": 52, "clip_qkv": 52, "rel_attn_t": 52, "input_layernorm": 52, "mlp_layernorm": 52, "post_layernorm": 52, "pre_feedforward_layernorm": 52, "post_feedforward_layernorm": 52, "num_attention_head": 52, "attention_head_s": 52, "num_kv_head": 52, "max_position_embed": 52, "rotary_pct": 52, "use_alibi": 52, "new_decoder_architectur": 52, "parallel_attent": 52, "apply_residual_connection_post_layernorm": 52, "use_cach": 52, "rope_ratio": 52, "seq_length": 52, "qwen_typ": 52, "rotary_bas": 52, "partial_rotary_factor": 52, "original_max_position_embed": 52, "longrope_scaling_short_factor": 52, "longrope_scaling_long_factor": 52, "mup_attn_multipli": 52, "mup_embedding_multipli": 52, "mup_use_sc": 52, "mup_width_multipli": 52, "blocksparse_block_s": 52, "blocksparse_homo_head_pattern": 52, "blocksparse_num_local_block": 52, "blocksparse_vertical_strid": 52, "dense_attention_every_n_lay": 52, "gegelu_limit": 52, "longrope_short_mscal": 52, "longrope_long_mscal": 52, "moe_num_expert": 52, "moe_top_k": 52, "moe_tp_mod": 52, "moe_renorm_mod": 52, "alibi_bias_max": 52, "residual_layernorm": 52, "residual_mlp": 52, "rnn_hidden_s": 52, "logits_soft_cap": 52, "emb_scale_by_sqrt_dim": 52, "layer_typ": 52, "factori": 52, "final_logit_softcap": 52, "attn_logit_softcap": 52, "query_pre_attn_scalar": 52, "cross_attent": 52, "cross_attention_layernorm": 52, "self_attent": 52, "self_attention_layernorm": 52, "attention_layernorm": 52, "rel_attn_max_dist": 52, "rel_attn_num_bucket": 52, "use_scaled_rop": 52, "recurrentconfig": 52, "ffn_hidden_size_loc": 52, "ffn": 52, "hidden": [52, 92], "local_vocab_s": 52, "expertconfig": 52, "fc": 52, "proj": 52, "layernorm_typ": 52, "1e": 52, "activation_scaling_factor": 52, "weights_scaling_factor": [52, 54], "weights_scaling_factor_2": 52, "prequant_scaling_factor": 52, "awq_block_s": 52, "gate": [52, 75], "merged_fc1_g": 52, "mixtur": 52, "router": [52, 75], "medusa_lay": 52, "modelconfig": [52, 54, 55, 57], "pipeline_parallel": 52, "float16": [52, 53, 93], "tensor_parallel": 52, "vocab_embed": 52, "position_embed": 52, "ln_emb": 52, "ln_f": 52, "share_embedding_t": 52, "medusa_head": 52, "enc_dec": [52, 57], "encoder_hidden_s": 52, "encoder_num_head": 52, "encoder_head_s": 52, "num_key_value_head": 52, "vocab_size_pad": 52, "pad": [52, 54, 55, 119], "merg": [52, 53, 54, 55, 56], "concat": 52, "quanitz": 52, "weight_scaling_factor_2": 52, "recurrentblock": 52, "linear_i": 52, "y_bia": 52, "linear_x": 52, "linear_out": 52, "conv1d": [52, 87], "rg_lru": 52, "rglruconfig": 52, "rg": 52, "lru": 52, "recurrent_param": 52, "input_g": 52, "recurrent_g": 52, "export_hf_checkpoint": 53, "unifi": [53, 73], "export_npz": 53, "naive_fp8_quant": 53, "use_nfs_workspac": 53, "split": [53, 54, 55, 73], "manual": 53, "old": 53, "naiv": 53, "nest": [53, 54], "pretrainedconfig": 53, "modeling_util": 53, "tensorrt_llm_config": [53, 57], "from_quantized_weight": 54, "torch_dtyp": 54, "merge_fc1_g": 54, "merge_qkv": 54, "model_config_from_dict": 54, "d": [54, 70], "model_config_to_dict": 54, "naive_quant": 54, "debug": [54, 75], "pack_linear_weight": 54, "pad_weight": 54, "tp_size": [54, 57], "restore_model_config": 54, "recurs": [54, 63, 68, 76, 122], "split_config_and_weight": 54, "prefix": [54, 93], "to_quantized_weight": 54, "check_weight_shape_valid": 55, "training_tensor_parallel": 55, "tp": [55, 57], "recurisv": 55, "pad_embedding_lm_head": 55, "padding_factor": 55, "postprocess_model_config": 55, "training_pipeline_parallel": 55, "pp": 55, "postprocess_tensor": 55, "force_cpu": 55, "force_contigu": 55, "force_non_view": 55, "cpu": [55, 73], "adjust_attn_amax_valu": 56, "amax": [56, 73, 74, 75, 79, 93, 101, 102], "get_weights_scaling_factor": 56, "group_siz": 56, "facotr": 56, "resmooth_and_get_scal": 56, "merged_weight": 56, "pre_quant_scal": [56, 93], "avg_pre_quant_scal": 56, "resmooth": 56, "averag": 56, "weight_scaling_factor": 56, "convert_to_tensorrt_llm_config": 57, "tp_size_overwrit": 57, "overwrit": [57, 63, 104], "builder": 57, "unshard": 57, "is_tensorrt_llm_0_8_or_9": 57, "prepare_enc_dec_decoder_lay": 57, "layer_config": 57, "prepar": [57, 65, 70, 110, 114], "t5config": 57, "prepare_enc_dec_export_dir": 57, "export_root": 57, "weights_to_npz": 57, "convert_to_transformer_engin": 58, "transformers_engin": 58, "export_to_vllm": 59, "export_path": 59, "infrastructur": 60, "ingest": 60, "procedur": [60, 61, 67, 80], "individu": [60, 63, 93, 111], "wihin": 60, "pydant": 61, "basemodel": 61, "our": 61, "capabl": 61, "easier": [61, 73], "manipul": 61, "alia": [61, 64, 87, 89, 91], "get_field_name_from_kei": 61, "alias": 61, "itemsview": 61, "keysview": 61, "model_dump_json": 61, "valuesview": 61, "modeloptbaserul": 61, "what": 61, "govern": 61, "classmethod": [61, 62, 63, 70, 97, 98, 99], "customize_rul": 61, "construct": [61, 63, 92, 93], "accord": [61, 63, 82, 111, 119], "get_rule_typ": 61, "wrapped_onli": 61, "typealia": 61, "validate_rul": 61, "cl": [61, 97], "unwrap": [61, 62, 119], "modeloptbaseruleconfig": [61, 104], "made": 61, "register_default": 61, "extra_default": 61, "unregister_default": 61, "unregist": [61, 76], "modeloptfield": 61, "pydanticundefin": 61, "get_kwargs_for_create_model_with_rul": 61, "default_rul": 61, "create_model": 61, "auto": [61, 114], "relev": 61, "rule_field": 61, "docstr": 61, "pertain": 61, "myruleconfig": 61, "get_create_model_kwargs_for_rule_model": 61, "sparsemagnitudeconfig": [61, 104, 111], "conveni": 61, "sinc": [61, 73, 78], "autodoc": 61, "workaround": 61, "burden": 61, "standard": [62, 63, 64, 67, 97, 111, 119], "histori": [62, 67, 80], "modeloptstatemanag": 62, "correspondig": 62, "init_st": 62, "add_mod": 62, "_state": 62, "therefor": [62, 63], "recal": 62, "check_mod": 62, "propos": 62, "get_config_class": 62, "has_stat": 62, "trivial": 62, "is_convert": 62, "is_root": 62, "detect": 62, "last_mod": 62, "modes_with_st": 62, "transfer_state_dict": 62, "model_from": 62, "model_to": [62, 119], "update_last_state_before_new_mod": 62, "update_last_state_before_sav": 62, "apply_mod": 62, "quantizemodedescriptor": [62, 78], "_moderegistrycl": 62, "retriev": [62, 119], "error": [62, 75, 78, 94, 100, 118], "bias": 62, "model_weight": 62, "pathlik": 62, "binaryio": 62, "locat": [62, 76], "distributeddataparallel": 62, "previous": [62, 106], "hparam": [63, 68, 70], "famili": 63, "searchabl": 63, "unit": [63, 65, 119], "candid": 63, "dynamicconv2d": 63, "callback": [63, 64], "out_channel": 63, "temporari": [63, 93], "ensur": [63, 80, 119], "expos": 63, "outermost": 63, "child": [63, 73], "dynamiclinear": 63, "inherit": 63, "__class__": 63, "henc": [63, 80, 107], "simultan": 63, "inject": 63, "rigoruo": 63, "fashion": 63, "vanilla": 63, "mechan": 63, "parent": [63, 76], "mutual": 63, "exlus": 63, "append": [63, 80], "dyanmic": 63, "affect": [63, 119], "underli": 63, "kept": [63, 75, 119], "until": [63, 119], "resultign": 63, "extra_repr": [63, 93], "sure": 63, "__dict__": 63, "heavili": 63, "force_assign": 63, "forc": 63, "overwritt": 63, "buffer": [63, 93], "circumst": 63, "freez": 63, "restrict": [63, 70, 80], "tbe": 63, "orgin": 63, "although": [63, 64], "get_hparam": [63, 68], "get_paramet": 63, "scalabl": 63, "overriden": 63, "out_features_ratio": 63, "system": 63, "keyword": [63, 73, 113, 119], "_dmregistrycl": 63, "fly": [63, 92], "leav": 63, "intact": 63, "some_dynamic_modul": 63, "named_hparam": [63, 68], "accordingli": [63, 76], "symbol": [63, 64, 101], "reset_dynamic_attribut": 63, "interf": 63, "getattr": 63, "setattr": 63, "delattr": 63, "exit": 63, "dynamicspac": 63, "hyperparamet": [63, 64, 67], "hp": 63, "parameter_nam": 63, "subnet": [63, 67, 106, 121], "convert_to_dynam": 63, "dm_registri": 63, "result": [63, 80, 121], "is_configur": [63, 64, 68], "is_dynam": [63, 68], "named_dynamic_modul": 63, "strict": [63, 93], "exact": 63, "ident": 64, "activeslic": 64, "slice": 64, "longtensor": 64, "importanceestim": 64, "customhptyp": [64, 70], "active_slic": 64, "enforce_ord": 64, "32": [64, 75], "equival": [64, 75, 76, 80], "_order": 64, "todo": 64, "ever": [64, 78], "cycl": 64, "detector": 64, "1d": [64, 87, 105], "in_channel": 64, "conv2d": [64, 75, 87, 101, 104], "score": [64, 67, 70, 80, 111], "associ": 64, "notion": 64, "is_sort": 64, "sortabl": 64, "register_import": 64, "importance_estim": 64, "constitut": 65, "arbitrari": 65, "whenev": 67, "conjunct": [67, 124], "basesearch": [67, 70, 106, 109], "abc": 67, "after_search": [67, 110], "before_search": [67, 70, 110], "constraint": [67, 70, 75, 80], "construct_forward_loop": 67, "silent": 67, "progress_bar_msg": 67, "max_iter_data_load": 67, "post_process_fn": [67, 79], "runnabl": 67, "default_search_config": [67, 70, 109, 110], "default_state_dict": [67, 70, 109], "dummy_input": [67, 119], "eval_scor": 67, "has_scor": 67, "load_search_checkpoint": 67, "reset_search": 67, "reset": [67, 73, 74, 93], "begin": 67, "run_search": [67, 70, 109], "sanitize_search_config": [67, 70, 109], "sanit": [67, 70, 109], "save_search_checkpoint": 67, "prunabl": 67, "net": [67, 119], "score_func": 67, "satisfi": [67, 113], "upper": [67, 84], "metric": 67, "flop": 67, "convent": [67, 101], "search_space_s": 68, "autoquantizesearch": [70, 80], "searcher": [70, 105, 110], "autoquant": 70, "program": 70, "solver": 70, "approxim": [70, 111, 121], "particular": 70, "taylor": 70, "expans": 70, "fisher": 70, "hessian": [70, 110, 111], "mathemat": 70, "log": 70, "likelihood": 70, "bert": 70, "proxi": 70, "resnet": 70, "candidate_stat": 70, "insert_quant_recipe_hparam": 70, "quant_recip": 70, "quantrecipehparam": 70, "quantrecip": 70, "merge_search_hparam_by_rul": 70, "q_proj": [70, 80], "k_proj": [70, 80], "v_proj": [70, 80], "gate_proj": 70, "up_proj": 70, "w1": 70, "w2": 70, "w3": 70, "w1_linear": 70, "w2_linear": 70, "w3_linear": 70, "quantizeconfig": [70, 75, 80], "unsupported_recip": 70, "track": [70, 74], "nn_modul": 70, "link_to": 70, "histogramcalibr": 73, "_calibr": [73, 74, 75], "compute_amax": [73, 74, 93], "percentil": 73, "axi": [73, 74, 75, 87, 89, 90, 92, 93, 102], "boolean": [73, 74, 84, 93, 101, 102], "num_bin": 73, "2048": [73, 75], "grow_method": 73, "skip_zero": 73, "torch_hist": 73, "histc": 73, "stride": 73, "start_bin": 73, "99": 73, "100": 73, "calibrate_weight": 73, "perchannel": 73, "collector": 73, "But": 73, "haven": 73, "decoupl": 73, "decid": [73, 101], "NOT": [73, 101], "everyth": 73, "neuron": 73, "absolut": [74, 102], "maxcalibr": 74, "calib_desc": 74, "maxcalibdescriptor": 74, "readonli": 74, "plot": 74, "track_amax": 74, "runtimeerror": 74, "definit": [75, 80], "cnn": 75, "fp8_default_cfg": [75, 80], "int4_awq_cfg": 75, "w4a8_awq_beta_cfg": 75, "against": [75, 76, 80], "miss": 75, "sequentialquant": [75, 76, 90, 92, 93], "sequenti": [75, 93, 104], "entri": [75, 76, 102], "quantmoduleregistri": 75, "class_nam": 75, "get_kei": 75, "my_quant_cfg": 75, "leakyrelu": 75, "block_sparse_mo": 75, "int4_blockwise_weight_only_cfg": 75, "awq_lit": [75, 79], "awq_ful": [75, 79], "max_co_batch_s": [75, 79], "These": 75, "custom_int4_awq_cfg": 75, "deepcopi": 75, "awqclipcalibconfig": [75, 79], "quantizealgorithmconfig": 75, "lite": 75, "oom": 75, "out_featur": 75, "max_tokens_per_batch": 75, "min_clip_ratio": 75, "shrink_step": 75, "gt": 75, "le": 75, "awqfullcalibconfig": [75, 79], "awqlitecalibconfig": [75, 79], "maxcalibconfig": [75, 79], "smoothquantcalibconfig": [75, 79], "realquantizeconfig": [75, 79], "fake_qu": [75, 87, 89, 90, 92, 93], "narrow_rang": [75, 87, 89, 90, 92, 93, 101], "learn_amax": [75, 87, 89, 90, 92], "trt_high_precision_dtyp": [75, 87, 89, 90, 92, 93, 101], "ax": 75, "input_tensor": [75, 102], "kcr": 75, "quant_axi": 75, "scale_bit": 75, "scale_block_s": [75, 99], "histogram": 75, "standardize_constructor_arg": [75, 119], "narrow": 75, "emul": 75, "fpx": 75, "half": 75, "bfloat16": [75, 93], "additional_algorithm": [75, 79], "smooth": 75, "outlier": 75, "hyper": 75, "migrat": 75, "strength": 75, "difficulti": 75, "ge": 75, "replace_quant_modul": 76, "set_quantizer_attribut": 76, "quant_model": 76, "wildcard_or_filter_func": [76, 80], "parent_class": 76, "finegrain": 76, "set_quantizer_by_cfg": 76, "get_cuda_ext": 77, "extent": 77, "tensor_qu": [77, 93], "get_cuda_ext_fp8": 77, "tensor_quant_fp8": 77, "descriptor": [78, 106], "quantizeexportmodedescriptor": 78, "placehold": [78, 94, 100], "throw": [78, 94, 100], "properli": 78, "update_for_sav": [78, 106], "4096": 79, "real_quant": 79, "postprocess_amax": 79, "loss_func": 80, "weight_compress": 80, "quantization_format": 80, "num_calib_step": 80, "num_score_step": 80, "gradient": [80, 82, 101, 102, 111, 119], "sensit": 80, "30": 80, "percentag": 80, "impli": 80, "basi": 80, "run_forward_loop": [80, 111, 119], "increas": 80, "taken": [80, 119], "progress": [80, 119], "belong": 80, "regex": 80, "r": 80, "readili": 80, "disable_quant": 80, "enable_quant": 80, "anyth": 80, "entir": 80, "subsampl": 80, "clipfunct": 82, "univers": [82, 101], "clamp": [82, 84], "doesn": [82, 92, 102], "broadcast": [82, 101], "genar": 82, "ibm": 82, "pact": 82, "1805": 82, "06085": 82, "tensorflow": [82, 101, 119], "clip_by_valu": 82, "ctx": [82, 101], "grad_output": [82, 101], "clip_value_min": [82, 84], "clip_value_max": [82, 84], "learn_min": 84, "learn_max": 84, "similar": [84, 93], "quantconv1d": 87, "quantconv2d": 87, "conv3d": 87, "quantconv3d": 87, "convtranspose1d": 87, "quantconvtranspose1d": 87, "convtranspose2d": 87, "quantconvtranspose2d": 87, "convtranspose3d": 87, "quantconvtranspose3d": 87, "_legacyquantlinearconvbasemixin": [87, 89], "default_quant_desc_weight": [87, 89, 90, 92], "transpos": 87, "quantinstancenorm1d": 88, "_legacyquantinputbasemixin": [88, 91], "instancenorm1d": 88, "quantinstancenorm2d": 88, "instancenorm2d": 88, "4d": 88, "quantinstancenorm3d": 88, "instancenorm3d": 88, "5d": 88, "quantlinear": 89, "quantinputbas": 90, "default_quant_desc_input": [90, 92], "default_quant_desc_output": 90, "quantlinearconvbas": 90, "initialize_quantizer_with_dummy_st": 90, "dummi": 90, "devic": [90, 114, 119, 120], "quantize_weight": [90, 92], "adaptiveavgpool1d": 91, "quantadaptiveavgpool1d": 91, "adaptiveavgpool2d": 91, "quantadaptiveavgpool2d": 91, "adaptiveavgpool3d": 91, "quantadaptiveavgpool3d": 91, "avgpool1d": 91, "quantavgpool1d": 91, "avgpool2d": 91, "quantavgpool2d": 91, "avgpool3d": 91, "quantavgpool3d": 91, "maxpool1d": 91, "quantmaxpool1d": 91, "maxpool2d": 91, "quantmaxpool2d": 91, "maxpool3d": 91, "quantmaxpool3d": 91, "quantrnnbas": 92, "all_input_quantizers_dis": 92, "functionals_to_replac": 92, "quantrnnfullbas": 92, "rnnlayerforward": 92, "cell": 92, "revers": 92, "variable_len": 92, "vfrnnforward": 92, "reimplement": 92, "_vf": 92, "oringin": 92, "bidirect": 92, "num_lay": 92, "has_proj": 92, "has_bia": 92, "proj_input_quant": 92, "batch_first": 92, "vf": 92, "overhead": 92, "layer_forward": 92, "flat_weight": 92, "dropout": 92, "get_quantized_rnn_layer_forward": 92, "signatur": [92, 119], "get_quantized_rnn_layer_variable_len_forward": 92, "get_quantized_rnn_layer_variable_len_reverse_forward": 92, "lstm_cell_with_proj": 92, "lstm_cell": 92, "project": 92, "h_n": 92, "c_n": 92, "quantized_cell_forward": 92, "container": 93, "get_modelopt_st": 93, "replace_sequential_quantizer_with_single_quant": 93, "indx": 93, "attribute_dict": 93, "tensor_quantizer_iter": 93, "quant_attribute_cfg": 93, "if_quant": 93, "if_clip": 93, "_learn_amax": 93, "if_calib": 93, "clean_up_after_set_from_modelopt_st": 93, "set_from_modelopt_st": 93, "qtensor": [93, 97], "de": 93, "basequantizedtensor": [93, 97, 98, 99], "bypass": 93, "neither": 93, "disable_calib": 93, "disable_clip": 93, "stage": 93, "disable_qu": 93, "enable_calib": 93, "enable_clip": 93, "enable_qu": 93, "export_amax": 93, "output_dtyp": [93, 101], "init_learn_amax": 93, "is_en": 93, "load_calib_amax": 93, "maxbound": 93, "symmetr": [93, 101], "reset_amax": 93, "attribute_cfg": 93, "step_siz": 93, "sync_amax_across_distributed_group": 93, "parallel_group": 93, "distributedprocessgroup": [93, 115], "freeze_paramet": 94, "group_paramet": 94, "match_paramet": 94, "quant_weight_inplac": 94, "apex": 95, "original_meta_tensor": 97, "quantized_data": [97, 98, 99], "quantizedtensor": 97, "fake_quant_tensor": 97, "qtensorwrapp": 97, "__new__": 97, "int4qtensor": 98, "uint8": [98, 99], "dequantz": [98, 99], "nf4qtensor": 99, "double_quant": 99, "num_scale_bit": 99, "unlik": 99, "deactiv": [100, 104, 119], "enable_onnx_export": 100, "fakeaffinetensorquantfunct": 101, "affin": 101, "gemmlowp": 101, "style": 101, "shift": 101, "cancel": 101, "come": 101, "penalti": 101, "grad_input": 101, "min_rang": 101, "max_rang": 101, "granular": [101, 102], "faketensorquantfunct": 101, "tensorquantfunct": 101, "legacyfaketensorquantfunct": 101, "comment": 101, "scalede4m3funct": 101, "e4m3fi": 101, "interpret": [101, 119], "127": 101, "grad_scal": 101, "natur": 101, "int32": 101, "255": 101, "scaled_e4m3_abstract": 101, "scaled_e4m3": 101, "export_torch_mod": 102, "is_quant": 102, "is_quantized_column_parallel_linear": 102, "is_quantized_layer_with_weight": 102, "is_quantized_row_parallel_linear": 102, "row": 102, "is_torch_library_support": 102, "exce": 102, "reduce_amax": 102, "keepdim": 102, "unless": 102, "never": 102, "meant": 102, "deprect": 102, "sens": 102, "unknown": 102, "replace_funct": 102, "new_func": 102, "exportsparseconfig": [104, 106], "export_spars": [104, 106], "sparsegptconfig": [104, 111], "sparse_gpt": 104, "sparseconv2dconfig": 104, "shown": 104, "glob": 104, "unnest": 104, "short": 104, "sparselinearconfig": 104, "inspir": 105, "magnitudesearch": 105, "basesparsesearch": [105, 109, 110], "compute_valid_1d_pattern": 105, "vector": 105, "permut": 105, "create_asp_mask": 105, "m4n2_1d": 105, "booltensor": [105, 107], "get_nmprune_info": 105, "mat": 105, "mn_1d_best": 105, "reshape_1d": 105, "dimension": 105, "hw": 105, "exportsparsemodedescriptor": 106, "sparsegptmodedescriptor": 106, "sparsemagnitudemodedescriptor": 106, "search_algorithm": 106, "convert_sparse_model": 106, "restore_export_spars": 106, "restore_sparse_model": 106, "update_sparse_metadata": 106, "sparsemodul": 107, "set_mask": 107, "sparsegptsearch": 110, "artifcat": 110, "hook": [110, 119], "create_sgpt_mask": 110, "invert": 110, "hessian_damp": 110, "invers": 110, "finish": 111, "carefulli": 111, "runtim": 111, "significantli": 111, "fewer": 111, "cpp": 113, "load_cpp_extens": 113, "cuda_version_specifi": 113, "fail_msg": 113, "load_kwarg": 113, "instantan": 113, "create_forward_loop": 114, "dataset_nam": 114, "cnn_dailymail": 114, "max_sample_length": 114, "tailor": 114, "feed": [114, 119], "preprocess": 114, "suitabl": 114, "pretrainedtokenizerfast": 114, "get_dataset_dataload": 114, "include_label": 114, "tokniz": 114, "instancn": 114, "hugginfac": 114, "get_data_parallel_group": 115, "get_tensor_parallel_group": 115, "is_avail": 115, "is_mast": 115, "processgroup": 115, "list_closest_to_median": 117, "closest": [117, 121], "val": 117, "avg": 117, "std": 117, "val2list": 117, "repeat_tim": 117, "val2tupl": 117, "min_len": 117, "idx_repeat": 117, "deprecatederror": 118, "notimplementederror": 118, "no_stdout": 118, "silenc": 118, "stdout": 118, "num2hrb": 118, "num": 118, "big": 118, "human": 118, "readabl": 118, "print_rank_0": 118, "compare_dict": 119, "dict1": 119, "dict2": 119, "unmatch": 119, "create_param_grad_clear_hook": 119, "clear": [119, 120], "fire": 119, "accum_grad": 119, "aliv": 119, "long": 119, "get_model_attribut": 119, "get_module_devic": 119, "get_same_pad": 119, "kernel_s": 119, "is_channels_last": 119, "is_parallel": 119, "make_divis": 119, "divisor": 119, "min_val": 119, "tf": 119, "repo": 119, "divis": 119, "seen": 119, "research": 119, "slim": 119, "mobilenet": 119, "target_model": 119, "layout": 119, "param_num": 119, "trainable_onli": 119, "1000000": 119, "count": 119, "trainabl": 119, "1e6": 119, "million": 119, "param_num_from_forward": 119, "circumv": 119, "appear": 119, "remove_bn": 119, "max_it": 119, "progress_bar": 119, "post_process": 119, "infiinit": 119, "exhaust": 119, "z": 119, "imag": 119, "descript": 119, "bar": 119, "set_submodul": 119, "target_submodul": 119, "complement": 119, "get_submodul": 119, "constructor_arg": 119, "standardize_model_arg": 119, "model_or_fw_or_sig": 119, "use_kwarg": 119, "matter": 119, "were": 119, "kw_only_arg": 119, "standardize_model_like_tupl": 119, "standardize_named_model_arg": 119, "args_norm": 119, "args_with_default": 119, "exactli": 119, "unwrap_model": 119, "raise_error": 119, "msg": 119, "timer": 120, "contextdecor": 120, "decor": 120, "stop": 120, "clear_cuda_cach": 120, "get_cuda_memory_stat": 120, "report_memori": 120, "determinist": 121, "centroid": 121, "seq": 121, "prod": 121, "aim": 121, "cheapli": 121, "median": 121, "recogn": 121, "popul": 121, "shuffl": 121, "mutablesequ": 121, "numpy_to_torch": 122, "np_output": 122, "torch_detach": 122, "detach": 122, "torch_to": 122, "torch_to_numpi": 122, "submit": 123}, "objects": {"modelopt": [[17, 0, 0, "-", "deploy"], [21, 0, 0, "-", "onnx"], [39, 0, 0, "-", "torch"]], "modelopt.deploy": [[18, 0, 0, "-", "llm"]], "modelopt.deploy.llm": [[19, 0, 0, "-", "generate"], [20, 0, 0, "-", "nemo_utils"]], "modelopt.deploy.llm.generate": [[19, 1, 1, "", "LLM"]], "modelopt.deploy.llm.generate.LLM": [[19, 2, 1, "", "__init__"], [19, 2, 1, "", "generate_text"], [19, 2, 1, "", "generate_tokens"], [19, 3, 1, "", "max_beam_width"], [19, 3, 1, "", "max_input_len"]], "modelopt.deploy.llm.nemo_utils": [[20, 1, 1, "", "CustomSentencePieceTokenizer"], [20, 4, 1, "", "get_nemo_tokenizer"], [20, 4, 1, "", "get_tokenzier"]], "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer": [[20, 2, 1, "", "__init__"], [20, 2, 1, "", "batch_decode"], [20, 2, 1, "", "batch_encode_plus"], [20, 2, 1, "", "decode"], [20, 2, 1, "", "encode"], [20, 3, 1, "", "eos_token"], [20, 3, 1, "", "eos_token_id"], [20, 3, 1, "", "pad_token"], [20, 3, 1, "", "pad_token_id"]], "modelopt.onnx": [[22, 0, 0, "-", "op_types"], [23, 0, 0, "-", "quantization"], [38, 0, 0, "-", "utils"]], "modelopt.onnx.op_types": [[22, 4, 1, "", "is_binary_op"], [22, 4, 1, "", "is_control_flow_op"], [22, 4, 1, "", "is_conversion_op"], [22, 4, 1, "", "is_copy_op"], [22, 4, 1, "", "is_default_quantizable_op_by_ort"], [22, 4, 1, "", "is_fusible_reduction_op"], [22, 4, 1, "", "is_generator_op"], [22, 4, 1, "", "is_irregular_mem_access_op"], [22, 4, 1, "", "is_linear_op"], [22, 4, 1, "", "is_modifier_op"], [22, 4, 1, "", "is_multiclass_op"], [22, 4, 1, "", "is_non_reshape_copy_op"], [22, 4, 1, "", "is_normalization_op"], [22, 4, 1, "", "is_pointwise_or_elementwise_op"], [22, 4, 1, "", "is_pooling_or_window_op"], [22, 4, 1, "", "is_recurrent_op"], [22, 4, 1, "", "is_selection_op"], [22, 4, 1, "", "is_sequence_op"], [22, 4, 1, "", "is_shape_op"], [22, 4, 1, "", "is_unary_op"]], "modelopt.onnx.quantization": [[24, 0, 0, "-", "calib_utils"], [25, 0, 0, "-", "extensions"], [26, 0, 0, "-", "fp8"], [27, 0, 0, "-", "graph_utils"], [28, 0, 0, "-", "gs_patching"], [29, 0, 0, "-", "int4"], [30, 0, 0, "-", "int8"], [31, 0, 0, "-", "operators"], [32, 0, 0, "-", "ort_patching"], [33, 0, 0, "-", "ort_utils"], [34, 0, 0, "-", "partitioning"], [35, 0, 0, "-", "qdq_utils"], [36, 0, 0, "-", "quant_utils"], [37, 4, 1, "", "quantize"]], "modelopt.onnx.quantization.calib_utils": [[24, 1, 1, "", "CalibrationDataProvider"], [24, 1, 1, "", "RandomDataProvider"], [24, 4, 1, "", "import_scales_from_calib_cache"]], "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider": [[24, 2, 1, "", "__init__"], [24, 2, 1, "", "get_next"]], "modelopt.onnx.quantization.calib_utils.RandomDataProvider": [[24, 2, 1, "", "__init__"], [24, 2, 1, "", "get_next"]], "modelopt.onnx.quantization.fp8": [[26, 4, 1, "", "quantize"]], "modelopt.onnx.quantization.graph_utils": [[27, 4, 1, "", "add_fp16_fp32_cast"], [27, 4, 1, "", "build_non_residual_input_map"], [27, 4, 1, "", "classify_partition_nodes"], [27, 4, 1, "", "filter_quantizable_kgen_heads"], [27, 4, 1, "", "find_fp8_mha_partitions"], [27, 4, 1, "", "find_mha_partitions"], [27, 4, 1, "", "find_nodes_to_exclude"], [27, 4, 1, "", "get_fusible_backbone"], [27, 4, 1, "", "has_const_input"], [27, 4, 1, "", "has_path_type"], [27, 4, 1, "", "insert_fp8_mha_casts"], [27, 4, 1, "", "insert_matmul_casts"], [27, 4, 1, "", "is_const_input"], [27, 4, 1, "", "print_stat"], [27, 4, 1, "", "remove_partial_input_qdq"]], "modelopt.onnx.quantization.gs_patching": [[28, 4, 1, "", "patch_gs_modules"]], "modelopt.onnx.quantization.int4": [[29, 1, 1, "", "AWQClipHelper"], [29, 4, 1, "", "dq_tensor"], [29, 4, 1, "", "find_scales"], [29, 4, 1, "", "quant_tensor"], [29, 4, 1, "", "quantize"], [29, 4, 1, "", "quantize_awq_clip"], [29, 4, 1, "", "quantize_rtn"], [29, 4, 1, "", "rtn"]], "modelopt.onnx.quantization.int4.AWQClipHelper": [[29, 2, 1, "", "__init__"], [29, 5, 1, "", "alpha_step"], [29, 5, 1, "", "alphas"], [29, 5, 1, "", "min_alpha"], [29, 2, 1, "", "update_best_params"]], "modelopt.onnx.quantization.int8": [[30, 4, 1, "", "quantize"]], "modelopt.onnx.quantization.operators": [[31, 1, 1, "", "QDQConvTranspose"], [31, 1, 1, "", "QDQNormalization"]], "modelopt.onnx.quantization.operators.QDQConvTranspose": [[31, 2, 1, "", "__init__"], [31, 2, 1, "", "quantize"]], "modelopt.onnx.quantization.operators.QDQNormalization": [[31, 2, 1, "", "__init__"], [31, 2, 1, "", "quantize"]], "modelopt.onnx.quantization.ort_patching": [[32, 4, 1, "", "patch_ort_modules"]], "modelopt.onnx.quantization.ort_utils": [[33, 4, 1, "", "configure_ort"], [33, 4, 1, "", "create_inference_session"], [33, 4, 1, "", "get_quantizable_op_types"]], "modelopt.onnx.quantization.partitioning": [[34, 4, 1, "", "find_fusible_partitions"], [34, 4, 1, "", "find_hardcoded_patterns"], [34, 4, 1, "", "find_layer_norm_partitions"], [34, 4, 1, "", "find_mha_partitions"], [34, 4, 1, "", "find_non_quantizable_partitions_from_patterns"], [34, 4, 1, "", "find_quantizable_nodes"], [34, 4, 1, "", "get_skiped_output_layers"]], "modelopt.onnx.quantization.qdq_utils": [[35, 4, 1, "", "insert_dq_nodes"], [35, 4, 1, "", "insert_qdq_nodes"], [35, 4, 1, "", "make_gs_dequantize_node"], [35, 4, 1, "", "make_gs_dequantize_output"], [35, 4, 1, "", "make_gs_quantize_node"], [35, 4, 1, "", "make_gs_quantize_output"], [35, 4, 1, "", "make_gs_quantized_weight"], [35, 4, 1, "", "make_gs_scale"], [35, 4, 1, "", "make_gs_zp"], [35, 4, 1, "", "replace_scale_values"], [35, 4, 1, "", "use_trt_qdq_ops"]], "modelopt.onnx.quantization.quant_utils": [[36, 4, 1, "", "pack_float32_to_4bit_cpp_based"], [36, 4, 1, "", "pack_float32_to_4bit_optimized"]], "modelopt.onnx.utils": [[38, 4, 1, "", "duplicate_shared_constants"], [38, 4, 1, "", "find_lowest_common_ancestor"], [38, 4, 1, "", "gen_random_inputs"], [38, 4, 1, "", "get_all_input_names"], [38, 4, 1, "", "get_batch_size"], [38, 4, 1, "", "get_batch_size_from_bytes"], [38, 4, 1, "", "get_child_nodes"], [38, 4, 1, "", "get_input_names"], [38, 4, 1, "", "get_input_names_from_bytes"], [38, 4, 1, "", "get_input_shapes"], [38, 4, 1, "", "get_input_shapes_from_bytes"], [38, 4, 1, "", "get_node_names"], [38, 4, 1, "", "get_node_names_from_bytes"], [38, 4, 1, "", "get_output_names"], [38, 4, 1, "", "get_output_names_from_bytes"], [38, 4, 1, "", "get_output_shapes"], [38, 4, 1, "", "get_parent_nodes"], [38, 4, 1, "", "get_variable_inputs"], [38, 4, 1, "", "is_valid_onnx_model"], [38, 4, 1, "", "name_onnx_nodes"], [38, 4, 1, "", "randomize_weights"], [38, 4, 1, "", "randomize_weights_onnx_bytes"], [38, 4, 1, "", "remove_weights_data"], [38, 4, 1, "", "save_onnx"], [38, 4, 1, "", "save_onnx_bytes_to_dir"], [38, 4, 1, "", "udpate_domain"], [38, 4, 1, "", "validate_batch_size"], [38, 4, 1, "", "validate_onnx"]], "modelopt.torch": [[40, 0, 0, "-", "distill"], [48, 0, 0, "-", "export"], [60, 0, 0, "-", "opt"], [69, 0, 0, "-", "quantization"], [103, 0, 0, "-", "sparsity"], [112, 0, 0, "-", "utils"]], "modelopt.torch.distill": [[41, 0, 0, "-", "config"], [42, 0, 0, "-", "distillation"], [43, 0, 0, "-", "distillation_model"], [44, 0, 0, "-", "loss_balancers"], [45, 0, 0, "-", "losses"], [46, 0, 0, "-", "mode"], [47, 0, 0, "-", "registry"]], "modelopt.torch.distill.config": [[41, 6, 1, "", "KDLossConfig"]], "modelopt.torch.distill.config.KDLossConfig": [[41, 7, 1, "", "criterion"], [41, 7, 1, "", "expose_minimal_state_dict"], [41, 7, 1, "", "loss_balancer"], [41, 2, 1, "", "model_dump"], [41, 7, 1, "", "teacher_model"]], "modelopt.torch.distill.distillation": [[42, 4, 1, "", "convert"], [42, 4, 1, "", "export"]], "modelopt.torch.distill.distillation_model": [[43, 1, 1, "", "DistillationModel"]], "modelopt.torch.distill.distillation_model.DistillationModel": [[43, 2, 1, "", "compute_kd_loss"], [43, 2, 1, "", "forward"], [43, 2, 1, "", "hide_loss_modules"], [43, 2, 1, "", "hide_teacher_model"], [43, 2, 1, "", "load_state_dict"], [43, 3, 1, "", "loss_balancer"], [43, 3, 1, "", "loss_modules"], [43, 2, 1, "", "modify"], [43, 2, 1, "", "state_dict"], [43, 3, 1, "", "teacher_model"]], "modelopt.torch.distill.loss_balancers": [[44, 1, 1, "", "DistillationLossBalancer"], [44, 1, 1, "", "StaticLossBalancer"]], "modelopt.torch.distill.loss_balancers.DistillationLossBalancer": [[44, 2, 1, "", "__init__"], [44, 2, 1, "", "forward"], [44, 2, 1, "", "set_student_loss_reduction_fn"]], "modelopt.torch.distill.loss_balancers.StaticLossBalancer": [[44, 2, 1, "", "__init__"], [44, 2, 1, "", "forward"]], "modelopt.torch.distill.losses": [[45, 1, 1, "", "LogitsDistillationLoss"], [45, 1, 1, "", "MGDLoss"]], "modelopt.torch.distill.losses.LogitsDistillationLoss": [[45, 2, 1, "", "__init__"], [45, 2, 1, "", "forward"]], "modelopt.torch.distill.losses.MGDLoss": [[45, 2, 1, "", "__init__"], [45, 2, 1, "", "forward"]], "modelopt.torch.distill.mode": [[46, 1, 1, "", "ExportStudentModeDescriptor"], [46, 1, 1, "", "KnowledgeDistillationModeDescriptor"]], "modelopt.torch.distill.mode.ExportStudentModeDescriptor": [[46, 3, 1, "", "config_class"], [46, 3, 1, "", "convert"], [46, 3, 1, "", "is_export_mode"], [46, 3, 1, "", "name"], [46, 3, 1, "", "restore"]], "modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor": [[46, 3, 1, "", "config_class"], [46, 3, 1, "", "convert"], [46, 3, 1, "", "export_mode"], [46, 3, 1, "", "name"], [46, 3, 1, "", "next_modes"], [46, 3, 1, "", "restore"], [46, 3, 1, "", "update_for_new_mode"]], "modelopt.torch.export": [[49, 0, 0, "-", "distribute"], [50, 0, 0, "-", "hf_config_map"], [51, 0, 0, "-", "layer_utils"], [52, 0, 0, "-", "model_config"], [53, 0, 0, "-", "model_config_export"], [54, 0, 0, "-", "model_config_utils"], [55, 0, 0, "-", "postprocess"], [56, 0, 0, "-", "scaling_factor_utils"], [57, 0, 0, "-", "tensorrt_llm_utils"], [58, 0, 0, "-", "transformer_engine"], [59, 0, 0, "-", "vllm"]], "modelopt.torch.export.distribute": [[49, 1, 1, "", "NFSWorkspace"], [49, 4, 1, "", "get_configs_parallel"], [49, 4, 1, "", "get_tensors_parallel"]], "modelopt.torch.export.distribute.NFSWorkspace": [[49, 2, 1, "", "__init__"], [49, 3, 1, "", "is_initialized"], [49, 2, 1, "", "read_configs_and_weights_from_rank"], [49, 2, 1, "", "write_configs_and_weights"]], "modelopt.torch.export.layer_utils": [[51, 4, 1, "", "build_attention_config"], [51, 4, 1, "", "build_conv_config"], [51, 4, 1, "", "build_decoder_config"], [51, 4, 1, "", "build_embedding_config"], [51, 4, 1, "", "build_layernorm_config"], [51, 4, 1, "", "build_linear_config"], [51, 4, 1, "", "build_medusa_heads_config"], [51, 4, 1, "", "build_mlp_config"], [51, 4, 1, "", "build_moe_config"], [51, 4, 1, "", "build_qkv"], [51, 4, 1, "", "build_recurrent_config"], [51, 4, 1, "", "build_stacked_experts"], [51, 4, 1, "", "check_model_compatibility"], [51, 4, 1, "", "get_activation_scaling_factor"], [51, 4, 1, "", "get_kv_cache_dtype"], [51, 4, 1, "", "get_kv_cache_scaling_factor"], [51, 4, 1, "", "get_prequant_scaling_factor"], [51, 4, 1, "", "get_quantization_format"], [51, 4, 1, "", "get_scaling_factor"], [51, 4, 1, "", "get_transformer_layers"], [51, 4, 1, "", "get_weight_block_size"], [51, 4, 1, "", "get_weight_scaling_factor"], [51, 4, 1, "", "get_weight_scaling_factor_2"], [51, 4, 1, "", "is_attention"], [51, 4, 1, "", "is_decoder_list"], [51, 4, 1, "", "is_embedding"], [51, 4, 1, "", "is_layernorm"], [51, 4, 1, "", "is_linear"], [51, 4, 1, "", "is_mlp"], [51, 4, 1, "", "is_moe"], [51, 4, 1, "", "is_quantlinear"], [51, 4, 1, "", "is_recurrent"]], "modelopt.torch.export.model_config": [[52, 1, 1, "", "AttentionConfig"], [52, 1, 1, "", "ConvConfig"], [52, 1, 1, "", "DecoderLayerConfig"], [52, 1, 1, "", "EmbeddingConfig"], [52, 1, 1, "", "ExpertConfig"], [52, 1, 1, "", "LayernormConfig"], [52, 1, 1, "", "LinearActConfig"], [52, 1, 1, "", "LinearConfig"], [52, 1, 1, "", "MLPConfig"], [52, 1, 1, "", "MOEConfig"], [52, 1, 1, "", "MedusaHeadConfig"], [52, 1, 1, "", "ModelConfig"], [52, 1, 1, "", "QKVConfig"], [52, 1, 1, "", "RecurrentConfig"], [52, 1, 1, "", "RgLruConfig"]], "modelopt.torch.export.model_config.AttentionConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "clip_qkv"], [52, 5, 1, "", "dense"], [52, 5, 1, "", "kv_cache_dtype"], [52, 5, 1, "", "kv_cache_scaling_factor"], [52, 5, 1, "", "qkv"], [52, 5, 1, "", "rel_attn_table"], [52, 5, 1, "", "rotary_dim"]], "modelopt.torch.export.model_config.ConvConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "bias"], [52, 5, 1, "", "weight"]], "modelopt.torch.export.model_config.DecoderLayerConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "alibi_bias_max"], [52, 5, 1, "", "apply_residual_connection_post_layernorm"], [52, 5, 1, "", "attention"], [52, 5, 1, "", "attention_head_size"], [52, 5, 1, "", "attention_layernorm"], [52, 5, 1, "", "attn_logit_softcapping"], [52, 5, 1, "", "blocksparse_block_size"], [52, 5, 1, "", "blocksparse_homo_head_pattern"], [52, 5, 1, "", "blocksparse_num_local_blocks"], [52, 5, 1, "", "blocksparse_vertical_stride"], [52, 5, 1, "", "clip_qkv"], [52, 5, 1, "", "cross_attention"], [52, 5, 1, "", "cross_attention_layernorm"], [52, 5, 1, "", "decoder_type"], [52, 5, 1, "", "dense_attention_every_n_layers"], [52, 5, 1, "", "emb_scale_by_sqrt_dim"], [52, 3, 1, "", "ffn_hidden_size_local"], [52, 5, 1, "", "final_logit_softcapping"], [52, 5, 1, "", "gegelu_limit"], [52, 3, 1, "", "hidden_size"], [52, 5, 1, "", "input_layernorm"], [52, 5, 1, "", "layer_types"], [52, 5, 1, "", "logits_soft_cap"], [52, 5, 1, "", "longrope_long_mscale"], [52, 5, 1, "", "longrope_scaling_long_factors"], [52, 5, 1, "", "longrope_scaling_short_factors"], [52, 5, 1, "", "longrope_short_mscale"], [52, 5, 1, "", "max_position_embeddings"], [52, 5, 1, "", "mlp"], [52, 5, 1, "", "mlp_layernorm"], [52, 5, 1, "", "model_name"], [52, 5, 1, "", "moe_num_experts"], [52, 5, 1, "", "moe_renorm_mode"], [52, 5, 1, "", "moe_top_k"], [52, 5, 1, "", "moe_tp_mode"], [52, 5, 1, "", "mup_attn_multiplier"], [52, 5, 1, "", "mup_embedding_multiplier"], [52, 5, 1, "", "mup_use_scaling"], [52, 5, 1, "", "mup_width_multiplier"], [52, 5, 1, "", "new_decoder_architecture"], [52, 5, 1, "", "num_attention_heads"], [52, 5, 1, "", "num_kv_heads"], [52, 5, 1, "", "original_max_position_embeddings"], [52, 5, 1, "", "parallel_attention"], [52, 5, 1, "", "partial_rotary_factor"], [52, 5, 1, "", "post_feedforward_layernorm"], [52, 5, 1, "", "post_layernorm"], [52, 5, 1, "", "pre_feedforward_layernorm"], [52, 5, 1, "", "quantization"], [52, 5, 1, "", "query_pre_attn_scalar"], [52, 5, 1, "", "qwen_type"], [52, 5, 1, "", "recurrent"], [52, 5, 1, "", "rel_attn_max_distance"], [52, 5, 1, "", "rel_attn_num_buckets"], [52, 5, 1, "", "residual_layernorm"], [52, 5, 1, "", "residual_mlp"], [52, 5, 1, "", "rnn_hidden_size"], [52, 5, 1, "", "rope_ratio"], [52, 5, 1, "", "rotary_base"], [52, 5, 1, "", "rotary_pct"], [52, 5, 1, "", "self_attention"], [52, 5, 1, "", "self_attention_layernorm"], [52, 5, 1, "", "seq_length"], [52, 5, 1, "", "use_alibi"], [52, 5, 1, "", "use_cache"], [52, 5, 1, "", "use_scaled_rope"]], "modelopt.torch.export.model_config.EmbeddingConfig": [[52, 2, 1, "", "__init__"], [52, 3, 1, "", "hidden_size"], [52, 3, 1, "", "local_vocab_size"], [52, 5, 1, "", "weight"]], "modelopt.torch.export.model_config.ExpertConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "fc"], [52, 5, 1, "", "proj"]], "modelopt.torch.export.model_config.LayernormConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "bias"], [52, 5, 1, "", "eps"], [52, 5, 1, "", "layernorm_type"], [52, 5, 1, "", "weight"]], "modelopt.torch.export.model_config.LinearActConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "hidden_act"], [52, 5, 1, "", "linear"]], "modelopt.torch.export.model_config.LinearConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "activation_scaling_factor"], [52, 5, 1, "", "awq_block_size"], [52, 5, 1, "", "bias"], [52, 5, 1, "", "linear_type"], [52, 5, 1, "", "prequant_scaling_factor"], [52, 5, 1, "", "weight"], [52, 5, 1, "", "weights_scaling_factor"], [52, 5, 1, "", "weights_scaling_factor_2"]], "modelopt.torch.export.model_config.MLPConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "fc"], [52, 5, 1, "", "gate"], [52, 5, 1, "", "hidden_act"], [52, 5, 1, "", "merged_fc1_gate"], [52, 5, 1, "", "proj"]], "modelopt.torch.export.model_config.MOEConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "experts"], [52, 3, 1, "", "fc"], [52, 5, 1, "", "hidden_act"], [52, 5, 1, "", "router"]], "modelopt.torch.export.model_config.MedusaHeadConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "lm_head"], [52, 5, 1, "", "medusa_layers"]], "modelopt.torch.export.model_config.ModelConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "dtype"], [52, 5, 1, "", "enc_dec"], [52, 5, 1, "", "encoder_head_size"], [52, 5, 1, "", "encoder_hidden_size"], [52, 5, 1, "", "encoder_num_heads"], [52, 3, 1, "", "hidden_act"], [52, 3, 1, "", "hidden_size"], [52, 5, 1, "", "layers"], [52, 5, 1, "", "lm_head"], [52, 5, 1, "", "ln_embed"], [52, 5, 1, "", "ln_f"], [52, 3, 1, "", "max_position_embeddings"], [52, 5, 1, "", "medusa_heads"], [52, 3, 1, "", "num_attention_heads"], [52, 3, 1, "", "num_kv_heads"], [52, 5, 1, "", "num_medusa_heads"], [52, 5, 1, "", "num_medusa_layers"], [52, 5, 1, "", "pipeline_parallel"], [52, 5, 1, "", "position_embedding"], [52, 5, 1, "", "quantization"], [52, 5, 1, "", "rank"], [52, 5, 1, "", "share_embedding_table"], [52, 5, 1, "", "tensor_parallel"], [52, 5, 1, "", "version"], [52, 5, 1, "", "vocab_embedding"], [52, 5, 1, "", "vocab_size"], [52, 3, 1, "", "vocab_size_padded"]], "modelopt.torch.export.model_config.QKVConfig": [[52, 2, 1, "", "__init__"], [52, 3, 1, "", "activation_scaling_factor"], [52, 3, 1, "", "awq_block_size"], [52, 3, 1, "", "bias"], [52, 5, 1, "", "k"], [52, 3, 1, "", "prequant_scaling_factor"], [52, 5, 1, "", "q"], [52, 5, 1, "", "v"], [52, 3, 1, "", "weight"], [52, 3, 1, "", "weights_scaling_factor"], [52, 3, 1, "", "weights_scaling_factor_2"]], "modelopt.torch.export.model_config.RecurrentConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "conv1d"], [52, 5, 1, "", "linear_out"], [52, 5, 1, "", "linear_x"], [52, 5, 1, "", "linear_y"], [52, 5, 1, "", "rg_lru"], [52, 5, 1, "", "y_bias"]], "modelopt.torch.export.model_config.RgLruConfig": [[52, 2, 1, "", "__init__"], [52, 5, 1, "", "input_gate"], [52, 5, 1, "", "recurrent_gate"], [52, 5, 1, "", "recurrent_param"]], "modelopt.torch.export.model_config_export": [[53, 4, 1, "", "export_hf_checkpoint"], [53, 4, 1, "", "export_tensorrt_llm_checkpoint"], [53, 4, 1, "", "torch_to_tensorrt_llm_checkpoint"]], "modelopt.torch.export.model_config_utils": [[54, 4, 1, "", "from_quantized_weight"], [54, 4, 1, "", "merge_fc1_gate"], [54, 4, 1, "", "merge_qkv"], [54, 4, 1, "", "model_config_from_dict"], [54, 4, 1, "", "model_config_to_dict"], [54, 4, 1, "", "naive_quantization"], [54, 4, 1, "", "pack_linear_weights"], [54, 4, 1, "", "pad_weights"], [54, 4, 1, "", "restore_model_config"], [54, 4, 1, "", "split_config_and_weights"], [54, 4, 1, "", "to_quantized_weight"]], "modelopt.torch.export.postprocess": [[55, 4, 1, "", "check_weight_shape_valid"], [55, 4, 1, "", "pad_embedding_lm_head"], [55, 4, 1, "", "postprocess_model_config"], [55, 4, 1, "", "postprocess_tensors"]], "modelopt.torch.export.scaling_factor_utils": [[56, 4, 1, "", "adjust_attn_amax_values"], [56, 4, 1, "", "get_weights_scaling_factor"], [56, 4, 1, "", "resmooth_and_get_scale"]], "modelopt.torch.export.tensorrt_llm_utils": [[57, 4, 1, "", "convert_to_tensorrt_llm_config"], [57, 4, 1, "", "is_tensorrt_llm_0_8_or_9"], [57, 4, 1, "", "prepare_enc_dec_decoder_layer"], [57, 4, 1, "", "prepare_enc_dec_export_dir"], [57, 4, 1, "", "weights_to_npz"]], "modelopt.torch.export.transformer_engine": [[58, 4, 1, "", "convert_to_transformer_engine"]], "modelopt.torch.export.vllm": [[59, 4, 1, "", "export_to_vllm"]], "modelopt.torch.opt": [[61, 0, 0, "-", "config"], [62, 0, 0, "-", "conversion"], [63, 0, 0, "-", "dynamic"], [64, 0, 0, "-", "hparam"], [65, 0, 0, "-", "mode"], [66, 0, 0, "-", "plugins"], [67, 0, 0, "-", "searcher"], [68, 0, 0, "-", "utils"]], "modelopt.torch.opt.config": [[61, 6, 1, "", "ModeloptBaseConfig"], [61, 6, 1, "", "ModeloptBaseRule"], [61, 6, 1, "", "ModeloptBaseRuleConfig"], [61, 4, 1, "", "ModeloptField"], [61, 4, 1, "", "get_kwargs_for_create_model_with_rules"]], "modelopt.torch.opt.config.ModeloptBaseConfig": [[61, 2, 1, "", "get"], [61, 2, 1, "", "get_field_name_from_key"], [61, 2, 1, "", "items"], [61, 2, 1, "", "keys"], [61, 2, 1, "", "model_dump"], [61, 2, 1, "", "model_dump_json"], [61, 2, 1, "", "update"], [61, 2, 1, "", "values"]], "modelopt.torch.opt.config.ModeloptBaseRule": [[61, 2, 1, "", "customize_rule"], [61, 2, 1, "", "get_rule_type"], [61, 2, 1, "", "validate_rule"]], "modelopt.torch.opt.config.ModeloptBaseRuleConfig": [[61, 2, 1, "", "register_default"], [61, 2, 1, "", "unregister_default"]], "modelopt.torch.opt.conversion": [[62, 1, 1, "", "ModeloptStateManager"], [62, 4, 1, "", "apply_mode"], [62, 4, 1, "", "modelopt_state"], [62, 4, 1, "", "restore"], [62, 4, 1, "", "restore_from_modelopt_state"], [62, 4, 1, "", "save"]], "modelopt.torch.opt.conversion.ModeloptStateManager": [[62, 2, 1, "", "__init__"], [62, 2, 1, "", "add_mode"], [62, 2, 1, "", "check_mode"], [62, 2, 1, "", "get_config_class"], [62, 3, 1, "", "has_state"], [62, 2, 1, "", "is_converted"], [62, 3, 1, "", "last_mode"], [62, 2, 1, "", "load_state_dict"], [62, 2, 1, "", "modes_with_states"], [62, 2, 1, "", "state_dict"], [62, 2, 1, "", "transfer_state_dict"], [62, 2, 1, "", "update_last_state_before_new_mode"], [62, 2, 1, "", "update_last_state_before_save"]], "modelopt.torch.opt.dynamic": [[63, 1, 1, "", "DynamicModule"], [63, 1, 1, "", "DynamicSpace"]], "modelopt.torch.opt.dynamic.DynamicModule": [[63, 2, 1, "", "__init__"], [63, 2, 1, "", "convert"], [63, 2, 1, "", "export"], [63, 2, 1, "", "extra_repr"], [63, 2, 1, "", "force_assign"], [63, 2, 1, "", "freeze"], [63, 2, 1, "", "get_hparam"], [63, 2, 1, "", "modify"], [63, 2, 1, "", "named_hparams"], [63, 3, 1, "", "original_cls"], [63, 2, 1, "", "reset_dynamic_attributes"]], "modelopt.torch.opt.dynamic.DynamicSpace": [[63, 2, 1, "", "__init__"], [63, 2, 1, "", "config"], [63, 2, 1, "", "convert_to_dynamic"], [63, 2, 1, "", "export"], [63, 2, 1, "", "get_hparam"], [63, 2, 1, "", "is_configurable"], [63, 2, 1, "", "is_dynamic"], [63, 2, 1, "", "named_dynamic_modules"], [63, 2, 1, "", "named_hparams"], [63, 2, 1, "", "select"], [63, 2, 1, "", "size"]], "modelopt.torch.opt.hparam": [[64, 1, 1, "", "Hparam"]], "modelopt.torch.opt.hparam.Hparam": [[64, 5, 1, "", "ActiveSlice"], [64, 5, 1, "", "Importance"], [64, 5, 1, "", "ImportanceEstimator"], [64, 2, 1, "", "__init__"], [64, 3, 1, "", "active"], [64, 3, 1, "", "active_slice"], [64, 3, 1, "", "choices"], [64, 2, 1, "", "enforce_order"], [64, 3, 1, "", "importance"], [64, 3, 1, "", "is_configurable"], [64, 3, 1, "", "is_sortable"], [64, 3, 1, "", "max"], [64, 3, 1, "", "min"], [64, 3, 1, "", "original"], [64, 2, 1, "", "register_importance"]], "modelopt.torch.opt.searcher": [[67, 1, 1, "", "BaseSearcher"]], "modelopt.torch.opt.searcher.BaseSearcher": [[67, 2, 1, "", "__init__"], [67, 2, 1, "", "after_search"], [67, 2, 1, "", "before_search"], [67, 5, 1, "", "config"], [67, 5, 1, "", "constraints"], [67, 2, 1, "", "construct_forward_loop"], [67, 3, 1, "", "default_search_config"], [67, 3, 1, "", "default_state_dict"], [67, 5, 1, "", "deployment"], [67, 5, 1, "", "dummy_input"], [67, 2, 1, "", "eval_score"], [67, 5, 1, "", "forward_loop"], [67, 3, 1, "", "has_score"], [67, 2, 1, "", "load_search_checkpoint"], [67, 5, 1, "", "model"], [67, 2, 1, "", "reset_search"], [67, 2, 1, "", "run_search"], [67, 2, 1, "", "sanitize_search_config"], [67, 2, 1, "", "save_search_checkpoint"], [67, 2, 1, "", "search"], [67, 2, 1, "", "state_dict"]], "modelopt.torch.opt.utils": [[68, 4, 1, "", "get_hparam"], [68, 4, 1, "", "is_configurable"], [68, 4, 1, "", "is_dynamic"], [68, 4, 1, "", "named_hparams"], [68, 4, 1, "", "search_space_size"]], "modelopt.torch.quantization": [[70, 0, 0, "-", "algorithms"], [71, 0, 0, "-", "calib"], [75, 0, 0, "-", "config"], [76, 0, 0, "-", "conversion"], [77, 0, 0, "-", "extensions"], [78, 0, 0, "-", "mode"], [79, 0, 0, "-", "model_calib"], [80, 0, 0, "-", "model_quant"], [81, 0, 0, "-", "nn"], [94, 0, 0, "-", "optim"], [95, 0, 0, "-", "plugins"], [96, 0, 0, "-", "qtensor"], [100, 0, 0, "-", "quant_modules"], [101, 0, 0, "-", "tensor_quant"], [102, 0, 0, "-", "utils"]], "modelopt.torch.quantization.algorithms": [[70, 1, 1, "", "AutoQuantizeSearcher"], [70, 1, 1, "", "QuantRecipe"], [70, 1, 1, "", "QuantRecipeHparam"]], "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher": [[70, 2, 1, "", "before_search"], [70, 5, 1, "", "candidate_stats"], [70, 3, 1, "", "default_search_config"], [70, 3, 1, "", "default_state_dict"], [70, 2, 1, "", "insert_quant_recipe_hparams"], [70, 2, 1, "", "merge_search_hparam_by_rules"], [70, 5, 1, "", "rules"], [70, 2, 1, "", "run_search"], [70, 2, 1, "", "sanitize_search_config"]], "modelopt.torch.quantization.algorithms.QuantRecipe": [[70, 5, 1, "", "UNSUPPORTED_RECIPES"], [70, 2, 1, "", "__init__"], [70, 3, 1, "", "compression"], [70, 3, 1, "", "config"]], "modelopt.torch.quantization.algorithms.QuantRecipeHparam": [[70, 2, 1, "", "__init__"], [70, 3, 1, "", "active"], [70, 2, 1, "", "link_to"]], "modelopt.torch.quantization.calib": [[72, 0, 0, "-", "calibrator"], [73, 0, 0, "-", "histogram"], [74, 0, 0, "-", "max"]], "modelopt.torch.quantization.calib.histogram": [[73, 1, 1, "", "HistogramCalibrator"], [73, 4, 1, "", "calibrate_weights"]], "modelopt.torch.quantization.calib.histogram.HistogramCalibrator": [[73, 2, 1, "", "__init__"], [73, 2, 1, "", "collect"], [73, 2, 1, "", "compute_amax"], [73, 2, 1, "", "reset"]], "modelopt.torch.quantization.calib.max": [[74, 1, 1, "", "MaxCalibrator"]], "modelopt.torch.quantization.calib.max.MaxCalibrator": [[74, 2, 1, "", "__init__"], [74, 3, 1, "", "amaxs"], [74, 2, 1, "", "collect"], [74, 2, 1, "", "compute_amax"], [74, 2, 1, "", "reset"]], "modelopt.torch.quantization.config": [[75, 6, 1, "", "AWQClipCalibConfig"], [75, 6, 1, "", "AWQFullCalibConfig"], [75, 6, 1, "", "AWQLiteCalibConfig"], [75, 6, 1, "", "MaxCalibConfig"], [75, 6, 1, "", "QuantizeAlgorithmConfig"], [75, 6, 1, "", "QuantizeConfig"], [75, 6, 1, "", "QuantizerAttributeConfig"], [75, 6, 1, "", "RealQuantizeConfig"], [75, 6, 1, "", "SmoothQuantCalibConfig"]], "modelopt.torch.quantization.config.AWQClipCalibConfig": [[75, 7, 1, "", "debug"], [75, 7, 1, "", "max_co_batch_size"], [75, 7, 1, "", "max_tokens_per_batch"], [75, 7, 1, "", "min_clip_ratio"], [75, 7, 1, "", "shrink_step"]], "modelopt.torch.quantization.config.AWQFullCalibConfig": [[75, 7, 1, "", "debug"]], "modelopt.torch.quantization.config.AWQLiteCalibConfig": [[75, 7, 1, "", "alpha_step"], [75, 7, 1, "", "debug"]], "modelopt.torch.quantization.config.QuantizeAlgorithmConfig": [[75, 7, 1, "", "method"]], "modelopt.torch.quantization.config.QuantizeConfig": [[75, 7, 1, "", "algorithm"], [75, 7, 1, "", "quant_cfg"]], "modelopt.torch.quantization.config.QuantizerAttributeConfig": [[75, 7, 1, "", "axis"], [75, 7, 1, "", "block_sizes"], [75, 7, 1, "", "calibrator"], [75, 7, 1, "", "enable"], [75, 7, 1, "", "fake_quant"], [75, 7, 1, "", "learn_amax"], [75, 7, 1, "", "narrow_range"], [75, 7, 1, "", "num_bits"], [75, 7, 1, "", "trt_high_precision_dtype"], [75, 7, 1, "", "type"], [75, 7, 1, "", "unsigned"]], "modelopt.torch.quantization.config.RealQuantizeConfig": [[75, 7, 1, "", "additional_algorithm"]], "modelopt.torch.quantization.config.SmoothQuantCalibConfig": [[75, 7, 1, "", "alpha"]], "modelopt.torch.quantization.conversion": [[76, 4, 1, "", "register"], [76, 4, 1, "", "replace_quant_module"], [76, 4, 1, "", "set_quantizer_attribute"], [76, 4, 1, "", "set_quantizer_by_cfg"], [76, 4, 1, "", "unregister"]], "modelopt.torch.quantization.extensions": [[77, 4, 1, "", "get_cuda_ext"], [77, 4, 1, "", "get_cuda_ext_fp8"]], "modelopt.torch.quantization.mode": [[78, 1, 1, "", "QuantizeExportModeDescriptor"], [78, 1, 1, "", "QuantizeModeDescriptor"]], "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor": [[78, 3, 1, "", "config_class"], [78, 3, 1, "", "convert"], [78, 3, 1, "", "is_export_mode"], [78, 3, 1, "", "name"], [78, 3, 1, "", "restore"]], "modelopt.torch.quantization.mode.QuantizeModeDescriptor": [[78, 3, 1, "", "config_class"], [78, 3, 1, "", "convert"], [78, 3, 1, "", "export_mode"], [78, 3, 1, "", "name"], [78, 3, 1, "", "next_modes"], [78, 3, 1, "", "restore"], [78, 3, 1, "", "update_for_new_mode"], [78, 3, 1, "", "update_for_save"]], "modelopt.torch.quantization.model_calib": [[79, 4, 1, "", "calibrate"], [79, 4, 1, "", "postprocess_amax"]], "modelopt.torch.quantization.model_quant": [[80, 4, 1, "", "auto_quantize"], [80, 4, 1, "", "disable_quantizer"], [80, 4, 1, "", "enable_quantizer"], [80, 4, 1, "", "fold_weight"], [80, 4, 1, "", "print_quant_summary"], [80, 4, 1, "", "quantize"]], "modelopt.torch.quantization.nn": [[82, 0, 0, "-", "functional"], [83, 0, 0, "-", "modules"]], "modelopt.torch.quantization.nn.functional": [[82, 1, 1, "", "ClipFunction"]], "modelopt.torch.quantization.nn.functional.ClipFunction": [[82, 2, 1, "", "backward"], [82, 2, 1, "", "forward"]], "modelopt.torch.quantization.nn.modules": [[84, 0, 0, "-", "clip"], [85, 0, 0, "-", "quant_activations"], [86, 0, 0, "-", "quant_batchnorm"], [87, 0, 0, "-", "quant_conv"], [88, 0, 0, "-", "quant_instancenorm"], [89, 0, 0, "-", "quant_linear"], [90, 0, 0, "-", "quant_module"], [91, 0, 0, "-", "quant_pooling"], [92, 0, 0, "-", "quant_rnn"], [93, 0, 0, "-", "tensor_quantizer"]], "modelopt.torch.quantization.nn.modules.clip": [[84, 1, 1, "", "Clip"]], "modelopt.torch.quantization.nn.modules.clip.Clip": [[84, 2, 1, "", "__init__"], [84, 2, 1, "", "forward"]], "modelopt.torch.quantization.nn.modules.quant_conv": [[87, 5, 1, "", "Conv1d"], [87, 5, 1, "", "Conv2d"], [87, 5, 1, "", "Conv3d"], [87, 5, 1, "", "ConvTranspose1d"], [87, 5, 1, "", "ConvTranspose2d"], [87, 5, 1, "", "ConvTranspose3d"], [87, 1, 1, "", "QuantConv1d"], [87, 1, 1, "", "QuantConv2d"], [87, 1, 1, "", "QuantConv3d"], [87, 1, 1, "", "QuantConvTranspose1d"], [87, 1, 1, "", "QuantConvTranspose2d"], [87, 1, 1, "", "QuantConvTranspose3d"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d": [[87, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d": [[87, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d": [[87, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d": [[87, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d": [[87, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d": [[87, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_instancenorm": [[88, 1, 1, "", "QuantInstanceNorm1d"], [88, 1, 1, "", "QuantInstanceNorm2d"], [88, 1, 1, "", "QuantInstanceNorm3d"]], "modelopt.torch.quantization.nn.modules.quant_linear": [[89, 5, 1, "", "Linear"], [89, 1, 1, "", "QuantLinear"]], "modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear": [[89, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_module": [[90, 1, 1, "", "QuantInputBase"], [90, 1, 1, "", "QuantLinearConvBase"]], "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase": [[90, 5, 1, "", "default_quant_desc_input"], [90, 5, 1, "", "default_quant_desc_output"], [90, 2, 1, "", "forward"], [90, 5, 1, "", "input_quantizer"], [90, 5, 1, "", "output_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase": [[90, 5, 1, "", "default_quant_desc_weight"], [90, 2, 1, "", "forward"], [90, 2, 1, "", "initialize_quantizer_with_dummy_states"], [90, 2, 1, "", "quantize_weight"], [90, 5, 1, "", "weight_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_pooling": [[91, 5, 1, "", "AdaptiveAvgPool1d"], [91, 5, 1, "", "AdaptiveAvgPool2d"], [91, 5, 1, "", "AdaptiveAvgPool3d"], [91, 5, 1, "", "AvgPool1d"], [91, 5, 1, "", "AvgPool2d"], [91, 5, 1, "", "AvgPool3d"], [91, 5, 1, "", "MaxPool1d"], [91, 5, 1, "", "MaxPool2d"], [91, 5, 1, "", "MaxPool3d"], [91, 1, 1, "", "QuantAdaptiveAvgPool1d"], [91, 1, 1, "", "QuantAdaptiveAvgPool2d"], [91, 1, 1, "", "QuantAdaptiveAvgPool3d"], [91, 1, 1, "", "QuantAvgPool1d"], [91, 1, 1, "", "QuantAvgPool2d"], [91, 1, 1, "", "QuantAvgPool3d"], [91, 1, 1, "", "QuantMaxPool1d"], [91, 1, 1, "", "QuantMaxPool2d"], [91, 1, 1, "", "QuantMaxPool3d"]], "modelopt.torch.quantization.nn.modules.quant_rnn": [[92, 1, 1, "", "QuantRNNBase"], [92, 1, 1, "", "QuantRNNFullBase"], [92, 1, 1, "", "RNNLayerForward"], [92, 1, 1, "", "VFRNNForward"], [92, 4, 1, "", "get_quantized_rnn_layer_forward"], [92, 4, 1, "", "get_quantized_rnn_layer_variable_len_forward"], [92, 4, 1, "", "get_quantized_rnn_layer_variable_len_reverse_forward"], [92, 4, 1, "", "lstm_cell_with_proj"], [92, 4, 1, "", "quantized_cell_forward"]], "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase": [[92, 3, 1, "", "all_input_quantizers_disabled"], [92, 5, 1, "", "default_quant_desc_input"], [92, 5, 1, "", "default_quant_desc_weight"], [92, 2, 1, "", "forward"], [92, 3, 1, "", "functionals_to_replace"], [92, 2, 1, "", "quantize_weight"], [92, 5, 1, "", "weight_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward": [[92, 2, 1, "", "__init__"]], "modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward": [[92, 2, 1, "", "__init__"], [92, 2, 1, "", "forward"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer": [[93, 1, 1, "", "SequentialQuantizer"], [93, 1, 1, "", "TensorQuantizer"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer": [[93, 2, 1, "", "__init__"], [93, 2, 1, "", "disable"], [93, 2, 1, "", "get_modelopt_state"], [93, 2, 1, "", "replace_sequential_quantizer_with_single_quantizer"], [93, 2, 1, "", "set_from_attribute_config"], [93, 2, 1, "", "tensor_quantizer_iterator"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer": [[93, 2, 1, "", "__init__"], [93, 3, 1, "", "amax"], [93, 3, 1, "", "axis"], [93, 3, 1, "", "block_sizes"], [93, 2, 1, "", "clean_up_after_set_from_modelopt_state"], [93, 2, 1, "", "dequantize"], [93, 2, 1, "", "disable"], [93, 2, 1, "", "disable_calib"], [93, 2, 1, "", "disable_clip"], [93, 2, 1, "", "disable_quant"], [93, 2, 1, "", "enable"], [93, 2, 1, "", "enable_calib"], [93, 2, 1, "", "enable_clip"], [93, 2, 1, "", "enable_quant"], [93, 2, 1, "", "export_amax"], [93, 2, 1, "", "extra_repr"], [93, 3, 1, "", "fake_quant"], [93, 2, 1, "", "forward"], [93, 2, 1, "", "get_modelopt_state"], [93, 2, 1, "", "init_learn_amax"], [93, 3, 1, "", "is_enabled"], [93, 2, 1, "", "load_calib_amax"], [93, 3, 1, "", "maxbound"], [93, 3, 1, "", "narrow_range"], [93, 3, 1, "", "num_bits"], [93, 3, 1, "", "pre_quant_scale"], [93, 2, 1, "", "reset_amax"], [93, 3, 1, "", "scale"], [93, 2, 1, "", "set_from_attribute_config"], [93, 2, 1, "", "set_from_modelopt_state"], [93, 3, 1, "", "step_size"], [93, 2, 1, "", "sync_amax_across_distributed_group"], [93, 3, 1, "", "trt_high_precision_dtype"], [93, 3, 1, "", "unsigned"]], "modelopt.torch.quantization.optim": [[94, 4, 1, "", "freeze_parameters"], [94, 4, 1, "", "group_parameters"], [94, 4, 1, "", "match_parameters"], [94, 4, 1, "", "quant_weight_inplace"]], "modelopt.torch.quantization.qtensor": [[97, 0, 0, "-", "base_qtensor"], [98, 0, 0, "-", "int4_tensor"], [99, 0, 0, "-", "nf4_tensor"]], "modelopt.torch.quantization.qtensor.base_qtensor": [[97, 1, 1, "", "BaseQuantizedTensor"], [97, 1, 1, "", "QTensorWrapper"]], "modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor": [[97, 2, 1, "", "__init__"], [97, 2, 1, "", "dequantize"], [97, 5, 1, "", "original_meta_tensor"], [97, 2, 1, "", "quantize"], [97, 5, 1, "", "quantized_data"]], "modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper": [[97, 2, 1, "", "__new__"]], "modelopt.torch.quantization.qtensor.int4_tensor": [[98, 1, 1, "", "INT4QTensor"]], "modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor": [[98, 2, 1, "", "dequantize"], [98, 2, 1, "", "quantize"], [98, 5, 1, "", "quantized_data"]], "modelopt.torch.quantization.qtensor.nf4_tensor": [[99, 1, 1, "", "NF4QTensor"]], "modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor": [[99, 2, 1, "", "dequantize"], [99, 2, 1, "", "double_quantization"], [99, 2, 1, "", "quantize"], [99, 5, 1, "", "quantized_data"]], "modelopt.torch.quantization.quant_modules": [[100, 4, 1, "", "deactivate"], [100, 4, 1, "", "enable_onnx_export"], [100, 4, 1, "", "initialize"]], "modelopt.torch.quantization.tensor_quant": [[101, 1, 1, "", "FakeAffineTensorQuantFunction"], [101, 1, 1, "", "FakeTensorQuantFunction"], [101, 1, 1, "", "LegacyFakeTensorQuantFunction"], [101, 1, 1, "", "ScaledE4M3Function"], [101, 1, 1, "", "TensorQuantFunction"], [101, 4, 1, "", "scaled_e4m3_abstract"]], "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction": [[101, 2, 1, "", "backward"], [101, 2, 1, "", "forward"]], "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction": [[101, 2, 1, "", "backward"], [101, 2, 1, "", "forward"], [101, 2, 1, "", "symbolic"]], "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction": [[101, 2, 1, "", "backward"], [101, 2, 1, "", "forward"]], "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function": [[101, 2, 1, "", "backward"], [101, 2, 1, "", "forward"], [101, 2, 1, "", "symbolic"]], "modelopt.torch.quantization.tensor_quant.TensorQuantFunction": [[101, 2, 1, "", "backward"], [101, 2, 1, "", "forward"], [101, 2, 1, "", "symbolic"]], "modelopt.torch.quantization.utils": [[102, 4, 1, "", "export_torch_mode"], [102, 4, 1, "", "is_quantized"], [102, 4, 1, "", "is_quantized_column_parallel_linear"], [102, 4, 1, "", "is_quantized_layer_with_weight"], [102, 4, 1, "", "is_quantized_row_parallel_linear"], [102, 4, 1, "", "is_torch_library_supported"], [102, 4, 1, "", "reduce_amax"], [102, 4, 1, "", "replace_function"]], "modelopt.torch.sparsity": [[104, 0, 0, "-", "config"], [105, 0, 0, "-", "magnitude"], [106, 0, 0, "-", "mode"], [107, 0, 0, "-", "module"], [108, 0, 0, "-", "plugins"], [109, 0, 0, "-", "searcher"], [110, 0, 0, "-", "sparsegpt"], [111, 0, 0, "-", "sparsification"]], "modelopt.torch.sparsity.config": [[104, 6, 1, "", "ExportSparseConfig"], [104, 6, 1, "", "SparseGPTConfig"], [104, 6, 1, "", "SparseMagnitudeConfig"]], "modelopt.torch.sparsity.config.SparseGPTConfig": [[104, 7, 1, "", "nn_conv2d"], [104, 7, 1, "", "nn_linear"]], "modelopt.torch.sparsity.config.SparseMagnitudeConfig": [[104, 7, 1, "", "nn_conv2d"], [104, 7, 1, "", "nn_linear"]], "modelopt.torch.sparsity.magnitude": [[105, 1, 1, "", "MagnitudeSearcher"], [105, 4, 1, "", "compute_valid_1d_patterns"], [105, 4, 1, "", "create_asp_mask"], [105, 4, 1, "", "fill"], [105, 4, 1, "", "get_nmprune_info"], [105, 4, 1, "", "m4n2_1d"], [105, 4, 1, "", "mn_1d_best"], [105, 4, 1, "", "reshape_1d"]], "modelopt.torch.sparsity.mode": [[106, 1, 1, "", "ExportSparseModeDescriptor"], [106, 1, 1, "", "SparseGPTModeDescriptor"], [106, 1, 1, "", "SparseMagnitudeModeDescriptor"], [106, 4, 1, "", "convert_sparse_model"], [106, 4, 1, "", "export_sparse"], [106, 4, 1, "", "restore_export_sparse"], [106, 4, 1, "", "restore_sparse_model"], [106, 4, 1, "", "update_sparse_metadata"]], "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor": [[106, 3, 1, "", "config_class"], [106, 3, 1, "", "convert"], [106, 3, 1, "", "is_export_mode"], [106, 3, 1, "", "name"], [106, 3, 1, "", "restore"]], "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor": [[106, 3, 1, "", "config_class"], [106, 3, 1, "", "name"], [106, 3, 1, "", "search_algorithm"]], "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor": [[106, 3, 1, "", "config_class"], [106, 3, 1, "", "convert"], [106, 3, 1, "", "export_mode"], [106, 3, 1, "", "name"], [106, 3, 1, "", "next_modes"], [106, 3, 1, "", "restore"], [106, 3, 1, "", "search_algorithm"], [106, 3, 1, "", "update_for_new_mode"], [106, 3, 1, "", "update_for_save"]], "modelopt.torch.sparsity.module": [[107, 1, 1, "", "SparseModule"]], "modelopt.torch.sparsity.module.SparseModule": [[107, 2, 1, "", "modify"], [107, 2, 1, "", "set_mask"]], "modelopt.torch.sparsity.searcher": [[109, 1, 1, "", "BaseSparseSearcher"]], "modelopt.torch.sparsity.searcher.BaseSparseSearcher": [[109, 3, 1, "", "default_search_config"], [109, 3, 1, "", "default_state_dict"], [109, 2, 1, "", "run_search"], [109, 2, 1, "", "sanitize_search_config"]], "modelopt.torch.sparsity.sparsegpt": [[110, 1, 1, "", "SparseGPTSearcher"], [110, 4, 1, "", "create_sgpt_mask"], [110, 4, 1, "", "invert"], [110, 4, 1, "", "prepare"]], "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher": [[110, 2, 1, "", "after_search"], [110, 2, 1, "", "before_search"], [110, 3, 1, "", "default_search_config"]], "modelopt.torch.sparsity.sparsification": [[111, 4, 1, "", "export"], [111, 4, 1, "", "sparsify"]], "modelopt.torch.utils": [[113, 0, 0, "-", "cpp_extension"], [114, 0, 0, "-", "dataset_utils"], [115, 0, 0, "-", "distributed"], [116, 0, 0, "-", "graph"], [117, 0, 0, "-", "list"], [118, 0, 0, "-", "logging"], [119, 0, 0, "-", "network"], [120, 0, 0, "-", "perf"], [121, 0, 0, "-", "random"], [122, 0, 0, "-", "tensor"]], "modelopt.torch.utils.cpp_extension": [[113, 4, 1, "", "load_cpp_extension"]], "modelopt.torch.utils.dataset_utils": [[114, 4, 1, "", "create_forward_loop"], [114, 4, 1, "", "get_dataset_dataloader"]], "modelopt.torch.utils.distributed": [[115, 4, 1, "", "backend"], [115, 4, 1, "", "barrier"], [115, 4, 1, "", "get_data_parallel_group"], [115, 4, 1, "", "get_tensor_parallel_group"], [115, 4, 1, "", "is_available"], [115, 4, 1, "", "is_initialized"], [115, 4, 1, "", "is_master"], [115, 4, 1, "", "rank"], [115, 4, 1, "", "set_data_parallel_group"], [115, 4, 1, "", "set_tensor_parallel_group"], [115, 4, 1, "", "size"]], "modelopt.torch.utils.graph": [[116, 4, 1, "", "match"]], "modelopt.torch.utils.list": [[117, 4, 1, "", "list_closest_to_median"], [117, 4, 1, "", "stats"], [117, 4, 1, "", "val2list"], [117, 4, 1, "", "val2tuple"]], "modelopt.torch.utils.logging": [[118, 8, 1, "", "DeprecatedError"], [118, 4, 1, "", "no_stdout"], [118, 4, 1, "", "num2hrb"], [118, 4, 1, "", "print_rank_0"]], "modelopt.torch.utils.network": [[119, 4, 1, "", "compare_dict"], [119, 4, 1, "", "create_param_grad_clear_hook"], [119, 4, 1, "", "get_model_attributes"], [119, 4, 1, "", "get_module_device"], [119, 4, 1, "", "get_same_padding"], [119, 4, 1, "", "init_model_from_model_like"], [119, 4, 1, "", "is_channels_last"], [119, 4, 1, "", "is_parallel"], [119, 4, 1, "", "make_divisible"], [119, 4, 1, "", "model_to"], [119, 4, 1, "", "param_num"], [119, 4, 1, "", "param_num_from_forward"], [119, 4, 1, "", "remove_bn"], [119, 4, 1, "", "run_forward_loop"], [119, 4, 1, "", "set_submodule"], [119, 4, 1, "", "standardize_constructor_args"], [119, 4, 1, "", "standardize_model_args"], [119, 4, 1, "", "standardize_model_like_tuple"], [119, 4, 1, "", "standardize_named_model_args"], [119, 4, 1, "", "unwrap_model"], [119, 4, 1, "", "zero_grad"]], "modelopt.torch.utils.perf": [[120, 1, 1, "", "Timer"], [120, 4, 1, "", "clear_cuda_cache"], [120, 4, 1, "", "get_cuda_memory_stats"], [120, 4, 1, "", "report_memory"]], "modelopt.torch.utils.perf.Timer": [[120, 2, 1, "", "__init__"], [120, 2, 1, "", "start"], [120, 2, 1, "", "stop"]], "modelopt.torch.utils.random": [[121, 4, 1, "", "centroid"], [121, 4, 1, "", "choice"], [121, 4, 1, "", "original"], [121, 4, 1, "", "random"], [121, 4, 1, "", "sample"], [121, 4, 1, "", "shuffle"]], "modelopt.torch.utils.tensor": [[122, 4, 1, "", "numpy_to_torch"], [122, 4, 1, "", "torch_detach"], [122, 4, 1, "", "torch_to"], [122, 4, 1, "", "torch_to_numpy"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:property", "4": "py:function", "5": "py:attribute", "6": "py:pydantic_model", "7": "py:pydantic_field", "8": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "property", "Python property"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"], "6": ["py", "pydantic_model", "Python model"], "7": ["py", "pydantic_field", "Python field"], "8": ["py", "exception", "Python exception"]}, "titleterms": {"tensorrt": [0, 2], "llm": [0, 18], "deploy": [0, 4, 14], "export": [0, 5, 48], "quantiz": [0, 2, 4, 7, 10, 11, 12, 13, 23, 37, 69, 75], "model": [0, 2, 3, 4, 5, 6, 9, 12, 13, 14, 15], "support": [0, 14], "matrix": 0, "checkpoint": 0, "convert": [0, 8], "github": 1, "exampl": [1, 14, 75], "overview": [2, 8], "nvidia": 2, "optim": [2, 3, 14, 15, 94], "techniqu": 2, "sparsiti": [2, 6, 9, 103], "instal": 3, "system": 3, "requir": [3, 12], "check": 3, "quick": [4, 5, 6], "start": [4, 5, 6, 14], "ptq": [4, 12, 13], "pytorch": [4, 6, 13], "distil": [5, 8, 40, 42], "set": 5, "up": 5, "your": 5, "base": 5, "meta": 5, "dure": 5, "train": [5, 6, 9, 10, 12, 13], "post": [6, 9, 12, 13], "sparsif": [6, 9, 111], "pt": 6, "awar": [6, 10, 13], "sat": 6, "introduct": [8, 9], "integr": 8, "concept": [8, 9, 10], "glossari": 8, "knowledg": 8, "student": 8, "teacher": 8, "loss": [8, 45], "balanc": 8, "soft": 8, "label": 8, "save": 9, "restor": 9, "spars": 9, "structur": 9, "unstructur": 9, "n": 9, "m": 9, "algorithm": [9, 10, 70], "basic": 10, "precis": 10, "format": [10, 75], "scale": 10, "factor": 10, "block": 10, "calibr": [10, 12, 72], "qat": [10, 13], "more": 10, "read": 10, "best": 11, "practic": 11, "choos": 11, "right": 11, "method": 11, "onnx": [12, 21, 37], "beta": 12, "appli": [12, 13], "prepar": 12, "dataset": 12, "call": 12, "function": [12, 82], "deploi": [12, 17], "compar": 12, "perform": 12, "store": 13, "load": 13, "advanc": 13, "topic": 13, "tensorquant": 13, "custom": 13, "config": [13, 41, 61, 75, 104], "modul": [13, 83, 107], "placement": 13, "fast": 13, "evalu": 13, "migrat": 13, "from": 13, "pytorch_quant": 13, "welcom": 14, "modelopt": [14, 16, 37], "document": 14, "get": 14, "guid": 14, "refer": 14, "changelog": 15, "0": 15, "15": 15, "2024": 15, "07": 15, "25": 15, "13": 15, "06": 15, "14": 15, "11": 15, "05": 15, "api": 16, "gener": 19, "nemo_util": 20, "op_typ": 22, "calib_util": 24, "extens": [25, 77], "fp8": 26, "graph_util": 27, "gs_patch": 28, "int4": 29, "int8": 30, "oper": 31, "ort_patch": 32, "ort_util": 33, "partit": 34, "qdq_util": 35, "quant_util": 36, "util": [38, 68, 102, 112], "torch": 39, "distillation_model": 43, "loss_balanc": 44, "mode": [46, 65, 78, 106], "registri": 47, "distribut": [49, 115], "hf_config_map": 50, "layer_util": 51, "model_config": 52, "model_config_export": 53, "model_config_util": 54, "postprocess": 55, "scaling_factor_util": 56, "tensorrt_llm_util": 57, "transformer_engin": 58, "vllm": 59, "opt": 60, "convers": [62, 76], "dynam": 63, "hparam": 64, "plugin": [66, 95, 108], "searcher": [67, 109], "calib": 71, "histogram": 73, "max": 74, "configur": 75, "model_calib": 79, "model_qu": 80, "nn": 81, "clip": 84, "quant_activ": 85, "quant_batchnorm": 86, "quant_conv": 87, "quant_instancenorm": 88, "quant_linear": 89, "quant_modul": [90, 100], "quant_pool": 91, "quant_rnn": 92, "tensor_quant": 93, "qtensor": 96, "base_qtensor": 97, "int4_tensor": 98, "nf4_tensor": 99, "tensor_qu": 101, "magnitud": 105, "sparsegpt": 110, "cpp_extens": 113, "dataset_util": 114, "graph": 116, "list": 117, "log": 118, "network": 119, "perf": 120, "random": 121, "tensor": 122, "contact": 123, "u": 123, "faq": 124, "known": 124, "issu": 124, "1": 124, "potenti": 124, "memori": 124, "leak": 124, "fsdp": 124, "use_orig_param": 124, "true": 124}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx": 60}, "alltitles": {"TensorRT-LLM Deployment": [[0, "tensorrt-llm-deployment"]], "Export Quantized Model": [[0, "export-quantized-model"]], "Model support matrix for the TensorRT-LLM checkpoint export": [[0, "id1"]], "Convert to TensorRT-LLM": [[0, "convert-to-tensorrt-llm"]], "GitHub Examples": [[1, "github-examples"]], "Overview": [[2, "overview"], [8, "overview"]], "NVIDIA TensorRT Model Optimizer": [[2, "nvidia-tensorrt-model-optimizer"]], "Techniques": [[2, "techniques"]], "Quantization": [[2, "quantization"], [4, "quantization"], [7, "quantization"]], "Sparsity": [[2, "sparsity"], [6, "sparsity"], [9, "sparsity"]], "Installation": [[3, "installation"]], "System requirements": [[3, "system-requirements"]], "Install Model Optimizer": [[3, "install-model-optimizer"]], "Check installation": [[3, "check-installation"]], "Quick Start: Quantization": [[4, "quick-start-quantization"]], "PTQ for PyTorch models": [[4, "ptq-for-pytorch-models"]], "Deployment": [[4, "deployment"], [14, null]], "Quick Start: Distillation": [[5, "quick-start-distillation"]], "Set up your base models": [[5, "set-up-your-base-models"]], "Set up the meta model": [[5, "set-up-the-meta-model"]], "Distill during training": [[5, "distill-during-training"]], "Export trained model": [[5, "export-trained-model"]], "Quick Start: Sparsity": [[6, "quick-start-sparsity"]], "Post-Training Sparsification (PTS) for PyTorch models": [[6, "post-training-sparsification-pts-for-pytorch-models"]], "Sparsity-aware Training (SAT) for PyTorch models": [[6, "sparsity-aware-training-sat-for-pytorch-models"]], "Distillation": [[8, "distillation"]], "Introduction": [[8, "introduction"], [9, "introduction"]], "Convert and integrate": [[8, "convert-and-integrate"]], "Distillation Concepts": [[8, "distillation-concepts"]], "Glossary": [[8, "id3"]], "Concepts": [[8, "concepts"]], "Knowledge Distillation": [[8, "knowledge-distillation"]], "Student": [[8, "student"]], "Teacher": [[8, "teacher"]], "Distillation loss": [[8, "distillation-loss"]], "Loss Balancer": [[8, "loss-balancer"]], "Soft-label Distillation": [[8, "soft-label-distillation"]], "Post-Training Sparsification": [[9, "post-training-sparsification"]], "Save and restore the sparse model": [[9, "save-and-restore-the-sparse-model"]], "Sparsity Concepts": [[9, "sparsity-concepts"]], "Structured and Unstructured Sparsity": [[9, "structured-and-unstructured-sparsity"]], "N:M Sparsity": [[9, "n-m-sparsity"]], "Sparsification algorithm": [[9, "sparsification-algorithm"]], "Basic Concepts": [[10, "basic-concepts"]], "Precision format": [[10, "precision-format"]], "Scaling factor": [[10, "scaling-factor"]], "Block format": [[10, "block-format"]], "Calibration algorithm": [[10, "calibration-algorithm"]], "Quantization-aware training (QAT)": [[10, "quantization-aware-training-qat"]], "More Readings": [[10, "more-readings"]], "Best practices to choose the right quantization methods": [[11, "best-practices-to-choose-the-right-quantization-methods"]], "ONNX Quantization (Beta)": [[12, "onnx-quantization-beta"]], "Requirements": [[12, "requirements"]], "Apply Post Training Quantization (PTQ)": [[12, "apply-post-training-quantization-ptq"], [13, "apply-post-training-quantization-ptq"]], "Prepare calibration dataset": [[12, "prepare-calibration-dataset"]], "Call PTQ function": [[12, "call-ptq-function"]], "Deploy Quantized ONNX Model": [[12, "deploy-quantized-onnx-model"]], "Compare the performance": [[12, "compare-the-performance"]], "PyTorch Quantization": [[13, "pytorch-quantization"]], "Quantization-aware Training (QAT)": [[13, "quantization-aware-training-qat"]], "Storing and loading quantized model": [[13, "storing-and-loading-quantized-model"]], "Advanced Topics": [[13, "advanced-topics"]], "TensorQuantizer": [[13, "tensorquantizer"]], "Customize quantizer config": [[13, "customize-quantizer-config"]], "Custom quantized module and quantizer placement": [[13, "custom-quantized-module-and-quantizer-placement"]], "Fast evaluation": [[13, "fast-evaluation"]], "Migrate from pytorch_quantization": [[13, "migrate-from-pytorch-quantization"]], "Welcome to Model Optimizer (ModelOpt) documentation!": [[14, "welcome-to-model-optimizer-modelopt-documentation"]], "Getting Started": [[14, null]], "Optimization Guides": [[14, null]], "Examples": [[14, null]], "Reference": [[14, null]], "Support": [[14, null]], "Changelog": [[15, "changelog"]], "Model Optimizer Changelog": [[15, "model-optimizer-changelog"]], "0.15 (2024-07-25)": [[15, "id1"]], "0.13 (2024-06-14)": [[15, "id2"]], "0.11 (2024-05-07)": [[15, "id4"]], "modelopt API": [[16, "modelopt-api"]], "deploy": [[17, "deploy"]], "llm": [[18, "llm"]], "generate": [[19, "generate"]], "nemo_utils": [[20, "nemo-utils"]], "onnx": [[21, "onnx"]], "op_types": [[22, "op-types"]], "quantization": [[23, "quantization"], [69, "quantization"]], "calib_utils": [[24, "calib-utils"]], "extensions": [[25, "extensions"], [77, "extensions"]], "fp8": [[26, "fp8"]], "graph_utils": [[27, "graph-utils"]], "gs_patching": [[28, "gs-patching"]], "int4": [[29, "int4"]], "int8": [[30, "int8"]], "operators": [[31, "operators"]], "ort_patching": [[32, "ort-patching"]], "ort_utils": [[33, "ort-utils"]], "partitioning": [[34, "partitioning"]], "qdq_utils": [[35, "qdq-utils"]], "quant_utils": [[36, "quant-utils"]], "modelopt.onnx.quantization.quantize": [[37, "modelopt-onnx-quantization-quantize"]], "utils": [[38, "utils"], [68, "utils"], [102, "utils"], [112, "utils"]], "torch": [[39, "torch"]], "distill": [[40, "distill"]], "config": [[41, "config"], [61, "config"], [75, "config"], [104, "config"]], "distillation": [[42, "distillation"]], "distillation_model": [[43, "distillation-model"]], "loss_balancers": [[44, "loss-balancers"]], "losses": [[45, "losses"]], "mode": [[46, "mode"], [65, "mode"], [78, "mode"], [106, "mode"]], "registry": [[47, "registry"]], "export": [[48, "export"]], "distribute": [[49, "distribute"]], "hf_config_map": [[50, "hf-config-map"]], "layer_utils": [[51, "layer-utils"]], "model_config": [[52, "model-config"]], "model_config_export": [[53, "model-config-export"]], "model_config_utils": [[54, "model-config-utils"]], "postprocess": [[55, "postprocess"]], "scaling_factor_utils": [[56, "scaling-factor-utils"]], "tensorrt_llm_utils": [[57, "tensorrt-llm-utils"]], "transformer_engine": [[58, "transformer-engine"]], "vllm": [[59, "vllm"]], "opt": [[60, "opt"]], "conversion": [[62, "conversion"], [76, "conversion"]], "dynamic": [[63, "dynamic"]], "hparam": [[64, "hparam"]], "plugins": [[66, "plugins"], [95, "plugins"], [108, "plugins"]], "searcher": [[67, "searcher"], [109, "searcher"]], "algorithms": [[70, "algorithms"]], "calib": [[71, "calib"]], "calibrator": [[72, "calibrator"]], "histogram": [[73, "histogram"]], "max": [[74, "max"]], "Quantization Formats": [[75, "quantization-formats"]], "Quantization Configs": [[75, "quantization-configs"]], "Example Quantization Configurations": [[75, "example-quantization-configurations"]], "model_calib": [[79, "model-calib"]], "model_quant": [[80, "model-quant"]], "nn": [[81, "nn"]], "functional": [[82, "functional"]], "modules": [[83, "modules"]], "clip": [[84, "clip"]], "quant_activations": [[85, "quant-activations"]], "quant_batchnorm": [[86, "quant-batchnorm"]], "quant_conv": [[87, "quant-conv"]], "quant_instancenorm": [[88, "quant-instancenorm"]], "quant_linear": [[89, "quant-linear"]], "quant_module": [[90, "quant-module"]], "quant_pooling": [[91, "quant-pooling"]], "quant_rnn": [[92, "quant-rnn"]], "tensor_quantizer": [[93, "tensor-quantizer"]], "optim": [[94, "optim"]], "qtensor": [[96, "qtensor"]], "base_qtensor": [[97, "base-qtensor"]], "int4_tensor": [[98, "int4-tensor"]], "nf4_tensor": [[99, "nf4-tensor"]], "quant_modules": [[100, "quant-modules"]], "tensor_quant": [[101, "tensor-quant"]], "sparsity": [[103, "sparsity"]], "magnitude": [[105, "magnitude"]], "module": [[107, "module"]], "sparsegpt": [[110, "sparsegpt"]], "sparsification": [[111, "sparsification"]], "cpp_extension": [[113, "cpp-extension"]], "dataset_utils": [[114, "dataset-utils"]], "distributed": [[115, "distributed"]], "graph": [[116, "graph"]], "list": [[117, "list"]], "logging": [[118, "logging"]], "network": [[119, "network"]], "perf": [[120, "perf"]], "random": [[121, "random"]], "tensor": [[122, "tensor"]], "Contact us": [[123, "contact-us"]], "FAQs": [[124, "faqs"]], "Known Issues": [[124, "known-issues"]], "1. Potential memory leak for FSDP with use_orig_params=True": [[124, "potential-memory-leak-for-fsdp-with-use-orig-params-true"]]}, "indexentries": {"modelopt.deploy": [[17, "module-modelopt.deploy"]], "module": [[17, "module-modelopt.deploy"], [18, "module-modelopt.deploy.llm"], [19, "module-modelopt.deploy.llm.generate"], [20, "module-modelopt.deploy.llm.nemo_utils"], [21, "module-modelopt.onnx"], [22, "module-modelopt.onnx.op_types"], [23, "module-modelopt.onnx.quantization"], [24, "module-modelopt.onnx.quantization.calib_utils"], [25, "module-modelopt.onnx.quantization.extensions"], [26, "module-modelopt.onnx.quantization.fp8"], [27, "module-modelopt.onnx.quantization.graph_utils"], [28, "module-modelopt.onnx.quantization.gs_patching"], [29, "module-modelopt.onnx.quantization.int4"], [30, "module-modelopt.onnx.quantization.int8"], [31, "module-modelopt.onnx.quantization.operators"], [32, "module-modelopt.onnx.quantization.ort_patching"], [33, "module-modelopt.onnx.quantization.ort_utils"], [34, "module-modelopt.onnx.quantization.partitioning"], [35, "module-modelopt.onnx.quantization.qdq_utils"], [36, "module-modelopt.onnx.quantization.quant_utils"], [38, "module-modelopt.onnx.utils"], [39, "module-modelopt.torch"], [40, "module-modelopt.torch.distill"], [41, "module-modelopt.torch.distill.config"], [42, "module-modelopt.torch.distill.distillation"], [43, "module-modelopt.torch.distill.distillation_model"], [44, "module-modelopt.torch.distill.loss_balancers"], [45, "module-modelopt.torch.distill.losses"], [46, "module-modelopt.torch.distill.mode"], [47, "module-modelopt.torch.distill.registry"], [48, "module-modelopt.torch.export"], [49, "module-modelopt.torch.export.distribute"], [50, "module-modelopt.torch.export.hf_config_map"], [51, "module-modelopt.torch.export.layer_utils"], [52, "module-modelopt.torch.export.model_config"], [53, "module-modelopt.torch.export.model_config_export"], [54, "module-modelopt.torch.export.model_config_utils"], [55, "module-modelopt.torch.export.postprocess"], [56, "module-modelopt.torch.export.scaling_factor_utils"], [57, "module-modelopt.torch.export.tensorrt_llm_utils"], [58, "module-modelopt.torch.export.transformer_engine"], [59, "module-modelopt.torch.export.vllm"], [60, "module-modelopt.torch.opt"], [61, "module-modelopt.torch.opt.config"], [62, "module-modelopt.torch.opt.conversion"], [63, "module-modelopt.torch.opt.dynamic"], [64, "module-modelopt.torch.opt.hparam"], [65, "module-modelopt.torch.opt.mode"], [66, "module-modelopt.torch.opt.plugins"], [67, "module-modelopt.torch.opt.searcher"], [68, "module-modelopt.torch.opt.utils"], [69, "module-modelopt.torch.quantization"], [70, "module-modelopt.torch.quantization.algorithms"], [71, "module-modelopt.torch.quantization.calib"], [72, "module-modelopt.torch.quantization.calib.calibrator"], [73, "module-modelopt.torch.quantization.calib.histogram"], [74, "module-modelopt.torch.quantization.calib.max"], [75, "module-modelopt.torch.quantization.config"], [76, "module-modelopt.torch.quantization.conversion"], [77, "module-modelopt.torch.quantization.extensions"], [78, "module-modelopt.torch.quantization.mode"], [79, "module-modelopt.torch.quantization.model_calib"], [80, "module-modelopt.torch.quantization.model_quant"], [81, "module-modelopt.torch.quantization.nn"], [82, "module-modelopt.torch.quantization.nn.functional"], [83, "module-modelopt.torch.quantization.nn.modules"], [84, "module-modelopt.torch.quantization.nn.modules.clip"], [85, "module-modelopt.torch.quantization.nn.modules.quant_activations"], [86, "module-modelopt.torch.quantization.nn.modules.quant_batchnorm"], [87, "module-modelopt.torch.quantization.nn.modules.quant_conv"], [88, "module-modelopt.torch.quantization.nn.modules.quant_instancenorm"], [89, "module-modelopt.torch.quantization.nn.modules.quant_linear"], [90, "module-modelopt.torch.quantization.nn.modules.quant_module"], [91, "module-modelopt.torch.quantization.nn.modules.quant_pooling"], [92, "module-modelopt.torch.quantization.nn.modules.quant_rnn"], [93, "module-modelopt.torch.quantization.nn.modules.tensor_quantizer"], [94, "module-modelopt.torch.quantization.optim"], [95, "module-modelopt.torch.quantization.plugins"], [96, "module-modelopt.torch.quantization.qtensor"], [97, "module-modelopt.torch.quantization.qtensor.base_qtensor"], [98, "module-modelopt.torch.quantization.qtensor.int4_tensor"], [99, "module-modelopt.torch.quantization.qtensor.nf4_tensor"], [100, "module-modelopt.torch.quantization.quant_modules"], [101, "module-modelopt.torch.quantization.tensor_quant"], [102, "module-modelopt.torch.quantization.utils"], [103, "module-modelopt.torch.sparsity"], [104, "module-modelopt.torch.sparsity.config"], [105, "module-modelopt.torch.sparsity.magnitude"], [106, "module-modelopt.torch.sparsity.mode"], [107, "module-modelopt.torch.sparsity.module"], [108, "module-modelopt.torch.sparsity.plugins"], [109, "module-modelopt.torch.sparsity.searcher"], [110, "module-modelopt.torch.sparsity.sparsegpt"], [111, "module-modelopt.torch.sparsity.sparsification"], [112, "module-modelopt.torch.utils"], [113, "module-modelopt.torch.utils.cpp_extension"], [114, "module-modelopt.torch.utils.dataset_utils"], [115, "module-modelopt.torch.utils.distributed"], [116, "module-modelopt.torch.utils.graph"], [117, "module-modelopt.torch.utils.list"], [118, "module-modelopt.torch.utils.logging"], [119, "module-modelopt.torch.utils.network"], [120, "module-modelopt.torch.utils.perf"], [121, "module-modelopt.torch.utils.random"], [122, "module-modelopt.torch.utils.tensor"]], "modelopt.deploy.llm": [[18, "module-modelopt.deploy.llm"]], "llm (class in modelopt.deploy.llm.generate)": [[19, "modelopt.deploy.llm.generate.LLM"]], "__init__() (llm method)": [[19, "modelopt.deploy.llm.generate.LLM.__init__"]], "generate_text() (llm method)": [[19, "modelopt.deploy.llm.generate.LLM.generate_text"]], "generate_tokens() (llm method)": [[19, "modelopt.deploy.llm.generate.LLM.generate_tokens"]], "max_beam_width (llm property)": [[19, "modelopt.deploy.llm.generate.LLM.max_beam_width"]], "max_input_len (llm property)": [[19, "modelopt.deploy.llm.generate.LLM.max_input_len"]], "modelopt.deploy.llm.generate": [[19, "module-modelopt.deploy.llm.generate"]], "customsentencepiecetokenizer (class in modelopt.deploy.llm.nemo_utils)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer"]], "__init__() (customsentencepiecetokenizer method)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.__init__"]], "batch_decode() (customsentencepiecetokenizer method)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.batch_decode"]], "batch_encode_plus() (customsentencepiecetokenizer method)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.batch_encode_plus"]], "decode() (customsentencepiecetokenizer method)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.decode"]], "encode() (customsentencepiecetokenizer method)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.encode"]], "eos_token (customsentencepiecetokenizer property)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.eos_token"]], "eos_token_id (customsentencepiecetokenizer property)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.eos_token_id"]], "get_nemo_tokenizer() (in module modelopt.deploy.llm.nemo_utils)": [[20, "modelopt.deploy.llm.nemo_utils.get_nemo_tokenizer"]], "get_tokenzier() (in module modelopt.deploy.llm.nemo_utils)": [[20, "modelopt.deploy.llm.nemo_utils.get_tokenzier"]], "modelopt.deploy.llm.nemo_utils": [[20, "module-modelopt.deploy.llm.nemo_utils"]], "pad_token (customsentencepiecetokenizer property)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.pad_token"]], "pad_token_id (customsentencepiecetokenizer property)": [[20, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.pad_token_id"]], "modelopt.onnx": [[21, "module-modelopt.onnx"]], "is_binary_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_binary_op"]], "is_control_flow_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_control_flow_op"]], "is_conversion_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_conversion_op"]], "is_copy_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_copy_op"]], "is_default_quantizable_op_by_ort() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_default_quantizable_op_by_ort"]], "is_fusible_reduction_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_fusible_reduction_op"]], "is_generator_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_generator_op"]], "is_irregular_mem_access_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_irregular_mem_access_op"]], "is_linear_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_linear_op"]], "is_modifier_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_modifier_op"]], "is_multiclass_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_multiclass_op"]], "is_non_reshape_copy_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_non_reshape_copy_op"]], "is_normalization_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_normalization_op"]], "is_pointwise_or_elementwise_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_pointwise_or_elementwise_op"]], "is_pooling_or_window_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_pooling_or_window_op"]], "is_recurrent_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_recurrent_op"]], "is_selection_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_selection_op"]], "is_sequence_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_sequence_op"]], "is_shape_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_shape_op"]], "is_unary_op() (in module modelopt.onnx.op_types)": [[22, "modelopt.onnx.op_types.is_unary_op"]], "modelopt.onnx.op_types": [[22, "module-modelopt.onnx.op_types"]], "modelopt.onnx.quantization": [[23, "module-modelopt.onnx.quantization"]], "calibrationdataprovider (class in modelopt.onnx.quantization.calib_utils)": [[24, "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider"]], "randomdataprovider (class in modelopt.onnx.quantization.calib_utils)": [[24, "modelopt.onnx.quantization.calib_utils.RandomDataProvider"]], "__init__() (calibrationdataprovider method)": [[24, "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider.__init__"]], "__init__() (randomdataprovider method)": [[24, "modelopt.onnx.quantization.calib_utils.RandomDataProvider.__init__"]], "get_next() (calibrationdataprovider method)": [[24, "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider.get_next"]], "get_next() (randomdataprovider method)": [[24, "modelopt.onnx.quantization.calib_utils.RandomDataProvider.get_next"]], "import_scales_from_calib_cache() (in module modelopt.onnx.quantization.calib_utils)": [[24, "modelopt.onnx.quantization.calib_utils.import_scales_from_calib_cache"]], "modelopt.onnx.quantization.calib_utils": [[24, "module-modelopt.onnx.quantization.calib_utils"]], "modelopt.onnx.quantization.extensions": [[25, "module-modelopt.onnx.quantization.extensions"]], "modelopt.onnx.quantization.fp8": [[26, "module-modelopt.onnx.quantization.fp8"]], "quantize() (in module modelopt.onnx.quantization.fp8)": [[26, "modelopt.onnx.quantization.fp8.quantize"]], "add_fp16_fp32_cast() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.add_fp16_fp32_cast"]], "build_non_residual_input_map() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.build_non_residual_input_map"]], "classify_partition_nodes() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.classify_partition_nodes"]], "filter_quantizable_kgen_heads() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.filter_quantizable_kgen_heads"]], "find_fp8_mha_partitions() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.find_fp8_mha_partitions"]], "find_mha_partitions() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.find_mha_partitions"]], "find_nodes_to_exclude() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.find_nodes_to_exclude"]], "get_fusible_backbone() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.get_fusible_backbone"]], "has_const_input() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.has_const_input"]], "has_path_type() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.has_path_type"]], "insert_fp8_mha_casts() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.insert_fp8_mha_casts"]], "insert_matmul_casts() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.insert_matmul_casts"]], "is_const_input() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.is_const_input"]], "modelopt.onnx.quantization.graph_utils": [[27, "module-modelopt.onnx.quantization.graph_utils"]], "print_stat() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.print_stat"]], "remove_partial_input_qdq() (in module modelopt.onnx.quantization.graph_utils)": [[27, "modelopt.onnx.quantization.graph_utils.remove_partial_input_qdq"]], "modelopt.onnx.quantization.gs_patching": [[28, "module-modelopt.onnx.quantization.gs_patching"]], "patch_gs_modules() (in module modelopt.onnx.quantization.gs_patching)": [[28, "modelopt.onnx.quantization.gs_patching.patch_gs_modules"]], "awqcliphelper (class in modelopt.onnx.quantization.int4)": [[29, "modelopt.onnx.quantization.int4.AWQClipHelper"]], "__init__() (awqcliphelper method)": [[29, "modelopt.onnx.quantization.int4.AWQClipHelper.__init__"]], "alpha_step (awqcliphelper attribute)": [[29, "modelopt.onnx.quantization.int4.AWQClipHelper.alpha_step"]], "alphas (awqcliphelper attribute)": [[29, "modelopt.onnx.quantization.int4.AWQClipHelper.alphas"]], "dq_tensor() (in module modelopt.onnx.quantization.int4)": [[29, "modelopt.onnx.quantization.int4.dq_tensor"]], "find_scales() (in module modelopt.onnx.quantization.int4)": [[29, "modelopt.onnx.quantization.int4.find_scales"]], "min_alpha (awqcliphelper attribute)": [[29, "modelopt.onnx.quantization.int4.AWQClipHelper.min_alpha"]], "modelopt.onnx.quantization.int4": [[29, "module-modelopt.onnx.quantization.int4"]], "quant_tensor() (in module modelopt.onnx.quantization.int4)": [[29, "modelopt.onnx.quantization.int4.quant_tensor"]], "quantize() (in module modelopt.onnx.quantization.int4)": [[29, "modelopt.onnx.quantization.int4.quantize"]], "quantize_awq_clip() (in module modelopt.onnx.quantization.int4)": [[29, "modelopt.onnx.quantization.int4.quantize_awq_clip"]], "quantize_rtn() (in module modelopt.onnx.quantization.int4)": [[29, "modelopt.onnx.quantization.int4.quantize_rtn"]], "rtn() (in module modelopt.onnx.quantization.int4)": [[29, "modelopt.onnx.quantization.int4.rtn"]], "update_best_params() (awqcliphelper method)": [[29, "modelopt.onnx.quantization.int4.AWQClipHelper.update_best_params"]], "modelopt.onnx.quantization.int8": [[30, "module-modelopt.onnx.quantization.int8"]], "quantize() (in module modelopt.onnx.quantization.int8)": [[30, "modelopt.onnx.quantization.int8.quantize"]], "qdqconvtranspose (class in modelopt.onnx.quantization.operators)": [[31, "modelopt.onnx.quantization.operators.QDQConvTranspose"]], "qdqnormalization (class in modelopt.onnx.quantization.operators)": [[31, "modelopt.onnx.quantization.operators.QDQNormalization"]], "__init__() (qdqconvtranspose method)": [[31, "modelopt.onnx.quantization.operators.QDQConvTranspose.__init__"]], "__init__() (qdqnormalization method)": [[31, "modelopt.onnx.quantization.operators.QDQNormalization.__init__"]], "modelopt.onnx.quantization.operators": [[31, "module-modelopt.onnx.quantization.operators"]], "quantize() (qdqconvtranspose method)": [[31, "modelopt.onnx.quantization.operators.QDQConvTranspose.quantize"]], "quantize() (qdqnormalization method)": [[31, "modelopt.onnx.quantization.operators.QDQNormalization.quantize"]], "modelopt.onnx.quantization.ort_patching": [[32, "module-modelopt.onnx.quantization.ort_patching"]], "patch_ort_modules() (in module modelopt.onnx.quantization.ort_patching)": [[32, "modelopt.onnx.quantization.ort_patching.patch_ort_modules"]], "configure_ort() (in module modelopt.onnx.quantization.ort_utils)": [[33, "modelopt.onnx.quantization.ort_utils.configure_ort"]], "create_inference_session() (in module modelopt.onnx.quantization.ort_utils)": [[33, "modelopt.onnx.quantization.ort_utils.create_inference_session"]], "get_quantizable_op_types() (in module modelopt.onnx.quantization.ort_utils)": [[33, "modelopt.onnx.quantization.ort_utils.get_quantizable_op_types"]], "modelopt.onnx.quantization.ort_utils": [[33, "module-modelopt.onnx.quantization.ort_utils"]], "find_fusible_partitions() (in module modelopt.onnx.quantization.partitioning)": [[34, "modelopt.onnx.quantization.partitioning.find_fusible_partitions"]], "find_hardcoded_patterns() (in module modelopt.onnx.quantization.partitioning)": [[34, "modelopt.onnx.quantization.partitioning.find_hardcoded_patterns"]], "find_layer_norm_partitions() (in module modelopt.onnx.quantization.partitioning)": [[34, "modelopt.onnx.quantization.partitioning.find_layer_norm_partitions"]], "find_mha_partitions() (in module modelopt.onnx.quantization.partitioning)": [[34, "modelopt.onnx.quantization.partitioning.find_mha_partitions"]], "find_non_quantizable_partitions_from_patterns() (in module modelopt.onnx.quantization.partitioning)": [[34, "modelopt.onnx.quantization.partitioning.find_non_quantizable_partitions_from_patterns"]], "find_quantizable_nodes() (in module modelopt.onnx.quantization.partitioning)": [[34, "modelopt.onnx.quantization.partitioning.find_quantizable_nodes"]], "get_skiped_output_layers() (in module modelopt.onnx.quantization.partitioning)": [[34, "modelopt.onnx.quantization.partitioning.get_skiped_output_layers"]], "modelopt.onnx.quantization.partitioning": [[34, "module-modelopt.onnx.quantization.partitioning"]], "insert_dq_nodes() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.insert_dq_nodes"]], "insert_qdq_nodes() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.insert_qdq_nodes"]], "make_gs_dequantize_node() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.make_gs_dequantize_node"]], "make_gs_dequantize_output() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.make_gs_dequantize_output"]], "make_gs_quantize_node() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.make_gs_quantize_node"]], "make_gs_quantize_output() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.make_gs_quantize_output"]], "make_gs_quantized_weight() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.make_gs_quantized_weight"]], "make_gs_scale() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.make_gs_scale"]], "make_gs_zp() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.make_gs_zp"]], "modelopt.onnx.quantization.qdq_utils": [[35, "module-modelopt.onnx.quantization.qdq_utils"]], "replace_scale_values() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.replace_scale_values"]], "use_trt_qdq_ops() (in module modelopt.onnx.quantization.qdq_utils)": [[35, "modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops"]], "modelopt.onnx.quantization.quant_utils": [[36, "module-modelopt.onnx.quantization.quant_utils"]], "pack_float32_to_4bit_cpp_based() (in module modelopt.onnx.quantization.quant_utils)": [[36, "modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_cpp_based"]], "pack_float32_to_4bit_optimized() (in module modelopt.onnx.quantization.quant_utils)": [[36, "modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized"]], "quantize() (in module modelopt.onnx.quantization)": [[37, "modelopt.onnx.quantization.quantize"]], "duplicate_shared_constants() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.duplicate_shared_constants"]], "find_lowest_common_ancestor() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.find_lowest_common_ancestor"]], "gen_random_inputs() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.gen_random_inputs"]], "get_all_input_names() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_all_input_names"]], "get_batch_size() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_batch_size"]], "get_batch_size_from_bytes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_batch_size_from_bytes"]], "get_child_nodes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_child_nodes"]], "get_input_names() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_input_names"]], "get_input_names_from_bytes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_input_names_from_bytes"]], "get_input_shapes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_input_shapes"]], "get_input_shapes_from_bytes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_input_shapes_from_bytes"]], "get_node_names() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_node_names"]], "get_node_names_from_bytes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_node_names_from_bytes"]], "get_output_names() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_output_names"]], "get_output_names_from_bytes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_output_names_from_bytes"]], "get_output_shapes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_output_shapes"]], "get_parent_nodes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_parent_nodes"]], "get_variable_inputs() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.get_variable_inputs"]], "is_valid_onnx_model() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.is_valid_onnx_model"]], "modelopt.onnx.utils": [[38, "module-modelopt.onnx.utils"]], "name_onnx_nodes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.name_onnx_nodes"]], "randomize_weights() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.randomize_weights"]], "randomize_weights_onnx_bytes() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.randomize_weights_onnx_bytes"]], "remove_weights_data() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.remove_weights_data"]], "save_onnx() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.save_onnx"]], "save_onnx_bytes_to_dir() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.save_onnx_bytes_to_dir"]], "udpate_domain() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.udpate_domain"]], "validate_batch_size() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.validate_batch_size"]], "validate_onnx() (in module modelopt.onnx.utils)": [[38, "modelopt.onnx.utils.validate_onnx"]], "modelopt.torch": [[39, "module-modelopt.torch"]], "modelopt.torch.distill": [[40, "module-modelopt.torch.distill"]], "criterion (kdlossconfig attribute)": [[41, "modelopt.torch.distill.config.KDLossConfig.criterion"]], "expose_minimal_state_dict (kdlossconfig attribute)": [[41, "modelopt.torch.distill.config.KDLossConfig.expose_minimal_state_dict"]], "loss_balancer (kdlossconfig attribute)": [[41, "modelopt.torch.distill.config.KDLossConfig.loss_balancer"]], "model_dump() (kdlossconfig method)": [[41, "modelopt.torch.distill.config.KDLossConfig.model_dump"]], "modelopt.torch.distill.config": [[41, "module-modelopt.torch.distill.config"]], "teacher_model (kdlossconfig attribute)": [[41, "modelopt.torch.distill.config.KDLossConfig.teacher_model"]], "convert() (in module modelopt.torch.distill.distillation)": [[42, "modelopt.torch.distill.distillation.convert"]], "export() (in module modelopt.torch.distill.distillation)": [[42, "modelopt.torch.distill.distillation.export"]], "modelopt.torch.distill.distillation": [[42, "module-modelopt.torch.distill.distillation"]], "distillationmodel (class in modelopt.torch.distill.distillation_model)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel"]], "compute_kd_loss() (distillationmodel method)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.compute_kd_loss"]], "forward() (distillationmodel method)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.forward"]], "hide_loss_modules() (distillationmodel method)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.hide_loss_modules"]], "hide_teacher_model() (distillationmodel method)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.hide_teacher_model"]], "load_state_dict() (distillationmodel method)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.load_state_dict"]], "loss_balancer (distillationmodel property)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.loss_balancer"]], "loss_modules (distillationmodel property)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.loss_modules"]], "modelopt.torch.distill.distillation_model": [[43, "module-modelopt.torch.distill.distillation_model"]], "modify() (distillationmodel method)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.modify"]], "state_dict() (distillationmodel method)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.state_dict"]], "teacher_model (distillationmodel property)": [[43, "modelopt.torch.distill.distillation_model.DistillationModel.teacher_model"]], "distillationlossbalancer (class in modelopt.torch.distill.loss_balancers)": [[44, "modelopt.torch.distill.loss_balancers.DistillationLossBalancer"]], "staticlossbalancer (class in modelopt.torch.distill.loss_balancers)": [[44, "modelopt.torch.distill.loss_balancers.StaticLossBalancer"]], "__init__() (distillationlossbalancer method)": [[44, "modelopt.torch.distill.loss_balancers.DistillationLossBalancer.__init__"]], "__init__() (staticlossbalancer method)": [[44, "modelopt.torch.distill.loss_balancers.StaticLossBalancer.__init__"]], "forward() (distillationlossbalancer method)": [[44, "modelopt.torch.distill.loss_balancers.DistillationLossBalancer.forward"]], "forward() (staticlossbalancer method)": [[44, "modelopt.torch.distill.loss_balancers.StaticLossBalancer.forward"]], "modelopt.torch.distill.loss_balancers": [[44, "module-modelopt.torch.distill.loss_balancers"]], "set_student_loss_reduction_fn() (distillationlossbalancer method)": [[44, "modelopt.torch.distill.loss_balancers.DistillationLossBalancer.set_student_loss_reduction_fn"]], "logitsdistillationloss (class in modelopt.torch.distill.losses)": [[45, "modelopt.torch.distill.losses.LogitsDistillationLoss"]], "mgdloss (class in modelopt.torch.distill.losses)": [[45, "modelopt.torch.distill.losses.MGDLoss"]], "__init__() (logitsdistillationloss method)": [[45, "modelopt.torch.distill.losses.LogitsDistillationLoss.__init__"]], "__init__() (mgdloss method)": [[45, "modelopt.torch.distill.losses.MGDLoss.__init__"]], "forward() (logitsdistillationloss method)": [[45, "modelopt.torch.distill.losses.LogitsDistillationLoss.forward"]], "forward() (mgdloss method)": [[45, "modelopt.torch.distill.losses.MGDLoss.forward"]], "modelopt.torch.distill.losses": [[45, "module-modelopt.torch.distill.losses"]], "exportstudentmodedescriptor (class in modelopt.torch.distill.mode)": [[46, "modelopt.torch.distill.mode.ExportStudentModeDescriptor"]], "knowledgedistillationmodedescriptor (class in modelopt.torch.distill.mode)": [[46, "modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor"]], "config_class (exportstudentmodedescriptor property)": [[46, "modelopt.torch.distill.mode.ExportStudentModeDescriptor.config_class"]], "config_class (knowledgedistillationmodedescriptor property)": [[46, "modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.config_class"]], "convert (exportstudentmodedescriptor property)": [[46, "modelopt.torch.distill.mode.ExportStudentModeDescriptor.convert"]], "convert (knowledgedistillationmodedescriptor property)": [[46, "modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.convert"]], "export_mode (knowledgedistillationmodedescriptor property)": [[46, "modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.export_mode"]], "is_export_mode (exportstudentmodedescriptor property)": [[46, "modelopt.torch.distill.mode.ExportStudentModeDescriptor.is_export_mode"]], "modelopt.torch.distill.mode": [[46, "module-modelopt.torch.distill.mode"]], "name (exportstudentmodedescriptor property)": [[46, "modelopt.torch.distill.mode.ExportStudentModeDescriptor.name"]], "name (knowledgedistillationmodedescriptor property)": [[46, "modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.name"]], "next_modes (knowledgedistillationmodedescriptor property)": [[46, "modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.next_modes"]], "restore (exportstudentmodedescriptor property)": [[46, "modelopt.torch.distill.mode.ExportStudentModeDescriptor.restore"]], "restore (knowledgedistillationmodedescriptor property)": [[46, "modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.restore"]], "update_for_new_mode (knowledgedistillationmodedescriptor property)": [[46, "modelopt.torch.distill.mode.KnowledgeDistillationModeDescriptor.update_for_new_mode"]], "modelopt.torch.distill.registry": [[47, "module-modelopt.torch.distill.registry"]], "modelopt.torch.export": [[48, "module-modelopt.torch.export"]], "nfsworkspace (class in modelopt.torch.export.distribute)": [[49, "modelopt.torch.export.distribute.NFSWorkspace"]], "__init__() (nfsworkspace method)": [[49, "modelopt.torch.export.distribute.NFSWorkspace.__init__"]], "get_configs_parallel() (in module modelopt.torch.export.distribute)": [[49, "modelopt.torch.export.distribute.get_configs_parallel"]], "get_tensors_parallel() (in module modelopt.torch.export.distribute)": [[49, "modelopt.torch.export.distribute.get_tensors_parallel"]], "is_initialized (nfsworkspace property)": [[49, "modelopt.torch.export.distribute.NFSWorkspace.is_initialized"]], "modelopt.torch.export.distribute": [[49, "module-modelopt.torch.export.distribute"]], "read_configs_and_weights_from_rank() (nfsworkspace method)": [[49, "modelopt.torch.export.distribute.NFSWorkspace.read_configs_and_weights_from_rank"]], "write_configs_and_weights() (nfsworkspace method)": [[49, "modelopt.torch.export.distribute.NFSWorkspace.write_configs_and_weights"]], "modelopt.torch.export.hf_config_map": [[50, "module-modelopt.torch.export.hf_config_map"]], "build_attention_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_attention_config"]], "build_conv_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_conv_config"]], "build_decoder_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_decoder_config"]], "build_embedding_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_embedding_config"]], "build_layernorm_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_layernorm_config"]], "build_linear_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_linear_config"]], "build_medusa_heads_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_medusa_heads_config"]], "build_mlp_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_mlp_config"]], "build_moe_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_moe_config"]], "build_qkv() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_qkv"]], "build_recurrent_config() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_recurrent_config"]], "build_stacked_experts() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.build_stacked_experts"]], "check_model_compatibility() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.check_model_compatibility"]], "get_activation_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_activation_scaling_factor"]], "get_kv_cache_dtype() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_kv_cache_dtype"]], "get_kv_cache_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_kv_cache_scaling_factor"]], "get_prequant_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_prequant_scaling_factor"]], "get_quantization_format() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_quantization_format"]], "get_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_scaling_factor"]], "get_transformer_layers() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_transformer_layers"]], "get_weight_block_size() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_weight_block_size"]], "get_weight_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_weight_scaling_factor"]], "get_weight_scaling_factor_2() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.get_weight_scaling_factor_2"]], "is_attention() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.is_attention"]], "is_decoder_list() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.is_decoder_list"]], "is_embedding() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.is_embedding"]], "is_layernorm() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.is_layernorm"]], "is_linear() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.is_linear"]], "is_mlp() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.is_mlp"]], "is_moe() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.is_moe"]], "is_quantlinear() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.is_quantlinear"]], "is_recurrent() (in module modelopt.torch.export.layer_utils)": [[51, "modelopt.torch.export.layer_utils.is_recurrent"]], "modelopt.torch.export.layer_utils": [[51, "module-modelopt.torch.export.layer_utils"]], "attentionconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.AttentionConfig"]], "convconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.ConvConfig"]], "decoderlayerconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig"]], "embeddingconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.EmbeddingConfig"]], "expertconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.ExpertConfig"]], "layernormconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.LayernormConfig"]], "linearactconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.LinearActConfig"]], "linearconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.LinearConfig"]], "mlpconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.MLPConfig"]], "moeconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.MOEConfig"]], "medusaheadconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.MedusaHeadConfig"]], "modelconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.ModelConfig"]], "qkvconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.QKVConfig"]], "recurrentconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.RecurrentConfig"]], "rglruconfig (class in modelopt.torch.export.model_config)": [[52, "modelopt.torch.export.model_config.RgLruConfig"]], "__init__() (attentionconfig method)": [[52, "modelopt.torch.export.model_config.AttentionConfig.__init__"]], "__init__() (convconfig method)": [[52, "modelopt.torch.export.model_config.ConvConfig.__init__"]], "__init__() (decoderlayerconfig method)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.__init__"]], "__init__() (embeddingconfig method)": [[52, "modelopt.torch.export.model_config.EmbeddingConfig.__init__"]], "__init__() (expertconfig method)": [[52, "modelopt.torch.export.model_config.ExpertConfig.__init__"]], "__init__() (layernormconfig method)": [[52, "modelopt.torch.export.model_config.LayernormConfig.__init__"]], "__init__() (linearactconfig method)": [[52, "modelopt.torch.export.model_config.LinearActConfig.__init__"]], "__init__() (linearconfig method)": [[52, "modelopt.torch.export.model_config.LinearConfig.__init__"]], "__init__() (mlpconfig method)": [[52, "modelopt.torch.export.model_config.MLPConfig.__init__"]], "__init__() (moeconfig method)": [[52, "modelopt.torch.export.model_config.MOEConfig.__init__"]], "__init__() (medusaheadconfig method)": [[52, "modelopt.torch.export.model_config.MedusaHeadConfig.__init__"]], "__init__() (modelconfig method)": [[52, "modelopt.torch.export.model_config.ModelConfig.__init__"]], "__init__() (qkvconfig method)": [[52, "modelopt.torch.export.model_config.QKVConfig.__init__"]], "__init__() (recurrentconfig method)": [[52, "modelopt.torch.export.model_config.RecurrentConfig.__init__"]], "__init__() (rglruconfig method)": [[52, "modelopt.torch.export.model_config.RgLruConfig.__init__"]], "activation_scaling_factor (linearconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearConfig.activation_scaling_factor"]], "activation_scaling_factor (qkvconfig property)": [[52, "modelopt.torch.export.model_config.QKVConfig.activation_scaling_factor"]], "alibi_bias_max (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.alibi_bias_max"]], "apply_residual_connection_post_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.apply_residual_connection_post_layernorm"]], "attention (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.attention"]], "attention_head_size (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.attention_head_size"]], "attention_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.attention_layernorm"]], "attn_logit_softcapping (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.attn_logit_softcapping"]], "awq_block_size (linearconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearConfig.awq_block_size"]], "awq_block_size (qkvconfig property)": [[52, "modelopt.torch.export.model_config.QKVConfig.awq_block_size"]], "bias (convconfig attribute)": [[52, "modelopt.torch.export.model_config.ConvConfig.bias"]], "bias (layernormconfig attribute)": [[52, "modelopt.torch.export.model_config.LayernormConfig.bias"]], "bias (linearconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearConfig.bias"]], "bias (qkvconfig property)": [[52, "modelopt.torch.export.model_config.QKVConfig.bias"]], "blocksparse_block_size (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_block_size"]], "blocksparse_homo_head_pattern (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_homo_head_pattern"]], "blocksparse_num_local_blocks (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_num_local_blocks"]], "blocksparse_vertical_stride (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.blocksparse_vertical_stride"]], "clip_qkv (attentionconfig attribute)": [[52, "modelopt.torch.export.model_config.AttentionConfig.clip_qkv"]], "clip_qkv (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.clip_qkv"]], "conv1d (recurrentconfig attribute)": [[52, "modelopt.torch.export.model_config.RecurrentConfig.conv1d"]], "cross_attention (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.cross_attention"]], "cross_attention_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.cross_attention_layernorm"]], "decoder_type (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.decoder_type"]], "dense (attentionconfig attribute)": [[52, "modelopt.torch.export.model_config.AttentionConfig.dense"]], "dense_attention_every_n_layers (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.dense_attention_every_n_layers"]], "dtype (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.dtype"]], "emb_scale_by_sqrt_dim (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.emb_scale_by_sqrt_dim"]], "enc_dec (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.enc_dec"]], "encoder_head_size (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.encoder_head_size"]], "encoder_hidden_size (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.encoder_hidden_size"]], "encoder_num_heads (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.encoder_num_heads"]], "eps (layernormconfig attribute)": [[52, "modelopt.torch.export.model_config.LayernormConfig.eps"]], "experts (moeconfig attribute)": [[52, "modelopt.torch.export.model_config.MOEConfig.experts"]], "fc (expertconfig attribute)": [[52, "modelopt.torch.export.model_config.ExpertConfig.fc"]], "fc (mlpconfig attribute)": [[52, "modelopt.torch.export.model_config.MLPConfig.fc"]], "fc (moeconfig property)": [[52, "modelopt.torch.export.model_config.MOEConfig.fc"]], "ffn_hidden_size_local (decoderlayerconfig property)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.ffn_hidden_size_local"]], "final_logit_softcapping (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.final_logit_softcapping"]], "gate (mlpconfig attribute)": [[52, "modelopt.torch.export.model_config.MLPConfig.gate"]], "gegelu_limit (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.gegelu_limit"]], "hidden_act (linearactconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearActConfig.hidden_act"]], "hidden_act (mlpconfig attribute)": [[52, "modelopt.torch.export.model_config.MLPConfig.hidden_act"]], "hidden_act (moeconfig attribute)": [[52, "modelopt.torch.export.model_config.MOEConfig.hidden_act"]], "hidden_act (modelconfig property)": [[52, "modelopt.torch.export.model_config.ModelConfig.hidden_act"]], "hidden_size (decoderlayerconfig property)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.hidden_size"]], "hidden_size (embeddingconfig property)": [[52, "modelopt.torch.export.model_config.EmbeddingConfig.hidden_size"]], "hidden_size (modelconfig property)": [[52, "modelopt.torch.export.model_config.ModelConfig.hidden_size"]], "input_gate (rglruconfig attribute)": [[52, "modelopt.torch.export.model_config.RgLruConfig.input_gate"]], "input_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.input_layernorm"]], "k (qkvconfig attribute)": [[52, "modelopt.torch.export.model_config.QKVConfig.k"]], "kv_cache_dtype (attentionconfig attribute)": [[52, "modelopt.torch.export.model_config.AttentionConfig.kv_cache_dtype"]], "kv_cache_scaling_factor (attentionconfig attribute)": [[52, "modelopt.torch.export.model_config.AttentionConfig.kv_cache_scaling_factor"]], "layer_types (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.layer_types"]], "layernorm_type (layernormconfig attribute)": [[52, "modelopt.torch.export.model_config.LayernormConfig.layernorm_type"]], "layers (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.layers"]], "linear (linearactconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearActConfig.linear"]], "linear_out (recurrentconfig attribute)": [[52, "modelopt.torch.export.model_config.RecurrentConfig.linear_out"]], "linear_type (linearconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearConfig.linear_type"]], "linear_x (recurrentconfig attribute)": [[52, "modelopt.torch.export.model_config.RecurrentConfig.linear_x"]], "linear_y (recurrentconfig attribute)": [[52, "modelopt.torch.export.model_config.RecurrentConfig.linear_y"]], "lm_head (medusaheadconfig attribute)": [[52, "modelopt.torch.export.model_config.MedusaHeadConfig.lm_head"]], "lm_head (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.lm_head"]], "ln_embed (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.ln_embed"]], "ln_f (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.ln_f"]], "local_vocab_size (embeddingconfig property)": [[52, "modelopt.torch.export.model_config.EmbeddingConfig.local_vocab_size"]], "logits_soft_cap (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.logits_soft_cap"]], "longrope_long_mscale (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.longrope_long_mscale"]], "longrope_scaling_long_factors (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.longrope_scaling_long_factors"]], "longrope_scaling_short_factors (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.longrope_scaling_short_factors"]], "longrope_short_mscale (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.longrope_short_mscale"]], "max_position_embeddings (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.max_position_embeddings"]], "max_position_embeddings (modelconfig property)": [[52, "modelopt.torch.export.model_config.ModelConfig.max_position_embeddings"]], "medusa_heads (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.medusa_heads"]], "medusa_layers (medusaheadconfig attribute)": [[52, "modelopt.torch.export.model_config.MedusaHeadConfig.medusa_layers"]], "merged_fc1_gate (mlpconfig attribute)": [[52, "modelopt.torch.export.model_config.MLPConfig.merged_fc1_gate"]], "mlp (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.mlp"]], "mlp_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.mlp_layernorm"]], "model_name (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.model_name"]], "modelopt.torch.export.model_config": [[52, "module-modelopt.torch.export.model_config"]], "moe_num_experts (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_num_experts"]], "moe_renorm_mode (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_renorm_mode"]], "moe_top_k (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_top_k"]], "moe_tp_mode (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_tp_mode"]], "mup_attn_multiplier (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.mup_attn_multiplier"]], "mup_embedding_multiplier (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.mup_embedding_multiplier"]], "mup_use_scaling (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.mup_use_scaling"]], "mup_width_multiplier (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.mup_width_multiplier"]], "new_decoder_architecture (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.new_decoder_architecture"]], "num_attention_heads (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.num_attention_heads"]], "num_attention_heads (modelconfig property)": [[52, "modelopt.torch.export.model_config.ModelConfig.num_attention_heads"]], "num_kv_heads (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.num_kv_heads"]], "num_kv_heads (modelconfig property)": [[52, "modelopt.torch.export.model_config.ModelConfig.num_kv_heads"]], "num_medusa_heads (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.num_medusa_heads"]], "num_medusa_layers (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.num_medusa_layers"]], "original_max_position_embeddings (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.original_max_position_embeddings"]], "parallel_attention (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.parallel_attention"]], "partial_rotary_factor (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.partial_rotary_factor"]], "pipeline_parallel (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.pipeline_parallel"]], "position_embedding (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.position_embedding"]], "post_feedforward_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.post_feedforward_layernorm"]], "post_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.post_layernorm"]], "pre_feedforward_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.pre_feedforward_layernorm"]], "prequant_scaling_factor (linearconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearConfig.prequant_scaling_factor"]], "prequant_scaling_factor (qkvconfig property)": [[52, "modelopt.torch.export.model_config.QKVConfig.prequant_scaling_factor"]], "proj (expertconfig attribute)": [[52, "modelopt.torch.export.model_config.ExpertConfig.proj"]], "proj (mlpconfig attribute)": [[52, "modelopt.torch.export.model_config.MLPConfig.proj"]], "q (qkvconfig attribute)": [[52, "modelopt.torch.export.model_config.QKVConfig.q"]], "qkv (attentionconfig attribute)": [[52, "modelopt.torch.export.model_config.AttentionConfig.qkv"]], "quantization (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.quantization"]], "quantization (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.quantization"]], "query_pre_attn_scalar (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.query_pre_attn_scalar"]], "qwen_type (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.qwen_type"]], "rank (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.rank"]], "recurrent (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.recurrent"]], "recurrent_gate (rglruconfig attribute)": [[52, "modelopt.torch.export.model_config.RgLruConfig.recurrent_gate"]], "recurrent_param (rglruconfig attribute)": [[52, "modelopt.torch.export.model_config.RgLruConfig.recurrent_param"]], "rel_attn_max_distance (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.rel_attn_max_distance"]], "rel_attn_num_buckets (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.rel_attn_num_buckets"]], "rel_attn_table (attentionconfig attribute)": [[52, "modelopt.torch.export.model_config.AttentionConfig.rel_attn_table"]], "residual_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.residual_layernorm"]], "residual_mlp (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.residual_mlp"]], "rg_lru (recurrentconfig attribute)": [[52, "modelopt.torch.export.model_config.RecurrentConfig.rg_lru"]], "rnn_hidden_size (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.rnn_hidden_size"]], "rope_ratio (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.rope_ratio"]], "rotary_base (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.rotary_base"]], "rotary_dim (attentionconfig attribute)": [[52, "modelopt.torch.export.model_config.AttentionConfig.rotary_dim"]], "rotary_pct (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.rotary_pct"]], "router (moeconfig attribute)": [[52, "modelopt.torch.export.model_config.MOEConfig.router"]], "self_attention (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.self_attention"]], "self_attention_layernorm (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.self_attention_layernorm"]], "seq_length (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.seq_length"]], "share_embedding_table (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.share_embedding_table"]], "tensor_parallel (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.tensor_parallel"]], "use_alibi (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.use_alibi"]], "use_cache (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.use_cache"]], "use_scaled_rope (decoderlayerconfig attribute)": [[52, "modelopt.torch.export.model_config.DecoderLayerConfig.use_scaled_rope"]], "v (qkvconfig attribute)": [[52, "modelopt.torch.export.model_config.QKVConfig.v"]], "version (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.version"]], "vocab_embedding (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.vocab_embedding"]], "vocab_size (modelconfig attribute)": [[52, "modelopt.torch.export.model_config.ModelConfig.vocab_size"]], "vocab_size_padded (modelconfig property)": [[52, "modelopt.torch.export.model_config.ModelConfig.vocab_size_padded"]], "weight (convconfig attribute)": [[52, "modelopt.torch.export.model_config.ConvConfig.weight"]], "weight (embeddingconfig attribute)": [[52, "modelopt.torch.export.model_config.EmbeddingConfig.weight"]], "weight (layernormconfig attribute)": [[52, "modelopt.torch.export.model_config.LayernormConfig.weight"]], "weight (linearconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearConfig.weight"]], "weight (qkvconfig property)": [[52, "modelopt.torch.export.model_config.QKVConfig.weight"]], "weights_scaling_factor (linearconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearConfig.weights_scaling_factor"]], "weights_scaling_factor (qkvconfig property)": [[52, "modelopt.torch.export.model_config.QKVConfig.weights_scaling_factor"]], "weights_scaling_factor_2 (linearconfig attribute)": [[52, "modelopt.torch.export.model_config.LinearConfig.weights_scaling_factor_2"]], "weights_scaling_factor_2 (qkvconfig property)": [[52, "modelopt.torch.export.model_config.QKVConfig.weights_scaling_factor_2"]], "y_bias (recurrentconfig attribute)": [[52, "modelopt.torch.export.model_config.RecurrentConfig.y_bias"]], "export_hf_checkpoint() (in module modelopt.torch.export.model_config_export)": [[53, "modelopt.torch.export.model_config_export.export_hf_checkpoint"]], "export_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)": [[53, "modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint"]], "modelopt.torch.export.model_config_export": [[53, "module-modelopt.torch.export.model_config_export"]], "torch_to_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)": [[53, "modelopt.torch.export.model_config_export.torch_to_tensorrt_llm_checkpoint"]], "from_quantized_weight() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.from_quantized_weight"]], "merge_fc1_gate() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.merge_fc1_gate"]], "merge_qkv() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.merge_qkv"]], "model_config_from_dict() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.model_config_from_dict"]], "model_config_to_dict() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.model_config_to_dict"]], "modelopt.torch.export.model_config_utils": [[54, "module-modelopt.torch.export.model_config_utils"]], "naive_quantization() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.naive_quantization"]], "pack_linear_weights() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.pack_linear_weights"]], "pad_weights() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.pad_weights"]], "restore_model_config() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.restore_model_config"]], "split_config_and_weights() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.split_config_and_weights"]], "to_quantized_weight() (in module modelopt.torch.export.model_config_utils)": [[54, "modelopt.torch.export.model_config_utils.to_quantized_weight"]], "check_weight_shape_valid() (in module modelopt.torch.export.postprocess)": [[55, "modelopt.torch.export.postprocess.check_weight_shape_valid"]], "modelopt.torch.export.postprocess": [[55, "module-modelopt.torch.export.postprocess"]], "pad_embedding_lm_head() (in module modelopt.torch.export.postprocess)": [[55, "modelopt.torch.export.postprocess.pad_embedding_lm_head"]], "postprocess_model_config() (in module modelopt.torch.export.postprocess)": [[55, "modelopt.torch.export.postprocess.postprocess_model_config"]], "postprocess_tensors() (in module modelopt.torch.export.postprocess)": [[55, "modelopt.torch.export.postprocess.postprocess_tensors"]], "adjust_attn_amax_values() (in module modelopt.torch.export.scaling_factor_utils)": [[56, "modelopt.torch.export.scaling_factor_utils.adjust_attn_amax_values"]], "get_weights_scaling_factor() (in module modelopt.torch.export.scaling_factor_utils)": [[56, "modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor"]], "modelopt.torch.export.scaling_factor_utils": [[56, "module-modelopt.torch.export.scaling_factor_utils"]], "resmooth_and_get_scale() (in module modelopt.torch.export.scaling_factor_utils)": [[56, "modelopt.torch.export.scaling_factor_utils.resmooth_and_get_scale"]], "convert_to_tensorrt_llm_config() (in module modelopt.torch.export.tensorrt_llm_utils)": [[57, "modelopt.torch.export.tensorrt_llm_utils.convert_to_tensorrt_llm_config"]], "is_tensorrt_llm_0_8_or_9() (in module modelopt.torch.export.tensorrt_llm_utils)": [[57, "modelopt.torch.export.tensorrt_llm_utils.is_tensorrt_llm_0_8_or_9"]], "modelopt.torch.export.tensorrt_llm_utils": [[57, "module-modelopt.torch.export.tensorrt_llm_utils"]], "prepare_enc_dec_decoder_layer() (in module modelopt.torch.export.tensorrt_llm_utils)": [[57, "modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_decoder_layer"]], "prepare_enc_dec_export_dir() (in module modelopt.torch.export.tensorrt_llm_utils)": [[57, "modelopt.torch.export.tensorrt_llm_utils.prepare_enc_dec_export_dir"]], "weights_to_npz() (in module modelopt.torch.export.tensorrt_llm_utils)": [[57, "modelopt.torch.export.tensorrt_llm_utils.weights_to_npz"]], "convert_to_transformer_engine() (in module modelopt.torch.export.transformer_engine)": [[58, "modelopt.torch.export.transformer_engine.convert_to_transformer_engine"]], "modelopt.torch.export.transformer_engine": [[58, "module-modelopt.torch.export.transformer_engine"]], "export_to_vllm() (in module modelopt.torch.export.vllm)": [[59, "modelopt.torch.export.vllm.export_to_vllm"]], "modelopt.torch.export.vllm": [[59, "module-modelopt.torch.export.vllm"]], "modelopt.torch.opt": [[60, "module-modelopt.torch.opt"]], "modeloptfield() (in module modelopt.torch.opt.config)": [[61, "modelopt.torch.opt.config.ModeloptField"]], "customize_rule() (modeloptbaserule class method)": [[61, "modelopt.torch.opt.config.ModeloptBaseRule.customize_rule"]], "get() (modeloptbaseconfig method)": [[61, "modelopt.torch.opt.config.ModeloptBaseConfig.get"]], "get_field_name_from_key() (modeloptbaseconfig method)": [[61, "modelopt.torch.opt.config.ModeloptBaseConfig.get_field_name_from_key"]], "get_kwargs_for_create_model_with_rules() (in module modelopt.torch.opt.config)": [[61, "modelopt.torch.opt.config.get_kwargs_for_create_model_with_rules"]], "get_rule_type() (modeloptbaserule class method)": [[61, "modelopt.torch.opt.config.ModeloptBaseRule.get_rule_type"]], "items() (modeloptbaseconfig method)": [[61, "modelopt.torch.opt.config.ModeloptBaseConfig.items"]], "keys() (modeloptbaseconfig method)": [[61, "modelopt.torch.opt.config.ModeloptBaseConfig.keys"]], "model_dump() (modeloptbaseconfig method)": [[61, "modelopt.torch.opt.config.ModeloptBaseConfig.model_dump"]], "model_dump_json() (modeloptbaseconfig method)": [[61, "modelopt.torch.opt.config.ModeloptBaseConfig.model_dump_json"]], "modelopt.torch.opt.config": [[61, "module-modelopt.torch.opt.config"]], "register_default() (modeloptbaseruleconfig class method)": [[61, "modelopt.torch.opt.config.ModeloptBaseRuleConfig.register_default"]], "unregister_default() (modeloptbaseruleconfig class method)": [[61, "modelopt.torch.opt.config.ModeloptBaseRuleConfig.unregister_default"]], "update() (modeloptbaseconfig method)": [[61, "modelopt.torch.opt.config.ModeloptBaseConfig.update"]], "validate_rule() (modeloptbaserule class method)": [[61, "modelopt.torch.opt.config.ModeloptBaseRule.validate_rule"]], "values() (modeloptbaseconfig method)": [[61, "modelopt.torch.opt.config.ModeloptBaseConfig.values"]], "modeloptstatemanager (class in modelopt.torch.opt.conversion)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager"]], "__init__() (modeloptstatemanager method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.__init__"]], "add_mode() (modeloptstatemanager method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.add_mode"]], "apply_mode() (in module modelopt.torch.opt.conversion)": [[62, "modelopt.torch.opt.conversion.apply_mode"]], "check_mode() (modeloptstatemanager method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.check_mode"]], "get_config_class() (modeloptstatemanager static method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.get_config_class"]], "has_state (modeloptstatemanager property)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.has_state"]], "is_converted() (modeloptstatemanager class method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.is_converted"]], "last_mode (modeloptstatemanager property)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.last_mode"]], "load_state_dict() (modeloptstatemanager method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.load_state_dict"]], "modelopt.torch.opt.conversion": [[62, "module-modelopt.torch.opt.conversion"]], "modelopt_state() (in module modelopt.torch.opt.conversion)": [[62, "modelopt.torch.opt.conversion.modelopt_state"]], "modes_with_states() (modeloptstatemanager method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.modes_with_states"]], "restore() (in module modelopt.torch.opt.conversion)": [[62, "modelopt.torch.opt.conversion.restore"]], "restore_from_modelopt_state() (in module modelopt.torch.opt.conversion)": [[62, "modelopt.torch.opt.conversion.restore_from_modelopt_state"]], "save() (in module modelopt.torch.opt.conversion)": [[62, "modelopt.torch.opt.conversion.save"]], "state_dict() (modeloptstatemanager method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.state_dict"]], "transfer_state_dict() (modeloptstatemanager class method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.transfer_state_dict"]], "update_last_state_before_new_mode() (modeloptstatemanager method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.update_last_state_before_new_mode"]], "update_last_state_before_save() (modeloptstatemanager method)": [[62, "modelopt.torch.opt.conversion.ModeloptStateManager.update_last_state_before_save"]], "dynamicmodule (class in modelopt.torch.opt.dynamic)": [[63, "modelopt.torch.opt.dynamic.DynamicModule"]], "dynamicspace (class in modelopt.torch.opt.dynamic)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace"]], "__init__() (dynamicmodule method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.__init__"]], "__init__() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.__init__"]], "config() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.config"]], "convert() (dynamicmodule class method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.convert"]], "convert_to_dynamic() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.convert_to_dynamic"]], "export() (dynamicmodule method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.export"]], "export() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.export"]], "extra_repr() (dynamicmodule method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.extra_repr"]], "force_assign() (dynamicmodule method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.force_assign"]], "freeze() (dynamicmodule method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.freeze"]], "get_hparam() (dynamicmodule method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.get_hparam"]], "get_hparam() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.get_hparam"]], "is_configurable() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.is_configurable"]], "is_dynamic() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.is_dynamic"]], "modelopt.torch.opt.dynamic": [[63, "module-modelopt.torch.opt.dynamic"]], "modify() (dynamicmodule method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.modify"]], "named_dynamic_modules() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.named_dynamic_modules"]], "named_hparams() (dynamicmodule method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.named_hparams"]], "named_hparams() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.named_hparams"]], "original_cls (dynamicmodule property)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.original_cls"]], "reset_dynamic_attributes() (dynamicmodule method)": [[63, "modelopt.torch.opt.dynamic.DynamicModule.reset_dynamic_attributes"]], "select() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.select"]], "size() (dynamicspace method)": [[63, "modelopt.torch.opt.dynamic.DynamicSpace.size"]], "activeslice (hparam attribute)": [[64, "modelopt.torch.opt.hparam.Hparam.ActiveSlice"]], "hparam (class in modelopt.torch.opt.hparam)": [[64, "modelopt.torch.opt.hparam.Hparam"]], "importance (hparam attribute)": [[64, "modelopt.torch.opt.hparam.Hparam.Importance"]], "importanceestimator (hparam attribute)": [[64, "modelopt.torch.opt.hparam.Hparam.ImportanceEstimator"]], "__init__() (hparam method)": [[64, "modelopt.torch.opt.hparam.Hparam.__init__"]], "active (hparam property)": [[64, "modelopt.torch.opt.hparam.Hparam.active"]], "active_slice (hparam property)": [[64, "modelopt.torch.opt.hparam.Hparam.active_slice"]], "choices (hparam property)": [[64, "modelopt.torch.opt.hparam.Hparam.choices"]], "enforce_order() (hparam method)": [[64, "modelopt.torch.opt.hparam.Hparam.enforce_order"]], "importance (hparam property)": [[64, "modelopt.torch.opt.hparam.Hparam.importance"]], "is_configurable (hparam property)": [[64, "modelopt.torch.opt.hparam.Hparam.is_configurable"]], "is_sortable (hparam property)": [[64, "modelopt.torch.opt.hparam.Hparam.is_sortable"]], "max (hparam property)": [[64, "modelopt.torch.opt.hparam.Hparam.max"]], "min (hparam property)": [[64, "modelopt.torch.opt.hparam.Hparam.min"]], "modelopt.torch.opt.hparam": [[64, "module-modelopt.torch.opt.hparam"]], "original (hparam property)": [[64, "modelopt.torch.opt.hparam.Hparam.original"]], "register_importance() (hparam method)": [[64, "modelopt.torch.opt.hparam.Hparam.register_importance"]], "modelopt.torch.opt.mode": [[65, "module-modelopt.torch.opt.mode"]], "modelopt.torch.opt.plugins": [[66, "module-modelopt.torch.opt.plugins"]], "basesearcher (class in modelopt.torch.opt.searcher)": [[67, "modelopt.torch.opt.searcher.BaseSearcher"]], "__init__() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.__init__"]], "after_search() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.after_search"]], "before_search() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.before_search"]], "config (basesearcher attribute)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.config"]], "constraints (basesearcher attribute)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.constraints"]], "construct_forward_loop() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.construct_forward_loop"]], "default_search_config (basesearcher property)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.default_search_config"]], "default_state_dict (basesearcher property)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.default_state_dict"]], "deployment (basesearcher attribute)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.deployment"]], "dummy_input (basesearcher attribute)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.dummy_input"]], "eval_score() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.eval_score"]], "forward_loop (basesearcher attribute)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.forward_loop"]], "has_score (basesearcher property)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.has_score"]], "load_search_checkpoint() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.load_search_checkpoint"]], "model (basesearcher attribute)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.model"]], "modelopt.torch.opt.searcher": [[67, "module-modelopt.torch.opt.searcher"]], "reset_search() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.reset_search"]], "run_search() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.run_search"]], "sanitize_search_config() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.sanitize_search_config"]], "save_search_checkpoint() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.save_search_checkpoint"]], "search() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.search"]], "state_dict() (basesearcher method)": [[67, "modelopt.torch.opt.searcher.BaseSearcher.state_dict"]], "get_hparam() (in module modelopt.torch.opt.utils)": [[68, "modelopt.torch.opt.utils.get_hparam"]], "is_configurable() (in module modelopt.torch.opt.utils)": [[68, "modelopt.torch.opt.utils.is_configurable"]], "is_dynamic() (in module modelopt.torch.opt.utils)": [[68, "modelopt.torch.opt.utils.is_dynamic"]], "modelopt.torch.opt.utils": [[68, "module-modelopt.torch.opt.utils"]], "named_hparams() (in module modelopt.torch.opt.utils)": [[68, "modelopt.torch.opt.utils.named_hparams"]], "search_space_size() (in module modelopt.torch.opt.utils)": [[68, "modelopt.torch.opt.utils.search_space_size"]], "modelopt.torch.quantization": [[69, "module-modelopt.torch.quantization"]], "autoquantizesearcher (class in modelopt.torch.quantization.algorithms)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher"]], "quantrecipe (class in modelopt.torch.quantization.algorithms)": [[70, "modelopt.torch.quantization.algorithms.QuantRecipe"]], "quantrecipehparam (class in modelopt.torch.quantization.algorithms)": [[70, "modelopt.torch.quantization.algorithms.QuantRecipeHparam"]], "unsupported_recipes (quantrecipe attribute)": [[70, "modelopt.torch.quantization.algorithms.QuantRecipe.UNSUPPORTED_RECIPES"]], "__init__() (quantrecipe method)": [[70, "modelopt.torch.quantization.algorithms.QuantRecipe.__init__"]], "__init__() (quantrecipehparam method)": [[70, "modelopt.torch.quantization.algorithms.QuantRecipeHparam.__init__"]], "active (quantrecipehparam property)": [[70, "modelopt.torch.quantization.algorithms.QuantRecipeHparam.active"]], "before_search() (autoquantizesearcher method)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.before_search"]], "candidate_stats (autoquantizesearcher attribute)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.candidate_stats"]], "compression (quantrecipe property)": [[70, "modelopt.torch.quantization.algorithms.QuantRecipe.compression"]], "config (quantrecipe property)": [[70, "modelopt.torch.quantization.algorithms.QuantRecipe.config"]], "default_search_config (autoquantizesearcher property)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.default_search_config"]], "default_state_dict (autoquantizesearcher property)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.default_state_dict"]], "insert_quant_recipe_hparams() (autoquantizesearcher class method)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.insert_quant_recipe_hparams"]], "link_to() (quantrecipehparam method)": [[70, "modelopt.torch.quantization.algorithms.QuantRecipeHparam.link_to"]], "merge_search_hparam_by_rules() (autoquantizesearcher class method)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.merge_search_hparam_by_rules"]], "modelopt.torch.quantization.algorithms": [[70, "module-modelopt.torch.quantization.algorithms"]], "rules (autoquantizesearcher attribute)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.rules"]], "run_search() (autoquantizesearcher method)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.run_search"]], "sanitize_search_config() (autoquantizesearcher method)": [[70, "modelopt.torch.quantization.algorithms.AutoQuantizeSearcher.sanitize_search_config"]], "modelopt.torch.quantization.calib": [[71, "module-modelopt.torch.quantization.calib"]], "modelopt.torch.quantization.calib.calibrator": [[72, "module-modelopt.torch.quantization.calib.calibrator"]], "histogramcalibrator (class in modelopt.torch.quantization.calib.histogram)": [[73, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator"]], "__init__() (histogramcalibrator method)": [[73, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.__init__"]], "calibrate_weights() (in module modelopt.torch.quantization.calib.histogram)": [[73, "modelopt.torch.quantization.calib.histogram.calibrate_weights"]], "collect() (histogramcalibrator method)": [[73, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.collect"]], "compute_amax() (histogramcalibrator method)": [[73, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.compute_amax"]], "modelopt.torch.quantization.calib.histogram": [[73, "module-modelopt.torch.quantization.calib.histogram"]], "reset() (histogramcalibrator method)": [[73, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.reset"]], "maxcalibrator (class in modelopt.torch.quantization.calib.max)": [[74, "modelopt.torch.quantization.calib.max.MaxCalibrator"]], "__init__() (maxcalibrator method)": [[74, "modelopt.torch.quantization.calib.max.MaxCalibrator.__init__"]], "amaxs (maxcalibrator property)": [[74, "modelopt.torch.quantization.calib.max.MaxCalibrator.amaxs"]], "collect() (maxcalibrator method)": [[74, "modelopt.torch.quantization.calib.max.MaxCalibrator.collect"]], "compute_amax() (maxcalibrator method)": [[74, "modelopt.torch.quantization.calib.max.MaxCalibrator.compute_amax"]], "modelopt.torch.quantization.calib.max": [[74, "module-modelopt.torch.quantization.calib.max"]], "reset() (maxcalibrator method)": [[74, "modelopt.torch.quantization.calib.max.MaxCalibrator.reset"]], "additional_algorithm (realquantizeconfig attribute)": [[75, "modelopt.torch.quantization.config.RealQuantizeConfig.additional_algorithm"]], "algorithm (quantizeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizeConfig.algorithm"]], "alpha (smoothquantcalibconfig attribute)": [[75, "modelopt.torch.quantization.config.SmoothQuantCalibConfig.alpha"]], "alpha_step (awqlitecalibconfig attribute)": [[75, "modelopt.torch.quantization.config.AWQLiteCalibConfig.alpha_step"]], "axis (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.axis"]], "block_sizes (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.block_sizes"]], "calibrator (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.calibrator"]], "debug (awqclipcalibconfig attribute)": [[75, "modelopt.torch.quantization.config.AWQClipCalibConfig.debug"]], "debug (awqfullcalibconfig attribute)": [[75, "modelopt.torch.quantization.config.AWQFullCalibConfig.debug"]], "debug (awqlitecalibconfig attribute)": [[75, "modelopt.torch.quantization.config.AWQLiteCalibConfig.debug"]], "enable (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.enable"]], "fake_quant (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.fake_quant"]], "learn_amax (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.learn_amax"]], "max_co_batch_size (awqclipcalibconfig attribute)": [[75, "modelopt.torch.quantization.config.AWQClipCalibConfig.max_co_batch_size"]], "max_tokens_per_batch (awqclipcalibconfig attribute)": [[75, "modelopt.torch.quantization.config.AWQClipCalibConfig.max_tokens_per_batch"]], "method (quantizealgorithmconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizeAlgorithmConfig.method"]], "min_clip_ratio (awqclipcalibconfig attribute)": [[75, "modelopt.torch.quantization.config.AWQClipCalibConfig.min_clip_ratio"]], "modelopt.torch.quantization.config": [[75, "module-modelopt.torch.quantization.config"]], "narrow_range (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.narrow_range"]], "num_bits (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.num_bits"]], "quant_cfg (quantizeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizeConfig.quant_cfg"]], "shrink_step (awqclipcalibconfig attribute)": [[75, "modelopt.torch.quantization.config.AWQClipCalibConfig.shrink_step"]], "trt_high_precision_dtype (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.trt_high_precision_dtype"]], "type (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.type"]], "unsigned (quantizerattributeconfig attribute)": [[75, "modelopt.torch.quantization.config.QuantizerAttributeConfig.unsigned"]], "modelopt.torch.quantization.conversion": [[76, "module-modelopt.torch.quantization.conversion"]], "register() (in module modelopt.torch.quantization.conversion)": [[76, "modelopt.torch.quantization.conversion.register"]], "replace_quant_module() (in module modelopt.torch.quantization.conversion)": [[76, "modelopt.torch.quantization.conversion.replace_quant_module"]], "set_quantizer_attribute() (in module modelopt.torch.quantization.conversion)": [[76, "modelopt.torch.quantization.conversion.set_quantizer_attribute"]], "set_quantizer_by_cfg() (in module modelopt.torch.quantization.conversion)": [[76, "modelopt.torch.quantization.conversion.set_quantizer_by_cfg"]], "unregister() (in module modelopt.torch.quantization.conversion)": [[76, "modelopt.torch.quantization.conversion.unregister"]], "get_cuda_ext() (in module modelopt.torch.quantization.extensions)": [[77, "modelopt.torch.quantization.extensions.get_cuda_ext"]], "get_cuda_ext_fp8() (in module modelopt.torch.quantization.extensions)": [[77, "modelopt.torch.quantization.extensions.get_cuda_ext_fp8"]], "modelopt.torch.quantization.extensions": [[77, "module-modelopt.torch.quantization.extensions"]], "quantizeexportmodedescriptor (class in modelopt.torch.quantization.mode)": [[78, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor"]], "quantizemodedescriptor (class in modelopt.torch.quantization.mode)": [[78, "modelopt.torch.quantization.mode.QuantizeModeDescriptor"]], "config_class (quantizeexportmodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.config_class"]], "config_class (quantizemodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.config_class"]], "convert (quantizeexportmodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.convert"]], "convert (quantizemodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.convert"]], "export_mode (quantizemodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.export_mode"]], "is_export_mode (quantizeexportmodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.is_export_mode"]], "modelopt.torch.quantization.mode": [[78, "module-modelopt.torch.quantization.mode"]], "name (quantizeexportmodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.name"]], "name (quantizemodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.name"]], "next_modes (quantizemodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.next_modes"]], "restore (quantizeexportmodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.restore"]], "restore (quantizemodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.restore"]], "update_for_new_mode (quantizemodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.update_for_new_mode"]], "update_for_save (quantizemodedescriptor property)": [[78, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.update_for_save"]], "calibrate() (in module modelopt.torch.quantization.model_calib)": [[79, "modelopt.torch.quantization.model_calib.calibrate"]], "modelopt.torch.quantization.model_calib": [[79, "module-modelopt.torch.quantization.model_calib"]], "postprocess_amax() (in module modelopt.torch.quantization.model_calib)": [[79, "modelopt.torch.quantization.model_calib.postprocess_amax"]], "auto_quantize() (in module modelopt.torch.quantization.model_quant)": [[80, "modelopt.torch.quantization.model_quant.auto_quantize"]], "disable_quantizer() (in module modelopt.torch.quantization.model_quant)": [[80, "modelopt.torch.quantization.model_quant.disable_quantizer"]], "enable_quantizer() (in module modelopt.torch.quantization.model_quant)": [[80, "modelopt.torch.quantization.model_quant.enable_quantizer"]], "fold_weight() (in module modelopt.torch.quantization.model_quant)": [[80, "modelopt.torch.quantization.model_quant.fold_weight"]], "modelopt.torch.quantization.model_quant": [[80, "module-modelopt.torch.quantization.model_quant"]], "print_quant_summary() (in module modelopt.torch.quantization.model_quant)": [[80, "modelopt.torch.quantization.model_quant.print_quant_summary"]], "quantize() (in module modelopt.torch.quantization.model_quant)": [[80, "modelopt.torch.quantization.model_quant.quantize"]], "modelopt.torch.quantization.nn": [[81, "module-modelopt.torch.quantization.nn"]], "clipfunction (class in modelopt.torch.quantization.nn.functional)": [[82, "modelopt.torch.quantization.nn.functional.ClipFunction"]], "backward() (clipfunction static method)": [[82, "modelopt.torch.quantization.nn.functional.ClipFunction.backward"]], "forward() (clipfunction static method)": [[82, "modelopt.torch.quantization.nn.functional.ClipFunction.forward"]], "modelopt.torch.quantization.nn.functional": [[82, "module-modelopt.torch.quantization.nn.functional"]], "modelopt.torch.quantization.nn.modules": [[83, "module-modelopt.torch.quantization.nn.modules"]], "clip (class in modelopt.torch.quantization.nn.modules.clip)": [[84, "modelopt.torch.quantization.nn.modules.clip.Clip"]], "__init__() (clip method)": [[84, "modelopt.torch.quantization.nn.modules.clip.Clip.__init__"]], "forward() (clip method)": [[84, "modelopt.torch.quantization.nn.modules.clip.Clip.forward"]], "modelopt.torch.quantization.nn.modules.clip": [[84, "module-modelopt.torch.quantization.nn.modules.clip"]], "modelopt.torch.quantization.nn.modules.quant_activations": [[85, "module-modelopt.torch.quantization.nn.modules.quant_activations"]], "modelopt.torch.quantization.nn.modules.quant_batchnorm": [[86, "module-modelopt.torch.quantization.nn.modules.quant_batchnorm"]], "conv1d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.Conv1d"]], "conv2d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.Conv2d"]], "conv3d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.Conv3d"]], "convtranspose1d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.ConvTranspose1d"]], "convtranspose2d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.ConvTranspose2d"]], "convtranspose3d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.ConvTranspose3d"]], "quantconv1d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d"]], "quantconv2d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d"]], "quantconv3d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d"]], "quantconvtranspose1d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d"]], "quantconvtranspose2d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d"]], "quantconvtranspose3d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d"]], "default_quant_desc_weight (quantconv1d attribute)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconv2d attribute)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconv3d attribute)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconvtranspose1d attribute)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconvtranspose2d attribute)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconvtranspose3d attribute)": [[87, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d.default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv": [[87, "module-modelopt.torch.quantization.nn.modules.quant_conv"]], "quantinstancenorm1d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)": [[88, "modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm1d"]], "quantinstancenorm2d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)": [[88, "modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm2d"]], "quantinstancenorm3d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)": [[88, "modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm3d"]], "modelopt.torch.quantization.nn.modules.quant_instancenorm": [[88, "module-modelopt.torch.quantization.nn.modules.quant_instancenorm"]], "linear (in module modelopt.torch.quantization.nn.modules.quant_linear)": [[89, "modelopt.torch.quantization.nn.modules.quant_linear.Linear"]], "quantlinear (class in modelopt.torch.quantization.nn.modules.quant_linear)": [[89, "modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear"]], "default_quant_desc_weight (quantlinear attribute)": [[89, "modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear.default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_linear": [[89, "module-modelopt.torch.quantization.nn.modules.quant_linear"]], "quantinputbase (class in modelopt.torch.quantization.nn.modules.quant_module)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase"]], "quantlinearconvbase (class in modelopt.torch.quantization.nn.modules.quant_module)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase"]], "default_quant_desc_input (quantinputbase attribute)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_input"]], "default_quant_desc_output (quantinputbase attribute)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_output"]], "default_quant_desc_weight (quantlinearconvbase attribute)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.default_quant_desc_weight"]], "forward() (quantinputbase method)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.forward"]], "forward() (quantlinearconvbase method)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.forward"]], "initialize_quantizer_with_dummy_states() (quantlinearconvbase static method)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.initialize_quantizer_with_dummy_states"]], "input_quantizer (quantinputbase attribute)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.input_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_module": [[90, "module-modelopt.torch.quantization.nn.modules.quant_module"]], "output_quantizer (quantinputbase attribute)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.output_quantizer"]], "quantize_weight() (quantlinearconvbase method)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.quantize_weight"]], "weight_quantizer (quantlinearconvbase attribute)": [[90, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.weight_quantizer"]], "adaptiveavgpool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool1d"]], "adaptiveavgpool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool2d"]], "adaptiveavgpool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool3d"]], "avgpool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool1d"]], "avgpool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool2d"]], "avgpool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool3d"]], "maxpool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool1d"]], "maxpool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool2d"]], "maxpool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool3d"]], "quantadaptiveavgpool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAdaptiveAvgPool1d"]], "quantadaptiveavgpool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAdaptiveAvgPool2d"]], "quantadaptiveavgpool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAdaptiveAvgPool3d"]], "quantavgpool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAvgPool1d"]], "quantavgpool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAvgPool2d"]], "quantavgpool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAvgPool3d"]], "quantmaxpool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool1d"]], "quantmaxpool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool2d"]], "quantmaxpool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[91, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool3d"]], "modelopt.torch.quantization.nn.modules.quant_pooling": [[91, "module-modelopt.torch.quantization.nn.modules.quant_pooling"]], "quantrnnbase (class in modelopt.torch.quantization.nn.modules.quant_rnn)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase"]], "quantrnnfullbase (class in modelopt.torch.quantization.nn.modules.quant_rnn)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNFullBase"]], "rnnlayerforward (class in modelopt.torch.quantization.nn.modules.quant_rnn)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward"]], "vfrnnforward (class in modelopt.torch.quantization.nn.modules.quant_rnn)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward"]], "__init__() (rnnlayerforward method)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.RNNLayerForward.__init__"]], "__init__() (vfrnnforward method)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward.__init__"]], "all_input_quantizers_disabled (quantrnnbase property)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.all_input_quantizers_disabled"]], "default_quant_desc_input (quantrnnbase attribute)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.default_quant_desc_input"]], "default_quant_desc_weight (quantrnnbase attribute)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.default_quant_desc_weight"]], "forward() (quantrnnbase method)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.forward"]], "forward() (vfrnnforward method)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.VFRNNForward.forward"]], "functionals_to_replace (quantrnnbase property)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.functionals_to_replace"]], "get_quantized_rnn_layer_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_forward"]], "get_quantized_rnn_layer_variable_len_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_forward"]], "get_quantized_rnn_layer_variable_len_reverse_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.get_quantized_rnn_layer_variable_len_reverse_forward"]], "lstm_cell_with_proj() (in module modelopt.torch.quantization.nn.modules.quant_rnn)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.lstm_cell_with_proj"]], "modelopt.torch.quantization.nn.modules.quant_rnn": [[92, "module-modelopt.torch.quantization.nn.modules.quant_rnn"]], "quantize_weight() (quantrnnbase method)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.quantize_weight"]], "quantized_cell_forward() (in module modelopt.torch.quantization.nn.modules.quant_rnn)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.quantized_cell_forward"]], "weight_quantizer (quantrnnbase attribute)": [[92, "modelopt.torch.quantization.nn.modules.quant_rnn.QuantRNNBase.weight_quantizer"]], "sequentialquantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer"]], "tensorquantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"]], "__init__() (sequentialquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.__init__"]], "__init__() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.__init__"]], "amax (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.amax"]], "axis (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.axis"]], "block_sizes (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.block_sizes"]], "clean_up_after_set_from_modelopt_state() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.clean_up_after_set_from_modelopt_state"]], "dequantize() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.dequantize"]], "disable() (sequentialquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.disable"]], "disable() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable"]], "disable_calib() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_calib"]], "disable_clip() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_clip"]], "disable_quant() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_quant"]], "enable() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable"]], "enable_calib() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_calib"]], "enable_clip() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_clip"]], "enable_quant() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_quant"]], "export_amax() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.export_amax"]], "extra_repr() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.extra_repr"]], "fake_quant (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.fake_quant"]], "forward() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.forward"]], "get_modelopt_state() (sequentialquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.get_modelopt_state"]], "get_modelopt_state() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.get_modelopt_state"]], "init_learn_amax() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.init_learn_amax"]], "is_enabled (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.is_enabled"]], "load_calib_amax() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.load_calib_amax"]], "maxbound (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.maxbound"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer": [[93, "module-modelopt.torch.quantization.nn.modules.tensor_quantizer"]], "narrow_range (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.narrow_range"]], "num_bits (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.num_bits"]], "pre_quant_scale (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.pre_quant_scale"]], "replace_sequential_quantizer_with_single_quantizer() (sequentialquantizer static method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.replace_sequential_quantizer_with_single_quantizer"]], "reset_amax() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.reset_amax"]], "scale (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.scale"]], "set_from_attribute_config() (sequentialquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.set_from_attribute_config"]], "set_from_attribute_config() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_config"]], "set_from_modelopt_state() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_modelopt_state"]], "step_size (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.step_size"]], "sync_amax_across_distributed_group() (tensorquantizer method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.sync_amax_across_distributed_group"]], "tensor_quantizer_iterator() (sequentialquantizer static method)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.tensor_quantizer_iterator"]], "trt_high_precision_dtype (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.trt_high_precision_dtype"]], "unsigned (tensorquantizer property)": [[93, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.unsigned"]], "freeze_parameters() (in module modelopt.torch.quantization.optim)": [[94, "modelopt.torch.quantization.optim.freeze_parameters"]], "group_parameters() (in module modelopt.torch.quantization.optim)": [[94, "modelopt.torch.quantization.optim.group_parameters"]], "match_parameters() (in module modelopt.torch.quantization.optim)": [[94, "modelopt.torch.quantization.optim.match_parameters"]], "modelopt.torch.quantization.optim": [[94, "module-modelopt.torch.quantization.optim"]], "quant_weight_inplace() (in module modelopt.torch.quantization.optim)": [[94, "modelopt.torch.quantization.optim.quant_weight_inplace"]], "modelopt.torch.quantization.plugins": [[95, "module-modelopt.torch.quantization.plugins"]], "modelopt.torch.quantization.qtensor": [[96, "module-modelopt.torch.quantization.qtensor"]], "basequantizedtensor (class in modelopt.torch.quantization.qtensor.base_qtensor)": [[97, "modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor"]], "qtensorwrapper (class in modelopt.torch.quantization.qtensor.base_qtensor)": [[97, "modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper"]], "__init__() (basequantizedtensor method)": [[97, "modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.__init__"]], "__new__() (qtensorwrapper static method)": [[97, "modelopt.torch.quantization.qtensor.base_qtensor.QTensorWrapper.__new__"]], "dequantize() (basequantizedtensor method)": [[97, "modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.dequantize"]], "modelopt.torch.quantization.qtensor.base_qtensor": [[97, "module-modelopt.torch.quantization.qtensor.base_qtensor"]], "original_meta_tensor (basequantizedtensor attribute)": [[97, "modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.original_meta_tensor"]], "quantize() (basequantizedtensor class method)": [[97, "modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.quantize"]], "quantized_data (basequantizedtensor attribute)": [[97, "modelopt.torch.quantization.qtensor.base_qtensor.BaseQuantizedTensor.quantized_data"]], "int4qtensor (class in modelopt.torch.quantization.qtensor.int4_tensor)": [[98, "modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor"]], "dequantize() (int4qtensor method)": [[98, "modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.dequantize"]], "modelopt.torch.quantization.qtensor.int4_tensor": [[98, "module-modelopt.torch.quantization.qtensor.int4_tensor"]], "quantize() (int4qtensor class method)": [[98, "modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.quantize"]], "quantized_data (int4qtensor attribute)": [[98, "modelopt.torch.quantization.qtensor.int4_tensor.INT4QTensor.quantized_data"]], "nf4qtensor (class in modelopt.torch.quantization.qtensor.nf4_tensor)": [[99, "modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor"]], "dequantize() (nf4qtensor method)": [[99, "modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.dequantize"]], "double_quantization() (nf4qtensor class method)": [[99, "modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.double_quantization"]], "modelopt.torch.quantization.qtensor.nf4_tensor": [[99, "module-modelopt.torch.quantization.qtensor.nf4_tensor"]], "quantize() (nf4qtensor class method)": [[99, "modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.quantize"]], "quantized_data (nf4qtensor attribute)": [[99, "modelopt.torch.quantization.qtensor.nf4_tensor.NF4QTensor.quantized_data"]], "deactivate() (in module modelopt.torch.quantization.quant_modules)": [[100, "modelopt.torch.quantization.quant_modules.deactivate"]], "enable_onnx_export() (in module modelopt.torch.quantization.quant_modules)": [[100, "modelopt.torch.quantization.quant_modules.enable_onnx_export"]], "initialize() (in module modelopt.torch.quantization.quant_modules)": [[100, "modelopt.torch.quantization.quant_modules.initialize"]], "modelopt.torch.quantization.quant_modules": [[100, "module-modelopt.torch.quantization.quant_modules"]], "fakeaffinetensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[101, "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction"]], "faketensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[101, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction"]], "legacyfaketensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[101, "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction"]], "scalede4m3function (class in modelopt.torch.quantization.tensor_quant)": [[101, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function"]], "tensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[101, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction"]], "backward() (fakeaffinetensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction.backward"]], "backward() (faketensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.backward"]], "backward() (legacyfaketensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction.backward"]], "backward() (scalede4m3function static method)": [[101, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.backward"]], "backward() (tensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction.backward"]], "forward() (fakeaffinetensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction.forward"]], "forward() (faketensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.forward"]], "forward() (legacyfaketensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction.forward"]], "forward() (scalede4m3function static method)": [[101, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.forward"]], "forward() (tensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction.forward"]], "modelopt.torch.quantization.tensor_quant": [[101, "module-modelopt.torch.quantization.tensor_quant"]], "scaled_e4m3_abstract() (in module modelopt.torch.quantization.tensor_quant)": [[101, "modelopt.torch.quantization.tensor_quant.scaled_e4m3_abstract"]], "symbolic() (faketensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.symbolic"]], "symbolic() (scalede4m3function static method)": [[101, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.symbolic"]], "symbolic() (tensorquantfunction static method)": [[101, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction.symbolic"]], "export_torch_mode() (in module modelopt.torch.quantization.utils)": [[102, "modelopt.torch.quantization.utils.export_torch_mode"]], "is_quantized() (in module modelopt.torch.quantization.utils)": [[102, "modelopt.torch.quantization.utils.is_quantized"]], "is_quantized_column_parallel_linear() (in module modelopt.torch.quantization.utils)": [[102, "modelopt.torch.quantization.utils.is_quantized_column_parallel_linear"]], "is_quantized_layer_with_weight() (in module modelopt.torch.quantization.utils)": [[102, "modelopt.torch.quantization.utils.is_quantized_layer_with_weight"]], "is_quantized_row_parallel_linear() (in module modelopt.torch.quantization.utils)": [[102, "modelopt.torch.quantization.utils.is_quantized_row_parallel_linear"]], "is_torch_library_supported() (in module modelopt.torch.quantization.utils)": [[102, "modelopt.torch.quantization.utils.is_torch_library_supported"]], "modelopt.torch.quantization.utils": [[102, "module-modelopt.torch.quantization.utils"]], "reduce_amax() (in module modelopt.torch.quantization.utils)": [[102, "modelopt.torch.quantization.utils.reduce_amax"]], "replace_function() (in module modelopt.torch.quantization.utils)": [[102, "modelopt.torch.quantization.utils.replace_function"]], "modelopt.torch.sparsity": [[103, "module-modelopt.torch.sparsity"]], "modelopt.torch.sparsity.config": [[104, "module-modelopt.torch.sparsity.config"]], "nn_conv2d (sparsegptconfig attribute)": [[104, "modelopt.torch.sparsity.config.SparseGPTConfig.nn_conv2d"]], "nn_conv2d (sparsemagnitudeconfig attribute)": [[104, "modelopt.torch.sparsity.config.SparseMagnitudeConfig.nn_conv2d"]], "nn_linear (sparsegptconfig attribute)": [[104, "modelopt.torch.sparsity.config.SparseGPTConfig.nn_linear"]], "nn_linear (sparsemagnitudeconfig attribute)": [[104, "modelopt.torch.sparsity.config.SparseMagnitudeConfig.nn_linear"]], "magnitudesearcher (class in modelopt.torch.sparsity.magnitude)": [[105, "modelopt.torch.sparsity.magnitude.MagnitudeSearcher"]], "compute_valid_1d_patterns() (in module modelopt.torch.sparsity.magnitude)": [[105, "modelopt.torch.sparsity.magnitude.compute_valid_1d_patterns"]], "create_asp_mask() (in module modelopt.torch.sparsity.magnitude)": [[105, "modelopt.torch.sparsity.magnitude.create_asp_mask"]], "fill() (in module modelopt.torch.sparsity.magnitude)": [[105, "modelopt.torch.sparsity.magnitude.fill"]], "get_nmprune_info() (in module modelopt.torch.sparsity.magnitude)": [[105, "modelopt.torch.sparsity.magnitude.get_nmprune_info"]], "m4n2_1d() (in module modelopt.torch.sparsity.magnitude)": [[105, "modelopt.torch.sparsity.magnitude.m4n2_1d"]], "mn_1d_best() (in module modelopt.torch.sparsity.magnitude)": [[105, "modelopt.torch.sparsity.magnitude.mn_1d_best"]], "modelopt.torch.sparsity.magnitude": [[105, "module-modelopt.torch.sparsity.magnitude"]], "reshape_1d() (in module modelopt.torch.sparsity.magnitude)": [[105, "modelopt.torch.sparsity.magnitude.reshape_1d"]], "exportsparsemodedescriptor (class in modelopt.torch.sparsity.mode)": [[106, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor"]], "sparsegptmodedescriptor (class in modelopt.torch.sparsity.mode)": [[106, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor"]], "sparsemagnitudemodedescriptor (class in modelopt.torch.sparsity.mode)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor"]], "config_class (exportsparsemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.config_class"]], "config_class (sparsegptmodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.config_class"]], "config_class (sparsemagnitudemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.config_class"]], "convert (exportsparsemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.convert"]], "convert (sparsemagnitudemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.convert"]], "convert_sparse_model() (in module modelopt.torch.sparsity.mode)": [[106, "modelopt.torch.sparsity.mode.convert_sparse_model"]], "export_mode (sparsemagnitudemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.export_mode"]], "export_sparse() (in module modelopt.torch.sparsity.mode)": [[106, "modelopt.torch.sparsity.mode.export_sparse"]], "is_export_mode (exportsparsemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.is_export_mode"]], "modelopt.torch.sparsity.mode": [[106, "module-modelopt.torch.sparsity.mode"]], "name (exportsparsemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.name"]], "name (sparsegptmodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.name"]], "name (sparsemagnitudemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.name"]], "next_modes (sparsemagnitudemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.next_modes"]], "restore (exportsparsemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.restore"]], "restore (sparsemagnitudemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.restore"]], "restore_export_sparse() (in module modelopt.torch.sparsity.mode)": [[106, "modelopt.torch.sparsity.mode.restore_export_sparse"]], "restore_sparse_model() (in module modelopt.torch.sparsity.mode)": [[106, "modelopt.torch.sparsity.mode.restore_sparse_model"]], "search_algorithm (sparsegptmodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.search_algorithm"]], "search_algorithm (sparsemagnitudemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.search_algorithm"]], "update_for_new_mode (sparsemagnitudemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.update_for_new_mode"]], "update_for_save (sparsemagnitudemodedescriptor property)": [[106, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.update_for_save"]], "update_sparse_metadata() (in module modelopt.torch.sparsity.mode)": [[106, "modelopt.torch.sparsity.mode.update_sparse_metadata"]], "sparsemodule (class in modelopt.torch.sparsity.module)": [[107, "modelopt.torch.sparsity.module.SparseModule"]], "modelopt.torch.sparsity.module": [[107, "module-modelopt.torch.sparsity.module"]], "modify() (sparsemodule method)": [[107, "modelopt.torch.sparsity.module.SparseModule.modify"]], "set_mask() (sparsemodule method)": [[107, "modelopt.torch.sparsity.module.SparseModule.set_mask"]], "modelopt.torch.sparsity.plugins": [[108, "module-modelopt.torch.sparsity.plugins"]], "basesparsesearcher (class in modelopt.torch.sparsity.searcher)": [[109, "modelopt.torch.sparsity.searcher.BaseSparseSearcher"]], "default_search_config (basesparsesearcher property)": [[109, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.default_search_config"]], "default_state_dict (basesparsesearcher property)": [[109, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.default_state_dict"]], "modelopt.torch.sparsity.searcher": [[109, "module-modelopt.torch.sparsity.searcher"]], "run_search() (basesparsesearcher method)": [[109, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.run_search"]], "sanitize_search_config() (basesparsesearcher method)": [[109, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.sanitize_search_config"]], "sparsegptsearcher (class in modelopt.torch.sparsity.sparsegpt)": [[110, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher"]], "after_search() (sparsegptsearcher method)": [[110, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.after_search"]], "before_search() (sparsegptsearcher method)": [[110, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.before_search"]], "create_sgpt_mask() (in module modelopt.torch.sparsity.sparsegpt)": [[110, "modelopt.torch.sparsity.sparsegpt.create_sgpt_mask"]], "default_search_config (sparsegptsearcher property)": [[110, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.default_search_config"]], "invert() (in module modelopt.torch.sparsity.sparsegpt)": [[110, "modelopt.torch.sparsity.sparsegpt.invert"]], "modelopt.torch.sparsity.sparsegpt": [[110, "module-modelopt.torch.sparsity.sparsegpt"]], "prepare() (in module modelopt.torch.sparsity.sparsegpt)": [[110, "modelopt.torch.sparsity.sparsegpt.prepare"]], "export() (in module modelopt.torch.sparsity.sparsification)": [[111, "modelopt.torch.sparsity.sparsification.export"]], "modelopt.torch.sparsity.sparsification": [[111, "module-modelopt.torch.sparsity.sparsification"]], "sparsify() (in module modelopt.torch.sparsity.sparsification)": [[111, "modelopt.torch.sparsity.sparsification.sparsify"]], "modelopt.torch.utils": [[112, "module-modelopt.torch.utils"]], "load_cpp_extension() (in module modelopt.torch.utils.cpp_extension)": [[113, "modelopt.torch.utils.cpp_extension.load_cpp_extension"]], "modelopt.torch.utils.cpp_extension": [[113, "module-modelopt.torch.utils.cpp_extension"]], "create_forward_loop() (in module modelopt.torch.utils.dataset_utils)": [[114, "modelopt.torch.utils.dataset_utils.create_forward_loop"]], "get_dataset_dataloader() (in module modelopt.torch.utils.dataset_utils)": [[114, "modelopt.torch.utils.dataset_utils.get_dataset_dataloader"]], "modelopt.torch.utils.dataset_utils": [[114, "module-modelopt.torch.utils.dataset_utils"]], "backend() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.backend"]], "barrier() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.barrier"]], "get_data_parallel_group() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.get_data_parallel_group"]], "get_tensor_parallel_group() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.get_tensor_parallel_group"]], "is_available() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.is_available"]], "is_initialized() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.is_initialized"]], "is_master() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.is_master"]], "modelopt.torch.utils.distributed": [[115, "module-modelopt.torch.utils.distributed"]], "rank() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.rank"]], "set_data_parallel_group() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.set_data_parallel_group"]], "set_tensor_parallel_group() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.set_tensor_parallel_group"]], "size() (in module modelopt.torch.utils.distributed)": [[115, "modelopt.torch.utils.distributed.size"]], "match() (in module modelopt.torch.utils.graph)": [[116, "modelopt.torch.utils.graph.match"]], "modelopt.torch.utils.graph": [[116, "module-modelopt.torch.utils.graph"]], "list_closest_to_median() (in module modelopt.torch.utils.list)": [[117, "modelopt.torch.utils.list.list_closest_to_median"]], "modelopt.torch.utils.list": [[117, "module-modelopt.torch.utils.list"]], "stats() (in module modelopt.torch.utils.list)": [[117, "modelopt.torch.utils.list.stats"]], "val2list() (in module modelopt.torch.utils.list)": [[117, "modelopt.torch.utils.list.val2list"]], "val2tuple() (in module modelopt.torch.utils.list)": [[117, "modelopt.torch.utils.list.val2tuple"]], "deprecatederror": [[118, "modelopt.torch.utils.logging.DeprecatedError"]], "modelopt.torch.utils.logging": [[118, "module-modelopt.torch.utils.logging"]], "no_stdout() (in module modelopt.torch.utils.logging)": [[118, "modelopt.torch.utils.logging.no_stdout"]], "num2hrb() (in module modelopt.torch.utils.logging)": [[118, "modelopt.torch.utils.logging.num2hrb"]], "print_rank_0() (in module modelopt.torch.utils.logging)": [[118, "modelopt.torch.utils.logging.print_rank_0"]], "compare_dict() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.compare_dict"]], "create_param_grad_clear_hook() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.create_param_grad_clear_hook"]], "get_model_attributes() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.get_model_attributes"]], "get_module_device() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.get_module_device"]], "get_same_padding() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.get_same_padding"]], "init_model_from_model_like() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.init_model_from_model_like"]], "is_channels_last() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.is_channels_last"]], "is_parallel() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.is_parallel"]], "make_divisible() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.make_divisible"]], "model_to() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.model_to"]], "modelopt.torch.utils.network": [[119, "module-modelopt.torch.utils.network"]], "param_num() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.param_num"]], "param_num_from_forward() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.param_num_from_forward"]], "remove_bn() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.remove_bn"]], "run_forward_loop() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.run_forward_loop"]], "set_submodule() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.set_submodule"]], "standardize_constructor_args() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.standardize_constructor_args"]], "standardize_model_args() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.standardize_model_args"]], "standardize_model_like_tuple() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.standardize_model_like_tuple"]], "standardize_named_model_args() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.standardize_named_model_args"]], "unwrap_model() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.unwrap_model"]], "zero_grad() (in module modelopt.torch.utils.network)": [[119, "modelopt.torch.utils.network.zero_grad"]], "timer (class in modelopt.torch.utils.perf)": [[120, "modelopt.torch.utils.perf.Timer"]], "__init__() (timer method)": [[120, "modelopt.torch.utils.perf.Timer.__init__"]], "clear_cuda_cache() (in module modelopt.torch.utils.perf)": [[120, "modelopt.torch.utils.perf.clear_cuda_cache"]], "get_cuda_memory_stats() (in module modelopt.torch.utils.perf)": [[120, "modelopt.torch.utils.perf.get_cuda_memory_stats"]], "modelopt.torch.utils.perf": [[120, "module-modelopt.torch.utils.perf"]], "report_memory() (in module modelopt.torch.utils.perf)": [[120, "modelopt.torch.utils.perf.report_memory"]], "start() (timer method)": [[120, "modelopt.torch.utils.perf.Timer.start"]], "stop() (timer method)": [[120, "modelopt.torch.utils.perf.Timer.stop"]], "centroid() (in module modelopt.torch.utils.random)": [[121, "modelopt.torch.utils.random.centroid"]], "choice() (in module modelopt.torch.utils.random)": [[121, "modelopt.torch.utils.random.choice"]], "modelopt.torch.utils.random": [[121, "module-modelopt.torch.utils.random"]], "original() (in module modelopt.torch.utils.random)": [[121, "modelopt.torch.utils.random.original"]], "random() (in module modelopt.torch.utils.random)": [[121, "modelopt.torch.utils.random.random"]], "sample() (in module modelopt.torch.utils.random)": [[121, "modelopt.torch.utils.random.sample"]], "shuffle() (in module modelopt.torch.utils.random)": [[121, "modelopt.torch.utils.random.shuffle"]], "modelopt.torch.utils.tensor": [[122, "module-modelopt.torch.utils.tensor"]], "numpy_to_torch() (in module modelopt.torch.utils.tensor)": [[122, "modelopt.torch.utils.tensor.numpy_to_torch"]], "torch_detach() (in module modelopt.torch.utils.tensor)": [[122, "modelopt.torch.utils.tensor.torch_detach"]], "torch_to() (in module modelopt.torch.utils.tensor)": [[122, "modelopt.torch.utils.tensor.torch_to"]], "torch_to_numpy() (in module modelopt.torch.utils.tensor)": [[122, "modelopt.torch.utils.tensor.torch_to_numpy"]]}})
\ No newline at end of file
diff --git a/support/1_contact.html b/support/1_contact.html
index 8439256..f78cfcf 100644
--- a/support/1_contact.html
+++ b/support/1_contact.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>Contact us &mdash; Model Optimizer 0.11.2</title>
+  <title>Contact us &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -37,22 +37,22 @@
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
     <link rel="next" title="FAQs" href="2_faqs.html" />
-    <link rel="prev" title="tensor" href="../reference/generated/modelopt.torch.utils.tensor.html" />
+    <link rel="prev" title="tensor" href="../reference/generated/modelopt.torch.utils.tensor.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -67,11 +67,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -80,11 +82,11 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
@@ -116,11 +118,10 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="contact-us">
 <h1>Contact us<a class="headerlink" href="#contact-us" title="Link to this heading"></a></h1>
-<p>You may raise an issue on <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer/issues" rel="noopener noreferrer" target="_blank">GitHub</a>
-for any questions or issues you may have.</p>
+<p>Contact us by submitting issues on <a class="reference external" href="https://github.com/NVIDIA/TensorRT-Model-Optimizer/issues" rel="noopener noreferrer" target="_blank">GitHub</a>.</p>
 </section>
 
 
@@ -140,7 +141,7 @@ <h1>Contact us<a class="headerlink" href="#contact-us" title="Link to this headi
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -151,7 +152,7 @@ <h1>Contact us<a class="headerlink" href="#contact-us" title="Link to this headi
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file
diff --git a/support/2_faqs.html b/support/2_faqs.html
index 49da1e6..689b39d 100644
--- a/support/2_faqs.html
+++ b/support/2_faqs.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1" />
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-  <title>FAQs &mdash; Model Optimizer 0.11.2</title>
+  <title>FAQs &mdash; Model Optimizer 0.15.0</title>
       <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=80d5e7a1" />
       <link rel="stylesheet" type="text/css" href="../_static/css/theme.css?v=19f00094" />
       <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
@@ -13,11 +13,11 @@
       <link rel="stylesheet" type="text/css" href="../_static/autodoc_pydantic.css" />
       <link rel="stylesheet" type="text/css" href="../_static/custom.css?v=d10054b6" />
 
-
+  
   <!--[if lt IE 9]>
     <script src="../_static/js/html5shiv.min.js"></script>
   <![endif]-->
-
+  
         <script src="../_static/jquery.js?v=5d32c60e"></script>
         <script src="../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
         <script src="../_static/documentation_options.js?v=5929fcd5"></script>
@@ -36,22 +36,22 @@
     <script src="../_static/js/theme.js"></script>
     <link rel="index" title="Index" href="../genindex.html" />
     <link rel="search" title="Search" href="../search.html" />
-    <link rel="prev" title="Contact us" href="1_contact.html" />
+    <link rel="prev" title="Contact us" href="1_contact.html" /> 
 </head>
 
-<body class="wy-body-for-nav">
+<body class="wy-body-for-nav"> 
   <div class="wy-grid-for-nav">
     <nav data-toggle="wy-nav-shift" class="wy-nav-side">
       <div class="wy-side-scroll">
         <div class="wy-side-nav-search" >
 
-
-
+          
+          
           <a href="../index.html" class="icon icon-home">
             TensorRT Model Optimizer
           </a>
               <div class="version">
-                0.11.2
+                0.15.0
               </div>
 <div role="search">
   <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
@@ -66,11 +66,13 @@
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/1_overview.html">Overview</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/2_installation.html">Installation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/3_quantization.html">Quick Start: Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../getting_started/5_distillation.html">Quick Start: Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../getting_started/6_sparsity.html">Quick Start: Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Optimization Guides</span></p>
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="../guides/1_quantization.html">Quantization</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../guides/4_distillation.html">Distillation</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../guides/5_sparsity.html">Sparsity</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Deployment</span></p>
@@ -79,18 +81,21 @@
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Examples</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">All ModelOpt Examples</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../examples/0_all_examples.html">GitHub Examples</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Reference</span></p>
 <ul>
-<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Model Optimizer Changelog</a></li>
+<li class="toctree-l1"><a class="reference internal" href="../reference/0_versions.html">Changelog</a></li>
 <li class="toctree-l1"><a class="reference internal" href="../reference/1_modelopt_api.html">modelopt API</a></li>
 </ul>
 <p class="caption" role="heading"><span class="caption-text">Support</span></p>
 <ul class="current">
 <li class="toctree-l1"><a class="reference internal" href="1_contact.html">Contact us</a></li>
 <li class="toctree-l1 current"><a class="current reference internal" href="#">FAQs</a><ul>
-<li class="toctree-l2"><a class="reference internal" href="#potential-memory-leak-for-fsdp-with-use-orig-params-true">1. Potential memory leak for <code class="docutils literal notranslate"><span class="pre">FSDP</span></code> with <code class="docutils literal notranslate"><span class="pre">use_orig_params=True</span></code></a></li>
+<li class="toctree-l2"><a class="reference internal" href="#known-issues">Known Issues</a><ul>
+<li class="toctree-l3"><a class="reference internal" href="#potential-memory-leak-for-fsdp-with-use-orig-params-true">1. Potential memory leak for <code class="docutils literal notranslate"><span class="pre">FSDP</span></code> with <code class="docutils literal notranslate"><span class="pre">use_orig_params=True</span></code></a></li>
+</ul>
+</li>
 </ul>
 </li>
 </ul>
@@ -118,15 +123,18 @@
 </div>
           <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
            <div itemprop="articleBody">
-
+             
   <section id="faqs">
 <h1>FAQs<a class="headerlink" href="#faqs" title="Link to this heading"></a></h1>
+<section id="known-issues">
+<h2>Known Issues<a class="headerlink" href="#known-issues" title="Link to this heading"></a></h2>
 <section id="potential-memory-leak-for-fsdp-with-use-orig-params-true">
-<h2>1. Potential memory leak for <code class="docutils literal notranslate"><span class="pre">FSDP</span></code> with <code class="docutils literal notranslate"><span class="pre">use_orig_params=True</span></code><a class="headerlink" href="#potential-memory-leak-for-fsdp-with-use-orig-params-true" title="Link to this heading"></a></h2>
+<h3>1. Potential memory leak for <code class="docutils literal notranslate"><span class="pre">FSDP</span></code> with <code class="docutils literal notranslate"><span class="pre">use_orig_params=True</span></code><a class="headerlink" href="#potential-memory-leak-for-fsdp-with-use-orig-params-true" title="Link to this heading"></a></h3>
 <p>When using <code class="docutils literal notranslate"><span class="pre">FSDP</span></code> with <code class="docutils literal notranslate"><span class="pre">use_orig_params=True</span></code>, there is a potential memory leak during training
 when using <code class="docutils literal notranslate"><span class="pre">FSDP</span></code> in conjunction with modelopt-converted models. Please use
 <code class="docutils literal notranslate"><span class="pre">use_orig_params=False</span></code> to avoid this issue.</p>
 </section>
+</section>
 </section>
 
 
@@ -145,7 +153,7 @@ <h2>1. Potential memory leak for <code class="docutils literal notranslate"><spa
   Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
     <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
     provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
+   
 
 </footer>
         </div>
@@ -156,7 +164,7 @@ <h2>1. Potential memory leak for <code class="docutils literal notranslate"><spa
       jQuery(function () {
           SphinxRtdTheme.Navigation.enable(true);
       });
-  </script>
+  </script> 
 
 </body>
-</html>
+</html>
\ No newline at end of file