diff --git a/.buildinfo b/.buildinfo new file mode 100644 index 0000000..f8b07ad --- /dev/null +++ b/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 89ada319c94fcb1610b7f80d777e8b12 +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.doctrees/deployment/1_tensorrt_llm_deployment.doctree b/.doctrees/deployment/1_tensorrt_llm_deployment.doctree new file mode 100644 index 0000000..ecf83fb Binary files /dev/null and b/.doctrees/deployment/1_tensorrt_llm_deployment.doctree differ diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle new file mode 100644 index 0000000..b4a8fc1 Binary files /dev/null and b/.doctrees/environment.pickle differ diff --git a/.doctrees/examples/0_all_examples.doctree b/.doctrees/examples/0_all_examples.doctree new file mode 100644 index 0000000..154cbaa Binary files /dev/null and b/.doctrees/examples/0_all_examples.doctree differ diff --git a/.doctrees/getting_started/1_overview.doctree b/.doctrees/getting_started/1_overview.doctree new file mode 100644 index 0000000..7be2a67 Binary files /dev/null and b/.doctrees/getting_started/1_overview.doctree differ diff --git a/.doctrees/getting_started/2_installation.doctree b/.doctrees/getting_started/2_installation.doctree new file mode 100644 index 0000000..741334c Binary files /dev/null and b/.doctrees/getting_started/2_installation.doctree differ diff --git a/.doctrees/getting_started/3_quantization.doctree b/.doctrees/getting_started/3_quantization.doctree new file mode 100644 index 0000000..0ce73de Binary files /dev/null and b/.doctrees/getting_started/3_quantization.doctree differ diff --git a/.doctrees/getting_started/6_sparsity.doctree b/.doctrees/getting_started/6_sparsity.doctree new file mode 100644 index 0000000..60fb311 Binary files /dev/null and b/.doctrees/getting_started/6_sparsity.doctree differ diff --git a/.doctrees/guides/1_quantization.doctree b/.doctrees/guides/1_quantization.doctree new file mode 100644 index 0000000..43906e9 Binary files /dev/null and b/.doctrees/guides/1_quantization.doctree differ diff --git a/.doctrees/guides/5_sparsity.doctree b/.doctrees/guides/5_sparsity.doctree new file mode 100644 index 0000000..a1e8396 Binary files /dev/null and b/.doctrees/guides/5_sparsity.doctree differ diff --git a/.doctrees/guides/_basic_quantization.doctree b/.doctrees/guides/_basic_quantization.doctree new file mode 100644 index 0000000..82c5be6 Binary files /dev/null and b/.doctrees/guides/_basic_quantization.doctree differ diff --git a/.doctrees/guides/_choosing_quant_methods.doctree b/.doctrees/guides/_choosing_quant_methods.doctree new file mode 100644 index 0000000..2599ce8 Binary files /dev/null and b/.doctrees/guides/_choosing_quant_methods.doctree differ diff --git a/.doctrees/guides/_onnx_quantization.doctree b/.doctrees/guides/_onnx_quantization.doctree new file mode 100644 index 0000000..980db0b Binary files /dev/null and b/.doctrees/guides/_onnx_quantization.doctree differ diff --git a/.doctrees/guides/_pytorch_quantization.doctree b/.doctrees/guides/_pytorch_quantization.doctree new file mode 100644 index 0000000..44f7511 Binary files /dev/null and b/.doctrees/guides/_pytorch_quantization.doctree differ diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree new file mode 100644 index 0000000..5b3f64d Binary files /dev/null and b/.doctrees/index.doctree differ diff --git a/.doctrees/reference/0_versions.doctree b/.doctrees/reference/0_versions.doctree new file mode 100644 index 0000000..993786f Binary files /dev/null and b/.doctrees/reference/0_versions.doctree differ diff --git a/.doctrees/reference/1_modelopt_api.doctree b/.doctrees/reference/1_modelopt_api.doctree new file mode 100644 index 0000000..023ea9c Binary files /dev/null and b/.doctrees/reference/1_modelopt_api.doctree differ diff --git a/.doctrees/reference/generated/modelopt.deploy.doctree b/.doctrees/reference/generated/modelopt.deploy.doctree new file mode 100644 index 0000000..b460ffd Binary files /dev/null and b/.doctrees/reference/generated/modelopt.deploy.doctree differ diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.doctree new file mode 100644 index 0000000..2a1f1ad Binary files /dev/null and b/.doctrees/reference/generated/modelopt.deploy.llm.doctree differ diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree new file mode 100644 index 0000000..cd4ad15 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.deploy.llm.generate.doctree differ diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.model_config_trt.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.model_config_trt.doctree new file mode 100644 index 0000000..58dc3bc Binary files /dev/null and b/.doctrees/reference/generated/modelopt.deploy.llm.model_config_trt.doctree differ diff --git a/.doctrees/reference/generated/modelopt.deploy.llm.nemo_utils.doctree b/.doctrees/reference/generated/modelopt.deploy.llm.nemo_utils.doctree new file mode 100644 index 0000000..2ba194f Binary files /dev/null and b/.doctrees/reference/generated/modelopt.deploy.llm.nemo_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.doctree b/.doctrees/reference/generated/modelopt.onnx.doctree new file mode 100644 index 0000000..5bed462 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.op_types.doctree b/.doctrees/reference/generated/modelopt.onnx.op_types.doctree new file mode 100644 index 0000000..53c54ea Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.op_types.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree new file mode 100644 index 0000000..39b0c32 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.calib_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.doctree new file mode 100644 index 0000000..aec17b1 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree new file mode 100644 index 0000000..09e9d7b Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.graph_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.gs_patching.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.gs_patching.doctree new file mode 100644 index 0000000..993c359 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.gs_patching.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree new file mode 100644 index 0000000..dccb0be Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.int4.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.operators.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.operators.doctree new file mode 100644 index 0000000..11ed336 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.operators.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree new file mode 100644 index 0000000..b3eaa47 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_patching.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree new file mode 100644 index 0000000..c76eb5f Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.ort_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.partitioning.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.partitioning.doctree new file mode 100644 index 0000000..a555768 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.partitioning.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree new file mode 100644 index 0000000..04c744c Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.qdq_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree new file mode 100644 index 0000000..3d2b1ca Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.quant_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree b/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree new file mode 100644 index 0000000..da5a0ee Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.quantization.quantize.doctree differ diff --git a/.doctrees/reference/generated/modelopt.onnx.utils.doctree b/.doctrees/reference/generated/modelopt.onnx.utils.doctree new file mode 100644 index 0000000..5339a16 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.onnx.utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.doctree b/.doctrees/reference/generated/modelopt.torch.doctree new file mode 100644 index 0000000..cd4233c Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree b/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree new file mode 100644 index 0000000..97c6425 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.distribute.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.doctree b/.doctrees/reference/generated/modelopt.torch.export.doctree new file mode 100644 index 0000000..1a4d247 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree new file mode 100644 index 0000000..19ca4d8 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.layer_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree b/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree new file mode 100644 index 0000000..d451e1e Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.model_config.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree b/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree new file mode 100644 index 0000000..1040ff5 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.model_config_export.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.model_config_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.model_config_utils.doctree new file mode 100644 index 0000000..b9a3104 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.model_config_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.postprocess.doctree b/.doctrees/reference/generated/modelopt.torch.export.postprocess.doctree new file mode 100644 index 0000000..c43455d Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.postprocess.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree new file mode 100644 index 0000000..01d94e4 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.scaling_factor_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree b/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree new file mode 100644 index 0000000..8741444 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.tensorrt_llm_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.export.transformer_engine.doctree b/.doctrees/reference/generated/modelopt.torch.export.transformer_engine.doctree new file mode 100644 index 0000000..1fbc07e Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.export.transformer_engine.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.config.doctree b/.doctrees/reference/generated/modelopt.torch.opt.config.doctree new file mode 100644 index 0000000..4e1aa6f Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.opt.config.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.conversion.doctree b/.doctrees/reference/generated/modelopt.torch.opt.conversion.doctree new file mode 100644 index 0000000..37900d4 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.opt.conversion.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.doctree b/.doctrees/reference/generated/modelopt.torch.opt.doctree new file mode 100644 index 0000000..8db64f8 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.opt.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.dynamic.doctree b/.doctrees/reference/generated/modelopt.torch.opt.dynamic.doctree new file mode 100644 index 0000000..da820f4 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.opt.dynamic.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree b/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree new file mode 100644 index 0000000..d727c92 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.opt.hparam.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.mode.doctree b/.doctrees/reference/generated/modelopt.torch.opt.mode.doctree new file mode 100644 index 0000000..7a992ca Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.opt.mode.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.plugins.doctree b/.doctrees/reference/generated/modelopt.torch.opt.plugins.doctree new file mode 100644 index 0000000..d066d6f Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.opt.plugins.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree b/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree new file mode 100644 index 0000000..7a5f751 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.opt.searcher.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree b/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree new file mode 100644 index 0000000..0bc4c08 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.opt.utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.calib.calibrator.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.calib.calibrator.doctree new file mode 100644 index 0000000..3e5fa07 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.calib.calibrator.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.calib.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.calib.doctree new file mode 100644 index 0000000..56834fc Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.calib.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree new file mode 100644 index 0000000..a7c0fb0 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.calib.histogram.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree new file mode 100644 index 0000000..5f8b221 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.calib.max.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree new file mode 100644 index 0000000..e7633c0 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.config.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree new file mode 100644 index 0000000..72be1dc Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.conversion.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.doctree new file mode 100644 index 0000000..cd84875 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree new file mode 100644 index 0000000..a673f09 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.extensions.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.mode.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.mode.doctree new file mode 100644 index 0000000..36d5a6c Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.mode.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree new file mode 100644 index 0000000..c6ac50f Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.model_calib.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree new file mode 100644 index 0000000..6879fb5 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.model_quant.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.doctree new file mode 100644 index 0000000..4599e31 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.functional.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.functional.doctree new file mode 100644 index 0000000..55b2ff2 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.functional.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.clip.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.clip.doctree new file mode 100644 index 0000000..22a9cc1 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.clip.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree new file mode 100644 index 0000000..448084b Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.doctree new file mode 100644 index 0000000..3c84f0a Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.doctree new file mode 100644 index 0000000..3d60355 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree new file mode 100644 index 0000000..9be4758 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.doctree new file mode 100644 index 0000000..773bd22 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree new file mode 100644 index 0000000..f8c99c2 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree new file mode 100644 index 0000000..0f9f93c Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.doctree new file mode 100644 index 0000000..c550735 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree new file mode 100644 index 0000000..a452e17 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.optim.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.optim.doctree new file mode 100644 index 0000000..2b2f282 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.optim.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree new file mode 100644 index 0000000..b1b8bc7 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.plugins.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.quant_modules.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.quant_modules.doctree new file mode 100644 index 0000000..3855964 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.quant_modules.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree new file mode 100644 index 0000000..0ab87a9 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.tensor_quant.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.quantization.utils.doctree b/.doctrees/reference/generated/modelopt.torch.quantization.utils.doctree new file mode 100644 index 0000000..289ec93 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.quantization.utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.sparsity.config.doctree b/.doctrees/reference/generated/modelopt.torch.sparsity.config.doctree new file mode 100644 index 0000000..d4d72fc Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.sparsity.config.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.sparsity.doctree b/.doctrees/reference/generated/modelopt.torch.sparsity.doctree new file mode 100644 index 0000000..8fade8e Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.sparsity.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.sparsity.magnitude.doctree b/.doctrees/reference/generated/modelopt.torch.sparsity.magnitude.doctree new file mode 100644 index 0000000..3d47d1e Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.sparsity.magnitude.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.sparsity.mode.doctree b/.doctrees/reference/generated/modelopt.torch.sparsity.mode.doctree new file mode 100644 index 0000000..2711ae6 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.sparsity.mode.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.sparsity.module.doctree b/.doctrees/reference/generated/modelopt.torch.sparsity.module.doctree new file mode 100644 index 0000000..a94cf92 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.sparsity.module.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.sparsity.plugins.doctree b/.doctrees/reference/generated/modelopt.torch.sparsity.plugins.doctree new file mode 100644 index 0000000..e707755 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.sparsity.plugins.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.sparsity.searcher.doctree b/.doctrees/reference/generated/modelopt.torch.sparsity.searcher.doctree new file mode 100644 index 0000000..46dea65 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.sparsity.searcher.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.sparsity.sparsegpt.doctree b/.doctrees/reference/generated/modelopt.torch.sparsity.sparsegpt.doctree new file mode 100644 index 0000000..1fa71cf Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.sparsity.sparsegpt.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.sparsity.sparsification.doctree b/.doctrees/reference/generated/modelopt.torch.sparsity.sparsification.doctree new file mode 100644 index 0000000..9383b12 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.sparsity.sparsification.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.cpp_extension.doctree b/.doctrees/reference/generated/modelopt.torch.utils.cpp_extension.doctree new file mode 100644 index 0000000..f6d2119 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.cpp_extension.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree b/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree new file mode 100644 index 0000000..7063a1f Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.dataset_utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree b/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree new file mode 100644 index 0000000..adb542c Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.distributed.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.doctree b/.doctrees/reference/generated/modelopt.torch.utils.doctree new file mode 100644 index 0000000..26a2fb6 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.graph.doctree b/.doctrees/reference/generated/modelopt.torch.utils.graph.doctree new file mode 100644 index 0000000..127ebb8 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.graph.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.list.doctree b/.doctrees/reference/generated/modelopt.torch.utils.list.doctree new file mode 100644 index 0000000..bd3a473 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.list.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.logging.doctree b/.doctrees/reference/generated/modelopt.torch.utils.logging.doctree new file mode 100644 index 0000000..bc7b338 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.logging.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.network.doctree b/.doctrees/reference/generated/modelopt.torch.utils.network.doctree new file mode 100644 index 0000000..0ec5a03 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.network.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.perf.doctree b/.doctrees/reference/generated/modelopt.torch.utils.perf.doctree new file mode 100644 index 0000000..5c5ba70 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.perf.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.random.doctree b/.doctrees/reference/generated/modelopt.torch.utils.random.doctree new file mode 100644 index 0000000..c0c3094 Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.random.doctree differ diff --git a/.doctrees/reference/generated/modelopt.torch.utils.tensor.doctree b/.doctrees/reference/generated/modelopt.torch.utils.tensor.doctree new file mode 100644 index 0000000..538956e Binary files /dev/null and b/.doctrees/reference/generated/modelopt.torch.utils.tensor.doctree differ diff --git a/.doctrees/support/1_contact.doctree b/.doctrees/support/1_contact.doctree new file mode 100644 index 0000000..5572c37 Binary files /dev/null and b/.doctrees/support/1_contact.doctree differ diff --git a/.doctrees/support/2_faqs.doctree b/.doctrees/support/2_faqs.doctree new file mode 100644 index 0000000..34b2b30 Binary files /dev/null and b/.doctrees/support/2_faqs.doctree differ diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/_sources/deployment/1_tensorrt_llm_deployment.rst.txt b/_sources/deployment/1_tensorrt_llm_deployment.rst.txt new file mode 100644 index 0000000..ea32741 --- /dev/null +++ b/_sources/deployment/1_tensorrt_llm_deployment.rst.txt @@ -0,0 +1,142 @@ +========================== +TensorRT-LLM Deployment +========================== + +.. note:: + + Please read the `TensorRT-LLM checkpoint workflow `_ + first before going through this section. + + +ModelOpt toolkit supports automatic conversion of ModelOpt exported LLM to the TensorRT-LLM checkpoint and the engines for accelerated inferencing. + +This conversion is achieved by: + +#. Converting Huggingface, NeMo and ModelOpt exported checkpoints to the TensorRT-LLM checkpoint. +#. Building TensorRT-LLM engine from the TensorRT-LLM checkpoint. + + +Export Quantized Model +====================== + +After the model is quantized, the quantized model can be exported to the TensorRT-LLM checkpoint format stored as + +#. A single JSON file recording the model structure and metadata (config.json) +#. A group of safetensors files, each recording the local calibrated model on a single GPU rank (model weights, scaling factors per GPU). + +The export API (:meth:`export_tensorrt_llm_checkpoint `) can be used as follows: + +.. code-block:: python + + from modelopt.torch.export import export_tensorrt_llm_checkpoint + + with torch.inference_mode(): + export_tensorrt_llm_checkpoint( + model, # The quantized model. + decoder_type, # The type of the model as str, e.g gptj, llama or gptnext. + dtype, # the weights data type to export the unquantized layers. + export_dir, # The directory where the exported files will be stored. + inference_tensor_parallel, # The number of GPUs used in the inference time for tensor parallelism. + inference_pipeline_parallel, # The number of GPUs used in the inference time for pipeline parallelism. + ) + +If the :meth:`export_tensorrt_llm_checkpoint ` call is successful, the TensorRT-LLM checkpoint will be saved. Otherwise, e.g. the ``decoder_type`` is not supported, a torch state_dict checkpoint will be saved instead. + +.. list-table:: Model support matrix for the TensorRT-LLM checkpoint export + :header-rows: 1 + + * - Model / Quantization + - FP16 / BF16 + - FP8 + - INT8_SQ + - INT4_AWQ + * - GPT2 + - Yes + - Yes + - Yes + - No + * - GPTJ + - Yes + - Yes + - Yes + - Yes + * - LLAMA 2 + - Yes + - Yes + - Yes + - Yes + * - LLAMA 3 + - Yes + - Yes + - No + - Yes + * - Mistral + - Yes + - Yes + - Yes + - Yes + * - Mixtral 8x7B + - Yes + - Yes + - No + - Yes + * - Falcon 40B, 180B + - Yes + - Yes + - Yes + - Yes + * - Falcon 7B + - Yes + - Yes + - Yes + - No + * - Falcon RW 1B, 7B + - Yes + - Yes + - Yes + - Yes + * - MPT 7B, 30B + - Yes + - Yes + - Yes + - Yes + * - Baichuan 1, 2 + - Yes + - Yes + - Yes + - Yes + * - Qwen 7B, 14B + - Yes + - Yes + - Yes + - Yes + * - ChatGLM2, 3 6B + - Yes + - Yes + - Yes + - Yes + * - Bloom + - Yes + - Yes + - Yes + - Yes + * - Phi-1, 2, 3 + - Yes + - Yes + - Yes + - Yes + * - Nemotron 8 + - Yes + - Yes + - No + - Yes + * - Gemma 2B, 7B + - Yes + - Yes + - No + - Yes + +Convert to TensorRT-LLM +======================= + +Once the TensorRT-LLM checkpoint is available, please follow the `TensorRT-LLM build API `_ to build and deploy the quantized LLM. diff --git a/_sources/examples/0_all_examples.rst.txt b/_sources/examples/0_all_examples.rst.txt new file mode 100644 index 0000000..eb1ecfc --- /dev/null +++ b/_sources/examples/0_all_examples.rst.txt @@ -0,0 +1,5 @@ +All ModelOpt Examples +===================== + +Please visit the `TensorRT-Model-Optimizer GitHub repository `_ +for all ModelOpt examples. diff --git a/_sources/getting_started/1_overview.rst.txt b/_sources/getting_started/1_overview.rst.txt new file mode 100644 index 0000000..b39fc37 --- /dev/null +++ b/_sources/getting_started/1_overview.rst.txt @@ -0,0 +1,41 @@ +Overview +######## + +**NVIDIA TensorRT Model Optimizer** +=================================== + +Minimizing inference costs presents a significant challenge as generative AI models continue to grow in complexity and size. +The `NVIDIA TensorRT Model Optimizer `_ (referred to as Model Optimizer, or ModelOpt) +is a library comprising state-of-the-art model optimization techniques including quantization and sparsity to compress model. +It accepts a torch or ONNX model as inputs and provides Python APIs for users to easily stack different model optimization +techniques to produce quantized checkpoint. Seamlessly integrated within the NVIDIA AI software ecosystem, the quantized +checkpoint generated from Model Optimizer is ready for deployment in downstream inference frameworks like +`TensorRT-LLM `_ or `TensorRT `_. +Further integrations are planned for `NVIDIA NeMo `_ and `Megatron-LM `_ +for training-in-the-loop optimization techniques. For enterprise users, the 8-bit quantization with Stable Diffusion is also available on +`NVIDIA NIM `_. + +Model Optimizer is available for free for all developers on `NVIDIA PyPI `_. +Visit `/NVIDIA/TensorRT-Model-Optimizer repository `_ for end-to-end +example scripts and recipes optimized for NVIDIA GPUs. + +Techniques +---------- + +Quantization +^^^^^^^^^^^^ +Quantization is an effective model optimization technique for large models. Quantization with Model Optimizer can compress +model size by 2x-4x, speeding up inference while preserving model quality. Model Optimizer enables highly performant +quantization formats including FP8, INT8, INT4, etc and supports advanced algorithms such as SmoothQuant, AWQ, and +Double Quantization with easy-to-use Python APIs. Both Post-training quantization (PTQ) and Quantization-aware training (QAT) +are supported. Visit :meth:`Quantization Format page ` +for list of formats supported. + +Sparsity +^^^^^^^^ +Sparsity is a technique to further reduce the memory footprint of deep learning models and accelerate the inference. +Model Optimizer provides Python API :meth:`mts.sparsify() ` to apply +weight sparsity to a given model. The ``mts.sparsify()`` API supports `NVIDIA 2:4 `_ +sparsity pattern and various sparsification methods, such as NVIDIA `ASP `_ +and `SparseGPT `_. It supports both post-training sparsity and sparsity with fine-tuning. +The latter workflow is recommended to minimize accuracy degradation. diff --git a/_sources/getting_started/2_installation.rst.txt b/_sources/getting_started/2_installation.rst.txt new file mode 100644 index 0000000..f015882 --- /dev/null +++ b/_sources/getting_started/2_installation.rst.txt @@ -0,0 +1,110 @@ +============ +Installation +============ + +System requirements +=================== + +Model Optimizer (``nvidia-modelopt``) currently has the following system requirements: + ++----------------------+-----------------------------+ +| OS | Linux, Windows | ++----------------------+-----------------------------+ +| Architecture | x86_64, aarch64, win_amd64 | ++----------------------+-----------------------------+ +| Python | >=3.8,<3.12 | ++----------------------+-----------------------------+ +| PyTorch | >=1.11 | ++----------------------+-----------------------------+ +| CUDA | >=11.8 (Recommended) | ++----------------------+-----------------------------+ + +Install Model Optimizer +======================= + +ModelOpt including its dependencies can be installed via ``pip``. Please review the +license terms of ModelOpt and any dependencies before use. + +.. tab:: Quick install + + .. empty tab + +.. tab:: Detailed instructions + + **Setting up a virtual environment** + + We recommend setting up a virtual environment if you don't have one already. Run the following + command to set up and activate a ``conda`` virtual environment named ``modelopt`` with Python 3.11: + + .. code-block:: bash + + conda create -n modelopt python=3.11 pip + + .. code-block:: bash + + conda activate modelopt + + (Optional) **Install desired PyTorch version** + + + By default, the latest PyTorch version (``torch>=1.11``) available on ``pip`` will + be installed. If you want to install a specific PyTorch version for a specific CUDA version, please first + `follow the instructions to install your desired PyTorch version `_. + For example, to install latest ``torch>=1.11`` with CUDA 11.8 run: + + .. code-block:: bash + + pip install torch --extra-index-url https://download.pytorch.org/whl/cu118 + + **Identify correct partial dependencies** + + Note that when installing ``nvidia-modelopt`` without optional dependencies, only the barebone + requirements are installed and none of the modules will work without the appropriate optional + dependencies or ``[all]`` optional dependencies. Below is a list of optional dependencies that + need to be installed to correctly use the corresponding modules: + + .. list-table:: + :widths: 30 30 + :header-rows: 1 + + * - Module + - Optional dependencies + * - ``modelopt.deploy`` + - ``[deploy]`` + * - ``modelopt.onnx`` + - ``[onnx]`` + * - ``modelopt.torch`` + - ``[torch]`` + * - ``modelopt.torch._deploy`` + - ``[torch, deploy]`` + + Additionally, we support the following 3rd-party plugins: + + .. list-table:: + :widths: 30 30 + :header-rows: 1 + + * - Third-party package + - Optional dependencies + * - ``transformers`` (Huggingface) + - ``[hf]`` + +**Install Model Optimizer** (``nvidia-modelopt``) + +.. code-block:: bash + + pip install "nvidia-modelopt[all]" --no-cache-dir --extra-index-url https://pypi.nvidia.com + +Check installation +================== + +.. tip:: + + When you use ModelOpt's PyTorch quantization APIs for the first time, it will compile the fast quantization kernels + using your installed torch and CUDA if available. + This may take a few minutes but subsequent quantization calls will be much faster. + To invoke the compilation now and check if it is successful, run the following command: + + .. code-block:: bash + + python -c "import modelopt.torch.quantization.extensions as ext; print(ext.cuda_ext); print(ext.cuda_ext_fp8)" diff --git a/_sources/getting_started/3_quantization.rst.txt b/_sources/getting_started/3_quantization.rst.txt new file mode 100644 index 0000000..693a8b2 --- /dev/null +++ b/_sources/getting_started/3_quantization.rst.txt @@ -0,0 +1,70 @@ +========================= +Quick Start: Quantization +========================= + +Quantization +------------ + +Quantization is an effective technique to reduce the memory footprint of deep learning models and to +accelerate the inference speed. + +ModelOpt's :meth:`mtq.quantize() ` API enables +users to quantize a model with advanced algorithms like SmoothQuant, AWQ etc. ModelOpt supports both +Post Training Quantization (PTQ) and Quantization Aware Training (QAT). + +.. tip:: + + Please refer to :any:`quantization-formats` for details on the ModelOpt supported quantization + formats and their use-cases. + +PTQ for PyTorch models +----------------------------- + +:meth:`mtq.quantize ` requires the model, +the appropriate quantization configuration and a forward loop as inputs. Here is a quick example of +quantizing a model with int8 SmoothQuant using +:meth:`mtq.quantize `: + +.. code-block:: python + + import modelopt.torch.quantization as mtq + + # Setup the model + model = get_model() + + # The quantization algorithm requires calibration data. Below we show a rough example of how to + # set up a calibration data loader with the desired calib_size + data_loader = get_dataloader(num_samples=calib_size) + + + # Define the forward_loop function with the model as input. The data loader should be wrapped + # inside the function. + def forward_loop(model): + for batch in data_loader: + model(batch) + + + # Quantize the model and perform calibration (PTQ) + model = mtq.quantize(model, mtq.INT8_SMOOTHQUANT_CFG, forward_loop) + +Refer to :any:`quantization-configs` for the quantization configurations available from ModelOpt. + +Deployment +---------------- + +The quantized model is just like a regular Pytorch model and is ready for evaluation or deployment. + +Huggingface or Nemo LLM models can be exported to TensorRT-LLM using ModelOpt. +Please see :doc:`TensorRT-LLM Deployment <../deployment/1_tensorrt_llm_deployment>` guide for more +details. + +The model can be also exported to ONNX using +`torch.onnx.export `_. + +-------------------------------- + +**Next Steps** + * Learn more about quantization and advanced usage of Model Optimizer quantization in + :doc:`Quantization guide <../guides/1_quantization>`. + * Checkout out the end-to-end examples on GitHub for PTQ and QAT + `here `_. diff --git a/_sources/getting_started/6_sparsity.rst.txt b/_sources/getting_started/6_sparsity.rst.txt new file mode 100644 index 0000000..72881e1 --- /dev/null +++ b/_sources/getting_started/6_sparsity.rst.txt @@ -0,0 +1,57 @@ +===================== +Quick Start: Sparsity +===================== + +Sparsity +-------- + +ModelOpt's :doc:`sparsity<../guides/5_sparsity>` feature is an effective technique to reduce the +memory footprint of deep learning models and accelerate the inference speed. ModelOpt provides an +easy-to-use API :meth:`mts.sparsify() ` to apply +weight sparsity to a given model. +:meth:`mts.sparsify() ` supports +`NVIDIA 2:4 Sparsity `_ sparsity pattern and various sparsification +methods, such as (`NVIDIA ASP `_) +and (`SparseGPT `_). + +This guide provides a quick start to apply weight sparsity to a PyTorch model using ModelOpt. + +Post-Training Sparsification (PTS) for PyTorch models +----------------------------------------------------- + +:meth:`mts.sparsify() ` requires the model, +the appropriate sparsity configuration, and a forward loop as inputs. +Here is a quick example of sparsifying a model to 2:4 sparsity pattern with SparseGPT method using +:meth:`mts.sparsify() `. + +.. code-block:: python + + import modelopt.torch.sparsity as mts + + # Setup the model + model = get_model() + + # Setup the data loaders. An example usage: + data_loader = get_train_dataloader(num_samples=calib_size) + + # Define the sparsity configuration + sparsity_config = {"data_loader": data_loader, "collect_func": lambda x: x} + + # Sparsify the model and perform calibration (PTS) + model = mts.sparsity(model, mode="sparsegpt", config=sparsity_config) + +.. note:: + `data_loader` is only required in case of data-driven sparsity, e.g., SparseGPT for calibration. + `sparse_magnitude` does not require `data_loader` as it is purely based on the weights of the model. + +.. note:: + `data_loader` and `collect_func` can be substituted with a `forward_loop` that iterates the model through the + calibration dataset. + +-------------------------------- + +**Next Steps** + * Learn more about sparsity and advanced usage of ModelOpt sparsity in + :doc:`Sparsity guide <../guides/5_sparsity>`. + * Checkout out the end-to-end examples on GitHub for PTQ and QAT + `here `_. diff --git a/_sources/guides/1_quantization.rst.txt b/_sources/guides/1_quantization.rst.txt new file mode 100644 index 0000000..a0ab8ec --- /dev/null +++ b/_sources/guides/1_quantization.rst.txt @@ -0,0 +1,22 @@ +Quantization +############ + +ModelOpt quantization toolkit supports quantization for NVIDIA's hardware and software stack. +Currently ModelOpt supports quantization in PyTorch and ONNX frameworks. + +ModelOpt is based on simulated quantization in the original precision to simulate, test and optimize +for the best trade-off between the accuracy of the model and different low-precision formats. To +achieve actual speedups and memory savings, the model with simulated quantization can be exported to +deployment frameworks, like TensorRT or TensorRT-LLM. Please refer to the +`TensorRT-Model-Optimizer GitHub repository `_ +for more details and examples. + +Below, you can find the documentation for the quantization toolkit in ModelOpt: + +.. toctree:: + :maxdepth: 1 + + ./_basic_quantization.rst + ./_choosing_quant_methods.rst + ./_pytorch_quantization.rst + ./_onnx_quantization.rst diff --git a/_sources/guides/5_sparsity.rst.txt b/_sources/guides/5_sparsity.rst.txt new file mode 100644 index 0000000..94d14eb --- /dev/null +++ b/_sources/guides/5_sparsity.rst.txt @@ -0,0 +1,161 @@ +============ +Sparsity +============ + +Introduction +============ + +ModelOpt's Sparsity module (:mod:`modelopt.torch.sparsity `) enables +you to sparsify the weights of your model. This can be useful for reducing the memory footprint of +your model, and can also be used to speed up inference. + + +Follow the steps described below to obtain a model with sparse weights using ModelOpt's Sparsity +module :mod:`modelopt.torch.sparsity`: + +#. **Training**:You can either train your model using the existing training pipeline or load a + pre-trained checkpoint for your model. +#. **Sparsification**: Sparsify the model using the provided + :meth:`mts.sparsify ` API. +#. **Checkpoint and re-load**: Save the model via :meth:`mto.save ` + and restore via :meth:`mto.restore ` + +*To find out more about Sparsity and related concepts, please refer to the section below* +:ref:`Sparsity Concepts `. + +.. _sparsity-pts: + +Post-Training Sparsification +============================ + +Post-training sparsification is the process of converting a dense model to a sparse model without +retraining. The simplest way to sparsify a model is to use +the :meth:`mts.sparsify ` API. + +The :meth:`mts.sparsify ` API takes a sparsity +config and a sparsity format as input and returns a sparse model. The sparsity config is a +dictionary specifying the layers to sparsify and the optional dataloader for +calibration in data-driven sparsity, e.g., SparseGPT. + +:meth:`mts.sparsify` supports (`NVIDIA ASP <1_>`_) and `SparseGPT <2_>`_ methods for magnitude-based +and data-driven sparsity, respectively. + +Example usage: + +.. code-block:: python + + import torch + from transformers import AutoModelForCausalLM + import modelopt.torch.sparsity as mts + + # User-defined model + model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b") + + # Configure and convert for sparsity + sparsity_config = { + # data_loader is required for sparsity calibration + "data_loader": calib_dataloader, + "collect_func": lambda x: x, + } + sparse_model = mts.sparsify( + model, + "sparsegpt", # or "sparse_magnitude" + config=sparsity_config, + ) + +.. note:: + `data_loader` is only required in case of data-driven sparsity, e.g., for calibration in + ``sparsegpt``. `sparse_magnitude` does not require `data_loader` as it uses magnitude-based + method for thresholding. + + +Save and restore the sparse model +--------------------------------- + +To store the sparse model for future usage, call +:meth:`mto.save() `: + +.. code-block:: python + + mto.save(sparse_model, "modelopt_sparse_model.pth") + +.. note:: + :meth:`mto.save() ` will save the model state_dict, + along with the sparse masks and metadata to correctly re-create the sparse model later. + +To restore the saved sparse model you can use +:meth:`mto.restore() `: + +.. code-block:: python + + import modelopt.torch.opt as mto + + # Re-initialize the original, unmodified model + model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b") + + # Restore the sparse model and metadata. + sparse_model = mto.restore(model, "modelopt_sparse_model.pth") + +.. note:: + :meth:`mto.restore() ` will restore the model state_dict, + along with the sparse masks and metadata of each sparse module. The plain pytorch module will be + converted to a sparse module. The sparsity mask will be automatically enforced when the model + weight is accessed. + +.. note:: + :meth:`mts.export() ` will export the sparse + model to a plain pytorch model. The sparse masks will be applied to model weights and all the + sparse metadata will be removed. After exporting, sparsity will no longer be enforced during + subsequent fine-tuning. If you want to continue fine-tuning, do not export the model. + + +.. _sparsity-concepts: + +Sparsity Concepts +================= + +Below, we will provide an overview of ModelOpt's sparsity feature as well as its basic +concepts and terminology. + + +Structured and Unstructured Sparsity +------------------------------------ + +Weight sparsity is a model optimization technique where a fraction of the weights in a model are set +to zero. Model sparsity can be broadly categorized as structured and unstructured sparsity. +Unstructured sparsity refers to the case where the zero weights are randomly distributed across the +weight matrix. Unstructured sparsity is more flexible but can lead to poor utilization on +highly-parallelized hardware architectures like GPUs. Structured sparsity, on the other hand, is +more efficient in terms of memory access and can be exploited to achieve higher math throughput. +Structured sparsity can usually be achieved by enforcing a specific sparsity pattern on the weights. + + +N:M Sparsity +------------ +N:M sparsity refers to special type of fine-grained structured pattern, where in each block of M +contiguous elements, at most N are nonzeros. Due to its regularity N:M sparsity can be efficiently +implemented on GPU architecture and provides the following benefits: + + * **Reduced memory bandwidth requirement:** N:M Sparsity pattern have a smaller memory bandwidth + requirement than both dense weights and weights with unstructured sparsity pattern. + + * **Higher math throughput:** Sparse Tensor Cores deliver higher math throughput for + matrix-multiply operations when the first argument is a compressed N:M sparse matrix. + For example, 2:4 sparsity pattern allows for 2x higher math throughput on sparse Tensor Cores. + +On current Nvidia architectures (Ampere or later), `2:4 Sparsity <3_>`_, where in each block of four +contiguous elements two are nonzeros, is supported for accelerated inference on sparse Tensor Cores. + +Sparsification algorithm +------------------------ + +There are many ways to achieve weight sparsity. A commonly-used approach is magnitude-based sparsity +where in block of M elements, the N largest elements are retained and the rest are set to +zero. Magnitude-based sparsity is simple and easy to implement, but may not retain the accuracy of +the original model as well. Other methods such as data-driven sparsity, e.g., Optimal Brain Surgeon, +usually delivers better accuracy. ModelOpt supports both magnitude-based (`NVIDIA ASP <1_>`_) and +data-driven sparsity (`SparseGPT <2_>`_). + +.. _1: https://github.com/NVIDIA/apex/tree/master/apex/contrib/sparsity +.. _2: https://arxiv.org/abs/2301.00774 +.. _3: https://arxiv.org/abs/2104.08378 diff --git a/_sources/guides/_basic_quantization.rst.txt b/_sources/guides/_basic_quantization.rst.txt new file mode 100644 index 0000000..c35041d --- /dev/null +++ b/_sources/guides/_basic_quantization.rst.txt @@ -0,0 +1,63 @@ +Basic Concepts +============== + +A quantization format consists of the precision format, the block format, and the calibration +algorithm. +The detailed list of available quantization formats can be found in :any:`quantization-formats`. +Below we provide an overview of the important topics: + +Precision format +**************** +The precision format defines the bit-width of the quantized values. Generally, there are integer +formats (sign bit + mantissa bits) and floating-point formats (sign bit + exponent bits + mantissa +bits). `FP8 FORMATS FOR DEEP LEARNING `_ provides a detailed +explanation of the floating-point formats. + +Scaling factor +************** +The scaling factor is a floating-point value that is used to scale and unscale the values before and +after the quantized operation, respectively. The scaling factor is used to map the range of the +original values to the range of the quantized values. The scaling factor is shared across the +quantized values in the same block. The scaling factor is calculated during the calibration process. + +Block format +************ +The block format defines the way the tensor is divided into blocks for sharing the scaling factors. +The most common block format is per-tensor quantization, where the whole tensor is quantized as a +single block with one global scaling factor. Other block formats include per-channel quantization, +where each channel is quantized separately, and the fine-grained per-block quantization, where the +tensor is divided into fix-size blocks along the channel dimension. For low-bit quantization (e.g. +4-bit), per-block quantization is typically needed to preserve the accuracy. + +Weight and activation may share different precision and block formats. For example, in GPTQ and AWQ, +the weight is quantized to 4-bit while activation stays in high precision. Weight-only quantization +is helpful for bandwidth-constrained scenarios, while weight and activation quantization can reduce +both bandwidth and computation cost. + +Calibration algorithm +********************* + +The calibration algorithm calculate scaling factors and potentially adjust weights to maximize +accuracy post quantization. The simplest calibration algorithm is "max calibration", in which the +scaling factor is calculated from the global maximum of the tensor and the weights are unchanged and +rounded to the nearest quantized value. An example of a more advanced calibration algorithm is +`Entropy Calibration `_, +`SmoothQuant `_, and `AWQ `_. + +Quantization-aware training (QAT) +********************************* +QAT can be viewed as regular PTQ followed by fine-tuning during which the original, unquantized +weights are updated to minimize the loss. Compared to regular fine-tuning, we must model the effect +of quantization on the forward and backward passes. Commonly used QAT techniques like +`Straight-Through Estimator (STE) `_ or STE with clipping have +fixed scaling factors and tune the weights during training to minimize the loss. ModelOpt implements +STE with clipping for QAT. + + +More Readings +************* + +* Math behind quantization: `Integer Quantization `_ + +* Explicit quantization graph representation with QDQ node: + `work-with-qat-networks `_ diff --git a/_sources/guides/_choosing_quant_methods.rst.txt b/_sources/guides/_choosing_quant_methods.rst.txt new file mode 100644 index 0000000..cb6514f --- /dev/null +++ b/_sources/guides/_choosing_quant_methods.rst.txt @@ -0,0 +1,54 @@ +======================================================= +Best practices to choose the right quantization methods +======================================================= + +A quantization method comprises three primary components: + +1. Weight precision format +2. Activation precision format +3. Calibration algorithms + +Typically, in the context of small-batch inference scenarios (batch size ≤ 4), the inference is often 'memory-bound'. In memory-bound inference, the throughput is limited by the weight loading time from GPU memory to GPU cache - i.e, inference is memory bandwidth limited. +In this regime of operation, weight-only quantization methods such as INT4 AWQ or INT4-FP8 AWQ gives superior performance improvement. + +Conversely, for large-batch inference scenarios, such as serving scenarios (batch size ≥ 16), both memory bandwidth and computation density become crucial factors. +Consequently, it's recommended to opt for a quantization method that has both weights & activation quantization as well as lower precision computation kernels. For batch size ≥ 16, the choice of quantization method can be model specific. + +We suggest prioritizing using FP8 first, as FP8 causes very little accuracy degradation and gives strong performance. +If FP8 performance does not meet your requirements, you could try INT4-FP8 AWQ. +If your deployment is on Ampere GPUs or earlier, we recommend using INT4 AWQ or INT8 SQ. + +Based on specific use cases, users might have different tolerances on accuracy degradation and calibration time. The table below summarizes the tradeoffs* to consider when choosing a quantization method. + ++-----------------------+-------------+-------------+-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Quantization Methods | Performance | Performance | Accuracy | Details | +| | small-batch | large-batch | degradation | | ++=======================+=============+=============+=============+==================================================================================================================================================================+ +| FP8 | Medium | Medium | Very Low | * FP8 per-tensor weight & activation quantization with min-max calibration. | +| | | | | * Compresses FP16/BF16 model to 50% of original size. | +| | | | | * Calibration time: minutes**. | +| | | | | * Deploy via TensorRT, TensorRT-LLM. Supported GPUs: Ada, Hopper and later. | ++-----------------------+-------------+-------------+-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| INT8 SmoothQuant | Medium | Medium | Medium | * 8-bit integer quantization with a variant of `SmoothQuant `_ calibration. | +| | | | | * Per-channel weight quantization, per-tensor activation quantization. | +| | | | | * Compresses FP16/BF16 model to 50% of original size | +| | | | | * Calibration time: minutes**. | +| | | | | * Deploy using TensorRT, TensorRT-LLM. Supported on most GPUs. | ++-----------------------+-------------+-------------+-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| INT4 Weights only AWQ | High | Low | Low | * 4-bit integer group-wise/block-wise weight only quantization with `AWQ `_ calibration. | +| (W4A16) | | | | * Compresses FP16/BF16 model to 25% of original size. | +| | | | | * Calibration time: tens of minutes**. | +| | | | | * Deploy via TensorRT-LLM. Supported GPUs: Ampere and later. | ++-----------------------+-------------+-------------+-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| INT4-FP8 AWQ (W4A8) | High | Medium | Low | * 4-bit integer group-wise/block-wise weight quantization, FP8 per-tensor activation quantization & `AWQ `_ calibration. | +| | | | | * Compresses FP16/BF16 model to 25% of original size. | +| | | | | * Calibration time: tens of minutes**. | +| | | | | * Deploy via TensorRT-LLM. Supported GPUs: Ada, Hopper and later. | ++-----------------------+-------------+-------------+-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + +| * The performance and impact are measured on 10+ popular LLMs. We'll follow up with more data points. +| ** Calibration time is subject to the actual model size. + +Please see how to apply these quantization methods below: + * :doc:`Quantizing pytorch models <../guides/_pytorch_quantization>` + * :doc:`Quantizing ONNX models <../guides/_onnx_quantization>` diff --git a/_sources/guides/_onnx_quantization.rst.txt b/_sources/guides/_onnx_quantization.rst.txt new file mode 100644 index 0000000..ecdc15f --- /dev/null +++ b/_sources/guides/_onnx_quantization.rst.txt @@ -0,0 +1,105 @@ +======================== +ONNX Quantization (Beta) +======================== + +ModelOpt provides ONNX quantization that works together with `TensorRT Explicit Quantization (EQ) `_. The key advantages offered by ModelOpt's ONNX quantization: + +#. Easy to use for non-expert users. +#. White-box design allowing expert users to customize the quantization process. +#. Better support for vision transformers. + +Currently ONNX quantization only supports INT8 quantization. + +.. note:: + + ModelOpt ONNX quantization generates new ONNX models with QDQ nodes following TensorRT rules. + For real speedup, the generated ONNX should be compiled into TensorRT engine. + +Requirements +============ + +#. TensorRT >= 8.6 ( >= 9.1 preferred). Please refer to `TensorRT 9.1 download link `_. + + + +Apply Post Training Quantization (PTQ) +====================================== + +PTQ should be done with a calibration dataset. If calibration dataset is not provided, ModelOpt will use random scales for the QDQ nodes. + +Prepare calibration dataset +--------------------------- +ModelOpt supports two types of calibration data format: image directory or numpy file. + +Image directory only works for single-input ONNX models. + +Numpy file works for both single-input and multi-input ONNX models. In the case of multi-input ONNX models, the numpy file should be a dictionary with keys as input names and values as numpy arrays. + +.. code-block:: python + + # Example numpy file for single-input ONNX + calib_data = np.random.randn(batch_size, channels, h, w) + np.save("calib_data.npy", calib_data) + + # Example numpy file for single/multi-input ONNX + # Dict key should match the input names of ONNX + calib_data = { + "input_name": np.random.randn(*shape), + "input_name2": np.random.randn(*shape2), + } + np.savez("calib_data.npz", calib_data) + + + +Call PTQ function +----------------- +.. code-block:: python + + import modelopt.onnx.quantization as moq + + calibration_data = np.load(calibration_data_path) + + moq.quantize( + onnx_path=onnx_path, + calibration_data=calibration_data, + output_path="quant.onnx", + quantize_mode="int8", + ) + +Alternatively, you can call PTQ function in command line: + +.. code-block:: bash + + python -m modelopt.onnx.quantization \ + --calibration_data_path /calibration/data/in/npz/npy/format \ + --output_path /path/to/the/quantized/onnx/output \ + --quantize_mode int8 + + +By default, after running the calibraton, the quantization tool will insert the QDQ nodes by following TensorRT friendly QDQ insertion algorithm. Users can change the default quantization behavior by tweaking the API params like op_types_to_quantize, op_types_to_exclude etc. See the :meth:`modelopt.onnx.quantization.quantize() ` for details. + + +Deploy Quantized ONNX Model +=========================== + + +``trtexec`` is a command-line tool provided by TensorRT. Typically, it's within the ``/usr/src/tensorrt/bin/`` directory. Below is a simple command to compile the quantized onnx model generated by the previous step into a TensorRT engine file. + +.. code-block:: bash + + trtexec --onnx=quant.onnx --saveEngine=quant.engine --best + +Compare the performance +======================= + +The following command will build the engine using fp16 precision. After building, check the reported "Latency" and "Throughput" fields and compare. + + +.. code-block:: bash + + trtexec --onnx=original.onnx --saveEngine=fp16.engine --fp16 + + +.. note:: + + If you replace ``--fp16`` flag with ``--best`` flag, this command will create an int8 engine with TensorRT's implicit quantization. diff --git a/_sources/guides/_pytorch_quantization.rst.txt b/_sources/guides/_pytorch_quantization.rst.txt new file mode 100644 index 0000000..0ba87ca --- /dev/null +++ b/_sources/guides/_pytorch_quantization.rst.txt @@ -0,0 +1,278 @@ +==================== +PyTorch Quantization +==================== + +ModelOpt PyTorch quantization is refactored based on `pytorch_quantization `_. + +Key advantages offered by ModelOpt's PyTorch quantization: + +#. Support advanced quantization formats, e.g., Block-wise Int4 and FP8. +#. Native support for LLM models in Hugging Face and NeMo. +#. Advanced Quantization algorithms, e.g., SmoothQuant, AWQ. +#. Deployment support to ONNX and NVIDIA TensorRT. + +.. note:: + + ModelOpt quantization is fake quantization, which means it only simulates the low-precision computation in PyTorch. + Real speedup and memory saving should be achieved by exporting the model to deployment frameworks. + +.. tip:: + + This guide covers the usage of ModelOpt quantization. For details on the quantization formats and recommended use cases, + please refer to :any:`quantization-formats`. + +Apply Post Training Quantization (PTQ) +====================================== + +PTQ can be achieved with simple calibration on a small set of training or evaluation data (typically 128-512 samples) after converting a regular PyTorch model to a quantized model. +The simplest way to quantize a model using ModelOpt is to use :meth:`mtq.quantize() `. + +:meth:`mtq.quantize` takes a model, a quantization config and a forward loop callable as input. The quantization config specifies the layers to quantize, their quantization formats as well as the algorithm to use for calibration. Please +refer to :any:`quantization-configs` for the list of quantization configs supported by default. You may also define your own quantization config as +described in :ref:`customizing quantizer config `. + +ModelOpt supports algorithms such as AWQ, SmoothQuant or max for calibration. Please refer to :meth:`mtq.calibrate ` +for more details. + +The forward loop is used to pass data through the model in-order to collect statistics for calibration. +It should wrap around the calibration dataloader and the model. + +Here is an example of performing PTQ using ModelOpt: + +.. code-block:: python + + import modelopt.torch.quantization as mtq + + # Setup the model + model = get_model() + + # Select quantization config + config = mtq.INT8_SMOOTHQUANT_CFG + + # Quantization need calibration data. Setup calibration data loader + # An example of creating a calibration data loader looks like the following: + data_loader = get_dataloader(num_samples=calib_size) + + + # Define forward_loop. Please wrap the data loader in the forward_loop + def forward_loop(model): + for batch in data_loader: + model(batch) + + + # Quantize the model and perform calibration (PTQ) + model = mtq.quantize(model, config, forward_loop) + +To verify that the quantizer nodes are placed correctly in the model, let's print the quantized model summary as show below: + +.. code-block:: python + + # Print quantization summary after successfully quantizing the model with mtq.quantize + # This will show the quantizers inserted in the model and their configurations + mtq.print_quantization_summary(model) + + +After PTQ, the model can be exported to ONNX with the normal PyTorch ONNX export flow. + +.. code-block:: python + + torch.onnx.export(model, sample_input, onnx_file) + +ModelOpt also supports direct export of Huggingface or Nemo LLM models to TensorRT-LLM for deployment. +Please see :doc:`TensorRT-LLM Deployment <../deployment/1_tensorrt_llm_deployment>` for more details. + +Quantization-aware Training (QAT) +================================= + +QAT is the technique of fine-tuning a quantized model to recover model quality degradation due to quantization. +While QAT requires much more compute resources than PTQ, it is highly effective in recovering model quality. + +A model quantized using :meth:`mtq.quantize() ` could be directly fine-tuned with QAT. +Typically during QAT, the quantizer states are frozen and the model weights are fine-tuned. + +Here is an example of performing QAT: + +.. code-block:: python + + import modelopt.torch.quantization as mtq + + # Select quantization config + config = mtq.INT8_DEFAULT_CFG + + + # Define forward loop for calibration + def forward_loop(model): + for data in calib_set: + model(data) + + + # QAT after replacement of regular modules to quantized modules + model = mtq.quantize(model, config, forward_loop) + + # Fine-tune with original training pipeline + # Adjust learning rate and training duration + train(model, train_loader, optimizer, scheduler, ...) + +.. tip:: + + We recommend QAT for 10% of the original training epochs. For LLMs, we find that QAT fine-tuning for even + less than 1% of the original pre-training duration is often sufficient to recover the model quality. + +Storing and loading quantized model +=================================== + +The model weights and quantizer states need to saved for future use or to resume training. +The quantizer states of the model should be saved and loaded separately from the model weights. + +:meth:`mto.modelopt_state() ` provides the quantizer states of the model. +The quantizer states can be saved with `torch.save `_. For example: + +.. code-block:: python + + import modelopt.torch.opt as mto + + # Save quantizer states + torch.save(mto.modelopt_state(model), "modelopt_state.pt") + + # Save model weights using torch.save or custom check-pointing function + # trainer.save_model("model.pt") + torch.save(model.state_dict(), "model.pt") + +To restore a quantized model, first restore the quantizer states using +:meth:`mto.restore_from_modelopt_state `. +After quantizer states are restored, load the model weights. For example: + +.. code-block:: python + + import modelopt.torch.opt as mto + + # Initialize the un-quantized model + model = ... + + # Load quantizer states + model = mto.restore_from_modelopt_state(model, torch.load("modelopt_state.pt")) + + # Load model weights using torch.load or custom check-pointing function + # model.from_pretrained("model.pt") + model.load_state_dict(torch.load("model.pt")) + + +Advanced Topics +=============== + +TensorQuantizer +--------------- + +Under the hood, ModelOpt :meth:`mtq.quantize() ` inserts +:class:`TensorQuantizer ` +(quantizer modules) into the model layers like linear layer, conv layer etc. and patches their forward method to perform quantization. + +To create :class:`TensorQuantizer` instance, you need to specify :class:`QuantDescriptor `, which +describes the quantization parameters like quantization bits, axis etc. + +Here is an example of creating a quantizer module: + +.. code-block:: python + + from modelopt.torch.quantization.tensor_quant import QuantDescriptor + from modelopt.torch.quantization.nn import TensorQuantizer + + # Create quantizer descriptor + quant_desc = QuantDescriptor(num_bits=8, axis=(-1,), unsigned=True) + + # Create quantizer module + quantizer = TensorQuantizer(quant_desc) + + quant_x = quantizer(x) # Quantize input x + +.. _customize_quantizer_config: + +Customize quantizer config +-------------------------- + +ModelOpt inserts input quantizer, weight quantizer and output quantizer into common layers, but by default disables the output quantizer. +Expert users who want to customize the default quantizer configuration can update the ``config`` dictionary provided to ``mtq.quantize`` using wildcard or filter function match. + +Here is an example of specifying a custom quantizer configuration to ``mtq.quantize``: + +.. code-block:: python + + # Select quantization config + config = mtq.INT8_DEFAULT_CFG.copy() + config["quant_cfg"]["*.bmm.output_quantizer"] = { + "enable": True + } # Enable output quantizer for bmm layer + + # Perform PTQ/QAT; + model = mtq.quantize(model, config, forward_loop) + + +.. _custom_quantied_module: + +Custom quantized module and quantizer placement +----------------------------------------------- + +``modelopt.torch.quantization`` has a default set of quantized modules (see :mod:`modelopt.torch.quantization.nn.modules ` for a detailed list) and quantizer placement rules (input, output and weight quantizers). However, there might be cases where you want to define a custom quantized module and/or customize the quantizer placement. + +ModelOpt provides a way to define custom quantized modules and register them with the quantization framework. This allows you to: + +#. Handle unsupported modules, e.g., a subclassed Linear layer that require quantization. +#. Customize the quantizer placement, e.g., placing the quantizer in special places like the KV Cache of an Attention layer. + +Here is an example of defining a custom quantized LayerNorm module: + +.. code-block:: python + + from modelopt.torch.quantization.nn import TensorQuantizer + + + class QuantLayerNorm(nn.LayerNorm): + def __init__(self, normalized_shape): + super().__init__(normalized_shape) + self._setup() + + def _setup(self): + # Method to setup the quantizers + self.input_quantizer = TensorQuantizer() + self.weight_quantizer = TensorQuantizer() + + def forward(self, input): + # You can customize the quantizer placement anywhere in the forward method + input = self.input_quantizer(input) + weight = self.weight_quantizer(self.weight) + return F.layer_norm(input, self.normalized_shape, weight, self.bias, self.eps) + +After defining the custom quantized module, you need to register this module so ``mtq.quantize`` API will automatically replace the original module with the quantized version. +Note that the custom ``QuantLayerNorm`` must have a ``_setup`` method which instantiates the quantizer attributes that are called in the forward method. +Here is the code to register the custom quantized module: + +.. code-block:: python + + import modelopt.torch.quantization as mtq + + # Register the custom quantized module + mtq.register(original_cls=nn.LayerNorm, quantized_cls=QuantLayerNorm) + + # Perform PTQ + # nn.LayerNorm modules in the model will be replaced with the QuantLayerNorm module + model = mtq.quantize(model, config, forward_loop) + +The quantization config might need to be customized if you define a custom quantized module. Please see +:ref:`customizing quantizer config ` for more details. + +Fast evaluation +--------------- + +Weight folding avoids repeated quantization of weights during each inferece forward pass and speedup evaluation. This can be done with the following code: + +.. code-block:: python + + # Fold quantizer together with weight tensor + mtq.fold_weight(quantized_model) + + # Run model evaluation + user_evaluate_func(quantized_model) + +.. note:: + + After weight folding, the model can no longer be exported to ONNX or fine-tuned with QAT. diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt new file mode 100644 index 0000000..bc42a89 --- /dev/null +++ b/_sources/index.rst.txt @@ -0,0 +1,50 @@ +.. Model Optimizer documentation master file, created by + sphinx-quickstart on Mon Feb 7 22:13:52 2022. + You can adapt this file completely to your liking, but it should at least + contain the root ``toctree`` directive. + +Welcome to Model Optimizer (ModelOpt) documentation! +#################################################### + +.. toctree:: + :glob: + :maxdepth: 1 + :caption: Getting Started + + getting_started/[0-9]* + +.. toctree:: + :glob: + :maxdepth: 1 + :caption: Optimization Guides + + guides/[0-9]* + +.. toctree:: + :glob: + :maxdepth: 1 + :caption: Deployment + + deployment/[0-9]* + +.. toctree:: + :glob: + :maxdepth: 1 + :caption: Examples + + examples/[0-9]* + + +.. toctree:: + :glob: + :maxdepth: 1 + :caption: Reference + + reference/[0-9]* + +.. toctree:: + :glob: + :maxdepth: 1 + :caption: Support + + support/[0-9]* diff --git a/_sources/reference/0_versions.rst.txt b/_sources/reference/0_versions.rst.txt new file mode 100644 index 0000000..58dc03f --- /dev/null +++ b/_sources/reference/0_versions.rst.txt @@ -0,0 +1 @@ +.. include:: ../../../CHANGELOG.rst diff --git a/_sources/reference/1_modelopt_api.rst.txt b/_sources/reference/1_modelopt_api.rst.txt new file mode 100644 index 0000000..a19639a --- /dev/null +++ b/_sources/reference/1_modelopt_api.rst.txt @@ -0,0 +1,16 @@ +============ +modelopt API +============ + +.. Explicitly enumerating all submodules so everything is top-level under the API Reference heading +.. TODO: add future submodules here as well! + +.. currentmodule:: modelopt + +.. autosummary:: + :toctree: generated + :recursive: + + deploy + onnx + torch diff --git a/_sources/reference/generated/modelopt.deploy.llm.generate.rst.txt b/_sources/reference/generated/modelopt.deploy.llm.generate.rst.txt new file mode 100644 index 0000000..7dd1d19 --- /dev/null +++ b/_sources/reference/generated/modelopt.deploy.llm.generate.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +generate +======== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.deploy.llm.generate + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + LLM + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt b/_sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt new file mode 100644 index 0000000..856ecca --- /dev/null +++ b/_sources/reference/generated/modelopt.deploy.llm.model_config_trt.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +model\_config\_trt +================== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.deploy.llm.model_config_trt + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + build_tensorrt_llm + build_tensorrt_llm_rank + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.deploy.llm.nemo_utils.rst.txt b/_sources/reference/generated/modelopt.deploy.llm.nemo_utils.rst.txt new file mode 100644 index 0000000..99ba240 --- /dev/null +++ b/_sources/reference/generated/modelopt.deploy.llm.nemo_utils.rst.txt @@ -0,0 +1,52 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +nemo\_utils +=========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.deploy.llm.nemo_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + CustomSentencePieceTokenizer + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + get_nemo_tokenizer + get_tokenzier + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.deploy.llm.rst.txt b/_sources/reference/generated/modelopt.deploy.llm.rst.txt new file mode 100644 index 0000000..7a09bbe --- /dev/null +++ b/_sources/reference/generated/modelopt.deploy.llm.rst.txt @@ -0,0 +1,53 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +llm +=== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.deploy.llm.generate + + + modelopt.deploy.llm.model_config_trt + + + modelopt.deploy.llm.nemo_utils + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.deploy.llm + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.deploy.rst.txt b/_sources/reference/generated/modelopt.deploy.rst.txt new file mode 100644 index 0000000..e3ae0fe --- /dev/null +++ b/_sources/reference/generated/modelopt.deploy.rst.txt @@ -0,0 +1,47 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +deploy +====== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.deploy.llm + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.deploy + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.op_types.rst.txt b/_sources/reference/generated/modelopt.onnx.op_types.rst.txt new file mode 100644 index 0000000..9325558 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.op_types.rst.txt @@ -0,0 +1,64 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +op\_types +========= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.op_types + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + get_quantizable_op_types + is_binary_op + is_control_flow_op + is_conversion_op + is_copy_op + is_default_quantizable_op_by_ort + is_fusible_reduction_op + is_generator_op + is_irregular_mem_access_op + is_linear_op + is_modifier_op + is_multiclass_op + is_non_reshape_copy_op + is_normalization_op + is_pointwise_or_elementwise_op + is_pooling_or_window_op + is_recurrent_op + is_selection_op + is_sequence_op + is_shape_op + is_unary_op + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt new file mode 100644 index 0000000..8c56bf3 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.calib_utils.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +calib\_utils +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.calib_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + CalibrationDataProvider + RandomDataProvider + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt new file mode 100644 index 0000000..68090db --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.graph_utils.rst.txt @@ -0,0 +1,52 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +graph\_utils +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.graph_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + build_non_residual_input_map + classify_partition_nodes + filter_quantizable_kgen_heads + get_fusible_backbone + has_const_input + has_path_type + is_const_input + print_stat + remove_partial_input_qdq + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.gs_patching.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.gs_patching.rst.txt new file mode 100644 index 0000000..a9eb344 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.gs_patching.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +gs\_patching +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.gs_patching + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + patch_gs_modules + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt new file mode 100644 index 0000000..d02940e --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.int4.rst.txt @@ -0,0 +1,57 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +int4 +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.int4 + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + AWQClipHelper + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + dq_tensor + find_scales + quant_tensor + quantize_int4 + quantize_int4_awq_clip + quantize_int4_rtn + rtn + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.operators.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.operators.rst.txt new file mode 100644 index 0000000..a0651cc --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.operators.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +operators +========= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.operators + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + QDQConvTranspose + QDQNormalization + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.ort_patching.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.ort_patching.rst.txt new file mode 100644 index 0000000..6ba1ffb --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.ort_patching.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +ort\_patching +============= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.ort_patching + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + patch_ort_modules + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt new file mode 100644 index 0000000..9ac6f96 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.ort_utils.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +ort\_utils +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.ort_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + create_inference_session + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.partitioning.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.partitioning.rst.txt new file mode 100644 index 0000000..3d5e398 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.partitioning.rst.txt @@ -0,0 +1,50 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +partitioning +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.partitioning + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + find_fusible_partitions + find_hardcoded_patterns + find_layer_norm_partitions + find_mha_partitions + find_non_quantizable_partitions_from_patterns + find_quantizable_nodes + get_skiped_output_layers + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt new file mode 100644 index 0000000..4609774 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.qdq_utils.rst.txt @@ -0,0 +1,53 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +qdq\_utils +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.qdq_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + insert_dq_nodes + insert_qdq_nodes + make_gs_dequantize_node + make_gs_dequantize_output + make_gs_quantize_node + make_gs_quantize_output + make_gs_quantized_weight + make_gs_scale + make_gs_zp + use_trt_qdq_ops + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt new file mode 100644 index 0000000..caf23db --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.quant_utils.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_utils +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.quant_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + pack_float32_to_4bit_optimized + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt new file mode 100644 index 0000000..6bf0871 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.quantize.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quantize +======== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization.quantize + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + quantize + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.quantization.rst.txt b/_sources/reference/generated/modelopt.onnx.quantization.rst.txt new file mode 100644 index 0000000..3af2c1a --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.quantization.rst.txt @@ -0,0 +1,77 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quantization +============ + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.onnx.quantization.calib_utils + + + modelopt.onnx.quantization.graph_utils + + + modelopt.onnx.quantization.gs_patching + + + modelopt.onnx.quantization.int4 + + + modelopt.onnx.quantization.operators + + + modelopt.onnx.quantization.ort_patching + + + modelopt.onnx.quantization.ort_utils + + + modelopt.onnx.quantization.partitioning + + + modelopt.onnx.quantization.qdq_utils + + + modelopt.onnx.quantization.quant_utils + + + modelopt.onnx.quantization.quantize + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.quantization + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.rst.txt b/_sources/reference/generated/modelopt.onnx.rst.txt new file mode 100644 index 0000000..76f1284 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.rst.txt @@ -0,0 +1,53 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +onnx +==== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.onnx.op_types + + + modelopt.onnx.quantization + + + modelopt.onnx.utils + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.onnx.utils.rst.txt b/_sources/reference/generated/modelopt.onnx.utils.rst.txt new file mode 100644 index 0000000..cbb0252 --- /dev/null +++ b/_sources/reference/generated/modelopt.onnx.utils.rst.txt @@ -0,0 +1,70 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +utils +===== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.onnx.utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + duplicate_shared_linear_weights + find_lowest_common_ancestor + gen_random_inputs + get_all_input_names + get_batch_size + get_batch_size_from_bytes + get_child_nodes + get_input_names + get_input_names_from_bytes + get_input_shapes + get_input_shapes_from_bytes + get_node_names + get_node_names_from_bytes + get_output_names + get_output_names_from_bytes + get_output_shapes + get_parent_nodes + get_variable_inputs + is_valid_onnx_model + name_onnx_nodes + randomize_weights + randomize_weights_onnx_bytes + remove_weights_data + save_onnx + save_onnx_bytes_to_dir + validate_batch_size + validate_onnx + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt b/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt new file mode 100644 index 0000000..970af8c --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.distribute.rst.txt @@ -0,0 +1,56 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +distribute +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.distribute + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + NFSWorkspace + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + barrier + get_configs_parallel + get_group + get_rank + get_tensors_parallel + get_world_size + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt new file mode 100644 index 0000000..9747b72 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.layer_utils.rst.txt @@ -0,0 +1,69 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +layer\_utils +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.layer_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + build_attention_config + build_decoder_config + build_embedding_config + build_layernorm_config + build_linear_config + build_mlp_config + build_moe_config + build_qkv + build_stacked_experts + check_model_compatibility + get_activation_scaling_factor + get_kv_cache_dtype + get_kv_cache_scaling_factor + get_prequant_scaling_factor + get_scaling_factor + get_transformer_layers + get_weight_block_size + get_weight_scaling_factor + get_weight_scaling_factor_2 + is_attention + is_decoder_list + is_embedding + is_layernorm + is_linear + is_mlp + is_moe + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt b/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt new file mode 100644 index 0000000..d5dd2f8 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.model_config.rst.txt @@ -0,0 +1,53 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +model\_config +============= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.model_config + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + AttentionConfig + DecoderLayerConfig + EmbeddingConfig + ExpertConfig + LayernormConfig + LinearConfig + MLPConfig + MOEConfig + ModelConfig + QKVConfig + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt b/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt new file mode 100644 index 0000000..8cdc857 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.model_config_export.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +model\_config\_export +===================== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.model_config_export + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + export_tensorrt_llm_checkpoint + torch_to_tensorrt_llm_checkpoint + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.model_config_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.model_config_utils.rst.txt new file mode 100644 index 0000000..7d852c0 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.model_config_utils.rst.txt @@ -0,0 +1,54 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +model\_config\_utils +==================== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.model_config_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + from_quantized_weight + merge_fc1_gate + merge_qkv + model_config_from_dict + model_config_to_dict + naive_quantization + pack_linear_weights + pad_weights + restore_model_config + split_config_and_weights + to_quantized_weight + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.postprocess.rst.txt b/_sources/reference/generated/modelopt.torch.export.postprocess.rst.txt new file mode 100644 index 0000000..f3a8895 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.postprocess.rst.txt @@ -0,0 +1,47 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +postprocess +=========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.postprocess + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + check_weight_shape_valid + pad_embedding_lm_head + postprocess_model_config + postprocess_tensors + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.rst.txt b/_sources/reference/generated/modelopt.torch.export.rst.txt new file mode 100644 index 0000000..0a5dccb --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.rst.txt @@ -0,0 +1,71 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +export +====== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.export.distribute + + + modelopt.torch.export.layer_utils + + + modelopt.torch.export.model_config + + + modelopt.torch.export.model_config_export + + + modelopt.torch.export.model_config_utils + + + modelopt.torch.export.postprocess + + + modelopt.torch.export.scaling_factor_utils + + + modelopt.torch.export.tensorrt_llm_utils + + + modelopt.torch.export.transformer_engine + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt new file mode 100644 index 0000000..c8b0cbe --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.scaling_factor_utils.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +scaling\_factor\_utils +====================== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.scaling_factor_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + get_weights_scaling_factor + resmooth_and_get_scale + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt b/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt new file mode 100644 index 0000000..e7b943c --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst.txt @@ -0,0 +1,46 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +tensorrt\_llm\_utils +==================== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.tensorrt_llm_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + convert_to_tensorrt_llm_config + is_tensorrt_llm_0_8_or_9 + weights_to_npz + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.export.transformer_engine.rst.txt b/_sources/reference/generated/modelopt.torch.export.transformer_engine.rst.txt new file mode 100644 index 0000000..07ff472 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.export.transformer_engine.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +transformer\_engine +=================== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.export.transformer_engine + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + convert_to_transformer_engine + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.config.rst.txt b/_sources/reference/generated/modelopt.torch.opt.config.rst.txt new file mode 100644 index 0000000..4cbb002 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.opt.config.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +config +====== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.opt.config + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + ModeloptField + get_kwargs_for_create_model_with_rules + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.conversion.rst.txt b/_sources/reference/generated/modelopt.torch.opt.conversion.rst.txt new file mode 100644 index 0000000..4dee394 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.opt.conversion.rst.txt @@ -0,0 +1,55 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +conversion +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.opt.conversion + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + ModeloptStateManager + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + apply_mode + modelopt_state + save + restore_from_modelopt_state + restore + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.dynamic.rst.txt b/_sources/reference/generated/modelopt.torch.opt.dynamic.rst.txt new file mode 100644 index 0000000..a5d8790 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.opt.dynamic.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +dynamic +======= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.opt.dynamic + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + DynamicModule + DynamicSpace + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.hparam.rst.txt b/_sources/reference/generated/modelopt.torch.opt.hparam.rst.txt new file mode 100644 index 0000000..10d9222 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.opt.hparam.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +hparam +====== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.opt.hparam + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + Hparam + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.mode.rst.txt b/_sources/reference/generated/modelopt.torch.opt.mode.rst.txt new file mode 100644 index 0000000..1c8ac9e --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.opt.mode.rst.txt @@ -0,0 +1,56 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +mode +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.opt.mode + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + ABC + Any + BaseSearcher + TypeVar + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + abstractmethod + get_mode_config + val2list + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.plugins.rst.txt b/_sources/reference/generated/modelopt.torch.opt.plugins.rst.txt new file mode 100644 index 0000000..d00e2f4 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.opt.plugins.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +plugins +======= + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.opt.plugins + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.rst.txt b/_sources/reference/generated/modelopt.torch.opt.rst.txt new file mode 100644 index 0000000..5f7edc7 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.opt.rst.txt @@ -0,0 +1,68 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +opt +=== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.opt.config + + + modelopt.torch.opt.conversion + + + modelopt.torch.opt.dynamic + + + modelopt.torch.opt.hparam + + + modelopt.torch.opt.mode + + + modelopt.torch.opt.plugins + + + modelopt.torch.opt.searcher + + + modelopt.torch.opt.utils + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.opt + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.searcher.rst.txt b/_sources/reference/generated/modelopt.torch.opt.searcher.rst.txt new file mode 100644 index 0000000..918bd86 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.opt.searcher.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +searcher +======== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.opt.searcher + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + BaseSearcher + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt b/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt new file mode 100644 index 0000000..d815dcc --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.opt.utils.rst.txt @@ -0,0 +1,47 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +utils +===== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.opt.utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + is_configurable + is_dynamic + named_hparams + search_space_size + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.calib.calibrator.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.calib.calibrator.rst.txt new file mode 100644 index 0000000..a6592d6 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.calib.calibrator.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +calibrator +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.calib.calibrator + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.calib.histogram.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.calib.histogram.rst.txt new file mode 100644 index 0000000..874d5c1 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.calib.histogram.rst.txt @@ -0,0 +1,51 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +histogram +========= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.calib.histogram + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + HistogramCalibrator + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + calibrate_weights + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.calib.max.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.calib.max.rst.txt new file mode 100644 index 0000000..4163097 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.calib.max.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +max +=== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.calib.max + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + MaxCalibrator + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.calib.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.calib.rst.txt new file mode 100644 index 0000000..044269c --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.calib.rst.txt @@ -0,0 +1,53 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +calib +===== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.quantization.calib.calibrator + + + modelopt.torch.quantization.calib.histogram + + + modelopt.torch.quantization.calib.max + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.calib + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.config.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.config.rst.txt new file mode 100644 index 0000000..5640c02 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.config.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +config +====== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.config + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.conversion.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.conversion.rst.txt new file mode 100644 index 0000000..54eff34 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.conversion.rst.txt @@ -0,0 +1,48 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +conversion +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.conversion + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + replace_quant_module + set_quantizer_by_cfg + set_quantizer_attribute + register + unregister + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt new file mode 100644 index 0000000..bab84af --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.extensions.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +extensions +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.extensions + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.mode.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.mode.rst.txt new file mode 100644 index 0000000..9b06131 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.mode.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +mode +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.mode + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + QuantizeExportModeDescriptor + QuantizeModeDescriptor + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.model_calib.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.model_calib.rst.txt new file mode 100644 index 0000000..294b433 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.model_calib.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +model\_calib +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.model_calib + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + calibrate + postprocess_amax + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt new file mode 100644 index 0000000..6fcdfab --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.model_quant.rst.txt @@ -0,0 +1,48 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +model\_quant +============ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.model_quant + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + quantize + disable_quantizer + enable_quantizer + print_quant_summary + fold_weight + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.functional.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.functional.rst.txt new file mode 100644 index 0000000..8296196 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.functional.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +functional +========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.functional + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + ClipFunction + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.clip.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.clip.rst.txt new file mode 100644 index 0000000..80df63e --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.clip.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +clip +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.clip + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + Clip + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.rst.txt new file mode 100644 index 0000000..fb1012f --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_activations +================== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.quant_activations + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.rst.txt new file mode 100644 index 0000000..01eb6e8 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_batchnorm +================ + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.quant_batchnorm + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.rst.txt new file mode 100644 index 0000000..d0f5f40 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.rst.txt @@ -0,0 +1,55 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_conv +=========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.quant_conv + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + Conv2d + QuantConv2d + Conv3d + QuantConv3d + Conv1d + QuantConv1d + ConvTranspose1d + ConvTranspose2d + ConvTranspose3d + QuantConvTranspose1d + QuantConvTranspose2d + QuantConvTranspose3d + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.rst.txt new file mode 100644 index 0000000..3db29f7 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.rst.txt @@ -0,0 +1,46 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_instancenorm +=================== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.quant_instancenorm + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + QuantInstanceNorm1d + QuantInstanceNorm2d + QuantInstanceNorm3d + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.rst.txt new file mode 100644 index 0000000..ebac01c --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_linear +============= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.quant_linear + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + Linear + QuantLinear + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.rst.txt new file mode 100644 index 0000000..0af57bd --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_module +============= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.quant_module + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + QuantInputBase + QuantLinearConvBase + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.rst.txt new file mode 100644 index 0000000..8855b23 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.rst.txt @@ -0,0 +1,61 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_pooling +============== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.quant_pooling + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + MaxPool1d + QuantMaxPool1d + MaxPool2d + QuantMaxPool2d + MaxPool3d + QuantMaxPool3d + AvgPool1d + QuantAvgPool1d + AvgPool2d + QuantAvgPool2d + AvgPool3d + QuantAvgPool3d + AdaptiveAvgPool1d + QuantAdaptiveAvgPool1d + AdaptiveAvgPool2d + QuantAdaptiveAvgPool2d + AdaptiveAvgPool3d + QuantAdaptiveAvgPool3d + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt new file mode 100644 index 0000000..2e2017a --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.rst.txt @@ -0,0 +1,71 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +modules +======= + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.quantization.nn.modules.clip + + + modelopt.torch.quantization.nn.modules.quant_activations + + + modelopt.torch.quantization.nn.modules.quant_batchnorm + + + modelopt.torch.quantization.nn.modules.quant_conv + + + modelopt.torch.quantization.nn.modules.quant_instancenorm + + + modelopt.torch.quantization.nn.modules.quant_linear + + + modelopt.torch.quantization.nn.modules.quant_module + + + modelopt.torch.quantization.nn.modules.quant_pooling + + + modelopt.torch.quantization.nn.modules.tensor_quantizer + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.rst.txt new file mode 100644 index 0000000..1ea451d --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +tensor\_quantizer +================= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn.modules.tensor_quantizer + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + TensorQuantizer + SequentialQuantizer + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.nn.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.nn.rst.txt new file mode 100644 index 0000000..83cbcb3 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.nn.rst.txt @@ -0,0 +1,50 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +nn +== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.quantization.nn.functional + + + modelopt.torch.quantization.nn.modules + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.nn + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.optim.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.optim.rst.txt new file mode 100644 index 0000000..34ef996 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.optim.rst.txt @@ -0,0 +1,47 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +optim +===== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.optim + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + freeze_parameters + group_parameters + match_parameters + quant_weight_inplace + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt new file mode 100644 index 0000000..f6dad09 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.plugins.rst.txt @@ -0,0 +1,50 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +plugins +======= + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + + + + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.plugins + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.quant_modules.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.quant_modules.rst.txt new file mode 100644 index 0000000..95a2d90 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.quant_modules.rst.txt @@ -0,0 +1,46 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quant\_modules +============== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.quant_modules + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + deactivate + enable_onnx_export + initialize + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.rst.txt new file mode 100644 index 0000000..f1dbf86 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.rst.txt @@ -0,0 +1,83 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +quantization +============ + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.quantization.calib + + + modelopt.torch.quantization.config + + + modelopt.torch.quantization.conversion + + + modelopt.torch.quantization.extensions + + + modelopt.torch.quantization.mode + + + modelopt.torch.quantization.model_calib + + + modelopt.torch.quantization.model_quant + + + modelopt.torch.quantization.nn + + + modelopt.torch.quantization.optim + + + modelopt.torch.quantization.plugins + + + modelopt.torch.quantization.quant_modules + + + modelopt.torch.quantization.tensor_quant + + + modelopt.torch.quantization.utils + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt new file mode 100644 index 0000000..71f1b1f --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.tensor_quant.rst.txt @@ -0,0 +1,57 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +tensor\_quant +============= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.tensor_quant + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + FakeAffineTensorQuantFunction + FakeTensorQuantFunction + LegacyFakeTensorQuantFunction + QuantDescriptor + ScaledE4M3Function + ScaledQuantDescriptor + TensorQuantFunction + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + scaled_e4m3_abstract + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.quantization.utils.rst.txt b/_sources/reference/generated/modelopt.torch.quantization.utils.rst.txt new file mode 100644 index 0000000..edc4556 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.quantization.utils.rst.txt @@ -0,0 +1,51 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +utils +===== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.quantization.utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + reduce_amax + is_quantized + is_quantized_layer_with_weight + is_quantized_column_parallel_linear + is_quantized_row_parallel_linear + replace_function + export_torch_mode + is_torch_library_supported + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.rst.txt b/_sources/reference/generated/modelopt.torch.rst.txt new file mode 100644 index 0000000..a00a1df --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.rst.txt @@ -0,0 +1,59 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +torch +===== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.export + + + modelopt.torch.opt + + + modelopt.torch.quantization + + + modelopt.torch.sparsity + + + modelopt.torch.utils + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.sparsity.config.rst.txt b/_sources/reference/generated/modelopt.torch.sparsity.config.rst.txt new file mode 100644 index 0000000..1f9411f --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.sparsity.config.rst.txt @@ -0,0 +1,37 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +config +====== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.sparsity.config + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.sparsity.magnitude.rst.txt b/_sources/reference/generated/modelopt.torch.sparsity.magnitude.rst.txt new file mode 100644 index 0000000..02eaec6 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.sparsity.magnitude.rst.txt @@ -0,0 +1,57 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +magnitude +========= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.sparsity.magnitude + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + MagnitudeSearcher + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + compute_valid_1d_patterns + create_asp_mask + fill + get_nmprune_info + m4n2_1d + mn_1d_best + reshape_1d + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.sparsity.mode.rst.txt b/_sources/reference/generated/modelopt.torch.sparsity.mode.rst.txt new file mode 100644 index 0000000..105aa54 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.sparsity.mode.rst.txt @@ -0,0 +1,57 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +mode +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.sparsity.mode + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + ExportSparseModeDescriptor + SparseGPTModeDescriptor + SparseMagnitudeModeDescriptor + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + convert_sparse_model + export_sparse + restore_export_sparse + restore_sparse_model + update_sparse_metadata + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.sparsity.module.rst.txt b/_sources/reference/generated/modelopt.torch.sparsity.module.rst.txt new file mode 100644 index 0000000..86f97c2 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.sparsity.module.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +module +====== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.sparsity.module + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + SparseModule + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.sparsity.plugins.rst.txt b/_sources/reference/generated/modelopt.torch.sparsity.plugins.rst.txt new file mode 100644 index 0000000..d77afa8 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.sparsity.plugins.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +plugins +======= + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.sparsity.plugins + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.sparsity.rst.txt b/_sources/reference/generated/modelopt.torch.sparsity.rst.txt new file mode 100644 index 0000000..89f1042 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.sparsity.rst.txt @@ -0,0 +1,68 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +sparsity +======== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.sparsity.config + + + modelopt.torch.sparsity.magnitude + + + modelopt.torch.sparsity.mode + + + modelopt.torch.sparsity.module + + + modelopt.torch.sparsity.plugins + + + modelopt.torch.sparsity.searcher + + + modelopt.torch.sparsity.sparsegpt + + + modelopt.torch.sparsity.sparsification + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.sparsity + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.sparsity.searcher.rst.txt b/_sources/reference/generated/modelopt.torch.sparsity.searcher.rst.txt new file mode 100644 index 0000000..9bc6273 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.sparsity.searcher.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +searcher +======== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.sparsity.searcher + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + BaseSparseSearcher + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.sparsity.sparsegpt.rst.txt b/_sources/reference/generated/modelopt.torch.sparsity.sparsegpt.rst.txt new file mode 100644 index 0000000..bd55377 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.sparsity.sparsegpt.rst.txt @@ -0,0 +1,53 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +sparsegpt +========= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.sparsity.sparsegpt + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + SparseGPTSearcher + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + create_sgpt_mask + invert + prepare + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.sparsity.sparsification.rst.txt b/_sources/reference/generated/modelopt.torch.sparsity.sparsification.rst.txt new file mode 100644 index 0000000..a2f26ab --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.sparsity.sparsification.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +sparsification +============== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.sparsity.sparsification + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + sparsify + export + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.cpp_extension.rst.txt b/_sources/reference/generated/modelopt.torch.utils.cpp_extension.rst.txt new file mode 100644 index 0000000..8099ceb --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.cpp_extension.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +cpp\_extension +============== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.cpp_extension + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + load_cpp_extension + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.dataset_utils.rst.txt b/_sources/reference/generated/modelopt.torch.utils.dataset_utils.rst.txt new file mode 100644 index 0000000..d62dc37 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.dataset_utils.rst.txt @@ -0,0 +1,45 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +dataset\_utils +============== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.dataset_utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + create_forward_loop + get_dataset_dataloader + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt b/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt new file mode 100644 index 0000000..ffca0fb --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.distributed.rst.txt @@ -0,0 +1,52 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +distributed +=========== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.distributed + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + backend + size + rank + is_master + barrier + set_data_parallel_group + set_tensor_parallel_group + get_data_parallel_group + get_tensor_parallel_group + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.graph.rst.txt b/_sources/reference/generated/modelopt.torch.utils.graph.rst.txt new file mode 100644 index 0000000..6db088a --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.graph.rst.txt @@ -0,0 +1,44 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +graph +===== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.graph + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + match + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.list.rst.txt b/_sources/reference/generated/modelopt.torch.utils.list.rst.txt new file mode 100644 index 0000000..521937f --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.list.rst.txt @@ -0,0 +1,47 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +list +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.list + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + list_closest_to_median + val2list + val2tuple + stats + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.logging.rst.txt b/_sources/reference/generated/modelopt.torch.utils.logging.rst.txt new file mode 100644 index 0000000..6bac471 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.logging.rst.txt @@ -0,0 +1,46 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +logging +======= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.logging + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + num2hrb + no_stdout + print_rank_0 + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.network.rst.txt b/_sources/reference/generated/modelopt.torch.utils.network.rst.txt new file mode 100644 index 0000000..45d6122 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.network.rst.txt @@ -0,0 +1,63 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +network +======= + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.network + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + compare_dict + get_model_attributes + get_module_device + get_same_padding + init_model_from_model_like + is_channels_last + is_parallel + make_divisible + model_to + param_num + param_num_from_forward + remove_bn + set_submodule + standardize_model_args + standardize_model_like_tuple + standardize_named_model_args + standardize_constructor_args + unwrap_model + zero_grad + run_forward_loop + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.perf.rst.txt b/_sources/reference/generated/modelopt.torch.utils.perf.rst.txt new file mode 100644 index 0000000..7b709d2 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.perf.rst.txt @@ -0,0 +1,53 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +perf +==== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.perf + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + .. rubric:: Classes + + .. autosummary:: + :nosignatures: + + Timer + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + clear_cuda_cache + get_cuda_memory_stats + report_memory + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.random.rst.txt b/_sources/reference/generated/modelopt.torch.utils.random.rst.txt new file mode 100644 index 0000000..0b1e717 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.random.rst.txt @@ -0,0 +1,49 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +random +====== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.random + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + centroid + choice + original + random + sample + shuffle + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.rst.txt b/_sources/reference/generated/modelopt.torch.utils.rst.txt new file mode 100644 index 0000000..af81075 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.rst.txt @@ -0,0 +1,74 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +utils +===== + +.. List the submodules + + + +.. rubric:: Modules + +.. autosummary:: + :toctree: + :recursive: + + + modelopt.torch.utils.cpp_extension + + + modelopt.torch.utils.dataset_utils + + + modelopt.torch.utils.distributed + + + modelopt.torch.utils.graph + + + modelopt.torch.utils.list + + + modelopt.torch.utils.logging + + + modelopt.torch.utils.network + + + modelopt.torch.utils.perf + + + modelopt.torch.utils.random + + + modelopt.torch.utils.tensor + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + \ No newline at end of file diff --git a/_sources/reference/generated/modelopt.torch.utils.tensor.rst.txt b/_sources/reference/generated/modelopt.torch.utils.tensor.rst.txt new file mode 100644 index 0000000..794b4f1 --- /dev/null +++ b/_sources/reference/generated/modelopt.torch.utils.tensor.rst.txt @@ -0,0 +1,47 @@ +.. From https://github.com/sphinx-doc/sphinx/blob/5.x/sphinx/ext/autosummary/templates/autosummary/module.rst + +tensor +====== + +.. List the submodules + + + + + +.. Autodoc anything defined in the module itself + + TODO: WE DON'T USE THIS OPTION RIGHT NOW BUT WE CAN REACTIVATE IF WANTED + We use :ignore-module-all: so sphinx does not document the same module twice, even if it is reimported + For reimports that should be documented somewhere other than where they are defined, the re-imports + __module__ should be manually overridden -- i.e. in the ``__init__.py`` which contains ``from xxx import YYY``, + add in ``YYY.__module__ = __name__``. + +.. automodule:: modelopt.torch.utils.tensor + :members: + :undoc-members: + + .. Also show members without docstrings. Only members from __all__ are considered as per conf.py + .. Ideally we should add docstrings for these members. + + + .. Overview table of available classes in the module + + + + + + .. Overview table of available functions in the module + + + .. rubric:: Functions + + .. autosummary:: + :nosignatures: + + torch_to + torch_detach + torch_to_numpy + numpy_to_torch + + \ No newline at end of file diff --git a/_sources/support/1_contact.rst.txt b/_sources/support/1_contact.rst.txt new file mode 100644 index 0000000..54e47ae --- /dev/null +++ b/_sources/support/1_contact.rst.txt @@ -0,0 +1,7 @@ + +========== +Contact us +========== + +You may raise an issue on `GitHub `_ +for any questions or issues you may have. diff --git a/_sources/support/2_faqs.rst.txt b/_sources/support/2_faqs.rst.txt new file mode 100644 index 0000000..59da8dd --- /dev/null +++ b/_sources/support/2_faqs.rst.txt @@ -0,0 +1,11 @@ + +==== +FAQs +==== + +1. Potential memory leak for ``FSDP`` with ``use_orig_params=True`` +------------------------------------------------------------------- + +When using ``FSDP`` with ``use_orig_params=True``, there is a potential memory leak during training +when using ``FSDP`` in conjunction with modelopt-converted models. Please use +``use_orig_params=False`` to avoid this issue. diff --git a/_static/_sphinx_javascript_frameworks_compat.js b/_static/_sphinx_javascript_frameworks_compat.js new file mode 100644 index 0000000..8141580 --- /dev/null +++ b/_static/_sphinx_javascript_frameworks_compat.js @@ -0,0 +1,123 @@ +/* Compatability shim for jQuery and underscores.js. + * + * Copyright Sphinx contributors + * Released under the two clause BSD licence + */ + +/** + * small helper function to urldecode strings + * + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL + */ +jQuery.urldecode = function(x) { + if (!x) { + return x + } + return decodeURIComponent(x.replace(/\+/g, ' ')); +}; + +/** + * small helper function to urlencode strings + */ +jQuery.urlencode = encodeURIComponent; + +/** + * This function returns the parsed url parameters of the + * current request. Multiple values per key are supported, + * it will always return arrays of strings for the value parts. + */ +jQuery.getQueryParameters = function(s) { + if (typeof s === 'undefined') + s = document.location.search; + var parts = s.substr(s.indexOf('?') + 1).split('&'); + var result = {}; + for (var i = 0; i < parts.length; i++) { + var tmp = parts[i].split('=', 2); + var key = jQuery.urldecode(tmp[0]); + var value = jQuery.urldecode(tmp[1]); + if (key in result) + result[key].push(value); + else + result[key] = [value]; + } + return result; +}; + +/** + * highlight a given string on a jquery object by wrapping it in + * span elements with the given class name. + */ +jQuery.fn.highlightText = function(text, className) { + function highlight(node, addItems) { + if (node.nodeType === 3) { + var val = node.nodeValue; + var pos = val.toLowerCase().indexOf(text); + if (pos >= 0 && + !jQuery(node.parentNode).hasClass(className) && + !jQuery(node.parentNode).hasClass("nohighlight")) { + var span; + var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.className = className; + } + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + node.parentNode.insertBefore(span, node.parentNode.insertBefore( + document.createTextNode(val.substr(pos + text.length)), + node.nextSibling)); + node.nodeValue = val.substr(0, pos); + if (isInSVG) { + var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); + var bbox = node.parentElement.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute('class', className); + addItems.push({ + "parent": node.parentNode, + "target": rect}); + } + } + } + else if (!jQuery(node).is("button, select, textarea")) { + jQuery.each(node.childNodes, function() { + highlight(this, addItems); + }); + } + } + var addItems = []; + var result = this.each(function() { + highlight(this, addItems); + }); + for (var i = 0; i < addItems.length; ++i) { + jQuery(addItems[i].parent).before(addItems[i].target); + } + return result; +}; + +/* + * backward compatibility for jQuery.browser + * This will be supported until firefox bug is fixed. + */ +if (!jQuery.browser) { + jQuery.uaMatch = function(ua) { + ua = ua.toLowerCase(); + + var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || + /(webkit)[ \/]([\w.]+)/.exec(ua) || + /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || + /(msie) ([\w.]+)/.exec(ua) || + ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || + []; + + return { + browser: match[ 1 ] || "", + version: match[ 2 ] || "0" + }; + }; + jQuery.browser = {}; + jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; +} diff --git a/_static/autodoc_pydantic.css b/_static/autodoc_pydantic.css new file mode 100644 index 0000000..994a3e5 --- /dev/null +++ b/_static/autodoc_pydantic.css @@ -0,0 +1,11 @@ +.autodoc_pydantic_validator_arrow { + padding-left: 8px; + } + +.autodoc_pydantic_collapsable_json { + cursor: pointer; + } + +.autodoc_pydantic_collapsable_erd { + cursor: pointer; + } \ No newline at end of file diff --git a/_static/basic.css b/_static/basic.css new file mode 100644 index 0000000..30fee9d --- /dev/null +++ b/_static/basic.css @@ -0,0 +1,925 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 360px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/_static/check-solid.svg b/_static/check-solid.svg new file mode 100644 index 0000000..92fad4b --- /dev/null +++ b/_static/check-solid.svg @@ -0,0 +1,4 @@ + + + + diff --git a/_static/clipboard.min.js b/_static/clipboard.min.js new file mode 100644 index 0000000..54b3c46 --- /dev/null +++ b/_static/clipboard.min.js @@ -0,0 +1,7 @@ +/*! + * clipboard.js v2.0.8 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */ +!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return o}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),c=n.n(e);function a(t){try{return document.execCommand(t)}catch(t){return}}var f=function(t){t=c()(t);return a("cut"),t};var l=function(t){var e,n,o,r=1 + + + + diff --git a/_static/copybutton.css b/_static/copybutton.css new file mode 100644 index 0000000..f1916ec --- /dev/null +++ b/_static/copybutton.css @@ -0,0 +1,94 @@ +/* Copy buttons */ +button.copybtn { + position: absolute; + display: flex; + top: .3em; + right: .3em; + width: 1.7em; + height: 1.7em; + opacity: 0; + transition: opacity 0.3s, border .3s, background-color .3s; + user-select: none; + padding: 0; + border: none; + outline: none; + border-radius: 0.4em; + /* The colors that GitHub uses */ + border: #1b1f2426 1px solid; + background-color: #f6f8fa; + color: #57606a; +} + +button.copybtn.success { + border-color: #22863a; + color: #22863a; +} + +button.copybtn svg { + stroke: currentColor; + width: 1.5em; + height: 1.5em; + padding: 0.1em; +} + +div.highlight { + position: relative; +} + +/* Show the copybutton */ +.highlight:hover button.copybtn, button.copybtn.success { + opacity: 1; +} + +.highlight button.copybtn:hover { + background-color: rgb(235, 235, 235); +} + +.highlight button.copybtn:active { + background-color: rgb(187, 187, 187); +} + +/** + * A minimal CSS-only tooltip copied from: + * https://codepen.io/mildrenben/pen/rVBrpK + * + * To use, write HTML like the following: + * + *

Short

+ */ + .o-tooltip--left { + position: relative; + } + + .o-tooltip--left:after { + opacity: 0; + visibility: hidden; + position: absolute; + content: attr(data-tooltip); + padding: .2em; + font-size: .8em; + left: -.2em; + background: grey; + color: white; + white-space: nowrap; + z-index: 2; + border-radius: 2px; + transform: translateX(-102%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); +} + +.o-tooltip--left:hover:after { + display: block; + opacity: 1; + visibility: visible; + transform: translateX(-100%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); + transition-delay: .5s; +} + +/* By default the copy button shouldn't show up when printing a page */ +@media print { + button.copybtn { + display: none; + } +} diff --git a/_static/copybutton.js b/_static/copybutton.js new file mode 100644 index 0000000..9b665e3 --- /dev/null +++ b/_static/copybutton.js @@ -0,0 +1,248 @@ +// Localization support +const messages = { + 'en': { + 'copy': 'Copy', + 'copy_to_clipboard': 'Copy to clipboard', + 'copy_success': 'Copied!', + 'copy_failure': 'Failed to copy', + }, + 'es' : { + 'copy': 'Copiar', + 'copy_to_clipboard': 'Copiar al portapapeles', + 'copy_success': '¡Copiado!', + 'copy_failure': 'Error al copiar', + }, + 'de' : { + 'copy': 'Kopieren', + 'copy_to_clipboard': 'In die Zwischenablage kopieren', + 'copy_success': 'Kopiert!', + 'copy_failure': 'Fehler beim Kopieren', + }, + 'fr' : { + 'copy': 'Copier', + 'copy_to_clipboard': 'Copier dans le presse-papier', + 'copy_success': 'Copié !', + 'copy_failure': 'Échec de la copie', + }, + 'ru': { + 'copy': 'Скопировать', + 'copy_to_clipboard': 'Скопировать в буфер', + 'copy_success': 'Скопировано!', + 'copy_failure': 'Не удалось скопировать', + }, + 'zh-CN': { + 'copy': '复制', + 'copy_to_clipboard': '复制到剪贴板', + 'copy_success': '复制成功!', + 'copy_failure': '复制失败', + }, + 'it' : { + 'copy': 'Copiare', + 'copy_to_clipboard': 'Copiato negli appunti', + 'copy_success': 'Copiato!', + 'copy_failure': 'Errore durante la copia', + } +} + +let locale = 'en' +if( document.documentElement.lang !== undefined + && messages[document.documentElement.lang] !== undefined ) { + locale = document.documentElement.lang +} + +let doc_url_root = DOCUMENTATION_OPTIONS.URL_ROOT; +if (doc_url_root == '#') { + doc_url_root = ''; +} + +/** + * SVG files for our copy buttons + */ +let iconCheck = ` + ${messages[locale]['copy_success']} + + +` + +// If the user specified their own SVG use that, otherwise use the default +let iconCopy = ``; +if (!iconCopy) { + iconCopy = ` + ${messages[locale]['copy_to_clipboard']} + + + +` +} + +/** + * Set up copy/paste for code blocks + */ + +const runWhenDOMLoaded = cb => { + if (document.readyState != 'loading') { + cb() + } else if (document.addEventListener) { + document.addEventListener('DOMContentLoaded', cb) + } else { + document.attachEvent('onreadystatechange', function() { + if (document.readyState == 'complete') cb() + }) + } +} + +const codeCellId = index => `codecell${index}` + +// Clears selected text since ClipboardJS will select the text when copying +const clearSelection = () => { + if (window.getSelection) { + window.getSelection().removeAllRanges() + } else if (document.selection) { + document.selection.empty() + } +} + +// Changes tooltip text for a moment, then changes it back +// We want the timeout of our `success` class to be a bit shorter than the +// tooltip and icon change, so that we can hide the icon before changing back. +var timeoutIcon = 2000; +var timeoutSuccessClass = 1500; + +const temporarilyChangeTooltip = (el, oldText, newText) => { + el.setAttribute('data-tooltip', newText) + el.classList.add('success') + // Remove success a little bit sooner than we change the tooltip + // So that we can use CSS to hide the copybutton first + setTimeout(() => el.classList.remove('success'), timeoutSuccessClass) + setTimeout(() => el.setAttribute('data-tooltip', oldText), timeoutIcon) +} + +// Changes the copy button icon for two seconds, then changes it back +const temporarilyChangeIcon = (el) => { + el.innerHTML = iconCheck; + setTimeout(() => {el.innerHTML = iconCopy}, timeoutIcon) +} + +const addCopyButtonToCodeCells = () => { + // If ClipboardJS hasn't loaded, wait a bit and try again. This + // happens because we load ClipboardJS asynchronously. + if (window.ClipboardJS === undefined) { + setTimeout(addCopyButtonToCodeCells, 250) + return + } + + // Add copybuttons to all of our code cells + const COPYBUTTON_SELECTOR = 'div.highlight-python pre, div.highlight-ipython3 pre, div.highlight-bash pre, div.highlight-shell pre'; + const codeCells = document.querySelectorAll(COPYBUTTON_SELECTOR) + codeCells.forEach((codeCell, index) => { + const id = codeCellId(index) + codeCell.setAttribute('id', id) + + const clipboardButton = id => + `` + codeCell.insertAdjacentHTML('afterend', clipboardButton(id)) + }) + +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} + + +var copyTargetText = (trigger) => { + var target = document.querySelector(trigger.attributes['data-clipboard-target'].value); + + // get filtered text + let exclude = '.linenos'; + + let text = filterText(target, exclude); + return formatCopyText(text, '', false, true, true, true, '', '') +} + + // Initialize with a callback so we can modify the text before copy + const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText}) + + // Update UI with error/success messages + clipboard.on('success', event => { + clearSelection() + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_success']) + temporarilyChangeIcon(event.trigger) + }) + + clipboard.on('error', event => { + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_failure']) + }) +} + +runWhenDOMLoaded(addCopyButtonToCodeCells) \ No newline at end of file diff --git a/_static/copybutton_funcs.js b/_static/copybutton_funcs.js new file mode 100644 index 0000000..dbe1aaa --- /dev/null +++ b/_static/copybutton_funcs.js @@ -0,0 +1,73 @@ +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +export function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +export function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} diff --git a/_static/css/badge_only.css b/_static/css/badge_only.css new file mode 100644 index 0000000..c718cee --- /dev/null +++ b/_static/css/badge_only.css @@ -0,0 +1 @@ +.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} \ No newline at end of file diff --git a/_static/css/fonts/Roboto-Slab-Bold.woff b/_static/css/fonts/Roboto-Slab-Bold.woff new file mode 100644 index 0000000..6cb6000 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Bold.woff differ diff --git a/_static/css/fonts/Roboto-Slab-Bold.woff2 b/_static/css/fonts/Roboto-Slab-Bold.woff2 new file mode 100644 index 0000000..7059e23 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Bold.woff2 differ diff --git a/_static/css/fonts/Roboto-Slab-Regular.woff b/_static/css/fonts/Roboto-Slab-Regular.woff new file mode 100644 index 0000000..f815f63 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Regular.woff differ diff --git a/_static/css/fonts/Roboto-Slab-Regular.woff2 b/_static/css/fonts/Roboto-Slab-Regular.woff2 new file mode 100644 index 0000000..f2c76e5 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Regular.woff2 differ diff --git a/_static/css/fonts/fontawesome-webfont.eot b/_static/css/fonts/fontawesome-webfont.eot new file mode 100644 index 0000000..e9f60ca Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.eot differ diff --git a/_static/css/fonts/fontawesome-webfont.svg b/_static/css/fonts/fontawesome-webfont.svg new file mode 100644 index 0000000..855c845 --- /dev/null +++ b/_static/css/fonts/fontawesome-webfont.svg @@ -0,0 +1,2671 @@ + + + + +Created by FontForge 20120731 at Mon Oct 24 17:37:40 2016 + By ,,, +Copyright Dave Gandy 2016. All rights reserved. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_static/css/fonts/fontawesome-webfont.ttf b/_static/css/fonts/fontawesome-webfont.ttf new file mode 100644 index 0000000..35acda2 Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.ttf differ diff --git a/_static/css/fonts/fontawesome-webfont.woff b/_static/css/fonts/fontawesome-webfont.woff new file mode 100644 index 0000000..400014a Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.woff differ diff --git a/_static/css/fonts/fontawesome-webfont.woff2 b/_static/css/fonts/fontawesome-webfont.woff2 new file mode 100644 index 0000000..4d13fc6 Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.woff2 differ diff --git a/_static/css/fonts/lato-bold-italic.woff b/_static/css/fonts/lato-bold-italic.woff new file mode 100644 index 0000000..88ad05b Binary files /dev/null and b/_static/css/fonts/lato-bold-italic.woff differ diff --git a/_static/css/fonts/lato-bold-italic.woff2 b/_static/css/fonts/lato-bold-italic.woff2 new file mode 100644 index 0000000..c4e3d80 Binary files /dev/null and b/_static/css/fonts/lato-bold-italic.woff2 differ diff --git a/_static/css/fonts/lato-bold.woff b/_static/css/fonts/lato-bold.woff new file mode 100644 index 0000000..c6dff51 Binary files /dev/null and b/_static/css/fonts/lato-bold.woff differ diff --git a/_static/css/fonts/lato-bold.woff2 b/_static/css/fonts/lato-bold.woff2 new file mode 100644 index 0000000..bb19504 Binary files /dev/null and b/_static/css/fonts/lato-bold.woff2 differ diff --git a/_static/css/fonts/lato-normal-italic.woff b/_static/css/fonts/lato-normal-italic.woff new file mode 100644 index 0000000..76114bc Binary files /dev/null and b/_static/css/fonts/lato-normal-italic.woff differ diff --git a/_static/css/fonts/lato-normal-italic.woff2 b/_static/css/fonts/lato-normal-italic.woff2 new file mode 100644 index 0000000..3404f37 Binary files /dev/null and b/_static/css/fonts/lato-normal-italic.woff2 differ diff --git a/_static/css/fonts/lato-normal.woff b/_static/css/fonts/lato-normal.woff new file mode 100644 index 0000000..ae1307f Binary files /dev/null and b/_static/css/fonts/lato-normal.woff differ diff --git a/_static/css/fonts/lato-normal.woff2 b/_static/css/fonts/lato-normal.woff2 new file mode 100644 index 0000000..3bf9843 Binary files /dev/null and b/_static/css/fonts/lato-normal.woff2 differ diff --git a/_static/css/theme.css b/_static/css/theme.css new file mode 100644 index 0000000..19a446a --- /dev/null +++ b/_static/css/theme.css @@ -0,0 +1,4 @@ +html{box-sizing:border-box}*,:after,:before{box-sizing:inherit}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}[hidden],audio:not([controls]){display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:active,a:hover{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:700}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;text-decoration:none}ins,mark{color:#000}mark{background:#ff0;font-style:italic;font-weight:700}.rst-content code,.rst-content tt,code,kbd,pre,samp{font-family:monospace,serif;_font-family:courier new,monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:after,q:before{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-.5em}sub{bottom:-.25em}dl,ol,ul{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure,form{margin:0}label{cursor:pointer}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type=button],input[type=reset],input[type=submit]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type=search]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}textarea{resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:.2em 0;background:#ccc;color:#000;padding:.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none!important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{body,html,section{background:none!important}*{box-shadow:none!important;text-shadow:none!important;filter:none!important;-ms-filter:none!important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="#"]:after,a[href^="javascript:"]:after{content:""}blockquote,pre{page-break-inside:avoid}thead{display:table-header-group}img,tr{page-break-inside:avoid}img{max-width:100%!important}@page{margin:.5cm}.rst-content .toctree-wrapper>p.caption,h2,h3,p{orphans:3;widows:3}.rst-content .toctree-wrapper>p.caption,h2,h3{page-break-after:avoid}}.btn,.fa:before,.icon:before,.rst-content .admonition,.rst-content .admonition-title:before,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .code-block-caption .headerlink:before,.rst-content .danger,.rst-content .eqno .headerlink:before,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-alert,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before,input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week],select,textarea{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}/*! + * Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome + * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) + */@font-face{font-family:FontAwesome;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713);src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix&v=4.7.0) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#fontawesomeregular) format("svg");font-weight:400;font-style:normal}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14286em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14286em;width:2.14286em;top:.14286em;text-align:center}.fa-li.fa-lg{left:-1.85714em}.fa-border{padding:.2em .25em .15em;border:.08em solid #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa-pull-left.icon,.fa.fa-pull-left,.rst-content .code-block-caption .fa-pull-left.headerlink,.rst-content .eqno .fa-pull-left.headerlink,.rst-content .fa-pull-left.admonition-title,.rst-content code.download span.fa-pull-left:first-child,.rst-content dl dt .fa-pull-left.headerlink,.rst-content h1 .fa-pull-left.headerlink,.rst-content h2 .fa-pull-left.headerlink,.rst-content h3 .fa-pull-left.headerlink,.rst-content h4 .fa-pull-left.headerlink,.rst-content h5 .fa-pull-left.headerlink,.rst-content h6 .fa-pull-left.headerlink,.rst-content p .fa-pull-left.headerlink,.rst-content table>caption .fa-pull-left.headerlink,.rst-content tt.download span.fa-pull-left:first-child,.wy-menu-vertical li.current>a button.fa-pull-left.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-left.toctree-expand,.wy-menu-vertical li button.fa-pull-left.toctree-expand{margin-right:.3em}.fa-pull-right.icon,.fa.fa-pull-right,.rst-content .code-block-caption .fa-pull-right.headerlink,.rst-content .eqno .fa-pull-right.headerlink,.rst-content .fa-pull-right.admonition-title,.rst-content code.download span.fa-pull-right:first-child,.rst-content dl dt .fa-pull-right.headerlink,.rst-content h1 .fa-pull-right.headerlink,.rst-content h2 .fa-pull-right.headerlink,.rst-content h3 .fa-pull-right.headerlink,.rst-content h4 .fa-pull-right.headerlink,.rst-content h5 .fa-pull-right.headerlink,.rst-content h6 .fa-pull-right.headerlink,.rst-content p .fa-pull-right.headerlink,.rst-content table>caption .fa-pull-right.headerlink,.rst-content tt.download span.fa-pull-right:first-child,.wy-menu-vertical li.current>a button.fa-pull-right.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-right.toctree-expand,.wy-menu-vertical li button.fa-pull-right.toctree-expand{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left,.pull-left.icon,.rst-content .code-block-caption .pull-left.headerlink,.rst-content .eqno .pull-left.headerlink,.rst-content .pull-left.admonition-title,.rst-content code.download span.pull-left:first-child,.rst-content dl dt .pull-left.headerlink,.rst-content h1 .pull-left.headerlink,.rst-content h2 .pull-left.headerlink,.rst-content h3 .pull-left.headerlink,.rst-content h4 .pull-left.headerlink,.rst-content h5 .pull-left.headerlink,.rst-content h6 .pull-left.headerlink,.rst-content p .pull-left.headerlink,.rst-content table>caption .pull-left.headerlink,.rst-content tt.download span.pull-left:first-child,.wy-menu-vertical li.current>a button.pull-left.toctree-expand,.wy-menu-vertical li.on a button.pull-left.toctree-expand,.wy-menu-vertical li button.pull-left.toctree-expand{margin-right:.3em}.fa.pull-right,.pull-right.icon,.rst-content .code-block-caption .pull-right.headerlink,.rst-content .eqno .pull-right.headerlink,.rst-content .pull-right.admonition-title,.rst-content code.download span.pull-right:first-child,.rst-content dl dt .pull-right.headerlink,.rst-content h1 .pull-right.headerlink,.rst-content h2 .pull-right.headerlink,.rst-content h3 .pull-right.headerlink,.rst-content h4 .pull-right.headerlink,.rst-content h5 .pull-right.headerlink,.rst-content h6 .pull-right.headerlink,.rst-content p .pull-right.headerlink,.rst-content table>caption .pull-right.headerlink,.rst-content tt.download span.pull-right:first-child,.wy-menu-vertical li.current>a button.pull-right.toctree-expand,.wy-menu-vertical li.on a button.pull-right.toctree-expand,.wy-menu-vertical li button.pull-right.toctree-expand{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s linear infinite;animation:fa-spin 2s linear infinite}.fa-pulse{-webkit-animation:fa-spin 1s steps(8) infinite;animation:fa-spin 1s steps(8) infinite}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scaleX(-1);-ms-transform:scaleX(-1);transform:scaleX(-1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scaleY(-1);-ms-transform:scaleY(-1);transform:scaleY(-1)}:root .fa-flip-horizontal,:root .fa-flip-vertical,:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:""}.fa-music:before{content:""}.fa-search:before,.icon-search:before{content:""}.fa-envelope-o:before{content:""}.fa-heart:before{content:""}.fa-star:before{content:""}.fa-star-o:before{content:""}.fa-user:before{content:""}.fa-film:before{content:""}.fa-th-large:before{content:""}.fa-th:before{content:""}.fa-th-list:before{content:""}.fa-check:before{content:""}.fa-close:before,.fa-remove:before,.fa-times:before{content:""}.fa-search-plus:before{content:""}.fa-search-minus:before{content:""}.fa-power-off:before{content:""}.fa-signal:before{content:""}.fa-cog:before,.fa-gear:before{content:""}.fa-trash-o:before{content:""}.fa-home:before,.icon-home:before{content:""}.fa-file-o:before{content:""}.fa-clock-o:before{content:""}.fa-road:before{content:""}.fa-download:before,.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{content:""}.fa-arrow-circle-o-down:before{content:""}.fa-arrow-circle-o-up:before{content:""}.fa-inbox:before{content:""}.fa-play-circle-o:before{content:""}.fa-repeat:before,.fa-rotate-right:before{content:""}.fa-refresh:before{content:""}.fa-list-alt:before{content:""}.fa-lock:before{content:""}.fa-flag:before{content:""}.fa-headphones:before{content:""}.fa-volume-off:before{content:""}.fa-volume-down:before{content:""}.fa-volume-up:before{content:""}.fa-qrcode:before{content:""}.fa-barcode:before{content:""}.fa-tag:before{content:""}.fa-tags:before{content:""}.fa-book:before,.icon-book:before{content:""}.fa-bookmark:before{content:""}.fa-print:before{content:""}.fa-camera:before{content:""}.fa-font:before{content:""}.fa-bold:before{content:""}.fa-italic:before{content:""}.fa-text-height:before{content:""}.fa-text-width:before{content:""}.fa-align-left:before{content:""}.fa-align-center:before{content:""}.fa-align-right:before{content:""}.fa-align-justify:before{content:""}.fa-list:before{content:""}.fa-dedent:before,.fa-outdent:before{content:""}.fa-indent:before{content:""}.fa-video-camera:before{content:""}.fa-image:before,.fa-photo:before,.fa-picture-o:before{content:""}.fa-pencil:before{content:""}.fa-map-marker:before{content:""}.fa-adjust:before{content:""}.fa-tint:before{content:""}.fa-edit:before,.fa-pencil-square-o:before{content:""}.fa-share-square-o:before{content:""}.fa-check-square-o:before{content:""}.fa-arrows:before{content:""}.fa-step-backward:before{content:""}.fa-fast-backward:before{content:""}.fa-backward:before{content:""}.fa-play:before{content:""}.fa-pause:before{content:""}.fa-stop:before{content:""}.fa-forward:before{content:""}.fa-fast-forward:before{content:""}.fa-step-forward:before{content:""}.fa-eject:before{content:""}.fa-chevron-left:before{content:""}.fa-chevron-right:before{content:""}.fa-plus-circle:before{content:""}.fa-minus-circle:before{content:""}.fa-times-circle:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before{content:""}.fa-check-circle:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before{content:""}.fa-question-circle:before{content:""}.fa-info-circle:before{content:""}.fa-crosshairs:before{content:""}.fa-times-circle-o:before{content:""}.fa-check-circle-o:before{content:""}.fa-ban:before{content:""}.fa-arrow-left:before{content:""}.fa-arrow-right:before{content:""}.fa-arrow-up:before{content:""}.fa-arrow-down:before{content:""}.fa-mail-forward:before,.fa-share:before{content:""}.fa-expand:before{content:""}.fa-compress:before{content:""}.fa-plus:before{content:""}.fa-minus:before{content:""}.fa-asterisk:before{content:""}.fa-exclamation-circle:before,.rst-content .admonition-title:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before{content:""}.fa-gift:before{content:""}.fa-leaf:before{content:""}.fa-fire:before,.icon-fire:before{content:""}.fa-eye:before{content:""}.fa-eye-slash:before{content:""}.fa-exclamation-triangle:before,.fa-warning:before{content:""}.fa-plane:before{content:""}.fa-calendar:before{content:""}.fa-random:before{content:""}.fa-comment:before{content:""}.fa-magnet:before{content:""}.fa-chevron-up:before{content:""}.fa-chevron-down:before{content:""}.fa-retweet:before{content:""}.fa-shopping-cart:before{content:""}.fa-folder:before{content:""}.fa-folder-open:before{content:""}.fa-arrows-v:before{content:""}.fa-arrows-h:before{content:""}.fa-bar-chart-o:before,.fa-bar-chart:before{content:""}.fa-twitter-square:before{content:""}.fa-facebook-square:before{content:""}.fa-camera-retro:before{content:""}.fa-key:before{content:""}.fa-cogs:before,.fa-gears:before{content:""}.fa-comments:before{content:""}.fa-thumbs-o-up:before{content:""}.fa-thumbs-o-down:before{content:""}.fa-star-half:before{content:""}.fa-heart-o:before{content:""}.fa-sign-out:before{content:""}.fa-linkedin-square:before{content:""}.fa-thumb-tack:before{content:""}.fa-external-link:before{content:""}.fa-sign-in:before{content:""}.fa-trophy:before{content:""}.fa-github-square:before{content:""}.fa-upload:before{content:""}.fa-lemon-o:before{content:""}.fa-phone:before{content:""}.fa-square-o:before{content:""}.fa-bookmark-o:before{content:""}.fa-phone-square:before{content:""}.fa-twitter:before{content:""}.fa-facebook-f:before,.fa-facebook:before{content:""}.fa-github:before,.icon-github:before{content:""}.fa-unlock:before{content:""}.fa-credit-card:before{content:""}.fa-feed:before,.fa-rss:before{content:""}.fa-hdd-o:before{content:""}.fa-bullhorn:before{content:""}.fa-bell:before{content:""}.fa-certificate:before{content:""}.fa-hand-o-right:before{content:""}.fa-hand-o-left:before{content:""}.fa-hand-o-up:before{content:""}.fa-hand-o-down:before{content:""}.fa-arrow-circle-left:before,.icon-circle-arrow-left:before{content:""}.fa-arrow-circle-right:before,.icon-circle-arrow-right:before{content:""}.fa-arrow-circle-up:before{content:""}.fa-arrow-circle-down:before{content:""}.fa-globe:before{content:""}.fa-wrench:before{content:""}.fa-tasks:before{content:""}.fa-filter:before{content:""}.fa-briefcase:before{content:""}.fa-arrows-alt:before{content:""}.fa-group:before,.fa-users:before{content:""}.fa-chain:before,.fa-link:before,.icon-link:before{content:""}.fa-cloud:before{content:""}.fa-flask:before{content:""}.fa-cut:before,.fa-scissors:before{content:""}.fa-copy:before,.fa-files-o:before{content:""}.fa-paperclip:before{content:""}.fa-floppy-o:before,.fa-save:before{content:""}.fa-square:before{content:""}.fa-bars:before,.fa-navicon:before,.fa-reorder:before{content:""}.fa-list-ul:before{content:""}.fa-list-ol:before{content:""}.fa-strikethrough:before{content:""}.fa-underline:before{content:""}.fa-table:before{content:""}.fa-magic:before{content:""}.fa-truck:before{content:""}.fa-pinterest:before{content:""}.fa-pinterest-square:before{content:""}.fa-google-plus-square:before{content:""}.fa-google-plus:before{content:""}.fa-money:before{content:""}.fa-caret-down:before,.icon-caret-down:before,.wy-dropdown .caret:before{content:""}.fa-caret-up:before{content:""}.fa-caret-left:before{content:""}.fa-caret-right:before{content:""}.fa-columns:before{content:""}.fa-sort:before,.fa-unsorted:before{content:""}.fa-sort-desc:before,.fa-sort-down:before{content:""}.fa-sort-asc:before,.fa-sort-up:before{content:""}.fa-envelope:before{content:""}.fa-linkedin:before{content:""}.fa-rotate-left:before,.fa-undo:before{content:""}.fa-gavel:before,.fa-legal:before{content:""}.fa-dashboard:before,.fa-tachometer:before{content:""}.fa-comment-o:before{content:""}.fa-comments-o:before{content:""}.fa-bolt:before,.fa-flash:before{content:""}.fa-sitemap:before{content:""}.fa-umbrella:before{content:""}.fa-clipboard:before,.fa-paste:before{content:""}.fa-lightbulb-o:before{content:""}.fa-exchange:before{content:""}.fa-cloud-download:before{content:""}.fa-cloud-upload:before{content:""}.fa-user-md:before{content:""}.fa-stethoscope:before{content:""}.fa-suitcase:before{content:""}.fa-bell-o:before{content:""}.fa-coffee:before{content:""}.fa-cutlery:before{content:""}.fa-file-text-o:before{content:""}.fa-building-o:before{content:""}.fa-hospital-o:before{content:""}.fa-ambulance:before{content:""}.fa-medkit:before{content:""}.fa-fighter-jet:before{content:""}.fa-beer:before{content:""}.fa-h-square:before{content:""}.fa-plus-square:before{content:""}.fa-angle-double-left:before{content:""}.fa-angle-double-right:before{content:""}.fa-angle-double-up:before{content:""}.fa-angle-double-down:before{content:""}.fa-angle-left:before{content:""}.fa-angle-right:before{content:""}.fa-angle-up:before{content:""}.fa-angle-down:before{content:""}.fa-desktop:before{content:""}.fa-laptop:before{content:""}.fa-tablet:before{content:""}.fa-mobile-phone:before,.fa-mobile:before{content:""}.fa-circle-o:before{content:""}.fa-quote-left:before{content:""}.fa-quote-right:before{content:""}.fa-spinner:before{content:""}.fa-circle:before{content:""}.fa-mail-reply:before,.fa-reply:before{content:""}.fa-github-alt:before{content:""}.fa-folder-o:before{content:""}.fa-folder-open-o:before{content:""}.fa-smile-o:before{content:""}.fa-frown-o:before{content:""}.fa-meh-o:before{content:""}.fa-gamepad:before{content:""}.fa-keyboard-o:before{content:""}.fa-flag-o:before{content:""}.fa-flag-checkered:before{content:""}.fa-terminal:before{content:""}.fa-code:before{content:""}.fa-mail-reply-all:before,.fa-reply-all:before{content:""}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:""}.fa-location-arrow:before{content:""}.fa-crop:before{content:""}.fa-code-fork:before{content:""}.fa-chain-broken:before,.fa-unlink:before{content:""}.fa-question:before{content:""}.fa-info:before{content:""}.fa-exclamation:before{content:""}.fa-superscript:before{content:""}.fa-subscript:before{content:""}.fa-eraser:before{content:""}.fa-puzzle-piece:before{content:""}.fa-microphone:before{content:""}.fa-microphone-slash:before{content:""}.fa-shield:before{content:""}.fa-calendar-o:before{content:""}.fa-fire-extinguisher:before{content:""}.fa-rocket:before{content:""}.fa-maxcdn:before{content:""}.fa-chevron-circle-left:before{content:""}.fa-chevron-circle-right:before{content:""}.fa-chevron-circle-up:before{content:""}.fa-chevron-circle-down:before{content:""}.fa-html5:before{content:""}.fa-css3:before{content:""}.fa-anchor:before{content:""}.fa-unlock-alt:before{content:""}.fa-bullseye:before{content:""}.fa-ellipsis-h:before{content:""}.fa-ellipsis-v:before{content:""}.fa-rss-square:before{content:""}.fa-play-circle:before{content:""}.fa-ticket:before{content:""}.fa-minus-square:before{content:""}.fa-minus-square-o:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before{content:""}.fa-level-up:before{content:""}.fa-level-down:before{content:""}.fa-check-square:before{content:""}.fa-pencil-square:before{content:""}.fa-external-link-square:before{content:""}.fa-share-square:before{content:""}.fa-compass:before{content:""}.fa-caret-square-o-down:before,.fa-toggle-down:before{content:""}.fa-caret-square-o-up:before,.fa-toggle-up:before{content:""}.fa-caret-square-o-right:before,.fa-toggle-right:before{content:""}.fa-eur:before,.fa-euro:before{content:""}.fa-gbp:before{content:""}.fa-dollar:before,.fa-usd:before{content:""}.fa-inr:before,.fa-rupee:before{content:""}.fa-cny:before,.fa-jpy:before,.fa-rmb:before,.fa-yen:before{content:""}.fa-rouble:before,.fa-rub:before,.fa-ruble:before{content:""}.fa-krw:before,.fa-won:before{content:""}.fa-bitcoin:before,.fa-btc:before{content:""}.fa-file:before{content:""}.fa-file-text:before{content:""}.fa-sort-alpha-asc:before{content:""}.fa-sort-alpha-desc:before{content:""}.fa-sort-amount-asc:before{content:""}.fa-sort-amount-desc:before{content:""}.fa-sort-numeric-asc:before{content:""}.fa-sort-numeric-desc:before{content:""}.fa-thumbs-up:before{content:""}.fa-thumbs-down:before{content:""}.fa-youtube-square:before{content:""}.fa-youtube:before{content:""}.fa-xing:before{content:""}.fa-xing-square:before{content:""}.fa-youtube-play:before{content:""}.fa-dropbox:before{content:""}.fa-stack-overflow:before{content:""}.fa-instagram:before{content:""}.fa-flickr:before{content:""}.fa-adn:before{content:""}.fa-bitbucket:before,.icon-bitbucket:before{content:""}.fa-bitbucket-square:before{content:""}.fa-tumblr:before{content:""}.fa-tumblr-square:before{content:""}.fa-long-arrow-down:before{content:""}.fa-long-arrow-up:before{content:""}.fa-long-arrow-left:before{content:""}.fa-long-arrow-right:before{content:""}.fa-apple:before{content:""}.fa-windows:before{content:""}.fa-android:before{content:""}.fa-linux:before{content:""}.fa-dribbble:before{content:""}.fa-skype:before{content:""}.fa-foursquare:before{content:""}.fa-trello:before{content:""}.fa-female:before{content:""}.fa-male:before{content:""}.fa-gittip:before,.fa-gratipay:before{content:""}.fa-sun-o:before{content:""}.fa-moon-o:before{content:""}.fa-archive:before{content:""}.fa-bug:before{content:""}.fa-vk:before{content:""}.fa-weibo:before{content:""}.fa-renren:before{content:""}.fa-pagelines:before{content:""}.fa-stack-exchange:before{content:""}.fa-arrow-circle-o-right:before{content:""}.fa-arrow-circle-o-left:before{content:""}.fa-caret-square-o-left:before,.fa-toggle-left:before{content:""}.fa-dot-circle-o:before{content:""}.fa-wheelchair:before{content:""}.fa-vimeo-square:before{content:""}.fa-try:before,.fa-turkish-lira:before{content:""}.fa-plus-square-o:before,.wy-menu-vertical li button.toctree-expand:before{content:""}.fa-space-shuttle:before{content:""}.fa-slack:before{content:""}.fa-envelope-square:before{content:""}.fa-wordpress:before{content:""}.fa-openid:before{content:""}.fa-bank:before,.fa-institution:before,.fa-university:before{content:""}.fa-graduation-cap:before,.fa-mortar-board:before{content:""}.fa-yahoo:before{content:""}.fa-google:before{content:""}.fa-reddit:before{content:""}.fa-reddit-square:before{content:""}.fa-stumbleupon-circle:before{content:""}.fa-stumbleupon:before{content:""}.fa-delicious:before{content:""}.fa-digg:before{content:""}.fa-pied-piper-pp:before{content:""}.fa-pied-piper-alt:before{content:""}.fa-drupal:before{content:""}.fa-joomla:before{content:""}.fa-language:before{content:""}.fa-fax:before{content:""}.fa-building:before{content:""}.fa-child:before{content:""}.fa-paw:before{content:""}.fa-spoon:before{content:""}.fa-cube:before{content:""}.fa-cubes:before{content:""}.fa-behance:before{content:""}.fa-behance-square:before{content:""}.fa-steam:before{content:""}.fa-steam-square:before{content:""}.fa-recycle:before{content:""}.fa-automobile:before,.fa-car:before{content:""}.fa-cab:before,.fa-taxi:before{content:""}.fa-tree:before{content:""}.fa-spotify:before{content:""}.fa-deviantart:before{content:""}.fa-soundcloud:before{content:""}.fa-database:before{content:""}.fa-file-pdf-o:before{content:""}.fa-file-word-o:before{content:""}.fa-file-excel-o:before{content:""}.fa-file-powerpoint-o:before{content:""}.fa-file-image-o:before,.fa-file-photo-o:before,.fa-file-picture-o:before{content:""}.fa-file-archive-o:before,.fa-file-zip-o:before{content:""}.fa-file-audio-o:before,.fa-file-sound-o:before{content:""}.fa-file-movie-o:before,.fa-file-video-o:before{content:""}.fa-file-code-o:before{content:""}.fa-vine:before{content:""}.fa-codepen:before{content:""}.fa-jsfiddle:before{content:""}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-ring:before,.fa-life-saver:before,.fa-support:before{content:""}.fa-circle-o-notch:before{content:""}.fa-ra:before,.fa-rebel:before,.fa-resistance:before{content:""}.fa-empire:before,.fa-ge:before{content:""}.fa-git-square:before{content:""}.fa-git:before{content:""}.fa-hacker-news:before,.fa-y-combinator-square:before,.fa-yc-square:before{content:""}.fa-tencent-weibo:before{content:""}.fa-qq:before{content:""}.fa-wechat:before,.fa-weixin:before{content:""}.fa-paper-plane:before,.fa-send:before{content:""}.fa-paper-plane-o:before,.fa-send-o:before{content:""}.fa-history:before{content:""}.fa-circle-thin:before{content:""}.fa-header:before{content:""}.fa-paragraph:before{content:""}.fa-sliders:before{content:""}.fa-share-alt:before{content:""}.fa-share-alt-square:before{content:""}.fa-bomb:before{content:""}.fa-futbol-o:before,.fa-soccer-ball-o:before{content:""}.fa-tty:before{content:""}.fa-binoculars:before{content:""}.fa-plug:before{content:""}.fa-slideshare:before{content:""}.fa-twitch:before{content:""}.fa-yelp:before{content:""}.fa-newspaper-o:before{content:""}.fa-wifi:before{content:""}.fa-calculator:before{content:""}.fa-paypal:before{content:""}.fa-google-wallet:before{content:""}.fa-cc-visa:before{content:""}.fa-cc-mastercard:before{content:""}.fa-cc-discover:before{content:""}.fa-cc-amex:before{content:""}.fa-cc-paypal:before{content:""}.fa-cc-stripe:before{content:""}.fa-bell-slash:before{content:""}.fa-bell-slash-o:before{content:""}.fa-trash:before{content:""}.fa-copyright:before{content:""}.fa-at:before{content:""}.fa-eyedropper:before{content:""}.fa-paint-brush:before{content:""}.fa-birthday-cake:before{content:""}.fa-area-chart:before{content:""}.fa-pie-chart:before{content:""}.fa-line-chart:before{content:""}.fa-lastfm:before{content:""}.fa-lastfm-square:before{content:""}.fa-toggle-off:before{content:""}.fa-toggle-on:before{content:""}.fa-bicycle:before{content:""}.fa-bus:before{content:""}.fa-ioxhost:before{content:""}.fa-angellist:before{content:""}.fa-cc:before{content:""}.fa-ils:before,.fa-shekel:before,.fa-sheqel:before{content:""}.fa-meanpath:before{content:""}.fa-buysellads:before{content:""}.fa-connectdevelop:before{content:""}.fa-dashcube:before{content:""}.fa-forumbee:before{content:""}.fa-leanpub:before{content:""}.fa-sellsy:before{content:""}.fa-shirtsinbulk:before{content:""}.fa-simplybuilt:before{content:""}.fa-skyatlas:before{content:""}.fa-cart-plus:before{content:""}.fa-cart-arrow-down:before{content:""}.fa-diamond:before{content:""}.fa-ship:before{content:""}.fa-user-secret:before{content:""}.fa-motorcycle:before{content:""}.fa-street-view:before{content:""}.fa-heartbeat:before{content:""}.fa-venus:before{content:""}.fa-mars:before{content:""}.fa-mercury:before{content:""}.fa-intersex:before,.fa-transgender:before{content:""}.fa-transgender-alt:before{content:""}.fa-venus-double:before{content:""}.fa-mars-double:before{content:""}.fa-venus-mars:before{content:""}.fa-mars-stroke:before{content:""}.fa-mars-stroke-v:before{content:""}.fa-mars-stroke-h:before{content:""}.fa-neuter:before{content:""}.fa-genderless:before{content:""}.fa-facebook-official:before{content:""}.fa-pinterest-p:before{content:""}.fa-whatsapp:before{content:""}.fa-server:before{content:""}.fa-user-plus:before{content:""}.fa-user-times:before{content:""}.fa-bed:before,.fa-hotel:before{content:""}.fa-viacoin:before{content:""}.fa-train:before{content:""}.fa-subway:before{content:""}.fa-medium:before{content:""}.fa-y-combinator:before,.fa-yc:before{content:""}.fa-optin-monster:before{content:""}.fa-opencart:before{content:""}.fa-expeditedssl:before{content:""}.fa-battery-4:before,.fa-battery-full:before,.fa-battery:before{content:""}.fa-battery-3:before,.fa-battery-three-quarters:before{content:""}.fa-battery-2:before,.fa-battery-half:before{content:""}.fa-battery-1:before,.fa-battery-quarter:before{content:""}.fa-battery-0:before,.fa-battery-empty:before{content:""}.fa-mouse-pointer:before{content:""}.fa-i-cursor:before{content:""}.fa-object-group:before{content:""}.fa-object-ungroup:before{content:""}.fa-sticky-note:before{content:""}.fa-sticky-note-o:before{content:""}.fa-cc-jcb:before{content:""}.fa-cc-diners-club:before{content:""}.fa-clone:before{content:""}.fa-balance-scale:before{content:""}.fa-hourglass-o:before{content:""}.fa-hourglass-1:before,.fa-hourglass-start:before{content:""}.fa-hourglass-2:before,.fa-hourglass-half:before{content:""}.fa-hourglass-3:before,.fa-hourglass-end:before{content:""}.fa-hourglass:before{content:""}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:""}.fa-hand-paper-o:before,.fa-hand-stop-o:before{content:""}.fa-hand-scissors-o:before{content:""}.fa-hand-lizard-o:before{content:""}.fa-hand-spock-o:before{content:""}.fa-hand-pointer-o:before{content:""}.fa-hand-peace-o:before{content:""}.fa-trademark:before{content:""}.fa-registered:before{content:""}.fa-creative-commons:before{content:""}.fa-gg:before{content:""}.fa-gg-circle:before{content:""}.fa-tripadvisor:before{content:""}.fa-odnoklassniki:before{content:""}.fa-odnoklassniki-square:before{content:""}.fa-get-pocket:before{content:""}.fa-wikipedia-w:before{content:""}.fa-safari:before{content:""}.fa-chrome:before{content:""}.fa-firefox:before{content:""}.fa-opera:before{content:""}.fa-internet-explorer:before{content:""}.fa-television:before,.fa-tv:before{content:""}.fa-contao:before{content:""}.fa-500px:before{content:""}.fa-amazon:before{content:""}.fa-calendar-plus-o:before{content:""}.fa-calendar-minus-o:before{content:""}.fa-calendar-times-o:before{content:""}.fa-calendar-check-o:before{content:""}.fa-industry:before{content:""}.fa-map-pin:before{content:""}.fa-map-signs:before{content:""}.fa-map-o:before{content:""}.fa-map:before{content:""}.fa-commenting:before{content:""}.fa-commenting-o:before{content:""}.fa-houzz:before{content:""}.fa-vimeo:before{content:""}.fa-black-tie:before{content:""}.fa-fonticons:before{content:""}.fa-reddit-alien:before{content:""}.fa-edge:before{content:""}.fa-credit-card-alt:before{content:""}.fa-codiepie:before{content:""}.fa-modx:before{content:""}.fa-fort-awesome:before{content:""}.fa-usb:before{content:""}.fa-product-hunt:before{content:""}.fa-mixcloud:before{content:""}.fa-scribd:before{content:""}.fa-pause-circle:before{content:""}.fa-pause-circle-o:before{content:""}.fa-stop-circle:before{content:""}.fa-stop-circle-o:before{content:""}.fa-shopping-bag:before{content:""}.fa-shopping-basket:before{content:""}.fa-hashtag:before{content:""}.fa-bluetooth:before{content:""}.fa-bluetooth-b:before{content:""}.fa-percent:before{content:""}.fa-gitlab:before,.icon-gitlab:before{content:""}.fa-wpbeginner:before{content:""}.fa-wpforms:before{content:""}.fa-envira:before{content:""}.fa-universal-access:before{content:""}.fa-wheelchair-alt:before{content:""}.fa-question-circle-o:before{content:""}.fa-blind:before{content:""}.fa-audio-description:before{content:""}.fa-volume-control-phone:before{content:""}.fa-braille:before{content:""}.fa-assistive-listening-systems:before{content:""}.fa-american-sign-language-interpreting:before,.fa-asl-interpreting:before{content:""}.fa-deaf:before,.fa-deafness:before,.fa-hard-of-hearing:before{content:""}.fa-glide:before{content:""}.fa-glide-g:before{content:""}.fa-sign-language:before,.fa-signing:before{content:""}.fa-low-vision:before{content:""}.fa-viadeo:before{content:""}.fa-viadeo-square:before{content:""}.fa-snapchat:before{content:""}.fa-snapchat-ghost:before{content:""}.fa-snapchat-square:before{content:""}.fa-pied-piper:before{content:""}.fa-first-order:before{content:""}.fa-yoast:before{content:""}.fa-themeisle:before{content:""}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:""}.fa-fa:before,.fa-font-awesome:before{content:""}.fa-handshake-o:before{content:""}.fa-envelope-open:before{content:""}.fa-envelope-open-o:before{content:""}.fa-linode:before{content:""}.fa-address-book:before{content:""}.fa-address-book-o:before{content:""}.fa-address-card:before,.fa-vcard:before{content:""}.fa-address-card-o:before,.fa-vcard-o:before{content:""}.fa-user-circle:before{content:""}.fa-user-circle-o:before{content:""}.fa-user-o:before{content:""}.fa-id-badge:before{content:""}.fa-drivers-license:before,.fa-id-card:before{content:""}.fa-drivers-license-o:before,.fa-id-card-o:before{content:""}.fa-quora:before{content:""}.fa-free-code-camp:before{content:""}.fa-telegram:before{content:""}.fa-thermometer-4:before,.fa-thermometer-full:before,.fa-thermometer:before{content:""}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:""}.fa-thermometer-2:before,.fa-thermometer-half:before{content:""}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:""}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:""}.fa-shower:before{content:""}.fa-bath:before,.fa-bathtub:before,.fa-s15:before{content:""}.fa-podcast:before{content:""}.fa-window-maximize:before{content:""}.fa-window-minimize:before{content:""}.fa-window-restore:before{content:""}.fa-times-rectangle:before,.fa-window-close:before{content:""}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:""}.fa-bandcamp:before{content:""}.fa-grav:before{content:""}.fa-etsy:before{content:""}.fa-imdb:before{content:""}.fa-ravelry:before{content:""}.fa-eercast:before{content:""}.fa-microchip:before{content:""}.fa-snowflake-o:before{content:""}.fa-superpowers:before{content:""}.fa-wpexplorer:before{content:""}.fa-meetup:before{content:""}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-dropdown .caret,.wy-inline-validate.wy-inline-validate-danger .wy-input-context,.wy-inline-validate.wy-inline-validate-info .wy-input-context,.wy-inline-validate.wy-inline-validate-success .wy-input-context,.wy-inline-validate.wy-inline-validate-warning .wy-input-context,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{font-family:inherit}.fa:before,.icon:before,.rst-content .admonition-title:before,.rst-content .code-block-caption .headerlink:before,.rst-content .eqno .headerlink:before,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before{font-family:FontAwesome;display:inline-block;font-style:normal;font-weight:400;line-height:1;text-decoration:inherit}.rst-content .code-block-caption a .headerlink,.rst-content .eqno a .headerlink,.rst-content a .admonition-title,.rst-content code.download a span:first-child,.rst-content dl dt a .headerlink,.rst-content h1 a .headerlink,.rst-content h2 a .headerlink,.rst-content h3 a .headerlink,.rst-content h4 a .headerlink,.rst-content h5 a .headerlink,.rst-content h6 a .headerlink,.rst-content p.caption a .headerlink,.rst-content p a .headerlink,.rst-content table>caption a .headerlink,.rst-content tt.download a span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li a button.toctree-expand,a .fa,a .icon,a .rst-content .admonition-title,a .rst-content .code-block-caption .headerlink,a .rst-content .eqno .headerlink,a .rst-content code.download span:first-child,a .rst-content dl dt .headerlink,a .rst-content h1 .headerlink,a .rst-content h2 .headerlink,a .rst-content h3 .headerlink,a .rst-content h4 .headerlink,a .rst-content h5 .headerlink,a .rst-content h6 .headerlink,a .rst-content p.caption .headerlink,a .rst-content p .headerlink,a .rst-content table>caption .headerlink,a .rst-content tt.download span:first-child,a .wy-menu-vertical li button.toctree-expand{display:inline-block;text-decoration:inherit}.btn .fa,.btn .icon,.btn .rst-content .admonition-title,.btn .rst-content .code-block-caption .headerlink,.btn .rst-content .eqno .headerlink,.btn .rst-content code.download span:first-child,.btn .rst-content dl dt .headerlink,.btn .rst-content h1 .headerlink,.btn .rst-content h2 .headerlink,.btn .rst-content h3 .headerlink,.btn .rst-content h4 .headerlink,.btn .rst-content h5 .headerlink,.btn .rst-content h6 .headerlink,.btn .rst-content p .headerlink,.btn .rst-content table>caption .headerlink,.btn .rst-content tt.download span:first-child,.btn .wy-menu-vertical li.current>a button.toctree-expand,.btn .wy-menu-vertical li.on a button.toctree-expand,.btn .wy-menu-vertical li button.toctree-expand,.nav .fa,.nav .icon,.nav .rst-content .admonition-title,.nav .rst-content .code-block-caption .headerlink,.nav .rst-content .eqno .headerlink,.nav .rst-content code.download span:first-child,.nav .rst-content dl dt .headerlink,.nav .rst-content h1 .headerlink,.nav .rst-content h2 .headerlink,.nav .rst-content h3 .headerlink,.nav .rst-content h4 .headerlink,.nav .rst-content h5 .headerlink,.nav .rst-content h6 .headerlink,.nav .rst-content p .headerlink,.nav .rst-content table>caption .headerlink,.nav .rst-content tt.download span:first-child,.nav .wy-menu-vertical li.current>a button.toctree-expand,.nav .wy-menu-vertical li.on a button.toctree-expand,.nav .wy-menu-vertical li button.toctree-expand,.rst-content .btn .admonition-title,.rst-content .code-block-caption .btn .headerlink,.rst-content .code-block-caption .nav .headerlink,.rst-content .eqno .btn .headerlink,.rst-content .eqno .nav .headerlink,.rst-content .nav .admonition-title,.rst-content code.download .btn span:first-child,.rst-content code.download .nav span:first-child,.rst-content dl dt .btn .headerlink,.rst-content dl dt .nav .headerlink,.rst-content h1 .btn .headerlink,.rst-content h1 .nav .headerlink,.rst-content h2 .btn .headerlink,.rst-content h2 .nav .headerlink,.rst-content h3 .btn .headerlink,.rst-content h3 .nav .headerlink,.rst-content h4 .btn .headerlink,.rst-content h4 .nav .headerlink,.rst-content h5 .btn .headerlink,.rst-content h5 .nav .headerlink,.rst-content h6 .btn .headerlink,.rst-content h6 .nav .headerlink,.rst-content p .btn .headerlink,.rst-content p .nav .headerlink,.rst-content table>caption .btn .headerlink,.rst-content table>caption .nav .headerlink,.rst-content tt.download .btn span:first-child,.rst-content tt.download .nav span:first-child,.wy-menu-vertical li .btn button.toctree-expand,.wy-menu-vertical li.current>a .btn button.toctree-expand,.wy-menu-vertical li.current>a .nav button.toctree-expand,.wy-menu-vertical li .nav button.toctree-expand,.wy-menu-vertical li.on a .btn button.toctree-expand,.wy-menu-vertical li.on a .nav button.toctree-expand{display:inline}.btn .fa-large.icon,.btn .fa.fa-large,.btn .rst-content .code-block-caption .fa-large.headerlink,.btn .rst-content .eqno .fa-large.headerlink,.btn .rst-content .fa-large.admonition-title,.btn .rst-content code.download span.fa-large:first-child,.btn .rst-content dl dt .fa-large.headerlink,.btn .rst-content h1 .fa-large.headerlink,.btn .rst-content h2 .fa-large.headerlink,.btn .rst-content h3 .fa-large.headerlink,.btn .rst-content h4 .fa-large.headerlink,.btn .rst-content h5 .fa-large.headerlink,.btn .rst-content h6 .fa-large.headerlink,.btn .rst-content p .fa-large.headerlink,.btn .rst-content table>caption .fa-large.headerlink,.btn .rst-content tt.download span.fa-large:first-child,.btn .wy-menu-vertical li button.fa-large.toctree-expand,.nav .fa-large.icon,.nav .fa.fa-large,.nav .rst-content .code-block-caption .fa-large.headerlink,.nav .rst-content .eqno .fa-large.headerlink,.nav .rst-content .fa-large.admonition-title,.nav .rst-content code.download span.fa-large:first-child,.nav .rst-content dl dt .fa-large.headerlink,.nav .rst-content h1 .fa-large.headerlink,.nav .rst-content h2 .fa-large.headerlink,.nav .rst-content h3 .fa-large.headerlink,.nav .rst-content h4 .fa-large.headerlink,.nav .rst-content h5 .fa-large.headerlink,.nav .rst-content h6 .fa-large.headerlink,.nav .rst-content p .fa-large.headerlink,.nav .rst-content table>caption .fa-large.headerlink,.nav .rst-content tt.download span.fa-large:first-child,.nav .wy-menu-vertical li button.fa-large.toctree-expand,.rst-content .btn .fa-large.admonition-title,.rst-content .code-block-caption .btn .fa-large.headerlink,.rst-content .code-block-caption .nav .fa-large.headerlink,.rst-content .eqno .btn .fa-large.headerlink,.rst-content .eqno .nav .fa-large.headerlink,.rst-content .nav .fa-large.admonition-title,.rst-content code.download .btn span.fa-large:first-child,.rst-content code.download .nav span.fa-large:first-child,.rst-content dl dt .btn .fa-large.headerlink,.rst-content dl dt .nav .fa-large.headerlink,.rst-content h1 .btn .fa-large.headerlink,.rst-content h1 .nav .fa-large.headerlink,.rst-content h2 .btn .fa-large.headerlink,.rst-content h2 .nav .fa-large.headerlink,.rst-content h3 .btn .fa-large.headerlink,.rst-content h3 .nav .fa-large.headerlink,.rst-content h4 .btn .fa-large.headerlink,.rst-content h4 .nav .fa-large.headerlink,.rst-content h5 .btn .fa-large.headerlink,.rst-content h5 .nav .fa-large.headerlink,.rst-content h6 .btn .fa-large.headerlink,.rst-content h6 .nav .fa-large.headerlink,.rst-content p .btn .fa-large.headerlink,.rst-content p .nav .fa-large.headerlink,.rst-content table>caption .btn .fa-large.headerlink,.rst-content table>caption .nav .fa-large.headerlink,.rst-content tt.download .btn span.fa-large:first-child,.rst-content tt.download .nav span.fa-large:first-child,.wy-menu-vertical li .btn button.fa-large.toctree-expand,.wy-menu-vertical li .nav button.fa-large.toctree-expand{line-height:.9em}.btn .fa-spin.icon,.btn .fa.fa-spin,.btn .rst-content .code-block-caption .fa-spin.headerlink,.btn .rst-content .eqno .fa-spin.headerlink,.btn .rst-content .fa-spin.admonition-title,.btn .rst-content code.download span.fa-spin:first-child,.btn .rst-content dl dt .fa-spin.headerlink,.btn .rst-content h1 .fa-spin.headerlink,.btn .rst-content h2 .fa-spin.headerlink,.btn .rst-content h3 .fa-spin.headerlink,.btn .rst-content h4 .fa-spin.headerlink,.btn .rst-content h5 .fa-spin.headerlink,.btn .rst-content h6 .fa-spin.headerlink,.btn .rst-content p .fa-spin.headerlink,.btn .rst-content table>caption .fa-spin.headerlink,.btn .rst-content tt.download span.fa-spin:first-child,.btn .wy-menu-vertical li button.fa-spin.toctree-expand,.nav .fa-spin.icon,.nav .fa.fa-spin,.nav .rst-content .code-block-caption .fa-spin.headerlink,.nav .rst-content .eqno .fa-spin.headerlink,.nav .rst-content .fa-spin.admonition-title,.nav .rst-content code.download span.fa-spin:first-child,.nav .rst-content dl dt .fa-spin.headerlink,.nav .rst-content h1 .fa-spin.headerlink,.nav .rst-content h2 .fa-spin.headerlink,.nav .rst-content h3 .fa-spin.headerlink,.nav .rst-content h4 .fa-spin.headerlink,.nav .rst-content h5 .fa-spin.headerlink,.nav .rst-content h6 .fa-spin.headerlink,.nav .rst-content p .fa-spin.headerlink,.nav .rst-content table>caption .fa-spin.headerlink,.nav .rst-content tt.download span.fa-spin:first-child,.nav .wy-menu-vertical li button.fa-spin.toctree-expand,.rst-content .btn .fa-spin.admonition-title,.rst-content .code-block-caption .btn .fa-spin.headerlink,.rst-content .code-block-caption .nav .fa-spin.headerlink,.rst-content .eqno .btn .fa-spin.headerlink,.rst-content .eqno .nav .fa-spin.headerlink,.rst-content .nav .fa-spin.admonition-title,.rst-content code.download .btn span.fa-spin:first-child,.rst-content code.download .nav span.fa-spin:first-child,.rst-content dl dt .btn .fa-spin.headerlink,.rst-content dl dt .nav .fa-spin.headerlink,.rst-content h1 .btn .fa-spin.headerlink,.rst-content h1 .nav .fa-spin.headerlink,.rst-content h2 .btn .fa-spin.headerlink,.rst-content h2 .nav .fa-spin.headerlink,.rst-content h3 .btn .fa-spin.headerlink,.rst-content h3 .nav .fa-spin.headerlink,.rst-content h4 .btn .fa-spin.headerlink,.rst-content h4 .nav .fa-spin.headerlink,.rst-content h5 .btn .fa-spin.headerlink,.rst-content h5 .nav .fa-spin.headerlink,.rst-content h6 .btn .fa-spin.headerlink,.rst-content h6 .nav .fa-spin.headerlink,.rst-content p .btn .fa-spin.headerlink,.rst-content p .nav .fa-spin.headerlink,.rst-content table>caption .btn .fa-spin.headerlink,.rst-content table>caption .nav .fa-spin.headerlink,.rst-content tt.download .btn span.fa-spin:first-child,.rst-content tt.download .nav span.fa-spin:first-child,.wy-menu-vertical li .btn button.fa-spin.toctree-expand,.wy-menu-vertical li .nav button.fa-spin.toctree-expand{display:inline-block}.btn.fa:before,.btn.icon:before,.rst-content .btn.admonition-title:before,.rst-content .code-block-caption .btn.headerlink:before,.rst-content .eqno .btn.headerlink:before,.rst-content code.download span.btn:first-child:before,.rst-content dl dt .btn.headerlink:before,.rst-content h1 .btn.headerlink:before,.rst-content h2 .btn.headerlink:before,.rst-content h3 .btn.headerlink:before,.rst-content h4 .btn.headerlink:before,.rst-content h5 .btn.headerlink:before,.rst-content h6 .btn.headerlink:before,.rst-content p .btn.headerlink:before,.rst-content table>caption .btn.headerlink:before,.rst-content tt.download span.btn:first-child:before,.wy-menu-vertical li button.btn.toctree-expand:before{opacity:.5;-webkit-transition:opacity .05s ease-in;-moz-transition:opacity .05s ease-in;transition:opacity .05s ease-in}.btn.fa:hover:before,.btn.icon:hover:before,.rst-content .btn.admonition-title:hover:before,.rst-content .code-block-caption .btn.headerlink:hover:before,.rst-content .eqno .btn.headerlink:hover:before,.rst-content code.download span.btn:first-child:hover:before,.rst-content dl dt .btn.headerlink:hover:before,.rst-content h1 .btn.headerlink:hover:before,.rst-content h2 .btn.headerlink:hover:before,.rst-content h3 .btn.headerlink:hover:before,.rst-content h4 .btn.headerlink:hover:before,.rst-content h5 .btn.headerlink:hover:before,.rst-content h6 .btn.headerlink:hover:before,.rst-content p .btn.headerlink:hover:before,.rst-content table>caption .btn.headerlink:hover:before,.rst-content tt.download span.btn:first-child:hover:before,.wy-menu-vertical li button.btn.toctree-expand:hover:before{opacity:1}.btn-mini .fa:before,.btn-mini .icon:before,.btn-mini .rst-content .admonition-title:before,.btn-mini .rst-content .code-block-caption .headerlink:before,.btn-mini .rst-content .eqno .headerlink:before,.btn-mini .rst-content code.download span:first-child:before,.btn-mini .rst-content dl dt .headerlink:before,.btn-mini .rst-content h1 .headerlink:before,.btn-mini .rst-content h2 .headerlink:before,.btn-mini .rst-content h3 .headerlink:before,.btn-mini .rst-content h4 .headerlink:before,.btn-mini .rst-content h5 .headerlink:before,.btn-mini .rst-content h6 .headerlink:before,.btn-mini .rst-content p .headerlink:before,.btn-mini .rst-content table>caption .headerlink:before,.btn-mini .rst-content tt.download span:first-child:before,.btn-mini .wy-menu-vertical li button.toctree-expand:before,.rst-content .btn-mini .admonition-title:before,.rst-content .code-block-caption .btn-mini .headerlink:before,.rst-content .eqno .btn-mini .headerlink:before,.rst-content code.download .btn-mini span:first-child:before,.rst-content dl dt .btn-mini .headerlink:before,.rst-content h1 .btn-mini .headerlink:before,.rst-content h2 .btn-mini .headerlink:before,.rst-content h3 .btn-mini .headerlink:before,.rst-content h4 .btn-mini .headerlink:before,.rst-content h5 .btn-mini .headerlink:before,.rst-content h6 .btn-mini .headerlink:before,.rst-content p .btn-mini .headerlink:before,.rst-content table>caption .btn-mini .headerlink:before,.rst-content tt.download .btn-mini span:first-child:before,.wy-menu-vertical li .btn-mini button.toctree-expand:before{font-size:14px;vertical-align:-15%}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.wy-alert{padding:12px;line-height:24px;margin-bottom:24px;background:#e7f2fa}.rst-content .admonition-title,.wy-alert-title{font-weight:700;display:block;color:#fff;background:#6ab0de;padding:6px 12px;margin:-12px -12px 12px}.rst-content .danger,.rst-content .error,.rst-content .wy-alert-danger.admonition,.rst-content .wy-alert-danger.admonition-todo,.rst-content .wy-alert-danger.attention,.rst-content .wy-alert-danger.caution,.rst-content .wy-alert-danger.hint,.rst-content .wy-alert-danger.important,.rst-content .wy-alert-danger.note,.rst-content .wy-alert-danger.seealso,.rst-content .wy-alert-danger.tip,.rst-content .wy-alert-danger.warning,.wy-alert.wy-alert-danger{background:#fdf3f2}.rst-content .danger .admonition-title,.rst-content .danger .wy-alert-title,.rst-content .error .admonition-title,.rst-content .error .wy-alert-title,.rst-content .wy-alert-danger.admonition-todo .admonition-title,.rst-content .wy-alert-danger.admonition-todo .wy-alert-title,.rst-content .wy-alert-danger.admonition .admonition-title,.rst-content .wy-alert-danger.admonition .wy-alert-title,.rst-content .wy-alert-danger.attention .admonition-title,.rst-content .wy-alert-danger.attention .wy-alert-title,.rst-content .wy-alert-danger.caution .admonition-title,.rst-content .wy-alert-danger.caution .wy-alert-title,.rst-content .wy-alert-danger.hint .admonition-title,.rst-content .wy-alert-danger.hint .wy-alert-title,.rst-content .wy-alert-danger.important .admonition-title,.rst-content .wy-alert-danger.important .wy-alert-title,.rst-content .wy-alert-danger.note .admonition-title,.rst-content .wy-alert-danger.note .wy-alert-title,.rst-content .wy-alert-danger.seealso .admonition-title,.rst-content .wy-alert-danger.seealso .wy-alert-title,.rst-content .wy-alert-danger.tip .admonition-title,.rst-content .wy-alert-danger.tip .wy-alert-title,.rst-content .wy-alert-danger.warning .admonition-title,.rst-content .wy-alert-danger.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-danger .admonition-title,.wy-alert.wy-alert-danger .rst-content .admonition-title,.wy-alert.wy-alert-danger .wy-alert-title{background:#f29f97}.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .warning,.rst-content .wy-alert-warning.admonition,.rst-content .wy-alert-warning.danger,.rst-content .wy-alert-warning.error,.rst-content .wy-alert-warning.hint,.rst-content .wy-alert-warning.important,.rst-content .wy-alert-warning.note,.rst-content .wy-alert-warning.seealso,.rst-content .wy-alert-warning.tip,.wy-alert.wy-alert-warning{background:#ffedcc}.rst-content .admonition-todo .admonition-title,.rst-content .admonition-todo .wy-alert-title,.rst-content .attention .admonition-title,.rst-content .attention .wy-alert-title,.rst-content .caution .admonition-title,.rst-content .caution .wy-alert-title,.rst-content .warning .admonition-title,.rst-content .warning .wy-alert-title,.rst-content .wy-alert-warning.admonition .admonition-title,.rst-content .wy-alert-warning.admonition .wy-alert-title,.rst-content .wy-alert-warning.danger .admonition-title,.rst-content .wy-alert-warning.danger .wy-alert-title,.rst-content .wy-alert-warning.error .admonition-title,.rst-content .wy-alert-warning.error .wy-alert-title,.rst-content .wy-alert-warning.hint .admonition-title,.rst-content .wy-alert-warning.hint .wy-alert-title,.rst-content .wy-alert-warning.important .admonition-title,.rst-content .wy-alert-warning.important .wy-alert-title,.rst-content .wy-alert-warning.note .admonition-title,.rst-content .wy-alert-warning.note .wy-alert-title,.rst-content .wy-alert-warning.seealso .admonition-title,.rst-content .wy-alert-warning.seealso .wy-alert-title,.rst-content .wy-alert-warning.tip .admonition-title,.rst-content .wy-alert-warning.tip .wy-alert-title,.rst-content .wy-alert.wy-alert-warning .admonition-title,.wy-alert.wy-alert-warning .rst-content .admonition-title,.wy-alert.wy-alert-warning .wy-alert-title{background:#f0b37e}.rst-content .note,.rst-content .seealso,.rst-content .wy-alert-info.admonition,.rst-content .wy-alert-info.admonition-todo,.rst-content .wy-alert-info.attention,.rst-content .wy-alert-info.caution,.rst-content .wy-alert-info.danger,.rst-content .wy-alert-info.error,.rst-content .wy-alert-info.hint,.rst-content .wy-alert-info.important,.rst-content .wy-alert-info.tip,.rst-content .wy-alert-info.warning,.wy-alert.wy-alert-info{background:#e7f2fa}.rst-content .note .admonition-title,.rst-content .note .wy-alert-title,.rst-content .seealso .admonition-title,.rst-content .seealso .wy-alert-title,.rst-content .wy-alert-info.admonition-todo .admonition-title,.rst-content .wy-alert-info.admonition-todo .wy-alert-title,.rst-content .wy-alert-info.admonition .admonition-title,.rst-content .wy-alert-info.admonition .wy-alert-title,.rst-content .wy-alert-info.attention .admonition-title,.rst-content .wy-alert-info.attention .wy-alert-title,.rst-content .wy-alert-info.caution .admonition-title,.rst-content .wy-alert-info.caution .wy-alert-title,.rst-content .wy-alert-info.danger .admonition-title,.rst-content .wy-alert-info.danger .wy-alert-title,.rst-content .wy-alert-info.error .admonition-title,.rst-content .wy-alert-info.error .wy-alert-title,.rst-content .wy-alert-info.hint .admonition-title,.rst-content .wy-alert-info.hint .wy-alert-title,.rst-content .wy-alert-info.important .admonition-title,.rst-content .wy-alert-info.important .wy-alert-title,.rst-content .wy-alert-info.tip .admonition-title,.rst-content .wy-alert-info.tip .wy-alert-title,.rst-content .wy-alert-info.warning .admonition-title,.rst-content .wy-alert-info.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-info .admonition-title,.wy-alert.wy-alert-info .rst-content .admonition-title,.wy-alert.wy-alert-info .wy-alert-title{background:#6ab0de}.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .wy-alert-success.admonition,.rst-content .wy-alert-success.admonition-todo,.rst-content .wy-alert-success.attention,.rst-content .wy-alert-success.caution,.rst-content .wy-alert-success.danger,.rst-content .wy-alert-success.error,.rst-content .wy-alert-success.note,.rst-content .wy-alert-success.seealso,.rst-content .wy-alert-success.warning,.wy-alert.wy-alert-success{background:#dbfaf4}.rst-content .hint .admonition-title,.rst-content .hint .wy-alert-title,.rst-content .important .admonition-title,.rst-content .important .wy-alert-title,.rst-content .tip .admonition-title,.rst-content .tip .wy-alert-title,.rst-content .wy-alert-success.admonition-todo .admonition-title,.rst-content .wy-alert-success.admonition-todo .wy-alert-title,.rst-content .wy-alert-success.admonition .admonition-title,.rst-content .wy-alert-success.admonition .wy-alert-title,.rst-content .wy-alert-success.attention .admonition-title,.rst-content .wy-alert-success.attention .wy-alert-title,.rst-content .wy-alert-success.caution .admonition-title,.rst-content .wy-alert-success.caution .wy-alert-title,.rst-content .wy-alert-success.danger .admonition-title,.rst-content .wy-alert-success.danger .wy-alert-title,.rst-content .wy-alert-success.error .admonition-title,.rst-content .wy-alert-success.error .wy-alert-title,.rst-content .wy-alert-success.note .admonition-title,.rst-content .wy-alert-success.note .wy-alert-title,.rst-content .wy-alert-success.seealso .admonition-title,.rst-content .wy-alert-success.seealso .wy-alert-title,.rst-content .wy-alert-success.warning .admonition-title,.rst-content .wy-alert-success.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-success .admonition-title,.wy-alert.wy-alert-success .rst-content .admonition-title,.wy-alert.wy-alert-success .wy-alert-title{background:#1abc9c}.rst-content .wy-alert-neutral.admonition,.rst-content .wy-alert-neutral.admonition-todo,.rst-content .wy-alert-neutral.attention,.rst-content .wy-alert-neutral.caution,.rst-content .wy-alert-neutral.danger,.rst-content .wy-alert-neutral.error,.rst-content .wy-alert-neutral.hint,.rst-content .wy-alert-neutral.important,.rst-content .wy-alert-neutral.note,.rst-content .wy-alert-neutral.seealso,.rst-content .wy-alert-neutral.tip,.rst-content .wy-alert-neutral.warning,.wy-alert.wy-alert-neutral{background:#f3f6f6}.rst-content .wy-alert-neutral.admonition-todo .admonition-title,.rst-content .wy-alert-neutral.admonition-todo .wy-alert-title,.rst-content .wy-alert-neutral.admonition .admonition-title,.rst-content .wy-alert-neutral.admonition .wy-alert-title,.rst-content .wy-alert-neutral.attention .admonition-title,.rst-content .wy-alert-neutral.attention .wy-alert-title,.rst-content .wy-alert-neutral.caution .admonition-title,.rst-content .wy-alert-neutral.caution .wy-alert-title,.rst-content .wy-alert-neutral.danger .admonition-title,.rst-content .wy-alert-neutral.danger .wy-alert-title,.rst-content .wy-alert-neutral.error .admonition-title,.rst-content .wy-alert-neutral.error .wy-alert-title,.rst-content .wy-alert-neutral.hint .admonition-title,.rst-content .wy-alert-neutral.hint .wy-alert-title,.rst-content .wy-alert-neutral.important .admonition-title,.rst-content .wy-alert-neutral.important .wy-alert-title,.rst-content .wy-alert-neutral.note .admonition-title,.rst-content .wy-alert-neutral.note .wy-alert-title,.rst-content .wy-alert-neutral.seealso .admonition-title,.rst-content .wy-alert-neutral.seealso .wy-alert-title,.rst-content .wy-alert-neutral.tip .admonition-title,.rst-content .wy-alert-neutral.tip .wy-alert-title,.rst-content .wy-alert-neutral.warning .admonition-title,.rst-content .wy-alert-neutral.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-neutral .admonition-title,.wy-alert.wy-alert-neutral .rst-content .admonition-title,.wy-alert.wy-alert-neutral .wy-alert-title{color:#404040;background:#e1e4e5}.rst-content .wy-alert-neutral.admonition-todo a,.rst-content .wy-alert-neutral.admonition a,.rst-content .wy-alert-neutral.attention a,.rst-content .wy-alert-neutral.caution a,.rst-content .wy-alert-neutral.danger a,.rst-content .wy-alert-neutral.error a,.rst-content .wy-alert-neutral.hint a,.rst-content .wy-alert-neutral.important a,.rst-content .wy-alert-neutral.note a,.rst-content .wy-alert-neutral.seealso a,.rst-content .wy-alert-neutral.tip a,.rst-content .wy-alert-neutral.warning a,.wy-alert.wy-alert-neutral a{color:#2980b9}.rst-content .admonition-todo p:last-child,.rst-content .admonition p:last-child,.rst-content .attention p:last-child,.rst-content .caution p:last-child,.rst-content .danger p:last-child,.rst-content .error p:last-child,.rst-content .hint p:last-child,.rst-content .important p:last-child,.rst-content .note p:last-child,.rst-content .seealso p:last-child,.rst-content .tip p:last-child,.rst-content .warning p:last-child,.wy-alert p:last-child{margin-bottom:0}.wy-tray-container{position:fixed;bottom:0;left:0;z-index:600}.wy-tray-container li{display:block;width:300px;background:transparent;color:#fff;text-align:center;box-shadow:0 5px 5px 0 rgba(0,0,0,.1);padding:0 24px;min-width:20%;opacity:0;height:0;line-height:56px;overflow:hidden;-webkit-transition:all .3s ease-in;-moz-transition:all .3s ease-in;transition:all .3s ease-in}.wy-tray-container li.wy-tray-item-success{background:#27ae60}.wy-tray-container li.wy-tray-item-info{background:#2980b9}.wy-tray-container li.wy-tray-item-warning{background:#e67e22}.wy-tray-container li.wy-tray-item-danger{background:#e74c3c}.wy-tray-container li.on{opacity:1;height:56px}@media screen and (max-width:768px){.wy-tray-container{bottom:auto;top:0;width:100%}.wy-tray-container li{width:100%}}button{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;cursor:pointer;line-height:normal;-webkit-appearance:button;*overflow:visible}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button[disabled]{cursor:default}.btn{display:inline-block;border-radius:2px;line-height:normal;white-space:nowrap;text-align:center;cursor:pointer;font-size:100%;padding:6px 12px 8px;color:#fff;border:1px solid rgba(0,0,0,.1);background-color:#27ae60;text-decoration:none;font-weight:400;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 2px -1px hsla(0,0%,100%,.5),inset 0 -2px 0 0 rgba(0,0,0,.1);outline-none:false;vertical-align:middle;*display:inline;zoom:1;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-transition:all .1s linear;-moz-transition:all .1s linear;transition:all .1s linear}.btn-hover{background:#2e8ece;color:#fff}.btn:hover{background:#2cc36b;color:#fff}.btn:focus{background:#2cc36b;outline:0}.btn:active{box-shadow:inset 0 -1px 0 0 rgba(0,0,0,.05),inset 0 2px 0 0 rgba(0,0,0,.1);padding:8px 12px 6px}.btn:visited{color:#fff}.btn-disabled,.btn-disabled:active,.btn-disabled:focus,.btn-disabled:hover,.btn:disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn::-moz-focus-inner{padding:0;border:0}.btn-small{font-size:80%}.btn-info{background-color:#2980b9!important}.btn-info:hover{background-color:#2e8ece!important}.btn-neutral{background-color:#f3f6f6!important;color:#404040!important}.btn-neutral:hover{background-color:#e5ebeb!important;color:#404040}.btn-neutral:visited{color:#404040!important}.btn-success{background-color:#27ae60!important}.btn-success:hover{background-color:#295!important}.btn-danger{background-color:#e74c3c!important}.btn-danger:hover{background-color:#ea6153!important}.btn-warning{background-color:#e67e22!important}.btn-warning:hover{background-color:#e98b39!important}.btn-invert{background-color:#222}.btn-invert:hover{background-color:#2f2f2f!important}.btn-link{background-color:transparent!important;color:#2980b9;box-shadow:none;border-color:transparent!important}.btn-link:active,.btn-link:hover{background-color:transparent!important;color:#409ad5!important;box-shadow:none}.btn-link:visited{color:#9b59b6}.wy-btn-group .btn,.wy-control .btn{vertical-align:middle}.wy-btn-group{margin-bottom:24px;*zoom:1}.wy-btn-group:after,.wy-btn-group:before{display:table;content:""}.wy-btn-group:after{clear:both}.wy-dropdown{position:relative;display:inline-block}.wy-dropdown-active .wy-dropdown-menu{display:block}.wy-dropdown-menu{position:absolute;left:0;display:none;float:left;top:100%;min-width:100%;background:#fcfcfc;z-index:100;border:1px solid #cfd7dd;box-shadow:0 2px 2px 0 rgba(0,0,0,.1);padding:12px}.wy-dropdown-menu>dd>a{display:block;clear:both;color:#404040;white-space:nowrap;font-size:90%;padding:0 12px;cursor:pointer}.wy-dropdown-menu>dd>a:hover{background:#2980b9;color:#fff}.wy-dropdown-menu>dd.divider{border-top:1px solid #cfd7dd;margin:6px 0}.wy-dropdown-menu>dd.search{padding-bottom:12px}.wy-dropdown-menu>dd.search input[type=search]{width:100%}.wy-dropdown-menu>dd.call-to-action{background:#e3e3e3;text-transform:uppercase;font-weight:500;font-size:80%}.wy-dropdown-menu>dd.call-to-action:hover{background:#e3e3e3}.wy-dropdown-menu>dd.call-to-action .btn{color:#fff}.wy-dropdown.wy-dropdown-up .wy-dropdown-menu{bottom:100%;top:auto;left:auto;right:0}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu{background:#fcfcfc;margin-top:2px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a{padding:6px 12px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a:hover{background:#2980b9;color:#fff}.wy-dropdown.wy-dropdown-left .wy-dropdown-menu{right:0;left:auto;text-align:right}.wy-dropdown-arrow:before{content:" ";border-bottom:5px solid #f5f5f5;border-left:5px solid transparent;border-right:5px solid transparent;position:absolute;display:block;top:-4px;left:50%;margin-left:-3px}.wy-dropdown-arrow.wy-dropdown-arrow-left:before{left:11px}.wy-form-stacked select{display:block}.wy-form-aligned .wy-help-inline,.wy-form-aligned input,.wy-form-aligned label,.wy-form-aligned select,.wy-form-aligned textarea{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-form-aligned .wy-control-group>label{display:inline-block;vertical-align:middle;width:10em;margin:6px 12px 0 0;float:left}.wy-form-aligned .wy-control{float:left}.wy-form-aligned .wy-control label{display:block}.wy-form-aligned .wy-control select{margin-top:6px}fieldset{margin:0}fieldset,legend{border:0;padding:0}legend{width:100%;white-space:normal;margin-bottom:24px;font-size:150%;*margin-left:-7px}label,legend{display:block}label{margin:0 0 .3125em;color:#333;font-size:90%}input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}.wy-control-group{margin-bottom:24px;max-width:1200px;margin-left:auto;margin-right:auto;*zoom:1}.wy-control-group:after,.wy-control-group:before{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group.wy-control-group-required>label:after{content:" *";color:#e74c3c}.wy-control-group .wy-form-full,.wy-control-group .wy-form-halves,.wy-control-group .wy-form-thirds{padding-bottom:12px}.wy-control-group .wy-form-full input[type=color],.wy-control-group .wy-form-full input[type=date],.wy-control-group .wy-form-full input[type=datetime-local],.wy-control-group .wy-form-full input[type=datetime],.wy-control-group .wy-form-full input[type=email],.wy-control-group .wy-form-full input[type=month],.wy-control-group .wy-form-full input[type=number],.wy-control-group .wy-form-full input[type=password],.wy-control-group .wy-form-full input[type=search],.wy-control-group .wy-form-full input[type=tel],.wy-control-group .wy-form-full input[type=text],.wy-control-group .wy-form-full input[type=time],.wy-control-group .wy-form-full input[type=url],.wy-control-group .wy-form-full input[type=week],.wy-control-group .wy-form-full select,.wy-control-group .wy-form-halves input[type=color],.wy-control-group .wy-form-halves input[type=date],.wy-control-group .wy-form-halves input[type=datetime-local],.wy-control-group .wy-form-halves input[type=datetime],.wy-control-group .wy-form-halves input[type=email],.wy-control-group .wy-form-halves input[type=month],.wy-control-group .wy-form-halves input[type=number],.wy-control-group .wy-form-halves input[type=password],.wy-control-group .wy-form-halves input[type=search],.wy-control-group .wy-form-halves input[type=tel],.wy-control-group .wy-form-halves input[type=text],.wy-control-group .wy-form-halves input[type=time],.wy-control-group .wy-form-halves input[type=url],.wy-control-group .wy-form-halves input[type=week],.wy-control-group .wy-form-halves select,.wy-control-group .wy-form-thirds input[type=color],.wy-control-group .wy-form-thirds input[type=date],.wy-control-group .wy-form-thirds input[type=datetime-local],.wy-control-group .wy-form-thirds input[type=datetime],.wy-control-group .wy-form-thirds input[type=email],.wy-control-group .wy-form-thirds input[type=month],.wy-control-group .wy-form-thirds input[type=number],.wy-control-group .wy-form-thirds input[type=password],.wy-control-group .wy-form-thirds input[type=search],.wy-control-group .wy-form-thirds input[type=tel],.wy-control-group .wy-form-thirds input[type=text],.wy-control-group .wy-form-thirds input[type=time],.wy-control-group .wy-form-thirds input[type=url],.wy-control-group .wy-form-thirds input[type=week],.wy-control-group .wy-form-thirds select{width:100%}.wy-control-group .wy-form-full{float:left;display:block;width:100%;margin-right:0}.wy-control-group .wy-form-full:last-child{margin-right:0}.wy-control-group .wy-form-halves{float:left;display:block;margin-right:2.35765%;width:48.82117%}.wy-control-group .wy-form-halves:last-child,.wy-control-group .wy-form-halves:nth-of-type(2n){margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(odd){clear:left}.wy-control-group .wy-form-thirds{float:left;display:block;margin-right:2.35765%;width:31.76157%}.wy-control-group .wy-form-thirds:last-child,.wy-control-group .wy-form-thirds:nth-of-type(3n){margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n+1){clear:left}.wy-control-group.wy-control-group-no-input .wy-control,.wy-control-no-input{margin:6px 0 0;font-size:90%}.wy-control-no-input{display:inline-block}.wy-control-group.fluid-input input[type=color],.wy-control-group.fluid-input input[type=date],.wy-control-group.fluid-input input[type=datetime-local],.wy-control-group.fluid-input input[type=datetime],.wy-control-group.fluid-input input[type=email],.wy-control-group.fluid-input input[type=month],.wy-control-group.fluid-input input[type=number],.wy-control-group.fluid-input input[type=password],.wy-control-group.fluid-input input[type=search],.wy-control-group.fluid-input input[type=tel],.wy-control-group.fluid-input input[type=text],.wy-control-group.fluid-input input[type=time],.wy-control-group.fluid-input input[type=url],.wy-control-group.fluid-input input[type=week]{width:100%}.wy-form-message-inline{padding-left:.3em;color:#666;font-size:90%}.wy-form-message{display:block;color:#999;font-size:70%;margin-top:.3125em;font-style:italic}.wy-form-message p{font-size:inherit;font-style:italic;margin-bottom:6px}.wy-form-message p:last-child{margin-bottom:0}input{line-height:normal}input[type=button],input[type=reset],input[type=submit]{-webkit-appearance:button;cursor:pointer;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;*overflow:visible}input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week]{-webkit-appearance:none;padding:6px;display:inline-block;border:1px solid #ccc;font-size:80%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 3px #ddd;border-radius:0;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}input[type=datetime-local]{padding:.34375em .625em}input[disabled]{cursor:default}input[type=checkbox],input[type=radio]{padding:0;margin-right:.3125em;*height:13px;*width:13px}input[type=checkbox],input[type=radio],input[type=search]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}input[type=search]::-webkit-search-cancel-button,input[type=search]::-webkit-search-decoration{-webkit-appearance:none}input[type=color]:focus,input[type=date]:focus,input[type=datetime-local]:focus,input[type=datetime]:focus,input[type=email]:focus,input[type=month]:focus,input[type=number]:focus,input[type=password]:focus,input[type=search]:focus,input[type=tel]:focus,input[type=text]:focus,input[type=time]:focus,input[type=url]:focus,input[type=week]:focus{outline:0;outline:thin dotted\9;border-color:#333}input.no-focus:focus{border-color:#ccc!important}input[type=checkbox]:focus,input[type=file]:focus,input[type=radio]:focus{outline:thin dotted #333;outline:1px auto #129fea}input[type=color][disabled],input[type=date][disabled],input[type=datetime-local][disabled],input[type=datetime][disabled],input[type=email][disabled],input[type=month][disabled],input[type=number][disabled],input[type=password][disabled],input[type=search][disabled],input[type=tel][disabled],input[type=text][disabled],input[type=time][disabled],input[type=url][disabled],input[type=week][disabled]{cursor:not-allowed;background-color:#fafafa}input:focus:invalid,select:focus:invalid,textarea:focus:invalid{color:#e74c3c;border:1px solid #e74c3c}input:focus:invalid:focus,select:focus:invalid:focus,textarea:focus:invalid:focus{border-color:#e74c3c}input[type=checkbox]:focus:invalid:focus,input[type=file]:focus:invalid:focus,input[type=radio]:focus:invalid:focus{outline-color:#e74c3c}input.wy-input-large{padding:12px;font-size:100%}textarea{overflow:auto;vertical-align:top;width:100%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif}select,textarea{padding:.5em .625em;display:inline-block;border:1px solid #ccc;font-size:80%;box-shadow:inset 0 1px 3px #ddd;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}select{border:1px solid #ccc;background-color:#fff}select[multiple]{height:auto}select:focus,textarea:focus{outline:0}input[readonly],select[disabled],select[readonly],textarea[disabled],textarea[readonly]{cursor:not-allowed;background-color:#fafafa}input[type=checkbox][disabled],input[type=radio][disabled]{cursor:not-allowed}.wy-checkbox,.wy-radio{margin:6px 0;color:#404040;display:block}.wy-checkbox input,.wy-radio input{vertical-align:baseline}.wy-form-message-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-input-prefix,.wy-input-suffix{white-space:nowrap;padding:6px}.wy-input-prefix .wy-input-context,.wy-input-suffix .wy-input-context{line-height:27px;padding:0 8px;display:inline-block;font-size:80%;background-color:#f3f6f6;border:1px solid #ccc;color:#999}.wy-input-suffix .wy-input-context{border-left:0}.wy-input-prefix .wy-input-context{border-right:0}.wy-switch{position:relative;display:block;height:24px;margin-top:12px;cursor:pointer}.wy-switch:before{left:0;top:0;width:36px;height:12px;background:#ccc}.wy-switch:after,.wy-switch:before{position:absolute;content:"";display:block;border-radius:4px;-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.wy-switch:after{width:18px;height:18px;background:#999;left:-3px;top:-3px}.wy-switch span{position:absolute;left:48px;display:block;font-size:12px;color:#ccc;line-height:1}.wy-switch.active:before{background:#1e8449}.wy-switch.active:after{left:24px;background:#27ae60}.wy-switch.disabled{cursor:not-allowed;opacity:.8}.wy-control-group.wy-control-group-error .wy-form-message,.wy-control-group.wy-control-group-error>label{color:#e74c3c}.wy-control-group.wy-control-group-error input[type=color],.wy-control-group.wy-control-group-error input[type=date],.wy-control-group.wy-control-group-error input[type=datetime-local],.wy-control-group.wy-control-group-error input[type=datetime],.wy-control-group.wy-control-group-error input[type=email],.wy-control-group.wy-control-group-error input[type=month],.wy-control-group.wy-control-group-error input[type=number],.wy-control-group.wy-control-group-error input[type=password],.wy-control-group.wy-control-group-error input[type=search],.wy-control-group.wy-control-group-error input[type=tel],.wy-control-group.wy-control-group-error input[type=text],.wy-control-group.wy-control-group-error input[type=time],.wy-control-group.wy-control-group-error input[type=url],.wy-control-group.wy-control-group-error input[type=week],.wy-control-group.wy-control-group-error textarea{border:1px solid #e74c3c}.wy-inline-validate{white-space:nowrap}.wy-inline-validate .wy-input-context{padding:.5em .625em;display:inline-block;font-size:80%}.wy-inline-validate.wy-inline-validate-success .wy-input-context{color:#27ae60}.wy-inline-validate.wy-inline-validate-danger .wy-input-context{color:#e74c3c}.wy-inline-validate.wy-inline-validate-warning .wy-input-context{color:#e67e22}.wy-inline-validate.wy-inline-validate-info .wy-input-context{color:#2980b9}.rotate-90{-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.rotate-180{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.rotate-270{-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.mirror{-webkit-transform:scaleX(-1);-moz-transform:scaleX(-1);-ms-transform:scaleX(-1);-o-transform:scaleX(-1);transform:scaleX(-1)}.mirror.rotate-90{-webkit-transform:scaleX(-1) rotate(90deg);-moz-transform:scaleX(-1) rotate(90deg);-ms-transform:scaleX(-1) rotate(90deg);-o-transform:scaleX(-1) rotate(90deg);transform:scaleX(-1) rotate(90deg)}.mirror.rotate-180{-webkit-transform:scaleX(-1) rotate(180deg);-moz-transform:scaleX(-1) rotate(180deg);-ms-transform:scaleX(-1) rotate(180deg);-o-transform:scaleX(-1) rotate(180deg);transform:scaleX(-1) rotate(180deg)}.mirror.rotate-270{-webkit-transform:scaleX(-1) rotate(270deg);-moz-transform:scaleX(-1) rotate(270deg);-ms-transform:scaleX(-1) rotate(270deg);-o-transform:scaleX(-1) rotate(270deg);transform:scaleX(-1) rotate(270deg)}@media only screen and (max-width:480px){.wy-form button[type=submit]{margin:.7em 0 0}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=text],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week],.wy-form label{margin-bottom:.3em;display:block}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week]{margin-bottom:0}.wy-form-aligned .wy-control-group label{margin-bottom:.3em;text-align:left;display:block;width:100%}.wy-form-aligned .wy-control{margin:1.5em 0 0}.wy-form-message,.wy-form-message-inline,.wy-form .wy-help-inline{display:block;font-size:80%;padding:6px 0}}@media screen and (max-width:768px){.tablet-hide{display:none}}@media screen and (max-width:480px){.mobile-hide{display:none}}.float-left{float:left}.float-right{float:right}.full-width{width:100%}.rst-content table.docutils,.rst-content table.field-list,.wy-table{border-collapse:collapse;border-spacing:0;empty-cells:show;margin-bottom:24px}.rst-content table.docutils caption,.rst-content table.field-list caption,.wy-table caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.rst-content table.docutils td,.rst-content table.docutils th,.rst-content table.field-list td,.rst-content table.field-list th,.wy-table td,.wy-table th{font-size:90%;margin:0;overflow:visible;padding:8px 16px}.rst-content table.docutils td:first-child,.rst-content table.docutils th:first-child,.rst-content table.field-list td:first-child,.rst-content table.field-list th:first-child,.wy-table td:first-child,.wy-table th:first-child{border-left-width:0}.rst-content table.docutils thead,.rst-content table.field-list thead,.wy-table thead{color:#000;text-align:left;vertical-align:bottom;white-space:nowrap}.rst-content table.docutils thead th,.rst-content table.field-list thead th,.wy-table thead th{font-weight:700;border-bottom:2px solid #e1e4e5}.rst-content table.docutils td,.rst-content table.field-list td,.wy-table td{background-color:transparent;vertical-align:middle}.rst-content table.docutils td p,.rst-content table.field-list td p,.wy-table td p{line-height:18px}.rst-content table.docutils td p:last-child,.rst-content table.field-list td p:last-child,.wy-table td p:last-child{margin-bottom:0}.rst-content table.docutils .wy-table-cell-min,.rst-content table.field-list .wy-table-cell-min,.wy-table .wy-table-cell-min{width:1%;padding-right:0}.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox],.wy-table .wy-table-cell-min input[type=checkbox]{margin:0}.wy-table-secondary{color:grey;font-size:90%}.wy-table-tertiary{color:grey;font-size:80%}.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td,.wy-table-backed,.wy-table-odd td,.wy-table-striped tr:nth-child(2n-1) td{background-color:#f3f6f6}.rst-content table.docutils,.wy-table-bordered-all{border:1px solid #e1e4e5}.rst-content table.docutils td,.wy-table-bordered-all td{border-bottom:1px solid #e1e4e5;border-left:1px solid #e1e4e5}.rst-content table.docutils tbody>tr:last-child td,.wy-table-bordered-all tbody>tr:last-child td{border-bottom-width:0}.wy-table-bordered{border:1px solid #e1e4e5}.wy-table-bordered-rows td{border-bottom:1px solid #e1e4e5}.wy-table-bordered-rows tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal td,.wy-table-horizontal th{border-width:0 0 1px;border-bottom:1px solid #e1e4e5}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-responsive{margin-bottom:24px;max-width:100%;overflow:auto}.wy-table-responsive table{margin-bottom:0!important}.wy-table-responsive table td,.wy-table-responsive table th{white-space:nowrap}a{color:#2980b9;text-decoration:none;cursor:pointer}a:hover{color:#3091d1}a:visited{color:#9b59b6}html{height:100%}body,html{overflow-x:hidden}body{font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;font-weight:400;color:#404040;min-height:100%;background:#edf0f2}.wy-text-left{text-align:left}.wy-text-center{text-align:center}.wy-text-right{text-align:right}.wy-text-large{font-size:120%}.wy-text-normal{font-size:100%}.wy-text-small,small{font-size:80%}.wy-text-strike{text-decoration:line-through}.wy-text-warning{color:#e67e22!important}a.wy-text-warning:hover{color:#eb9950!important}.wy-text-info{color:#2980b9!important}a.wy-text-info:hover{color:#409ad5!important}.wy-text-success{color:#27ae60!important}a.wy-text-success:hover{color:#36d278!important}.wy-text-danger{color:#e74c3c!important}a.wy-text-danger:hover{color:#ed7669!important}.wy-text-neutral{color:#404040!important}a.wy-text-neutral:hover{color:#595959!important}.rst-content .toctree-wrapper>p.caption,h1,h2,h3,h4,h5,h6,legend{margin-top:0;font-weight:700;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif}p{line-height:24px;font-size:16px;margin:0 0 24px}h1{font-size:175%}.rst-content .toctree-wrapper>p.caption,h2{font-size:150%}h3{font-size:125%}h4{font-size:115%}h5{font-size:110%}h6{font-size:100%}hr{display:block;height:1px;border:0;border-top:1px solid #e1e4e5;margin:24px 0;padding:0}.rst-content code,.rst-content tt,code{white-space:nowrap;max-width:100%;background:#fff;border:1px solid #e1e4e5;font-size:75%;padding:0 5px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#e74c3c;overflow-x:auto}.rst-content tt.code-large,code.code-large{font-size:90%}.rst-content .section ul,.rst-content .toctree-wrapper ul,.rst-content section ul,.wy-plain-list-disc,article ul{list-style:disc;line-height:24px;margin-bottom:24px}.rst-content .section ul li,.rst-content .toctree-wrapper ul li,.rst-content section ul li,.wy-plain-list-disc li,article ul li{list-style:disc;margin-left:24px}.rst-content .section ul li p:last-child,.rst-content .section ul li ul,.rst-content .toctree-wrapper ul li p:last-child,.rst-content .toctree-wrapper ul li ul,.rst-content section ul li p:last-child,.rst-content section ul li ul,.wy-plain-list-disc li p:last-child,.wy-plain-list-disc li ul,article ul li p:last-child,article ul li ul{margin-bottom:0}.rst-content .section ul li li,.rst-content .toctree-wrapper ul li li,.rst-content section ul li li,.wy-plain-list-disc li li,article ul li li{list-style:circle}.rst-content .section ul li li li,.rst-content .toctree-wrapper ul li li li,.rst-content section ul li li li,.wy-plain-list-disc li li li,article ul li li li{list-style:square}.rst-content .section ul li ol li,.rst-content .toctree-wrapper ul li ol li,.rst-content section ul li ol li,.wy-plain-list-disc li ol li,article ul li ol li{list-style:decimal}.rst-content .section ol,.rst-content .section ol.arabic,.rst-content .toctree-wrapper ol,.rst-content .toctree-wrapper ol.arabic,.rst-content section ol,.rst-content section ol.arabic,.wy-plain-list-decimal,article ol{list-style:decimal;line-height:24px;margin-bottom:24px}.rst-content .section ol.arabic li,.rst-content .section ol li,.rst-content .toctree-wrapper ol.arabic li,.rst-content .toctree-wrapper ol li,.rst-content section ol.arabic li,.rst-content section ol li,.wy-plain-list-decimal li,article ol li{list-style:decimal;margin-left:24px}.rst-content .section ol.arabic li ul,.rst-content .section ol li p:last-child,.rst-content .section ol li ul,.rst-content .toctree-wrapper ol.arabic li ul,.rst-content .toctree-wrapper ol li p:last-child,.rst-content .toctree-wrapper ol li ul,.rst-content section ol.arabic li ul,.rst-content section ol li p:last-child,.rst-content section ol li ul,.wy-plain-list-decimal li p:last-child,.wy-plain-list-decimal li ul,article ol li p:last-child,article ol li ul{margin-bottom:0}.rst-content .section ol.arabic li ul li,.rst-content .section ol li ul li,.rst-content .toctree-wrapper ol.arabic li ul li,.rst-content .toctree-wrapper ol li ul li,.rst-content section ol.arabic li ul li,.rst-content section ol li ul li,.wy-plain-list-decimal li ul li,article ol li ul li{list-style:disc}.wy-breadcrumbs{*zoom:1}.wy-breadcrumbs:after,.wy-breadcrumbs:before{display:table;content:""}.wy-breadcrumbs:after{clear:both}.wy-breadcrumbs>li{display:inline-block;padding-top:5px}.wy-breadcrumbs>li.wy-breadcrumbs-aside{float:right}.rst-content .wy-breadcrumbs>li code,.rst-content .wy-breadcrumbs>li tt,.wy-breadcrumbs>li .rst-content tt,.wy-breadcrumbs>li code{all:inherit;color:inherit}.breadcrumb-item:before{content:"/";color:#bbb;font-size:13px;padding:0 6px 0 3px}.wy-breadcrumbs-extra{margin-bottom:0;color:#b3b3b3;font-size:80%;display:inline-block}@media screen and (max-width:480px){.wy-breadcrumbs-extra,.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}@media print{.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}html{font-size:16px}.wy-affix{position:fixed;top:1.618em}.wy-menu a:hover{text-decoration:none}.wy-menu-horiz{*zoom:1}.wy-menu-horiz:after,.wy-menu-horiz:before{display:table;content:""}.wy-menu-horiz:after{clear:both}.wy-menu-horiz li,.wy-menu-horiz ul{display:inline-block}.wy-menu-horiz li:hover{background:hsla(0,0%,100%,.1)}.wy-menu-horiz li.divide-left{border-left:1px solid #404040}.wy-menu-horiz li.divide-right{border-right:1px solid #404040}.wy-menu-horiz a{height:32px;display:inline-block;line-height:32px;padding:0 16px}.wy-menu-vertical{width:300px}.wy-menu-vertical header,.wy-menu-vertical p.caption{color:#55a5d9;height:32px;line-height:32px;padding:0 1.618em;margin:12px 0 0;display:block;font-weight:700;text-transform:uppercase;font-size:85%;white-space:nowrap}.wy-menu-vertical ul{margin-bottom:0}.wy-menu-vertical li.divide-top{border-top:1px solid #404040}.wy-menu-vertical li.divide-bottom{border-bottom:1px solid #404040}.wy-menu-vertical li.current{background:#e3e3e3}.wy-menu-vertical li.current a{color:grey;border-right:1px solid #c9c9c9;padding:.4045em 2.427em}.wy-menu-vertical li.current a:hover{background:#d6d6d6}.rst-content .wy-menu-vertical li tt,.wy-menu-vertical li .rst-content tt,.wy-menu-vertical li code{border:none;background:inherit;color:inherit;padding-left:0;padding-right:0}.wy-menu-vertical li button.toctree-expand{display:block;float:left;margin-left:-1.2em;line-height:18px;color:#4d4d4d;border:none;background:none;padding:0}.wy-menu-vertical li.current>a,.wy-menu-vertical li.on a{color:#404040;font-weight:700;position:relative;background:#fcfcfc;border:none;padding:.4045em 1.618em}.wy-menu-vertical li.current>a:hover,.wy-menu-vertical li.on a:hover{background:#fcfcfc}.wy-menu-vertical li.current>a:hover button.toctree-expand,.wy-menu-vertical li.on a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand{display:block;line-height:18px;color:#333}.wy-menu-vertical li.toctree-l1.current>a{border-bottom:1px solid #c9c9c9;border-top:1px solid #c9c9c9}.wy-menu-vertical .toctree-l1.current .toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .toctree-l11>ul{display:none}.wy-menu-vertical .toctree-l1.current .current.toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .current.toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .current.toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .current.toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .current.toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .current.toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .current.toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .current.toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .current.toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .current.toctree-l11>ul{display:block}.wy-menu-vertical li.toctree-l3,.wy-menu-vertical li.toctree-l4{font-size:.9em}.wy-menu-vertical li.toctree-l2 a,.wy-menu-vertical li.toctree-l3 a,.wy-menu-vertical li.toctree-l4 a,.wy-menu-vertical li.toctree-l5 a,.wy-menu-vertical li.toctree-l6 a,.wy-menu-vertical li.toctree-l7 a,.wy-menu-vertical li.toctree-l8 a,.wy-menu-vertical li.toctree-l9 a,.wy-menu-vertical li.toctree-l10 a{color:#404040}.wy-menu-vertical li.toctree-l2 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l3 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l4 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l5 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l6 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l7 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l8 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l9 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l10 a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a,.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a,.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a,.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a,.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a,.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a,.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a,.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{display:block}.wy-menu-vertical li.toctree-l2.current>a{padding:.4045em 2.427em}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{padding:.4045em 1.618em .4045em 4.045em}.wy-menu-vertical li.toctree-l3.current>a{padding:.4045em 4.045em}.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{padding:.4045em 1.618em .4045em 5.663em}.wy-menu-vertical li.toctree-l4.current>a{padding:.4045em 5.663em}.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a{padding:.4045em 1.618em .4045em 7.281em}.wy-menu-vertical li.toctree-l5.current>a{padding:.4045em 7.281em}.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a{padding:.4045em 1.618em .4045em 8.899em}.wy-menu-vertical li.toctree-l6.current>a{padding:.4045em 8.899em}.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a{padding:.4045em 1.618em .4045em 10.517em}.wy-menu-vertical li.toctree-l7.current>a{padding:.4045em 10.517em}.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a{padding:.4045em 1.618em .4045em 12.135em}.wy-menu-vertical li.toctree-l8.current>a{padding:.4045em 12.135em}.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a{padding:.4045em 1.618em .4045em 13.753em}.wy-menu-vertical li.toctree-l9.current>a{padding:.4045em 13.753em}.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a{padding:.4045em 1.618em .4045em 15.371em}.wy-menu-vertical li.toctree-l10.current>a{padding:.4045em 15.371em}.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{padding:.4045em 1.618em .4045em 16.989em}.wy-menu-vertical li.toctree-l2.current>a,.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{background:#c9c9c9}.wy-menu-vertical li.toctree-l2 button.toctree-expand{color:#a3a3a3}.wy-menu-vertical li.toctree-l3.current>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{background:#bdbdbd}.wy-menu-vertical li.toctree-l3 button.toctree-expand{color:#969696}.wy-menu-vertical li.current ul{display:block}.wy-menu-vertical li ul{margin-bottom:0;display:none}.wy-menu-vertical li ul li a{margin-bottom:0;color:#d9d9d9;font-weight:400}.wy-menu-vertical a{line-height:18px;padding:.4045em 1.618em;display:block;position:relative;font-size:90%;color:#d9d9d9}.wy-menu-vertical a:hover{background-color:#4e4a4a;cursor:pointer}.wy-menu-vertical a:hover button.toctree-expand{color:#d9d9d9}.wy-menu-vertical a:active{background-color:#2980b9;cursor:pointer;color:#fff}.wy-menu-vertical a:active button.toctree-expand{color:#fff}.wy-side-nav-search{display:block;width:300px;padding:.809em;margin-bottom:.809em;z-index:200;background-color:#2980b9;text-align:center;color:#fcfcfc}.wy-side-nav-search input[type=text]{width:100%;border-radius:50px;padding:6px 12px;border-color:#2472a4}.wy-side-nav-search img{display:block;margin:auto auto .809em;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-side-nav-search .wy-dropdown>a,.wy-side-nav-search>a{color:#fcfcfc;font-size:100%;font-weight:700;display:inline-block;padding:4px 6px;margin-bottom:.809em;max-width:100%}.wy-side-nav-search .wy-dropdown>a:hover,.wy-side-nav-search>a:hover{background:hsla(0,0%,100%,.1)}.wy-side-nav-search .wy-dropdown>a img.logo,.wy-side-nav-search>a img.logo{display:block;margin:0 auto;height:auto;width:auto;border-radius:0;max-width:100%;background:transparent}.wy-side-nav-search .wy-dropdown>a.icon img.logo,.wy-side-nav-search>a.icon img.logo{margin-top:.85em}.wy-side-nav-search>div.version{margin-top:-.4045em;margin-bottom:.809em;font-weight:400;color:hsla(0,0%,100%,.3)}.wy-nav .wy-menu-vertical header{color:#2980b9}.wy-nav .wy-menu-vertical a{color:#b3b3b3}.wy-nav .wy-menu-vertical a:hover{background-color:#2980b9;color:#fff}[data-menu-wrap]{-webkit-transition:all .2s ease-in;-moz-transition:all .2s ease-in;transition:all .2s ease-in;position:absolute;opacity:1;width:100%;opacity:0}[data-menu-wrap].move-center{left:0;right:auto;opacity:1}[data-menu-wrap].move-left{right:auto;left:-100%;opacity:0}[data-menu-wrap].move-right{right:-100%;left:auto;opacity:0}.wy-body-for-nav{background:#fcfcfc}.wy-grid-for-nav{position:absolute;width:100%;height:100%}.wy-nav-side{position:fixed;top:0;bottom:0;left:0;padding-bottom:2em;width:300px;overflow-x:hidden;overflow-y:hidden;min-height:100%;color:#9b9b9b;background:#343131;z-index:200}.wy-side-scroll{width:320px;position:relative;overflow-x:hidden;overflow-y:scroll;height:100%}.wy-nav-top{display:none;background:#2980b9;color:#fff;padding:.4045em .809em;position:relative;line-height:50px;text-align:center;font-size:100%;*zoom:1}.wy-nav-top:after,.wy-nav-top:before{display:table;content:""}.wy-nav-top:after{clear:both}.wy-nav-top a{color:#fff;font-weight:700}.wy-nav-top img{margin-right:12px;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-nav-top i{font-size:30px;float:left;cursor:pointer;padding-top:inherit}.wy-nav-content-wrap{margin-left:300px;background:#fcfcfc;min-height:100%}.wy-nav-content{padding:1.618em 3.236em;height:100%;max-width:800px;margin:auto}.wy-body-mask{position:fixed;width:100%;height:100%;background:rgba(0,0,0,.2);display:none;z-index:499}.wy-body-mask.on{display:block}footer{color:grey}footer p{margin-bottom:12px}.rst-content footer span.commit tt,footer span.commit .rst-content tt,footer span.commit code{padding:0;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:1em;background:none;border:none;color:grey}.rst-footer-buttons{*zoom:1}.rst-footer-buttons:after,.rst-footer-buttons:before{width:100%;display:table;content:""}.rst-footer-buttons:after{clear:both}.rst-breadcrumbs-buttons{margin-top:12px;*zoom:1}.rst-breadcrumbs-buttons:after,.rst-breadcrumbs-buttons:before{display:table;content:""}.rst-breadcrumbs-buttons:after{clear:both}#search-results .search li{margin-bottom:24px;border-bottom:1px solid #e1e4e5;padding-bottom:24px}#search-results .search li:first-child{border-top:1px solid #e1e4e5;padding-top:24px}#search-results .search li a{font-size:120%;margin-bottom:12px;display:inline-block}#search-results .context{color:grey;font-size:90%}.genindextable li>ul{margin-left:24px}@media screen and (max-width:768px){.wy-body-for-nav{background:#fcfcfc}.wy-nav-top{display:block}.wy-nav-side{left:-300px}.wy-nav-side.shift{width:85%;left:0}.wy-menu.wy-menu-vertical,.wy-side-nav-search,.wy-side-scroll{width:auto}.wy-nav-content-wrap{margin-left:0}.wy-nav-content-wrap .wy-nav-content{padding:1.618em}.wy-nav-content-wrap.shift{position:fixed;min-width:100%;left:85%;top:0;height:100%;overflow:hidden}}@media screen and (min-width:1100px){.wy-nav-content-wrap{background:rgba(0,0,0,.05)}.wy-nav-content{margin:0;background:#fcfcfc}}@media print{.rst-versions,.wy-nav-side,footer{display:none}.wy-nav-content-wrap{margin-left:0}}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60;*zoom:1}.rst-versions .rst-current-version:after,.rst-versions .rst-current-version:before{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-content .code-block-caption .rst-versions .rst-current-version .headerlink,.rst-content .eqno .rst-versions .rst-current-version .headerlink,.rst-content .rst-versions .rst-current-version .admonition-title,.rst-content code.download .rst-versions .rst-current-version span:first-child,.rst-content dl dt .rst-versions .rst-current-version .headerlink,.rst-content h1 .rst-versions .rst-current-version .headerlink,.rst-content h2 .rst-versions .rst-current-version .headerlink,.rst-content h3 .rst-versions .rst-current-version .headerlink,.rst-content h4 .rst-versions .rst-current-version .headerlink,.rst-content h5 .rst-versions .rst-current-version .headerlink,.rst-content h6 .rst-versions .rst-current-version .headerlink,.rst-content p .rst-versions .rst-current-version .headerlink,.rst-content table>caption .rst-versions .rst-current-version .headerlink,.rst-content tt.download .rst-versions .rst-current-version span:first-child,.rst-versions .rst-current-version .fa,.rst-versions .rst-current-version .icon,.rst-versions .rst-current-version .rst-content .admonition-title,.rst-versions .rst-current-version .rst-content .code-block-caption .headerlink,.rst-versions .rst-current-version .rst-content .eqno .headerlink,.rst-versions .rst-current-version .rst-content code.download span:first-child,.rst-versions .rst-current-version .rst-content dl dt .headerlink,.rst-versions .rst-current-version .rst-content h1 .headerlink,.rst-versions .rst-current-version .rst-content h2 .headerlink,.rst-versions .rst-current-version .rst-content h3 .headerlink,.rst-versions .rst-current-version .rst-content h4 .headerlink,.rst-versions .rst-current-version .rst-content h5 .headerlink,.rst-versions .rst-current-version .rst-content h6 .headerlink,.rst-versions .rst-current-version .rst-content p .headerlink,.rst-versions .rst-current-version .rst-content table>caption .headerlink,.rst-versions .rst-current-version .rst-content tt.download span:first-child,.rst-versions .rst-current-version .wy-menu-vertical li button.toctree-expand,.wy-menu-vertical li .rst-versions .rst-current-version button.toctree-expand{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}.rst-content .toctree-wrapper>p.caption,.rst-content h1,.rst-content h2,.rst-content h3,.rst-content h4,.rst-content h5,.rst-content h6{margin-bottom:24px}.rst-content img{max-width:100%;height:auto}.rst-content div.figure,.rst-content figure{margin-bottom:24px}.rst-content div.figure .caption-text,.rst-content figure .caption-text{font-style:italic}.rst-content div.figure p:last-child.caption,.rst-content figure p:last-child.caption{margin-bottom:0}.rst-content div.figure.align-center,.rst-content figure.align-center{text-align:center}.rst-content .section>a>img,.rst-content .section>img,.rst-content section>a>img,.rst-content section>img{margin-bottom:24px}.rst-content abbr[title]{text-decoration:none}.rst-content.style-external-links a.reference.external:after{font-family:FontAwesome;content:"\f08e";color:#b3b3b3;vertical-align:super;font-size:60%;margin:0 .2em}.rst-content blockquote{margin-left:24px;line-height:24px;margin-bottom:24px}.rst-content pre.literal-block{white-space:pre;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;display:block;overflow:auto}.rst-content div[class^=highlight],.rst-content pre.literal-block{border:1px solid #e1e4e5;overflow-x:auto;margin:1px 0 24px}.rst-content div[class^=highlight] div[class^=highlight],.rst-content pre.literal-block div[class^=highlight]{padding:0;border:none;margin:0}.rst-content div[class^=highlight] td.code{width:100%}.rst-content .linenodiv pre{border-right:1px solid #e6e9ea;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;user-select:none;pointer-events:none}.rst-content div[class^=highlight] pre{white-space:pre;margin:0;padding:12px;display:block;overflow:auto}.rst-content div[class^=highlight] pre .hll{display:block;margin:0 -12px;padding:0 12px}.rst-content .linenodiv pre,.rst-content div[class^=highlight] pre,.rst-content pre.literal-block{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:12px;line-height:1.4}.rst-content div.highlight .gp,.rst-content div.highlight span.linenos{user-select:none;pointer-events:none}.rst-content div.highlight span.linenos{display:inline-block;padding-left:0;padding-right:12px;margin-right:12px;border-right:1px solid #e6e9ea}.rst-content .code-block-caption{font-style:italic;font-size:85%;line-height:1;padding:1em 0;text-align:center}@media print{.rst-content .codeblock,.rst-content div[class^=highlight],.rst-content div[class^=highlight] pre{white-space:pre-wrap}}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning{clear:both}.rst-content .admonition-todo .last,.rst-content .admonition-todo>:last-child,.rst-content .admonition .last,.rst-content .admonition>:last-child,.rst-content .attention .last,.rst-content .attention>:last-child,.rst-content .caution .last,.rst-content .caution>:last-child,.rst-content .danger .last,.rst-content .danger>:last-child,.rst-content .error .last,.rst-content .error>:last-child,.rst-content .hint .last,.rst-content .hint>:last-child,.rst-content .important .last,.rst-content .important>:last-child,.rst-content .note .last,.rst-content .note>:last-child,.rst-content .seealso .last,.rst-content .seealso>:last-child,.rst-content .tip .last,.rst-content .tip>:last-child,.rst-content .warning .last,.rst-content .warning>:last-child{margin-bottom:0}.rst-content .admonition-title:before{margin-right:4px}.rst-content .admonition table{border-color:rgba(0,0,0,.1)}.rst-content .admonition table td,.rst-content .admonition table th{background:transparent!important;border-color:rgba(0,0,0,.1)!important}.rst-content .section ol.loweralpha,.rst-content .section ol.loweralpha>li,.rst-content .toctree-wrapper ol.loweralpha,.rst-content .toctree-wrapper ol.loweralpha>li,.rst-content section ol.loweralpha,.rst-content section ol.loweralpha>li{list-style:lower-alpha}.rst-content .section ol.upperalpha,.rst-content .section ol.upperalpha>li,.rst-content .toctree-wrapper ol.upperalpha,.rst-content .toctree-wrapper ol.upperalpha>li,.rst-content section ol.upperalpha,.rst-content section ol.upperalpha>li{list-style:upper-alpha}.rst-content .section ol li>*,.rst-content .section ul li>*,.rst-content .toctree-wrapper ol li>*,.rst-content .toctree-wrapper ul li>*,.rst-content section ol li>*,.rst-content section ul li>*{margin-top:12px;margin-bottom:12px}.rst-content .section ol li>:first-child,.rst-content .section ul li>:first-child,.rst-content .toctree-wrapper ol li>:first-child,.rst-content .toctree-wrapper ul li>:first-child,.rst-content section ol li>:first-child,.rst-content section ul li>:first-child{margin-top:0}.rst-content .section ol li>p,.rst-content .section ol li>p:last-child,.rst-content .section ul li>p,.rst-content .section ul li>p:last-child,.rst-content .toctree-wrapper ol li>p,.rst-content .toctree-wrapper ol li>p:last-child,.rst-content .toctree-wrapper ul li>p,.rst-content .toctree-wrapper ul li>p:last-child,.rst-content section ol li>p,.rst-content section ol li>p:last-child,.rst-content section ul li>p,.rst-content section ul li>p:last-child{margin-bottom:12px}.rst-content .section ol li>p:only-child,.rst-content .section ol li>p:only-child:last-child,.rst-content .section ul li>p:only-child,.rst-content .section ul li>p:only-child:last-child,.rst-content .toctree-wrapper ol li>p:only-child,.rst-content .toctree-wrapper ol li>p:only-child:last-child,.rst-content .toctree-wrapper ul li>p:only-child,.rst-content .toctree-wrapper ul li>p:only-child:last-child,.rst-content section ol li>p:only-child,.rst-content section ol li>p:only-child:last-child,.rst-content section ul li>p:only-child,.rst-content section ul li>p:only-child:last-child{margin-bottom:0}.rst-content .section ol li>ol,.rst-content .section ol li>ul,.rst-content .section ul li>ol,.rst-content .section ul li>ul,.rst-content .toctree-wrapper ol li>ol,.rst-content .toctree-wrapper ol li>ul,.rst-content .toctree-wrapper ul li>ol,.rst-content .toctree-wrapper ul li>ul,.rst-content section ol li>ol,.rst-content section ol li>ul,.rst-content section ul li>ol,.rst-content section ul li>ul{margin-bottom:12px}.rst-content .section ol.simple li>*,.rst-content .section ol.simple li ol,.rst-content .section ol.simple li ul,.rst-content .section ul.simple li>*,.rst-content .section ul.simple li ol,.rst-content .section ul.simple li ul,.rst-content .toctree-wrapper ol.simple li>*,.rst-content .toctree-wrapper ol.simple li ol,.rst-content .toctree-wrapper ol.simple li ul,.rst-content .toctree-wrapper ul.simple li>*,.rst-content .toctree-wrapper ul.simple li ol,.rst-content .toctree-wrapper ul.simple li ul,.rst-content section ol.simple li>*,.rst-content section ol.simple li ol,.rst-content section ol.simple li ul,.rst-content section ul.simple li>*,.rst-content section ul.simple li ol,.rst-content section ul.simple li ul{margin-top:0;margin-bottom:0}.rst-content .line-block{margin-left:0;margin-bottom:24px;line-height:24px}.rst-content .line-block .line-block{margin-left:24px;margin-bottom:0}.rst-content .topic-title{font-weight:700;margin-bottom:12px}.rst-content .toc-backref{color:#404040}.rst-content .align-right{float:right;margin:0 0 24px 24px}.rst-content .align-left{float:left;margin:0 24px 24px 0}.rst-content .align-center{margin:auto}.rst-content .align-center:not(table){display:block}.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink{opacity:0;font-size:14px;font-family:FontAwesome;margin-left:.5em}.rst-content .code-block-caption .headerlink:focus,.rst-content .code-block-caption:hover .headerlink,.rst-content .eqno .headerlink:focus,.rst-content .eqno:hover .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink:focus,.rst-content .toctree-wrapper>p.caption:hover .headerlink,.rst-content dl dt .headerlink:focus,.rst-content dl dt:hover .headerlink,.rst-content h1 .headerlink:focus,.rst-content h1:hover .headerlink,.rst-content h2 .headerlink:focus,.rst-content h2:hover .headerlink,.rst-content h3 .headerlink:focus,.rst-content h3:hover .headerlink,.rst-content h4 .headerlink:focus,.rst-content h4:hover .headerlink,.rst-content h5 .headerlink:focus,.rst-content h5:hover .headerlink,.rst-content h6 .headerlink:focus,.rst-content h6:hover .headerlink,.rst-content p.caption .headerlink:focus,.rst-content p.caption:hover .headerlink,.rst-content p .headerlink:focus,.rst-content p:hover .headerlink,.rst-content table>caption .headerlink:focus,.rst-content table>caption:hover .headerlink{opacity:1}.rst-content p a{overflow-wrap:anywhere}.rst-content .wy-table td p,.rst-content .wy-table td ul,.rst-content .wy-table th p,.rst-content .wy-table th ul,.rst-content table.docutils td p,.rst-content table.docutils td ul,.rst-content table.docutils th p,.rst-content table.docutils th ul,.rst-content table.field-list td p,.rst-content table.field-list td ul,.rst-content table.field-list th p,.rst-content table.field-list th ul{font-size:inherit}.rst-content .btn:focus{outline:2px solid}.rst-content table>caption .headerlink:after{font-size:12px}.rst-content .centered{text-align:center}.rst-content .sidebar{float:right;width:40%;display:block;margin:0 0 24px 24px;padding:24px;background:#f3f6f6;border:1px solid #e1e4e5}.rst-content .sidebar dl,.rst-content .sidebar p,.rst-content .sidebar ul{font-size:90%}.rst-content .sidebar .last,.rst-content .sidebar>:last-child{margin-bottom:0}.rst-content .sidebar .sidebar-title{display:block;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif;font-weight:700;background:#e1e4e5;padding:6px 12px;margin:-24px -24px 24px;font-size:100%}.rst-content .highlighted{background:#f1c40f;box-shadow:0 0 0 2px #f1c40f;display:inline;font-weight:700}.rst-content .citation-reference,.rst-content .footnote-reference{vertical-align:baseline;position:relative;top:-.4em;line-height:0;font-size:90%}.rst-content .citation-reference>span.fn-bracket,.rst-content .footnote-reference>span.fn-bracket{display:none}.rst-content .hlist{width:100%}.rst-content dl dt span.classifier:before{content:" : "}.rst-content dl dt span.classifier-delimiter{display:none!important}html.writer-html4 .rst-content table.docutils.citation,html.writer-html4 .rst-content table.docutils.footnote{background:none;border:none}html.writer-html4 .rst-content table.docutils.citation td,html.writer-html4 .rst-content table.docutils.citation tr,html.writer-html4 .rst-content table.docutils.footnote td,html.writer-html4 .rst-content table.docutils.footnote tr{border:none;background-color:transparent!important;white-space:normal}html.writer-html4 .rst-content table.docutils.citation td.label,html.writer-html4 .rst-content table.docutils.footnote td.label{padding-left:0;padding-right:0;vertical-align:top}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{display:grid;grid-template-columns:auto minmax(80%,95%)}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{display:inline-grid;grid-template-columns:max-content auto}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{display:grid;grid-template-columns:auto auto minmax(.65rem,auto) minmax(40%,95%)}html.writer-html5 .rst-content aside.citation>span.label,html.writer-html5 .rst-content aside.footnote>span.label,html.writer-html5 .rst-content div.citation>span.label{grid-column-start:1;grid-column-end:2}html.writer-html5 .rst-content aside.citation>span.backrefs,html.writer-html5 .rst-content aside.footnote>span.backrefs,html.writer-html5 .rst-content div.citation>span.backrefs{grid-column-start:2;grid-column-end:3;grid-row-start:1;grid-row-end:3}html.writer-html5 .rst-content aside.citation>p,html.writer-html5 .rst-content aside.footnote>p,html.writer-html5 .rst-content div.citation>p{grid-column-start:4;grid-column-end:5}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{margin-bottom:24px}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{padding-left:1rem}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dd,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dd,html.writer-html5 .rst-content dl.footnote>dt{margin-bottom:0}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{font-size:.9rem}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.footnote>dt{margin:0 .5rem .5rem 0;line-height:1.2rem;word-break:break-all;font-weight:400}html.writer-html5 .rst-content dl.citation>dt>span.brackets:before,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:before{content:"["}html.writer-html5 .rst-content dl.citation>dt>span.brackets:after,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:after{content:"]"}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a{word-break:keep-all}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a:not(:first-child):before,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.footnote>dd{margin:0 0 .5rem;line-height:1.2rem}html.writer-html5 .rst-content dl.citation>dd p,html.writer-html5 .rst-content dl.footnote>dd p{font-size:.9rem}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{padding-left:1rem;padding-right:1rem;font-size:.9rem;line-height:1.2rem}html.writer-html5 .rst-content aside.citation p,html.writer-html5 .rst-content aside.footnote p,html.writer-html5 .rst-content div.citation p{font-size:.9rem;line-height:1.2rem;margin-bottom:12px}html.writer-html5 .rst-content aside.citation span.backrefs,html.writer-html5 .rst-content aside.footnote span.backrefs,html.writer-html5 .rst-content div.citation span.backrefs{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content aside.citation span.backrefs>a,html.writer-html5 .rst-content aside.footnote span.backrefs>a,html.writer-html5 .rst-content div.citation span.backrefs>a{word-break:keep-all}html.writer-html5 .rst-content aside.citation span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content aside.footnote span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content div.citation span.backrefs>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content aside.citation span.label,html.writer-html5 .rst-content aside.footnote span.label,html.writer-html5 .rst-content div.citation span.label{line-height:1.2rem}html.writer-html5 .rst-content aside.citation-list,html.writer-html5 .rst-content aside.footnote-list,html.writer-html5 .rst-content div.citation-list{margin-bottom:24px}html.writer-html5 .rst-content dl.option-list kbd{font-size:.9rem}.rst-content table.docutils.footnote,html.writer-html4 .rst-content table.docutils.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content aside.footnote-list aside.footnote,html.writer-html5 .rst-content div.citation-list>div.citation,html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{color:grey}.rst-content table.docutils.footnote code,.rst-content table.docutils.footnote tt,html.writer-html4 .rst-content table.docutils.citation code,html.writer-html4 .rst-content table.docutils.citation tt,html.writer-html5 .rst-content aside.footnote-list aside.footnote code,html.writer-html5 .rst-content aside.footnote-list aside.footnote tt,html.writer-html5 .rst-content aside.footnote code,html.writer-html5 .rst-content aside.footnote tt,html.writer-html5 .rst-content div.citation-list>div.citation code,html.writer-html5 .rst-content div.citation-list>div.citation tt,html.writer-html5 .rst-content dl.citation code,html.writer-html5 .rst-content dl.citation tt,html.writer-html5 .rst-content dl.footnote code,html.writer-html5 .rst-content dl.footnote tt{color:#555}.rst-content .wy-table-responsive.citation,.rst-content .wy-table-responsive.footnote{margin-bottom:0}.rst-content .wy-table-responsive.citation+:not(.citation),.rst-content .wy-table-responsive.footnote+:not(.footnote){margin-top:24px}.rst-content .wy-table-responsive.citation:last-child,.rst-content .wy-table-responsive.footnote:last-child{margin-bottom:24px}.rst-content table.docutils th{border-color:#e1e4e5}html.writer-html5 .rst-content table.docutils th{border:1px solid #e1e4e5}html.writer-html5 .rst-content table.docutils td>p,html.writer-html5 .rst-content table.docutils th>p{line-height:1rem;margin-bottom:0;font-size:.9rem}.rst-content table.docutils td .last,.rst-content table.docutils td .last>:last-child{margin-bottom:0}.rst-content table.field-list,.rst-content table.field-list td{border:none}.rst-content table.field-list td p{line-height:inherit}.rst-content table.field-list td>strong{display:inline-block}.rst-content table.field-list .field-name{padding-right:10px;text-align:left;white-space:nowrap}.rst-content table.field-list .field-body{text-align:left}.rst-content code,.rst-content tt{color:#000;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;padding:2px 5px}.rst-content code big,.rst-content code em,.rst-content tt big,.rst-content tt em{font-size:100%!important;line-height:normal}.rst-content code.literal,.rst-content tt.literal{color:#e74c3c;white-space:normal}.rst-content code.xref,.rst-content tt.xref,a .rst-content code,a .rst-content tt{font-weight:700;color:#404040;overflow-wrap:normal}.rst-content kbd,.rst-content pre,.rst-content samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace}.rst-content a code,.rst-content a tt{color:#2980b9}.rst-content dl{margin-bottom:24px}.rst-content dl dt{font-weight:700;margin-bottom:12px}.rst-content dl ol,.rst-content dl p,.rst-content dl table,.rst-content dl ul{margin-bottom:12px}.rst-content dl dd{margin:0 0 12px 24px;line-height:24px}.rst-content dl dd>ol:last-child,.rst-content dl dd>p:last-child,.rst-content dl dd>table:last-child,.rst-content dl dd>ul:last-child{margin-bottom:0}html.writer-html4 .rst-content dl:not(.docutils),html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple){margin-bottom:24px}html.writer-html4 .rst-content dl:not(.docutils)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{display:table;margin:6px 0;font-size:90%;line-height:normal;background:#e7f2fa;color:#2980b9;border-top:3px solid #6ab0de;padding:6px;position:relative}html.writer-html4 .rst-content dl:not(.docutils)>dt:before,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:before{color:#6ab0de}html.writer-html4 .rst-content dl:not(.docutils)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{margin-bottom:6px;border:none;border-left:3px solid #ccc;background:#f0f0f0;color:#555}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils)>dt:first-child,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:first-child{margin-top:0}html.writer-html4 .rst-content dl:not(.docutils) code.descclassname,html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descclassname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{background-color:transparent;border:none;padding:0;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .optional,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .optional{display:inline-block;padding:0 4px;color:#000;font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .property,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .property{display:inline-block;padding-right:8px;max-width:100%}html.writer-html4 .rst-content dl:not(.docutils) .k,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .k{font-style:italic}html.writer-html4 .rst-content dl:not(.docutils) .descclassname,html.writer-html4 .rst-content dl:not(.docutils) .descname,html.writer-html4 .rst-content dl:not(.docutils) .sig-name,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .sig-name{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#000}.rst-content .viewcode-back,.rst-content .viewcode-link{display:inline-block;color:#27ae60;font-size:80%;padding-left:24px}.rst-content .viewcode-back{display:block;float:right}.rst-content p.rubric{margin-bottom:12px;font-weight:700}.rst-content code.download,.rst-content tt.download{background:inherit;padding:inherit;font-weight:400;font-family:inherit;font-size:inherit;color:inherit;border:inherit;white-space:inherit}.rst-content code.download span:first-child,.rst-content tt.download span:first-child{-webkit-font-smoothing:subpixel-antialiased}.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{margin-right:4px}.rst-content .guilabel,.rst-content .menuselection{font-size:80%;font-weight:700;border-radius:4px;padding:2.4px 6px;margin:auto 2px}.rst-content .guilabel,.rst-content .menuselection{border:1px solid #7fbbe3;background:#e7f2fa}.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>.kbd,.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>kbd{color:inherit;font-size:80%;background-color:#fff;border:1px solid #a6a6a6;border-radius:4px;box-shadow:0 2px grey;padding:2.4px 6px;margin:auto 0}.rst-content .versionmodified{font-style:italic}@media screen and (max-width:480px){.rst-content .sidebar{width:100%}}span[id*=MathJax-Span]{color:#404040}.math{text-align:center}@font-face{font-family:Lato;src:url(fonts/lato-normal.woff2?bd03a2cc277bbbc338d464e679fe9942) format("woff2"),url(fonts/lato-normal.woff?27bd77b9162d388cb8d4c4217c7c5e2a) format("woff");font-weight:400;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold.woff2?cccb897485813c7c256901dbca54ecf2) format("woff2"),url(fonts/lato-bold.woff?d878b6c29b10beca227e9eef4246111b) format("woff");font-weight:700;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold-italic.woff2?0b6bb6725576b072c5d0b02ecdd1900d) format("woff2"),url(fonts/lato-bold-italic.woff?9c7e4e9eb485b4a121c760e61bc3707c) format("woff");font-weight:700;font-style:italic;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-normal-italic.woff2?4eb103b4d12be57cb1d040ed5e162e9d) format("woff2"),url(fonts/lato-normal-italic.woff?f28f2d6482446544ef1ea1ccc6dd5892) format("woff");font-weight:400;font-style:italic;font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:400;src:url(fonts/Roboto-Slab-Regular.woff2?7abf5b8d04d26a2cafea937019bca958) format("woff2"),url(fonts/Roboto-Slab-Regular.woff?c1be9284088d487c5e3ff0a10a92e58c) format("woff");font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:700;src:url(fonts/Roboto-Slab-Bold.woff2?9984f4a9bda09be08e83f2506954adbe) format("woff2"),url(fonts/Roboto-Slab-Bold.woff?bed5564a116b05148e3b3bea6fb1162a) format("woff");font-display:block} \ No newline at end of file diff --git a/_static/custom.css b/_static/custom.css new file mode 100644 index 0000000..5209e16 --- /dev/null +++ b/_static/custom.css @@ -0,0 +1,39 @@ +/* Hide Warnings outputs from rendered Jupyter Notebooks */ +div.nboutput.container div.output_area.stderr { + display: none; + visibility: hidden; +} + +/* Increase the width of the content area (default 800px) */ +div.wy-nav-content { + max-width: 1000px; +} + +/* Show a `$` sign before bash code-blocks */ +div.highlight-bash pre::before, div.highlight-shell pre::before { + content: "$ "; +} + +/* Reduce margin from left sidebar titles */ +p.caption { + margin-top: 0.25em !important; +} + +/* Reduce padding from left sidebar items */ +a.reference.internal { + padding-top: 0; +} + +.wy-table-responsive table td, .wy-table-responsive table th { + white-space: normal; +} + +.wy-table-responsive { + margin-bottom: 24px; + max-width: 100%; + overflow: visible; +} + +.wy-table-responsive th p { + margin-bottom: unset; +} diff --git a/_static/doctools.js b/_static/doctools.js new file mode 100644 index 0000000..d06a71d --- /dev/null +++ b/_static/doctools.js @@ -0,0 +1,156 @@ +/* + * doctools.js + * ~~~~~~~~~~~ + * + * Base JavaScript utilities for all Sphinx HTML documentation. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/_static/documentation_options.js b/_static/documentation_options.js new file mode 100644 index 0000000..7e4c114 --- /dev/null +++ b/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '', + LANGUAGE: 'en', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/_static/file.png b/_static/file.png new file mode 100644 index 0000000..a858a41 Binary files /dev/null and b/_static/file.png differ diff --git a/_static/jquery.js b/_static/jquery.js new file mode 100644 index 0000000..c4c6022 --- /dev/null +++ b/_static/jquery.js @@ -0,0 +1,2 @@ +/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */ +!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType&&"function"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.6.0",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="",y.option=!!ce.lastChild;var ge={thead:[1,"","
"],col:[2,"","
"],tr:[2,"","
"],td:[3,"","
"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n",""]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\s*$/g;function je(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function De(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function qe(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=zt.pop()||S.expando+"_"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ut.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,"$1"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument("").body).innerHTML="
",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document); \ No newline at end of file diff --git a/_static/js/html5shiv.min.js b/_static/js/html5shiv.min.js new file mode 100644 index 0000000..cd1c674 --- /dev/null +++ b/_static/js/html5shiv.min.js @@ -0,0 +1,4 @@ +/** +* @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed +*/ +!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document); \ No newline at end of file diff --git a/_static/js/theme.js b/_static/js/theme.js new file mode 100644 index 0000000..1fddb6e --- /dev/null +++ b/_static/js/theme.js @@ -0,0 +1 @@ +!function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/_static/minus.png b/_static/minus.png new file mode 100644 index 0000000..d96755f Binary files /dev/null and b/_static/minus.png differ diff --git a/_static/nbsphinx-broken-thumbnail.svg b/_static/nbsphinx-broken-thumbnail.svg new file mode 100644 index 0000000..4919ca8 --- /dev/null +++ b/_static/nbsphinx-broken-thumbnail.svg @@ -0,0 +1,9 @@ + + + + diff --git a/_static/nbsphinx-code-cells.css b/_static/nbsphinx-code-cells.css new file mode 100644 index 0000000..a3fb27c --- /dev/null +++ b/_static/nbsphinx-code-cells.css @@ -0,0 +1,259 @@ +/* remove conflicting styling from Sphinx themes */ +div.nbinput.container div.prompt *, +div.nboutput.container div.prompt *, +div.nbinput.container div.input_area pre, +div.nboutput.container div.output_area pre, +div.nbinput.container div.input_area .highlight, +div.nboutput.container div.output_area .highlight { + border: none; + padding: 0; + margin: 0; + box-shadow: none; +} + +div.nbinput.container > div[class*=highlight], +div.nboutput.container > div[class*=highlight] { + margin: 0; +} + +div.nbinput.container div.prompt *, +div.nboutput.container div.prompt * { + background: none; +} + +div.nboutput.container div.output_area .highlight, +div.nboutput.container div.output_area pre { + background: unset; +} + +div.nboutput.container div.output_area div.highlight { + color: unset; /* override Pygments text color */ +} + +/* avoid gaps between output lines */ +div.nboutput.container div[class*=highlight] pre { + line-height: normal; +} + +/* input/output containers */ +div.nbinput.container, +div.nboutput.container { + display: -webkit-flex; + display: flex; + align-items: flex-start; + margin: 0; + width: 100%; +} +@media (max-width: 540px) { + div.nbinput.container, + div.nboutput.container { + flex-direction: column; + } +} + +/* input container */ +div.nbinput.container { + padding-top: 5px; +} + +/* last container */ +div.nblast.container { + padding-bottom: 5px; +} + +/* input prompt */ +div.nbinput.container div.prompt pre, +/* for sphinx_immaterial theme: */ +div.nbinput.container div.prompt pre > code { + color: #307FC1; +} + +/* output prompt */ +div.nboutput.container div.prompt pre, +/* for sphinx_immaterial theme: */ +div.nboutput.container div.prompt pre > code { + color: #BF5B3D; +} + +/* all prompts */ +div.nbinput.container div.prompt, +div.nboutput.container div.prompt { + width: 4.5ex; + padding-top: 5px; + position: relative; + user-select: none; +} + +div.nbinput.container div.prompt > div, +div.nboutput.container div.prompt > div { + position: absolute; + right: 0; + margin-right: 0.3ex; +} + +@media (max-width: 540px) { + div.nbinput.container div.prompt, + div.nboutput.container div.prompt { + width: unset; + text-align: left; + padding: 0.4em; + } + div.nboutput.container div.prompt.empty { + padding: 0; + } + + div.nbinput.container div.prompt > div, + div.nboutput.container div.prompt > div { + position: unset; + } +} + +/* disable scrollbars and line breaks on prompts */ +div.nbinput.container div.prompt pre, +div.nboutput.container div.prompt pre { + overflow: hidden; + white-space: pre; +} + +/* input/output area */ +div.nbinput.container div.input_area, +div.nboutput.container div.output_area { + -webkit-flex: 1; + flex: 1; + overflow: auto; +} +@media (max-width: 540px) { + div.nbinput.container div.input_area, + div.nboutput.container div.output_area { + width: 100%; + } +} + +/* input area */ +div.nbinput.container div.input_area { + border: 1px solid #e0e0e0; + border-radius: 2px; + /*background: #f5f5f5;*/ +} + +/* override MathJax center alignment in output cells */ +div.nboutput.container div[class*=MathJax] { + text-align: left !important; +} + +/* override sphinx.ext.imgmath center alignment in output cells */ +div.nboutput.container div.math p { + text-align: left; +} + +/* standard error */ +div.nboutput.container div.output_area.stderr { + background: #fdd; +} + +/* ANSI colors */ +.ansi-black-fg { color: #3E424D; } +.ansi-black-bg { background-color: #3E424D; } +.ansi-black-intense-fg { color: #282C36; } +.ansi-black-intense-bg { background-color: #282C36; } +.ansi-red-fg { color: #E75C58; } +.ansi-red-bg { background-color: #E75C58; } +.ansi-red-intense-fg { color: #B22B31; } +.ansi-red-intense-bg { background-color: #B22B31; } +.ansi-green-fg { color: #00A250; } +.ansi-green-bg { background-color: #00A250; } +.ansi-green-intense-fg { color: #007427; } +.ansi-green-intense-bg { background-color: #007427; } +.ansi-yellow-fg { color: #DDB62B; } +.ansi-yellow-bg { background-color: #DDB62B; } +.ansi-yellow-intense-fg { color: #B27D12; } +.ansi-yellow-intense-bg { background-color: #B27D12; } +.ansi-blue-fg { color: #208FFB; } +.ansi-blue-bg { background-color: #208FFB; } +.ansi-blue-intense-fg { color: #0065CA; } +.ansi-blue-intense-bg { background-color: #0065CA; } +.ansi-magenta-fg { color: #D160C4; } +.ansi-magenta-bg { background-color: #D160C4; } +.ansi-magenta-intense-fg { color: #A03196; } +.ansi-magenta-intense-bg { background-color: #A03196; } +.ansi-cyan-fg { color: #60C6C8; } +.ansi-cyan-bg { background-color: #60C6C8; } +.ansi-cyan-intense-fg { color: #258F8F; } +.ansi-cyan-intense-bg { background-color: #258F8F; } +.ansi-white-fg { color: #C5C1B4; } +.ansi-white-bg { background-color: #C5C1B4; } +.ansi-white-intense-fg { color: #A1A6B2; } +.ansi-white-intense-bg { background-color: #A1A6B2; } + +.ansi-default-inverse-fg { color: #FFFFFF; } +.ansi-default-inverse-bg { background-color: #000000; } + +.ansi-bold { font-weight: bold; } +.ansi-underline { text-decoration: underline; } + + +div.nbinput.container div.input_area div[class*=highlight] > pre, +div.nboutput.container div.output_area div[class*=highlight] > pre, +div.nboutput.container div.output_area div[class*=highlight].math, +div.nboutput.container div.output_area.rendered_html, +div.nboutput.container div.output_area > div.output_javascript, +div.nboutput.container div.output_area:not(.rendered_html) > img{ + padding: 5px; + margin: 0; +} + +/* fix copybtn overflow problem in chromium (needed for 'sphinx_copybutton') */ +div.nbinput.container div.input_area > div[class^='highlight'], +div.nboutput.container div.output_area > div[class^='highlight']{ + overflow-y: hidden; +} + +/* hide copy button on prompts for 'sphinx_copybutton' extension ... */ +.prompt .copybtn, +/* ... and 'sphinx_immaterial' theme */ +.prompt .md-clipboard.md-icon { + display: none; +} + +/* Some additional styling taken form the Jupyter notebook CSS */ +.jp-RenderedHTMLCommon table, +div.rendered_html table { + border: none; + border-collapse: collapse; + border-spacing: 0; + color: black; + font-size: 12px; + table-layout: fixed; +} +.jp-RenderedHTMLCommon thead, +div.rendered_html thead { + border-bottom: 1px solid black; + vertical-align: bottom; +} +.jp-RenderedHTMLCommon tr, +.jp-RenderedHTMLCommon th, +.jp-RenderedHTMLCommon td, +div.rendered_html tr, +div.rendered_html th, +div.rendered_html td { + text-align: right; + vertical-align: middle; + padding: 0.5em 0.5em; + line-height: normal; + white-space: normal; + max-width: none; + border: none; +} +.jp-RenderedHTMLCommon th, +div.rendered_html th { + font-weight: bold; +} +.jp-RenderedHTMLCommon tbody tr:nth-child(odd), +div.rendered_html tbody tr:nth-child(odd) { + background: #f5f5f5; +} +.jp-RenderedHTMLCommon tbody tr:hover, +div.rendered_html tbody tr:hover { + background: rgba(66, 165, 245, 0.2); +} + diff --git a/_static/nbsphinx-gallery.css b/_static/nbsphinx-gallery.css new file mode 100644 index 0000000..365c27a --- /dev/null +++ b/_static/nbsphinx-gallery.css @@ -0,0 +1,31 @@ +.nbsphinx-gallery { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(160px, 1fr)); + gap: 5px; + margin-top: 1em; + margin-bottom: 1em; +} + +.nbsphinx-gallery > a { + padding: 5px; + border: 1px dotted currentColor; + border-radius: 2px; + text-align: center; +} + +.nbsphinx-gallery > a:hover { + border-style: solid; +} + +.nbsphinx-gallery img { + max-width: 100%; + max-height: 100%; +} + +.nbsphinx-gallery > a > div:first-child { + display: flex; + align-items: start; + justify-content: center; + height: 120px; + margin-bottom: 5px; +} diff --git a/_static/nbsphinx-no-thumbnail.svg b/_static/nbsphinx-no-thumbnail.svg new file mode 100644 index 0000000..9dca758 --- /dev/null +++ b/_static/nbsphinx-no-thumbnail.svg @@ -0,0 +1,9 @@ + + + + diff --git a/_static/plus.png b/_static/plus.png new file mode 100644 index 0000000..7107cec Binary files /dev/null and b/_static/plus.png differ diff --git a/_static/pygments.css b/_static/pygments.css new file mode 100644 index 0000000..84ab303 --- /dev/null +++ b/_static/pygments.css @@ -0,0 +1,75 @@ +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f8f8f8; } +.highlight .c { color: #3D7B7B; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #008000; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #9C6500 } /* Comment.Preproc */ +.highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +.highlight .gr { color: #E40000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #008400 } /* Generic.Inserted */ +.highlight .go { color: #717171 } /* Generic.Output */ +.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ +.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #008000 } /* Keyword.Pseudo */ +.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #B00040 } /* Keyword.Type */ +.highlight .m { color: #666666 } /* Literal.Number */ +.highlight .s { color: #BA2121 } /* Literal.String */ +.highlight .na { color: #687822 } /* Name.Attribute */ +.highlight .nb { color: #008000 } /* Name.Builtin */ +.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */ +.highlight .no { color: #880000 } /* Name.Constant */ +.highlight .nd { color: #AA22FF } /* Name.Decorator */ +.highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #0000FF } /* Name.Function */ +.highlight .nl { color: #767600 } /* Name.Label */ +.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #19177C } /* Name.Variable */ +.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mb { color: #666666 } /* Literal.Number.Bin */ +.highlight .mf { color: #666666 } /* Literal.Number.Float */ +.highlight .mh { color: #666666 } /* Literal.Number.Hex */ +.highlight .mi { color: #666666 } /* Literal.Number.Integer */ +.highlight .mo { color: #666666 } /* Literal.Number.Oct */ +.highlight .sa { color: #BA2121 } /* Literal.String.Affix */ +.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */ +.highlight .sc { color: #BA2121 } /* Literal.String.Char */ +.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */ +.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #BA2121 } /* Literal.String.Double */ +.highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */ +.highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */ +.highlight .sx { color: #008000 } /* Literal.String.Other */ +.highlight .sr { color: #A45A77 } /* Literal.String.Regex */ +.highlight .s1 { color: #BA2121 } /* Literal.String.Single */ +.highlight .ss { color: #19177C } /* Literal.String.Symbol */ +.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #0000FF } /* Name.Function.Magic */ +.highlight .vc { color: #19177C } /* Name.Variable.Class */ +.highlight .vg { color: #19177C } /* Name.Variable.Global */ +.highlight .vi { color: #19177C } /* Name.Variable.Instance */ +.highlight .vm { color: #19177C } /* Name.Variable.Magic */ +.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/_static/searchtools.js b/_static/searchtools.js new file mode 100644 index 0000000..7918c3f --- /dev/null +++ b/_static/searchtools.js @@ -0,0 +1,574 @@ +/* + * searchtools.js + * ~~~~~~~~~~~~~~~~ + * + * Sphinx JavaScript utilities for the full-text search. + * + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename] = item; + + let listItem = document.createElement("li"); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = _( + `Search finished, found ${resultCount} page(s) matching the search query.` + ); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() }); + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent !== undefined) return docContent.textContent; + console.warn( + "Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + /** + * execute search (requires search index to be loaded) + */ + query: (query) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + // array of [docname, title, anchor, descr, score, filename] + let results = []; + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + let score = Math.round(100 * queryLower.length / title.length) + results.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id] of foundEntries) { + let score = Math.round(100 * queryLower.length / entry.length) + results.push([ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // lookup as object + objectTerms.forEach((term) => + results.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + results.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item))); + + // now sort the results by score (in opposite order of appearance, since the + // display function below uses pop() to retrieve items) and then + // alphabetically + results.sort((a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; + }); + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + results = results.reverse(); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord) && !terms[word]) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord) && !titleTerms[word]) + arr.push({ files: titleTerms[word], score: Scorer.partialTitle }); + }); + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1) + fileMap.get(file).push(word); + else fileMap.set(file, [word]); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords) => { + const text = Search.htmlToText(htmlText); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/_static/sphinx_highlight.js b/_static/sphinx_highlight.js new file mode 100644 index 0000000..8a96c69 --- /dev/null +++ b/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/_static/tabs.css b/_static/tabs.css new file mode 100644 index 0000000..13042e5 --- /dev/null +++ b/_static/tabs.css @@ -0,0 +1,110 @@ +/* body[data-theme] { */ +:root { + --tabs--label-text: #4b5563; + --tabs--label-text--hover: #4b5563; + --tabs--label-text--active: #0ea5e9; + --tabs--label-text--active--hover: #0ea5e9; + --tabs--label-background: transparent; + --tabs--label-background--hover: transparent; + --tabs--label-background--active: transparent; + --tabs--label-background--active--hover: transparent; + --tabs--label-border: transparent; + --tabs--label-border--hover: #d1d5db; + --tabs--label-border--active: #0ea5e9; + --tabs--label-border--active--hover: #0ea5e9; + --tabs--padding-x: 1.25em; + --tabs--margin-x: 0; + --tabs--border: #e6e6e6; +} + +/* Hide radio buttons */ +.tab-set > input { + position: absolute; + opacity: 0; +} + +/* Tab set container */ +.tab-set { + border-radius: 2px; + display: flex; + flex-wrap: wrap; + margin: 0.75em 0; + position: relative; +} + +/* Tab label */ +.tab-set > label { + z-index: 1; + + width: auto; + border-bottom: 2px solid var(--tabs--label-border); + padding: 1em var(--tabs--padding-x) 0.5em; + margin-left: var(--tabs--margin-x); + + color: var(--tabs--label-text); + background: var(--tabs--label-background); + + transition: color 250ms; + + cursor: pointer; + + font-size: 0.875em; + font-weight: 700; +} +.tab-set > label:nth-child(2) { + margin-left: 0; +} + +/* Hovered label */ +.tab-set > label:hover { + color: var(--tabs--label-text--hover); + background: var(--tabs--label-background--hover); + border-color: var(--tabs--label-border--hover); +} + +/* Active tab label */ +.tab-set > input:checked + label { + color: var(--tabs--label-text--active); + background: var(--tabs--label-background--active); + border-color: var(--tabs--label-border--active); +} +.tab-set > input:checked + label:hover { + color: var(--tabs--label-text--active--hover); + background: var(--tabs--label-background--active--hover); + border-color: var(--tabs--label-border--active--hover); +} + +/* Tab content */ +.tab-content { + order: 99; + display: none; + width: 100%; + box-shadow: 0 -0.0625rem var(--tabs--border); +} + +/* Show content, when input is checked. */ +.tab-set > input:checked + label + .tab-content { + display: block; +} +.tab-content > p:first-child { + margin-top: 0.75rem; +} +/* Remove the top border on first code block */ +.tab-content > [class^="highlight-"]:first-child .highlight { + border-top: none; + border-top-left-radius: 0; + border-top-right-radius: 0; +} + +/* Remove margins on children */ +.tab-content > *:first-child { + margin-top: 0; +} +.tab-content > *:last-child { + margin-bottom: 0; +} + +/* Remove margins on nested tabs */ +.tab-content > .tab-set { + margin: 0; +} diff --git a/_static/tabs.js b/_static/tabs.js new file mode 100644 index 0000000..58d2cdd --- /dev/null +++ b/_static/tabs.js @@ -0,0 +1,30 @@ +var labels_by_text = {}; + +function ready() { + var li = document.getElementsByClassName("tab-label"); + const urlParams = new URLSearchParams(window.location.search); + const tabs = urlParams.getAll("tabs"); + + for (const label of li) { + label.onclick = onLabelClick; + const text = label.textContent; + if (!labels_by_text[text]) { + labels_by_text[text] = []; + } + labels_by_text[text].push(label); + } + + for (const tab of tabs) { + for (label of labels_by_text[tab]) { + label.previousSibling.checked = true; + } + } +} + +function onLabelClick() { + // Activate other labels with the same text. + for (label of labels_by_text[this.textContent]) { + label.previousSibling.checked = true; + } +} +document.addEventListener("DOMContentLoaded", ready, false); diff --git a/_static/togglebutton.css b/_static/togglebutton.css new file mode 100644 index 0000000..54a6787 --- /dev/null +++ b/_static/togglebutton.css @@ -0,0 +1,160 @@ +/** + * Admonition-based toggles + */ + +/* Visibility of the target */ +.admonition.toggle .admonition-title ~ * { + transition: opacity .3s, height .3s; +} + +/* Toggle buttons inside admonitions so we see the title */ +.admonition.toggle { + position: relative; +} + +/* Titles should cut off earlier to avoid overlapping w/ button */ +.admonition.toggle .admonition-title { + padding-right: 25%; + cursor: pointer; +} + +/* Hovering will cause a slight shift in color to make it feel interactive */ +.admonition.toggle .admonition-title:hover { + box-shadow: inset 0 0 0px 20px rgb(0 0 0 / 1%); +} + +/* Hovering will cause a slight shift in color to make it feel interactive */ +.admonition.toggle .admonition-title:active { + box-shadow: inset 0 0 0px 20px rgb(0 0 0 / 3%); +} + +/* Remove extra whitespace below the admonition title when hidden */ +.admonition.toggle-hidden { + padding-bottom: 0; +} + +.admonition.toggle-hidden .admonition-title { + margin-bottom: 0; +} + +/* hides all the content of a page until de-toggled */ +.admonition.toggle-hidden .admonition-title ~ * { + height: 0; + margin: 0; + opacity: 0; + visibility: hidden; +} + +/* General button style and position*/ +button.toggle-button { + /** + * Background and shape. By default there's no background + * but users can style as they wish + */ + background: none; + border: none; + outline: none; + + /* Positioning just inside the admonition title */ + position: absolute; + right: 0.5em; + padding: 0px; + border: none; + outline: none; +} + +/* Display the toggle hint on wide screens */ +@media (min-width: 768px) { + button.toggle-button.toggle-button-hidden:before { + content: attr(data-toggle-hint); /* This will be filled in by JS */ + font-size: .8em; + align-self: center; + } +} + +/* Icon behavior */ +.tb-icon { + transition: transform .2s ease-out; + height: 1.5em; + width: 1.5em; + stroke: currentColor; /* So that we inherit the color of other text */ +} + +/* The icon should point right when closed, down when open. */ +/* Open */ +.admonition.toggle button .tb-icon { + transform: rotate(90deg); +} + +/* Closed */ +.admonition.toggle button.toggle-button-hidden .tb-icon { + transform: rotate(0deg); +} + +/* With details toggles, we don't rotate the icon so it points right */ +details.toggle-details .tb-icon { + height: 1.4em; + width: 1.4em; + margin-top: 0.1em; /* To center the button vertically */ +} + + +/** + * Details-based toggles. + * In this case, we wrap elements with `.toggle` in a details block. + */ + +/* Details blocks */ +details.toggle-details { + margin: 1em 0; +} + + +details.toggle-details summary { + display: flex; + align-items: center; + cursor: pointer; + list-style: none; + border-radius: .2em; + border-left: 3px solid #1976d2; + background-color: rgb(204 204 204 / 10%); + padding: 0.2em 0.7em 0.3em 0.5em; /* Less padding on left because the SVG has left margin */ + font-size: 0.9em; +} + +details.toggle-details summary:hover { + background-color: rgb(204 204 204 / 20%); +} + +details.toggle-details summary:active { + background: rgb(204 204 204 / 28%); +} + +.toggle-details__summary-text { + margin-left: 0.2em; +} + +details.toggle-details[open] summary { + margin-bottom: .5em; +} + +details.toggle-details[open] summary .tb-icon { + transform: rotate(90deg); +} + +details.toggle-details[open] summary ~ * { + animation: toggle-fade-in .3s ease-out; +} + +@keyframes toggle-fade-in { + from {opacity: 0%;} + to {opacity: 100%;} +} + +/* Print rules - we hide all toggle button elements at print */ +@media print { + /* Always hide the summary so the button doesn't show up */ + details.toggle-details summary { + display: none; + } +} \ No newline at end of file diff --git a/_static/togglebutton.js b/_static/togglebutton.js new file mode 100644 index 0000000..215a7ee --- /dev/null +++ b/_static/togglebutton.js @@ -0,0 +1,187 @@ +/** + * Add Toggle Buttons to elements + */ + +let toggleChevron = ` + + + +`; + +var initToggleItems = () => { + var itemsToToggle = document.querySelectorAll(togglebuttonSelector); + console.log(`[togglebutton]: Adding toggle buttons to ${itemsToToggle.length} items`) + // Add the button to each admonition and hook up a callback to toggle visibility + itemsToToggle.forEach((item, index) => { + if (item.classList.contains("admonition")) { + // If it's an admonition block, then we'll add a button inside + // Generate unique IDs for this item + var toggleID = `toggle-${index}`; + var buttonID = `button-${toggleID}`; + + item.setAttribute('id', toggleID); + if (!item.classList.contains("toggle")){ + item.classList.add("toggle"); + } + // This is the button that will be added to each item to trigger the toggle + var collapseButton = ` + `; + + title = item.querySelector(".admonition-title") + title.insertAdjacentHTML("beforeend", collapseButton); + thisButton = document.getElementById(buttonID); + + // Add click handlers for the button + admonition title (if admonition) + admonitionTitle = document.querySelector(`#${toggleID} > .admonition-title`) + if (admonitionTitle) { + // If an admonition, then make the whole title block clickable + admonitionTitle.addEventListener('click', toggleClickHandler); + admonitionTitle.dataset.target = toggleID + admonitionTitle.dataset.button = buttonID + } else { + // If not an admonition then we'll listen for the button click + thisButton.addEventListener('click', toggleClickHandler); + } + + // Now hide the item for this toggle button unless explicitly noted to show + if (!item.classList.contains("toggle-shown")) { + toggleHidden(thisButton); + } + } else { + // If not an admonition, wrap the block in a
block + // Define the structure of the details block and insert it as a sibling + var detailsBlock = ` +
+ + ${toggleChevron} + ${toggleHintShow} + +
`; + item.insertAdjacentHTML("beforebegin", detailsBlock); + + // Now move the toggle-able content inside of the details block + details = item.previousElementSibling + details.appendChild(item) + item.classList.add("toggle-details__container") + + // Set up a click trigger to change the text as needed + details.addEventListener('click', (click) => { + let parent = click.target.parentElement; + if (parent.tagName.toLowerCase() == "details") { + summary = parent.querySelector("summary"); + details = parent; + } else { + summary = parent; + details = parent.parentElement; + } + // Update the inner text for the proper hint + if (details.open) { + summary.querySelector("span.toggle-details__summary-text").innerText = toggleHintShow; + } else { + summary.querySelector("span.toggle-details__summary-text").innerText = toggleHintHide; + } + + }); + + // If we have a toggle-shown class, open details block should be open + if (item.classList.contains("toggle-shown")) { + details.click(); + } + } + }) +}; + +// This should simply add / remove the collapsed class and change the button text +var toggleHidden = (button) => { + target = button.dataset['target'] + var itemToToggle = document.getElementById(target); + if (itemToToggle.classList.contains("toggle-hidden")) { + itemToToggle.classList.remove("toggle-hidden"); + button.classList.remove("toggle-button-hidden"); + } else { + itemToToggle.classList.add("toggle-hidden"); + button.classList.add("toggle-button-hidden"); + } +} + +var toggleClickHandler = (click) => { + // Be cause the admonition title is clickable and extends to the whole admonition + // We only look for a click event on this title to trigger the toggle. + + if (click.target.classList.contains("admonition-title")) { + button = click.target.querySelector(".toggle-button"); + } else if (click.target.classList.contains("tb-icon")) { + // We've clicked the icon and need to search up one parent for the button + button = click.target.parentElement; + } else if (click.target.tagName == "polyline") { + // We've clicked the SVG elements inside the button, need to up 2 layers + button = click.target.parentElement.parentElement; + } else if (click.target.classList.contains("toggle-button")) { + // We've clicked the button itself and so don't need to do anything + button = click.target; + } else { + console.log(`[togglebutton]: Couldn't find button for ${click.target}`) + } + target = document.getElementById(button.dataset['button']); + toggleHidden(target); +} + +// If we want to blanket-add toggle classes to certain cells +var addToggleToSelector = () => { + const selector = ""; + if (selector.length > 0) { + document.querySelectorAll(selector).forEach((item) => { + item.classList.add("toggle"); + }) + } +} + +// Helper function to run when the DOM is finished +const sphinxToggleRunWhenDOMLoaded = cb => { + if (document.readyState != 'loading') { + cb() + } else if (document.addEventListener) { + document.addEventListener('DOMContentLoaded', cb) + } else { + document.attachEvent('onreadystatechange', function() { + if (document.readyState == 'complete') cb() + }) + } +} +sphinxToggleRunWhenDOMLoaded(addToggleToSelector) +sphinxToggleRunWhenDOMLoaded(initToggleItems) + +/** Toggle details blocks to be open when printing */ +if (toggleOpenOnPrint == "true") { + window.addEventListener("beforeprint", () => { + // Open the details + document.querySelectorAll("details.toggle-details").forEach((el) => { + el.dataset["togglestatus"] = el.open; + el.open = true; + }); + + // Open the admonitions + document.querySelectorAll(".admonition.toggle.toggle-hidden").forEach((el) => { + console.log(el); + el.querySelector("button.toggle-button").click(); + el.dataset["toggle_after_print"] = "true"; + }); + }); + window.addEventListener("afterprint", () => { + // Re-close the details that were closed + document.querySelectorAll("details.toggle-details").forEach((el) => { + el.open = el.dataset["togglestatus"] == "true"; + delete el.dataset["togglestatus"]; + }); + + // Re-close the admonition toggle buttons + document.querySelectorAll(".admonition.toggle").forEach((el) => { + if (el.dataset["toggle_after_print"] == "true") { + el.querySelector("button.toggle-button").click(); + delete el.dataset["toggle_after_print"]; + } + }); + }); +} diff --git a/deployment/1_tensorrt_llm_deployment.html b/deployment/1_tensorrt_llm_deployment.html new file mode 100644 index 0000000..43529e5 --- /dev/null +++ b/deployment/1_tensorrt_llm_deployment.html @@ -0,0 +1,312 @@ + + + + + + + TensorRT-LLM Deployment — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

TensorRT-LLM Deployment

+
+

Note

+

Please read the TensorRT-LLM checkpoint workflow +first before going through this section.

+
+

ModelOpt toolkit supports automatic conversion of ModelOpt exported LLM to the TensorRT-LLM checkpoint and the engines for accelerated inferencing.

+

This conversion is achieved by:

+
    +
  1. Converting Huggingface, NeMo and ModelOpt exported checkpoints to the TensorRT-LLM checkpoint.

  2. +
  3. Building TensorRT-LLM engine from the TensorRT-LLM checkpoint.

  4. +
+
+

Export Quantized Model

+

After the model is quantized, the quantized model can be exported to the TensorRT-LLM checkpoint format stored as

+
    +
  1. A single JSON file recording the model structure and metadata (config.json)

  2. +
  3. A group of safetensors files, each recording the local calibrated model on a single GPU rank (model weights, scaling factors per GPU).

  4. +
+

The export API (export_tensorrt_llm_checkpoint) can be used as follows:

+
from modelopt.torch.export import export_tensorrt_llm_checkpoint
+
+with torch.inference_mode():
+    export_tensorrt_llm_checkpoint(
+        model,  # The quantized model.
+        decoder_type,  # The type of the model as str, e.g gptj, llama or gptnext.
+        dtype,  # the weights data type to export the unquantized layers.
+        export_dir,  # The directory where the exported files will be stored.
+        inference_tensor_parallel,  # The number of GPUs used in the inference time for tensor parallelism.
+        inference_pipeline_parallel,  # The number of GPUs used in the inference time for pipeline parallelism.
+    )
+
+
+

If the export_tensorrt_llm_checkpoint call is successful, the TensorRT-LLM checkpoint will be saved. Otherwise, e.g. the decoder_type is not supported, a torch state_dict checkpoint will be saved instead.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Model support matrix for the TensorRT-LLM checkpoint export

Model / Quantization

FP16 / BF16

FP8

INT8_SQ

INT4_AWQ

GPT2

Yes

Yes

Yes

No

GPTJ

Yes

Yes

Yes

Yes

LLAMA 2

Yes

Yes

Yes

Yes

LLAMA 3

Yes

Yes

No

Yes

Mistral

Yes

Yes

Yes

Yes

Mixtral 8x7B

Yes

Yes

No

Yes

Falcon 40B, 180B

Yes

Yes

Yes

Yes

Falcon 7B

Yes

Yes

Yes

No

Falcon RW 1B, 7B

Yes

Yes

Yes

Yes

MPT 7B, 30B

Yes

Yes

Yes

Yes

Baichuan 1, 2

Yes

Yes

Yes

Yes

Qwen 7B, 14B

Yes

Yes

Yes

Yes

ChatGLM2, 3 6B

Yes

Yes

Yes

Yes

Bloom

Yes

Yes

Yes

Yes

Phi-1, 2, 3

Yes

Yes

Yes

Yes

Nemotron 8

Yes

Yes

No

Yes

Gemma 2B, 7B

Yes

Yes

No

Yes

+
+
+

Convert to TensorRT-LLM

+

Once the TensorRT-LLM checkpoint is available, please follow the TensorRT-LLM build API to build and deploy the quantized LLM.

+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/examples/0_all_examples.html b/examples/0_all_examples.html new file mode 100644 index 0000000..ba97a20 --- /dev/null +++ b/examples/0_all_examples.html @@ -0,0 +1,157 @@ + + + + + + + All ModelOpt Examples — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

All ModelOpt Examples

+

Please visit the TensorRT-Model-Optimizer GitHub repository +for all ModelOpt examples.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/genindex.html b/genindex.html new file mode 100644 index 0000000..c83e8db --- /dev/null +++ b/genindex.html @@ -0,0 +1,2682 @@ + + + + + + Index — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ + +

Index

+ +
+ _ + | A + | B + | C + | D + | E + | F + | G + | H + | I + | K + | L + | M + | N + | O + | P + | Q + | R + | S + | T + | U + | V + | W + | Z + +
+

_

+ + +
+ +

A

+ + + +
+ +

B

+ + + +
+ +

C

+ + + +
+ +

D

+ + + +
+ +

E

+ + + +
+ +

F

+ + + +
+ +

G

+ + + +
+ +

H

+ + + +
+ +

I

+ + + +
+ +

K

+ + + +
+ +

L

+ + + +
+ +

M

+ + + +
+ +

N

+ + + +
+ +

O

+ + + +
+ +

P

+ + + +
+ +

Q

+ + + +
+ +

R

+ + + +
+ +

S

+ + + +
+ +

T

+ + + +
+ +

U

+ + + +
+ +

V

+ + + +
+ +

W

+ + + +
+ +

Z

+ + +
+ + + +
+
+
+ +
+ +
+

© Copyright 2023-2024, NVIDIA Corporation.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + diff --git a/getting_started/1_overview.html b/getting_started/1_overview.html new file mode 100644 index 0000000..1a3cb7b --- /dev/null +++ b/getting_started/1_overview.html @@ -0,0 +1,202 @@ + + + + + + + Overview — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Overview

+
+

NVIDIA TensorRT Model Optimizer

+

Minimizing inference costs presents a significant challenge as generative AI models continue to grow in complexity and size. +The NVIDIA TensorRT Model Optimizer (referred to as Model Optimizer, or ModelOpt) +is a library comprising state-of-the-art model optimization techniques including quantization and sparsity to compress model. +It accepts a torch or ONNX model as inputs and provides Python APIs for users to easily stack different model optimization +techniques to produce quantized checkpoint. Seamlessly integrated within the NVIDIA AI software ecosystem, the quantized +checkpoint generated from Model Optimizer is ready for deployment in downstream inference frameworks like +TensorRT-LLM or TensorRT. +Further integrations are planned for NVIDIA NeMo and Megatron-LM +for training-in-the-loop optimization techniques. For enterprise users, the 8-bit quantization with Stable Diffusion is also available on +NVIDIA NIM.

+

Model Optimizer is available for free for all developers on NVIDIA PyPI. +Visit /NVIDIA/TensorRT-Model-Optimizer repository for end-to-end +example scripts and recipes optimized for NVIDIA GPUs.

+
+

Techniques

+
+

Quantization

+

Quantization is an effective model optimization technique for large models. Quantization with Model Optimizer can compress +model size by 2x-4x, speeding up inference while preserving model quality. Model Optimizer enables highly performant +quantization formats including FP8, INT8, INT4, etc and supports advanced algorithms such as SmoothQuant, AWQ, and +Double Quantization with easy-to-use Python APIs. Both Post-training quantization (PTQ) and Quantization-aware training (QAT) +are supported. Visit Quantization Format page +for list of formats supported.

+
+
+

Sparsity

+

Sparsity is a technique to further reduce the memory footprint of deep learning models and accelerate the inference. +Model Optimizer provides Python API mts.sparsify() to apply +weight sparsity to a given model. The mts.sparsify() API supports NVIDIA 2:4 +sparsity pattern and various sparsification methods, such as NVIDIA ASP +and SparseGPT. It supports both post-training sparsity and sparsity with fine-tuning. +The latter workflow is recommended to minimize accuracy degradation.

+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/getting_started/2_installation.html b/getting_started/2_installation.html new file mode 100644 index 0000000..e4feb71 --- /dev/null +++ b/getting_started/2_installation.html @@ -0,0 +1,275 @@ + + + + + + + Installation — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Installation

+
+

System requirements

+

Model Optimizer (nvidia-modelopt) currently has the following system requirements:

+ + + + + + + + + + + + + + + + + + +

OS

Linux, Windows

Architecture

x86_64, aarch64, win_amd64

Python

>=3.8,<3.12

PyTorch

>=1.11

CUDA

>=11.8 (Recommended)

+
+
+

Install Model Optimizer

+

ModelOpt including its dependencies can be installed via pip. Please review the +license terms of ModelOpt and any dependencies before use.

+
+
+
+
+

Setting up a virtual environment

+

We recommend setting up a virtual environment if you don’t have one already. Run the following +command to set up and activate a conda virtual environment named modelopt with Python 3.11:

+
conda create -n modelopt python=3.11 pip
+
+
+
conda activate modelopt
+
+
+

(Optional) Install desired PyTorch version

+

By default, the latest PyTorch version (torch>=1.11) available on pip will +be installed. If you want to install a specific PyTorch version for a specific CUDA version, please first +follow the instructions to install your desired PyTorch version. +For example, to install latest torch>=1.11 with CUDA 11.8 run:

+
pip install torch --extra-index-url https://download.pytorch.org/whl/cu118
+
+
+

Identify correct partial dependencies

+

Note that when installing nvidia-modelopt without optional dependencies, only the barebone +requirements are installed and none of the modules will work without the appropriate optional +dependencies or [all] optional dependencies. Below is a list of optional dependencies that +need to be installed to correctly use the corresponding modules:

+ ++++ + + + + + + + + + + + + + + + + + + + +

Module

Optional dependencies

modelopt.deploy

[deploy]

modelopt.onnx

[onnx]

modelopt.torch

[torch]

modelopt.torch._deploy

[torch, deploy]

+

Additionally, we support the following 3rd-party plugins:

+ ++++ + + + + + + + + + + +

Third-party package

Optional dependencies

transformers (Huggingface)

[hf]

+
+
+

Install Model Optimizer (nvidia-modelopt)

+
pip install "nvidia-modelopt[all]" --no-cache-dir --extra-index-url https://pypi.nvidia.com
+
+
+
+
+

Check installation

+
+

Tip

+

When you use ModelOpt’s PyTorch quantization APIs for the first time, it will compile the fast quantization kernels +using your installed torch and CUDA if available. +This may take a few minutes but subsequent quantization calls will be much faster. +To invoke the compilation now and check if it is successful, run the following command:

+
python -c "import modelopt.torch.quantization.extensions as ext; print(ext.cuda_ext); print(ext.cuda_ext_fp8)"
+
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/getting_started/3_quantization.html b/getting_started/3_quantization.html new file mode 100644 index 0000000..4f5f043 --- /dev/null +++ b/getting_started/3_quantization.html @@ -0,0 +1,221 @@ + + + + + + + Quick Start: Quantization — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Quick Start: Quantization

+
+

Quantization

+

Quantization is an effective technique to reduce the memory footprint of deep learning models and to +accelerate the inference speed.

+

ModelOpt’s mtq.quantize() API enables +users to quantize a model with advanced algorithms like SmoothQuant, AWQ etc. ModelOpt supports both +Post Training Quantization (PTQ) and Quantization Aware Training (QAT).

+
+

Tip

+

Please refer to Quantization Formats for details on the ModelOpt supported quantization +formats and their use-cases.

+
+
+
+

PTQ for PyTorch models

+

mtq.quantize requires the model, +the appropriate quantization configuration and a forward loop as inputs. Here is a quick example of +quantizing a model with int8 SmoothQuant using +mtq.quantize:

+
import modelopt.torch.quantization as mtq
+
+# Setup the model
+model = get_model()
+
+# The quantization algorithm requires calibration data. Below we show a rough example of how to
+# set up a calibration data loader with the desired calib_size
+data_loader = get_dataloader(num_samples=calib_size)
+
+
+# Define the forward_loop function with the model as input. The data loader should be wrapped
+# inside the function.
+def forward_loop(model):
+    for batch in data_loader:
+        model(batch)
+
+
+# Quantize the model and perform calibration (PTQ)
+model = mtq.quantize(model, mtq.INT8_SMOOTHQUANT_CFG, forward_loop)
+
+
+

Refer to Quantization Configs for the quantization configurations available from ModelOpt.

+
+
+

Deployment

+

The quantized model is just like a regular Pytorch model and is ready for evaluation or deployment.

+

Huggingface or Nemo LLM models can be exported to TensorRT-LLM using ModelOpt. +Please see TensorRT-LLM Deployment guide for more +details.

+

The model can be also exported to ONNX using +torch.onnx.export.

+
+
+
Next Steps
    +
  • Learn more about quantization and advanced usage of Model Optimizer quantization in +Quantization guide.

  • +
  • Checkout out the end-to-end examples on GitHub for PTQ and QAT +here.

  • +
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/getting_started/6_sparsity.html b/getting_started/6_sparsity.html new file mode 100644 index 0000000..d5a768c --- /dev/null +++ b/getting_started/6_sparsity.html @@ -0,0 +1,213 @@ + + + + + + + Quick Start: Sparsity — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Quick Start: Sparsity

+
+

Sparsity

+

ModelOpt’s sparsity feature is an effective technique to reduce the +memory footprint of deep learning models and accelerate the inference speed. ModelOpt provides an +easy-to-use API mts.sparsify() to apply +weight sparsity to a given model. +mts.sparsify() supports +NVIDIA 2:4 Sparsity sparsity pattern and various sparsification +methods, such as (NVIDIA ASP) +and (SparseGPT).

+

This guide provides a quick start to apply weight sparsity to a PyTorch model using ModelOpt.

+
+
+

Post-Training Sparsification (PTS) for PyTorch models

+

mts.sparsify() requires the model, +the appropriate sparsity configuration, and a forward loop as inputs. +Here is a quick example of sparsifying a model to 2:4 sparsity pattern with SparseGPT method using +mts.sparsify().

+
import modelopt.torch.sparsity as mts
+
+# Setup the model
+model = get_model()
+
+# Setup the data loaders. An example usage:
+data_loader = get_train_dataloader(num_samples=calib_size)
+
+# Define the sparsity configuration
+sparsity_config = {"data_loader": data_loader, "collect_func": lambda x: x}
+
+# Sparsify the model and perform calibration (PTS)
+model = mts.sparsity(model, mode="sparsegpt", config=sparsity_config)
+
+
+
+

Note

+

data_loader is only required in case of data-driven sparsity, e.g., SparseGPT for calibration. +sparse_magnitude does not require data_loader as it is purely based on the weights of the model.

+
+
+

Note

+

data_loader and collect_func can be substituted with a forward_loop that iterates the model through the +calibration dataset.

+
+
+
+
Next Steps
    +
  • Learn more about sparsity and advanced usage of ModelOpt sparsity in +Sparsity guide.

  • +
  • Checkout out the end-to-end examples on GitHub for PTQ and QAT +here.

  • +
+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/guides/1_quantization.html b/guides/1_quantization.html new file mode 100644 index 0000000..fbfc259 --- /dev/null +++ b/guides/1_quantization.html @@ -0,0 +1,178 @@ + + + + + + + Quantization — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Quantization

+

ModelOpt quantization toolkit supports quantization for NVIDIA’s hardware and software stack. +Currently ModelOpt supports quantization in PyTorch and ONNX frameworks.

+

ModelOpt is based on simulated quantization in the original precision to simulate, test and optimize +for the best trade-off between the accuracy of the model and different low-precision formats. To +achieve actual speedups and memory savings, the model with simulated quantization can be exported to +deployment frameworks, like TensorRT or TensorRT-LLM. Please refer to the +TensorRT-Model-Optimizer GitHub repository +for more details and examples.

+

Below, you can find the documentation for the quantization toolkit in ModelOpt:

+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/guides/5_sparsity.html b/guides/5_sparsity.html new file mode 100644 index 0000000..3b00a57 --- /dev/null +++ b/guides/5_sparsity.html @@ -0,0 +1,304 @@ + + + + + + + Sparsity — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Sparsity

+
+

Introduction

+

ModelOpt’s Sparsity module (modelopt.torch.sparsity) enables +you to sparsify the weights of your model. This can be useful for reducing the memory footprint of +your model, and can also be used to speed up inference.

+

Follow the steps described below to obtain a model with sparse weights using ModelOpt’s Sparsity +module modelopt.torch.sparsity:

+
    +
  1. Training:You can either train your model using the existing training pipeline or load a +pre-trained checkpoint for your model.

  2. +
  3. Sparsification: Sparsify the model using the provided +mts.sparsify API.

  4. +
  5. Checkpoint and re-load: Save the model via mto.save +and restore via mto.restore

  6. +
+

To find out more about Sparsity and related concepts, please refer to the section below +Sparsity Concepts.

+
+
+

Post-Training Sparsification

+

Post-training sparsification is the process of converting a dense model to a sparse model without +retraining. The simplest way to sparsify a model is to use +the mts.sparsify API.

+

The mts.sparsify API takes a sparsity +config and a sparsity format as input and returns a sparse model. The sparsity config is a +dictionary specifying the layers to sparsify and the optional dataloader for +calibration in data-driven sparsity, e.g., SparseGPT.

+

mts.sparsify() supports (NVIDIA ASP) and SparseGPT methods for magnitude-based +and data-driven sparsity, respectively.

+

Example usage:

+
import torch
+from transformers import AutoModelForCausalLM
+import modelopt.torch.sparsity as mts
+
+# User-defined model
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b")
+
+# Configure and convert for sparsity
+sparsity_config = {
+    # data_loader is required for sparsity calibration
+    "data_loader": calib_dataloader,
+    "collect_func": lambda x: x,
+}
+sparse_model = mts.sparsify(
+    model,
+    "sparsegpt",  # or "sparse_magnitude"
+    config=sparsity_config,
+)
+
+
+
+

Note

+

data_loader is only required in case of data-driven sparsity, e.g., for calibration in +sparsegpt. sparse_magnitude does not require data_loader as it uses magnitude-based +method for thresholding.

+
+
+

Save and restore the sparse model

+

To store the sparse model for future usage, call +mto.save():

+
mto.save(sparse_model, "modelopt_sparse_model.pth")
+
+
+
+

Note

+

mto.save() will save the model state_dict, +along with the sparse masks and metadata to correctly re-create the sparse model later.

+
+

To restore the saved sparse model you can use +mto.restore():

+
import modelopt.torch.opt as mto
+
+# Re-initialize the original, unmodified model
+model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6b")
+
+# Restore the sparse model and metadata.
+sparse_model = mto.restore(model, "modelopt_sparse_model.pth")
+
+
+
+

Note

+

mto.restore() will restore the model state_dict, +along with the sparse masks and metadata of each sparse module. The plain pytorch module will be +converted to a sparse module. The sparsity mask will be automatically enforced when the model +weight is accessed.

+
+
+

Note

+

mts.export() will export the sparse +model to a plain pytorch model. The sparse masks will be applied to model weights and all the +sparse metadata will be removed. After exporting, sparsity will no longer be enforced during +subsequent fine-tuning. If you want to continue fine-tuning, do not export the model.

+
+
+
+
+

Sparsity Concepts

+

Below, we will provide an overview of ModelOpt’s sparsity feature as well as its basic +concepts and terminology.

+
+

Structured and Unstructured Sparsity

+

Weight sparsity is a model optimization technique where a fraction of the weights in a model are set +to zero. Model sparsity can be broadly categorized as structured and unstructured sparsity. +Unstructured sparsity refers to the case where the zero weights are randomly distributed across the +weight matrix. Unstructured sparsity is more flexible but can lead to poor utilization on +highly-parallelized hardware architectures like GPUs. Structured sparsity, on the other hand, is +more efficient in terms of memory access and can be exploited to achieve higher math throughput. +Structured sparsity can usually be achieved by enforcing a specific sparsity pattern on the weights.

+
+
+

N:M Sparsity

+

N:M sparsity refers to special type of fine-grained structured pattern, where in each block of M +contiguous elements, at most N are nonzeros. Due to its regularity N:M sparsity can be efficiently +implemented on GPU architecture and provides the following benefits:

+
+
    +
  • Reduced memory bandwidth requirement: N:M Sparsity pattern have a smaller memory bandwidth +requirement than both dense weights and weights with unstructured sparsity pattern.

  • +
  • Higher math throughput: Sparse Tensor Cores deliver higher math throughput for +matrix-multiply operations when the first argument is a compressed N:M sparse matrix. +For example, 2:4 sparsity pattern allows for 2x higher math throughput on sparse Tensor Cores.

  • +
+
+

On current Nvidia architectures (Ampere or later), 2:4 Sparsity, where in each block of four +contiguous elements two are nonzeros, is supported for accelerated inference on sparse Tensor Cores.

+
+
+

Sparsification algorithm

+

There are many ways to achieve weight sparsity. A commonly-used approach is magnitude-based sparsity +where in block of M elements, the N largest elements are retained and the rest are set to +zero. Magnitude-based sparsity is simple and easy to implement, but may not retain the accuracy of +the original model as well. Other methods such as data-driven sparsity, e.g., Optimal Brain Surgeon, +usually delivers better accuracy. ModelOpt supports both magnitude-based (NVIDIA ASP) and +data-driven sparsity (SparseGPT).

+
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/guides/_basic_quantization.html b/guides/_basic_quantization.html new file mode 100644 index 0000000..797ada4 --- /dev/null +++ b/guides/_basic_quantization.html @@ -0,0 +1,227 @@ + + + + + + + Basic Concepts — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Basic Concepts

+

A quantization format consists of the precision format, the block format, and the calibration +algorithm. +The detailed list of available quantization formats can be found in Quantization Formats. +Below we provide an overview of the important topics:

+
+

Precision format

+

The precision format defines the bit-width of the quantized values. Generally, there are integer +formats (sign bit + mantissa bits) and floating-point formats (sign bit + exponent bits + mantissa +bits). FP8 FORMATS FOR DEEP LEARNING provides a detailed +explanation of the floating-point formats.

+
+
+

Scaling factor

+

The scaling factor is a floating-point value that is used to scale and unscale the values before and +after the quantized operation, respectively. The scaling factor is used to map the range of the +original values to the range of the quantized values. The scaling factor is shared across the +quantized values in the same block. The scaling factor is calculated during the calibration process.

+
+
+

Block format

+

The block format defines the way the tensor is divided into blocks for sharing the scaling factors. +The most common block format is per-tensor quantization, where the whole tensor is quantized as a +single block with one global scaling factor. Other block formats include per-channel quantization, +where each channel is quantized separately, and the fine-grained per-block quantization, where the +tensor is divided into fix-size blocks along the channel dimension. For low-bit quantization (e.g. +4-bit), per-block quantization is typically needed to preserve the accuracy.

+

Weight and activation may share different precision and block formats. For example, in GPTQ and AWQ, +the weight is quantized to 4-bit while activation stays in high precision. Weight-only quantization +is helpful for bandwidth-constrained scenarios, while weight and activation quantization can reduce +both bandwidth and computation cost.

+
+
+

Calibration algorithm

+

The calibration algorithm calculate scaling factors and potentially adjust weights to maximize +accuracy post quantization. The simplest calibration algorithm is “max calibration”, in which the +scaling factor is calculated from the global maximum of the tensor and the weights are unchanged and +rounded to the nearest quantized value. An example of a more advanced calibration algorithm is +Entropy Calibration, +SmoothQuant, and AWQ.

+
+
+

Quantization-aware training (QAT)

+

QAT can be viewed as regular PTQ followed by fine-tuning during which the original, unquantized +weights are updated to minimize the loss. Compared to regular fine-tuning, we must model the effect +of quantization on the forward and backward passes. Commonly used QAT techniques like +Straight-Through Estimator (STE) or STE with clipping have +fixed scaling factors and tune the weights during training to minimize the loss. ModelOpt implements +STE with clipping for QAT.

+
+
+

More Readings

+ +
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/guides/_choosing_quant_methods.html b/guides/_choosing_quant_methods.html new file mode 100644 index 0000000..820a6c0 --- /dev/null +++ b/guides/_choosing_quant_methods.html @@ -0,0 +1,252 @@ + + + + + + + Best practices to choose the right quantization methods — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Best practices to choose the right quantization methods

+

A quantization method comprises three primary components:

+
    +
  1. Weight precision format

  2. +
  3. Activation precision format

  4. +
  5. Calibration algorithms

  6. +
+

Typically, in the context of small-batch inference scenarios (batch size ≤ 4), the inference is often ‘memory-bound’. In memory-bound inference, the throughput is limited by the weight loading time from GPU memory to GPU cache - i.e, inference is memory bandwidth limited. +In this regime of operation, weight-only quantization methods such as INT4 AWQ or INT4-FP8 AWQ gives superior performance improvement.

+

Conversely, for large-batch inference scenarios, such as serving scenarios (batch size ≥ 16), both memory bandwidth and computation density become crucial factors. +Consequently, it’s recommended to opt for a quantization method that has both weights & activation quantization as well as lower precision computation kernels. For batch size ≥ 16, the choice of quantization method can be model specific.

+

We suggest prioritizing using FP8 first, as FP8 causes very little accuracy degradation and gives strong performance. +If FP8 performance does not meet your requirements, you could try INT4-FP8 AWQ. +If your deployment is on Ampere GPUs or earlier, we recommend using INT4 AWQ or INT8 SQ.

+

Based on specific use cases, users might have different tolerances on accuracy degradation and calibration time. The table below summarizes the tradeoffs* to consider when choosing a quantization method.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Quantization Methods

Performance +small-batch

Performance +large-batch

Accuracy +degradation

Details

FP8

Medium

Medium

Very Low

    +
  • FP8 per-tensor weight & activation quantization with min-max calibration.

  • +
  • Compresses FP16/BF16 model to 50% of original size.

  • +
  • Calibration time: minutes**.

  • +
  • Deploy via TensorRT, TensorRT-LLM. Supported GPUs: Ada, Hopper and later.

  • +
+

INT8 SmoothQuant

Medium

Medium

Medium

    +
  • 8-bit integer quantization with a variant of SmoothQuant calibration.

  • +
  • Per-channel weight quantization, per-tensor activation quantization.

  • +
  • Compresses FP16/BF16 model to 50% of original size

  • +
  • Calibration time: minutes**.

  • +
  • Deploy using TensorRT, TensorRT-LLM. Supported on most GPUs.

  • +
+

INT4 Weights only AWQ +(W4A16)

High

Low

Low

    +
  • 4-bit integer group-wise/block-wise weight only quantization with AWQ calibration.

  • +
  • Compresses FP16/BF16 model to 25% of original size.

  • +
  • Calibration time: tens of minutes**.

  • +
  • Deploy via TensorRT-LLM. Supported GPUs: Ampere and later.

  • +
+

INT4-FP8 AWQ (W4A8)

High

Medium

Low

    +
  • 4-bit integer group-wise/block-wise weight quantization, FP8 per-tensor activation quantization & AWQ calibration.

  • +
  • Compresses FP16/BF16 model to 25% of original size.

  • +
  • Calibration time: tens of minutes**.

  • +
  • Deploy via TensorRT-LLM. Supported GPUs: Ada, Hopper and later.

  • +
+
+
+
* The performance and impact are measured on 10+ popular LLMs. We’ll follow up with more data points.
+
** Calibration time is subject to the actual model size.
+
+
+
Please see how to apply these quantization methods below:
+
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/guides/_onnx_quantization.html b/guides/_onnx_quantization.html new file mode 100644 index 0000000..abbea04 --- /dev/null +++ b/guides/_onnx_quantization.html @@ -0,0 +1,254 @@ + + + + + + + ONNX Quantization (Beta) — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

ONNX Quantization (Beta)

+

ModelOpt provides ONNX quantization that works together with TensorRT Explicit Quantization (EQ). The key advantages offered by ModelOpt’s ONNX quantization:

+
    +
  1. Easy to use for non-expert users.

  2. +
  3. White-box design allowing expert users to customize the quantization process.

  4. +
  5. Better support for vision transformers.

  6. +
+

Currently ONNX quantization only supports INT8 quantization.

+
+

Note

+

ModelOpt ONNX quantization generates new ONNX models with QDQ nodes following TensorRT rules. +For real speedup, the generated ONNX should be compiled into TensorRT engine.

+
+
+

Requirements

+
    +
  1. TensorRT >= 8.6 ( >= 9.1 preferred). Please refer to TensorRT 9.1 download link.

  2. +
+
+
+

Apply Post Training Quantization (PTQ)

+

PTQ should be done with a calibration dataset. If calibration dataset is not provided, ModelOpt will use random scales for the QDQ nodes.

+
+

Prepare calibration dataset

+

ModelOpt supports two types of calibration data format: image directory or numpy file.

+

Image directory only works for single-input ONNX models.

+

Numpy file works for both single-input and multi-input ONNX models. In the case of multi-input ONNX models, the numpy file should be a dictionary with keys as input names and values as numpy arrays.

+
# Example numpy file for single-input ONNX
+calib_data = np.random.randn(batch_size, channels, h, w)
+np.save("calib_data.npy", calib_data)
+
+# Example numpy file for single/multi-input ONNX
+# Dict key should match the input names of ONNX
+calib_data = {
+    "input_name": np.random.randn(*shape),
+    "input_name2": np.random.randn(*shape2),
+}
+np.savez("calib_data.npz", calib_data)
+
+
+
+
+

Call PTQ function

+
import modelopt.onnx.quantization as moq
+
+calibration_data = np.load(calibration_data_path)
+
+moq.quantize(
+    onnx_path=onnx_path,
+    calibration_data=calibration_data,
+    output_path="quant.onnx",
+    quantize_mode="int8",
+)
+
+
+

Alternatively, you can call PTQ function in command line:

+
python -m modelopt.onnx.quantization \
+    --calibration_data_path /calibration/data/in/npz/npy/format \
+    --output_path /path/to/the/quantized/onnx/output \
+    --quantize_mode int8
+
+
+

By default, after running the calibraton, the quantization tool will insert the QDQ nodes by following TensorRT friendly QDQ insertion algorithm. Users can change the default quantization behavior by tweaking the API params like op_types_to_quantize, op_types_to_exclude etc. See the modelopt.onnx.quantization.quantize() for details.

+
+
+
+

Deploy Quantized ONNX Model

+

trtexec is a command-line tool provided by TensorRT. Typically, it’s within the /usr/src/tensorrt/bin/ directory. Below is a simple command to compile the quantized onnx model generated by the previous step into a TensorRT engine file.

+
trtexec --onnx=quant.onnx --saveEngine=quant.engine --best
+
+
+
+
+

Compare the performance

+

The following command will build the engine using fp16 precision. After building, check the reported “Latency” and “Throughput” fields and compare.

+
trtexec --onnx=original.onnx --saveEngine=fp16.engine --fp16
+
+
+
+

Note

+

If you replace --fp16 flag with --best flag, this command will create an int8 engine with TensorRT’s implicit quantization.

+
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/guides/_pytorch_quantization.html b/guides/_pytorch_quantization.html new file mode 100644 index 0000000..8719540 --- /dev/null +++ b/guides/_pytorch_quantization.html @@ -0,0 +1,406 @@ + + + + + + + PyTorch Quantization — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

PyTorch Quantization

+

ModelOpt PyTorch quantization is refactored based on pytorch_quantization.

+

Key advantages offered by ModelOpt’s PyTorch quantization:

+
    +
  1. Support advanced quantization formats, e.g., Block-wise Int4 and FP8.

  2. +
  3. Native support for LLM models in Hugging Face and NeMo.

  4. +
  5. Advanced Quantization algorithms, e.g., SmoothQuant, AWQ.

  6. +
  7. Deployment support to ONNX and NVIDIA TensorRT.

  8. +
+
+

Note

+

ModelOpt quantization is fake quantization, which means it only simulates the low-precision computation in PyTorch. +Real speedup and memory saving should be achieved by exporting the model to deployment frameworks.

+
+
+

Tip

+

This guide covers the usage of ModelOpt quantization. For details on the quantization formats and recommended use cases, +please refer to Quantization Formats.

+
+
+

Apply Post Training Quantization (PTQ)

+

PTQ can be achieved with simple calibration on a small set of training or evaluation data (typically 128-512 samples) after converting a regular PyTorch model to a quantized model. +The simplest way to quantize a model using ModelOpt is to use mtq.quantize().

+

mtq.quantize() takes a model, a quantization config and a forward loop callable as input. The quantization config specifies the layers to quantize, their quantization formats as well as the algorithm to use for calibration. Please +refer to Quantization Configs for the list of quantization configs supported by default. You may also define your own quantization config as +described in customizing quantizer config.

+

ModelOpt supports algorithms such as AWQ, SmoothQuant or max for calibration. Please refer to mtq.calibrate +for more details.

+

The forward loop is used to pass data through the model in-order to collect statistics for calibration. +It should wrap around the calibration dataloader and the model.

+

Here is an example of performing PTQ using ModelOpt:

+
import modelopt.torch.quantization as mtq
+
+# Setup the model
+model = get_model()
+
+# Select quantization config
+config = mtq.INT8_SMOOTHQUANT_CFG
+
+# Quantization need calibration data. Setup calibration data loader
+# An example of creating a calibration data loader looks like the following:
+data_loader = get_dataloader(num_samples=calib_size)
+
+
+# Define forward_loop. Please wrap the data loader in the forward_loop
+def forward_loop(model):
+    for batch in data_loader:
+        model(batch)
+
+
+# Quantize the model and perform calibration (PTQ)
+model = mtq.quantize(model, config, forward_loop)
+
+
+

To verify that the quantizer nodes are placed correctly in the model, let’s print the quantized model summary as show below:

+
# Print quantization summary after successfully quantizing the model with mtq.quantize
+# This will show the quantizers inserted in the model and their configurations
+mtq.print_quantization_summary(model)
+
+
+

After PTQ, the model can be exported to ONNX with the normal PyTorch ONNX export flow.

+
torch.onnx.export(model, sample_input, onnx_file)
+
+
+

ModelOpt also supports direct export of Huggingface or Nemo LLM models to TensorRT-LLM for deployment. +Please see TensorRT-LLM Deployment for more details.

+
+
+

Quantization-aware Training (QAT)

+

QAT is the technique of fine-tuning a quantized model to recover model quality degradation due to quantization. +While QAT requires much more compute resources than PTQ, it is highly effective in recovering model quality.

+

A model quantized using mtq.quantize() could be directly fine-tuned with QAT. +Typically during QAT, the quantizer states are frozen and the model weights are fine-tuned.

+

Here is an example of performing QAT:

+
import modelopt.torch.quantization as mtq
+
+# Select quantization config
+config = mtq.INT8_DEFAULT_CFG
+
+
+# Define forward loop for calibration
+def forward_loop(model):
+    for data in calib_set:
+        model(data)
+
+
+# QAT after replacement of regular modules to quantized modules
+model = mtq.quantize(model, config, forward_loop)
+
+# Fine-tune with original training pipeline
+# Adjust learning rate and training duration
+train(model, train_loader, optimizer, scheduler, ...)
+
+
+
+

Tip

+

We recommend QAT for 10% of the original training epochs. For LLMs, we find that QAT fine-tuning for even +less than 1% of the original pre-training duration is often sufficient to recover the model quality.

+
+
+
+

Storing and loading quantized model

+

The model weights and quantizer states need to saved for future use or to resume training. +The quantizer states of the model should be saved and loaded separately from the model weights.

+

mto.modelopt_state() provides the quantizer states of the model. +The quantizer states can be saved with torch.save. For example:

+
import modelopt.torch.opt as mto
+
+# Save quantizer states
+torch.save(mto.modelopt_state(model), "modelopt_state.pt")
+
+# Save model weights using torch.save or custom check-pointing function
+# trainer.save_model("model.pt")
+torch.save(model.state_dict(), "model.pt")
+
+
+

To restore a quantized model, first restore the quantizer states using +mto.restore_from_modelopt_state. +After quantizer states are restored, load the model weights. For example:

+
import modelopt.torch.opt as mto
+
+# Initialize the un-quantized model
+model = ...
+
+# Load quantizer states
+model = mto.restore_from_modelopt_state(model, torch.load("modelopt_state.pt"))
+
+# Load model weights using torch.load or custom check-pointing function
+# model.from_pretrained("model.pt")
+model.load_state_dict(torch.load("model.pt"))
+
+
+
+
+

Advanced Topics

+
+

TensorQuantizer

+

Under the hood, ModelOpt mtq.quantize() inserts +TensorQuantizer +(quantizer modules) into the model layers like linear layer, conv layer etc. and patches their forward method to perform quantization.

+

To create TensorQuantizer instance, you need to specify QuantDescriptor, which +describes the quantization parameters like quantization bits, axis etc.

+

Here is an example of creating a quantizer module:

+
from modelopt.torch.quantization.tensor_quant import QuantDescriptor
+from modelopt.torch.quantization.nn import TensorQuantizer
+
+# Create quantizer descriptor
+quant_desc = QuantDescriptor(num_bits=8, axis=(-1,), unsigned=True)
+
+# Create quantizer module
+quantizer = TensorQuantizer(quant_desc)
+
+quant_x = quantizer(x)  # Quantize input x
+
+
+
+
+

Customize quantizer config

+

ModelOpt inserts input quantizer, weight quantizer and output quantizer into common layers, but by default disables the output quantizer. +Expert users who want to customize the default quantizer configuration can update the config dictionary provided to mtq.quantize using wildcard or filter function match.

+

Here is an example of specifying a custom quantizer configuration to mtq.quantize:

+
# Select quantization config
+config = mtq.INT8_DEFAULT_CFG.copy()
+config["quant_cfg"]["*.bmm.output_quantizer"] = {
+    "enable": True
+}  # Enable output quantizer for bmm layer
+
+# Perform PTQ/QAT;
+model = mtq.quantize(model, config, forward_loop)
+
+
+
+
+

Custom quantized module and quantizer placement

+

modelopt.torch.quantization has a default set of quantized modules (see modelopt.torch.quantization.nn.modules for a detailed list) and quantizer placement rules (input, output and weight quantizers). However, there might be cases where you want to define a custom quantized module and/or customize the quantizer placement.

+

ModelOpt provides a way to define custom quantized modules and register them with the quantization framework. This allows you to:

+
    +
  1. Handle unsupported modules, e.g., a subclassed Linear layer that require quantization.

  2. +
  3. Customize the quantizer placement, e.g., placing the quantizer in special places like the KV Cache of an Attention layer.

  4. +
+

Here is an example of defining a custom quantized LayerNorm module:

+
from modelopt.torch.quantization.nn import TensorQuantizer
+
+
+class QuantLayerNorm(nn.LayerNorm):
+    def __init__(self, normalized_shape):
+        super().__init__(normalized_shape)
+        self._setup()
+
+    def _setup(self):
+        # Method to setup the quantizers
+        self.input_quantizer = TensorQuantizer()
+        self.weight_quantizer = TensorQuantizer()
+
+    def forward(self, input):
+        # You can customize the quantizer placement anywhere in the forward method
+        input = self.input_quantizer(input)
+        weight = self.weight_quantizer(self.weight)
+        return F.layer_norm(input, self.normalized_shape, weight, self.bias, self.eps)
+
+
+

After defining the custom quantized module, you need to register this module so mtq.quantize API will automatically replace the original module with the quantized version. +Note that the custom QuantLayerNorm must have a _setup method which instantiates the quantizer attributes that are called in the forward method. +Here is the code to register the custom quantized module:

+
import modelopt.torch.quantization as mtq
+
+# Register the custom quantized module
+mtq.register(original_cls=nn.LayerNorm, quantized_cls=QuantLayerNorm)
+
+# Perform PTQ
+# nn.LayerNorm modules in the model will be replaced with the QuantLayerNorm module
+model = mtq.quantize(model, config, forward_loop)
+
+
+

The quantization config might need to be customized if you define a custom quantized module. Please see +customizing quantizer config for more details.

+
+
+

Fast evaluation

+

Weight folding avoids repeated quantization of weights during each inferece forward pass and speedup evaluation. This can be done with the following code:

+
# Fold quantizer together with weight tensor
+mtq.fold_weight(quantized_model)
+
+# Run model evaluation
+user_evaluate_func(quantized_model)
+
+
+
+

Note

+

After weight folding, the model can no longer be exported to ONNX or fine-tuned with QAT.

+
+
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/index.html b/index.html new file mode 100644 index 0000000..12a754a --- /dev/null +++ b/index.html @@ -0,0 +1,195 @@ + + + + + + + Welcome to Model Optimizer (ModelOpt) documentation! — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Welcome to Model Optimizer (ModelOpt) documentation!

+ +
+

Optimization Guides

+ +
+
+

Deployment

+ +
+
+

Examples

+ +
+ +
+

Support

+ +
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/objects.inv b/objects.inv new file mode 100644 index 0000000..61bab42 Binary files /dev/null and b/objects.inv differ diff --git a/py-modindex.html b/py-modindex.html new file mode 100644 index 0000000..35a1e3a --- /dev/null +++ b/py-modindex.html @@ -0,0 +1,606 @@ + + + + + + Python Module Index — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ + +

Python Module Index

+ +
+ m +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 
+ m
+ modelopt +
    + modelopt.deploy +
    + modelopt.deploy.llm +
    + modelopt.deploy.llm.generate +
    + modelopt.deploy.llm.model_config_trt +
    + modelopt.deploy.llm.nemo_utils +
    + modelopt.onnx +
    + modelopt.onnx.op_types +
    + modelopt.onnx.quantization +
    + modelopt.onnx.quantization.calib_utils +
    + modelopt.onnx.quantization.graph_utils +
    + modelopt.onnx.quantization.gs_patching +
    + modelopt.onnx.quantization.int4 +
    + modelopt.onnx.quantization.operators +
    + modelopt.onnx.quantization.ort_patching +
    + modelopt.onnx.quantization.ort_utils +
    + modelopt.onnx.quantization.partitioning +
    + modelopt.onnx.quantization.qdq_utils +
    + modelopt.onnx.quantization.quant_utils +
    + modelopt.onnx.quantization.quantize +
    + modelopt.onnx.utils +
    + modelopt.torch +
    + modelopt.torch.export +
    + modelopt.torch.export.distribute +
    + modelopt.torch.export.layer_utils +
    + modelopt.torch.export.model_config +
    + modelopt.torch.export.model_config_export +
    + modelopt.torch.export.model_config_utils +
    + modelopt.torch.export.postprocess +
    + modelopt.torch.export.scaling_factor_utils +
    + modelopt.torch.export.tensorrt_llm_utils +
    + modelopt.torch.export.transformer_engine +
    + modelopt.torch.opt +
    + modelopt.torch.opt.config +
    + modelopt.torch.opt.conversion +
    + modelopt.torch.opt.dynamic +
    + modelopt.torch.opt.hparam +
    + modelopt.torch.opt.mode +
    + modelopt.torch.opt.plugins +
    + modelopt.torch.opt.searcher +
    + modelopt.torch.opt.utils +
    + modelopt.torch.quantization +
    + modelopt.torch.quantization.calib +
    + modelopt.torch.quantization.calib.calibrator +
    + modelopt.torch.quantization.calib.histogram +
    + modelopt.torch.quantization.calib.max +
    + modelopt.torch.quantization.config +
    + modelopt.torch.quantization.conversion +
    + modelopt.torch.quantization.extensions +
    + modelopt.torch.quantization.mode +
    + modelopt.torch.quantization.model_calib +
    + modelopt.torch.quantization.model_quant +
    + modelopt.torch.quantization.nn +
    + modelopt.torch.quantization.nn.functional +
    + modelopt.torch.quantization.nn.modules +
    + modelopt.torch.quantization.nn.modules.clip +
    + modelopt.torch.quantization.nn.modules.quant_activations +
    + modelopt.torch.quantization.nn.modules.quant_batchnorm +
    + modelopt.torch.quantization.nn.modules.quant_conv +
    + modelopt.torch.quantization.nn.modules.quant_instancenorm +
    + modelopt.torch.quantization.nn.modules.quant_linear +
    + modelopt.torch.quantization.nn.modules.quant_module +
    + modelopt.torch.quantization.nn.modules.quant_pooling +
    + modelopt.torch.quantization.nn.modules.tensor_quantizer +
    + modelopt.torch.quantization.optim +
    + modelopt.torch.quantization.plugins +
    + modelopt.torch.quantization.quant_modules +
    + modelopt.torch.quantization.tensor_quant +
    + modelopt.torch.quantization.utils +
    + modelopt.torch.sparsity +
    + modelopt.torch.sparsity.config +
    + modelopt.torch.sparsity.magnitude +
    + modelopt.torch.sparsity.mode +
    + modelopt.torch.sparsity.module +
    + modelopt.torch.sparsity.plugins +
    + modelopt.torch.sparsity.searcher +
    + modelopt.torch.sparsity.sparsegpt +
    + modelopt.torch.sparsity.sparsification +
    + modelopt.torch.utils +
    + modelopt.torch.utils.cpp_extension +
    + modelopt.torch.utils.dataset_utils +
    + modelopt.torch.utils.distributed +
    + modelopt.torch.utils.graph +
    + modelopt.torch.utils.list +
    + modelopt.torch.utils.logging +
    + modelopt.torch.utils.network +
    + modelopt.torch.utils.perf +
    + modelopt.torch.utils.random +
    + modelopt.torch.utils.tensor +
+ + +
+
+
+ +
+ +
+

© Copyright 2023-2024, NVIDIA Corporation.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + diff --git a/reference/0_versions.html b/reference/0_versions.html new file mode 100644 index 0000000..f8a890f --- /dev/null +++ b/reference/0_versions.html @@ -0,0 +1,197 @@ + + + + + + + Model Optimizer Changelog — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Model Optimizer Changelog

+
+

0.11 (2024-05-07)

+

Backward Breaking Changes

+
    +
  • [!!!] The package was renamed from ammo to modelopt. The new full product +name is Nvidia TensorRT Model Optimizer. PLEASE CHANGE ALL YOUR REFERENCES FROM ammo to +modelopt including any paths and links!

  • +
  • Default installation pip install nvidia-modelopt will now only install minimal core +dependencies. Following optional dependencies are available depending on the features that are +being used: [deploy], [onnx], [torch], [hf]. To install all dependencies, use +pip install "nvidia-modelopt[all]".

  • +
  • Deprecated inference_gpus arg in modelopt.torch.export.model_config_export.torch_to_tensorrt_llm_checkpoint. User should use inference_tensor_parallel instead.

  • +
  • Experimental modelopt.torch.deploy module is now available as modelopt.torch._deploy.

  • +
+

New Features

+
    +
  • modelopt.torch.sparsity now supports sparsity-aware training (SAT). Both SAT and post-training +sparsification supports chaining with other modes, e.g. SAT + QAT.

  • +
  • modelopt.torch.quantization natively support distributed data and tensor parallelism while estimating quantization parameters. +The data and tensor parallel groups needs to be registered with modelopt.torch.utils.distributed.set_data_parallel_group and modelopt.torch.utils.distributed.set_tensor_parallel_group APIs. +By default, the data parallel group is set as the default distributed group and the tensor parallel group is disabled.

  • +
  • modelopt.torch.opt now supports chaining multiple optimization techniques that each require +modifications to the same model, e.g., you can now sparsify and quantize a model at the same time.

  • +
  • modelopt.onnx.quantization supports FLOAT8 quantization format with Distribution calibration algorithm.

  • +
  • Native support of modelopt.torch.opt with FSDP (Fully Sharded Data Parallel) for torch>=2.1. This includes +sparsity, quantization, and any other model modification & optimization.

  • +
  • Added FP8 ONNX quantization support in modelopt.onnx.quantization.

  • +
  • Added Windows (win_amd64) support for ModelOpt released wheels. Currently supported for modelopt.onnx submodule only.

  • +
+

Bug Fixes

+
    +
  • Fixed the compatibility issue of modelopt.torch.sparsity with FSDP.

  • +
  • Fixed an issue in dynamic dim handling in modelopt.onnx.quantization with random calibration data.

  • +
  • Fixed graph node naming issue after opset convertion operation.

  • +
  • Fixed an issue in negative dim handling like dynamic dim in modelopt.onnx.quantization with random calibration data.

  • +
  • Fixed allowing to accept .pb file for input file.

  • +
  • Fixed copy extra data to tmp folder issue for ONNX PTQ.

  • +
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/1_modelopt_api.html b/reference/1_modelopt_api.html new file mode 100644 index 0000000..00ccf45 --- /dev/null +++ b/reference/1_modelopt_api.html @@ -0,0 +1,173 @@ + + + + + + + modelopt API — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

modelopt API

+ + + + + + + + + + + + +

deploy

Model Optimizer's deployment package.

onnx

Model optimization subpackage for onnx.

torch

Model optimization and deployment subpackage for torch.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.deploy.html b/reference/generated/modelopt.deploy.html new file mode 100644 index 0000000..7c51e77 --- /dev/null +++ b/reference/generated/modelopt.deploy.html @@ -0,0 +1,173 @@ + + + + + + + deploy — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

deploy

+

Modules

+ + + + + + +

modelopt.deploy.llm

LLM deployment package with tensorrt_llm.

+

Model Optimizer’s deployment package.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.deploy.llm.generate.html b/reference/generated/modelopt.deploy.llm.generate.html new file mode 100644 index 0000000..e118f7b --- /dev/null +++ b/reference/generated/modelopt.deploy.llm.generate.html @@ -0,0 +1,237 @@ + + + + + + + generate — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

generate

+

A wrapper over the TensorRT-LLM high level API runner.

+

Classes

+ + + + + + +

LLM

A wrapper over the tensorrt_llm.hlapi.llm.LLM for LLM profiling and validation.

+
+
+class LLM
+

Bases: LLM

+

A wrapper over the tensorrt_llm.hlapi.llm.LLM for LLM profiling and validation.

+
+
+__init__(engine_dir, tokenizer, kv_cache_config={})
+

Initializes the LLM runner class.

+
+
Parameters:
+
+
+
+
+ +
+
+generate_text(prompts, max_new_tokens, temperature=1.0, keep_input_prompt=True)
+

Generates the text based on the input prompts.

+
+
Parameters:
+
    +
  • prompts (Iterable[str] | Iterable[List[int]]) – The input prompts. Could be a list of strings or token lists.

  • +
  • max_new_tokens (int) – The max output token length.

  • +
  • temperature (float) – The sampling temperature

  • +
  • keep_input_prompt (bool) – Set to include input prommpts in the outputs.

  • +
+
+
Returns:
+

a list of output text strings if max_beam_width is 1 or a 2D list with shape [batch, beam].

+
+
Return type:
+

List[str] | List[List[str]]

+
+
+
+ +
+
+property max_beam_width
+

Get the max beam width from the LLM instance.

+
+ +
+
+property max_input_len
+

Get the max input length from the LLM instance.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.deploy.llm.html b/reference/generated/modelopt.deploy.llm.html new file mode 100644 index 0000000..c2a016a --- /dev/null +++ b/reference/generated/modelopt.deploy.llm.html @@ -0,0 +1,216 @@ + + + + + + + llm — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

llm

+

Modules

+ + + + + + + + + + + + +

modelopt.deploy.llm.generate

A wrapper over the TensorRT-LLM high level API runner.

modelopt.deploy.llm.model_config_trt

The API convert the TensorRT-LLM checkpoint to the engines.

modelopt.deploy.llm.nemo_utils

The utils to support Nemo models.

+

LLM deployment package with tensorrt_llm.

+

Model Optimizer supports automatic conversion of Model Optimizer exported LLM to TensorRT-LLM +engines for accelerated inferencing.

+

Convert to TensorRT-LLM:

+

Model Optimizer offers a single API to build the exported model from the quantization stage on top +of the TensorRT-LLM build API.

+
from modelopt.deploy.llm import build_tensorrt_llm
+
+build_tensorrt_llm(
+    pretrained_config=pretrained_config_json_path,
+    engine_dir=engine_dir,
+    max_input_len=max_input_len,
+    max_output_len=max_output_len,
+    max_batch_size=max_batch_size,
+    max_beam_width=max_num_beams,
+    num_build_workers=num_build_workers,
+)
+
+
+

Batched Inference with TensorRT-LLM:

+

Model Optimizer offers an easy-to-use python API to run batched offline inferences to test the TensorRT-LLM +engine(s) built.

+

For example:

+
from modelopt.deploy.llm import generate, load
+
+# The host_context loading (called once).
+host_context = load(tokenizer=tokenizer, engine_dir=engine_dir, num_beams=num_beams)
+# generate could be called multiple times as long as the host_context is present.
+outputs = generate(input_texts, max_output_len, host_context)
+print(outputs)
+
+
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.deploy.llm.model_config_trt.html b/reference/generated/modelopt.deploy.llm.model_config_trt.html new file mode 100644 index 0000000..ff04b70 --- /dev/null +++ b/reference/generated/modelopt.deploy.llm.model_config_trt.html @@ -0,0 +1,256 @@ + + + + + + + model_config_trt — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

model_config_trt

+

The API convert the TensorRT-LLM checkpoint to the engines.

+

Functions

+ + + + + + + + + +

build_tensorrt_llm

The API to convert the TensorRT-LLM checkpoint to engines.

build_tensorrt_llm_rank

The API to convert the TensorRT-LLM checkpoint to the engine for a single rank.

+
+
+build_tensorrt_llm(pretrained_config, engine_dir, max_input_len=200, max_output_len=200, max_batch_size=1, max_beam_width=1, max_num_tokens=None, num_build_workers=1, enable_sparsity=False, max_prompt_embedding_table_size=0)
+

The API to convert the TensorRT-LLM checkpoint to engines.

+
+
Parameters:
+
    +
  • pretrained_config (str | Path) – The pretrained_config (file path) exported by +modelopt.torch.export.export_tensorrt_llm_checkpoint.

  • +
  • engine_dir (str | Path) – The target output directory to save the built tensorrt_llm engines.

  • +
  • max_input_len (int) – The max input sequence length.

  • +
  • max_output_len (int) – The max output sequence length.

  • +
  • max_batch_size (int) – The max batch size.

  • +
  • max_beam_width (int) – The max beam search width.

  • +
  • max_num_tokens (int | None) – The max number of tokens that can be processed at the same time. +For the context phase, the max_num_tokens counts the full sequence length. +For the generation phase, the max_num_tokens counts only the ones under generation +as the input sequence has been processed as cached. +max_num_tokens should fall between [max_batch_size * max_beam_width, max_batch_size * max_input_len]. +when inflight batching is enabled. +Higher max_num_tokens means more GPU memory will be used for resource allocation. +If not specified the max_num_tokens will be set to the max bound. +Details: https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/perf_best_practices.md

  • +
  • num_build_workers (int) – The number of workers to use for the building process. +If build time is a concern, you can increase this worker count to num of GPUs. +At a lost of higer CPU memory usage footprint. +If CPU memory is limited, num_build_workers should be set to 1 to conserve memory.

  • +
  • enable_sparsity (bool) – The switch to enable sparsity for TRT compiler. +With this flag, the TRT compiler will search tactics of sparse kernels for each node of which +weight tensors are sparsified. This increases engine building time significantly.

  • +
  • max_prompt_embedding_table_size (int) – Length of the prepended/concatenated embeddings (either multimodal +feature embeddings or prompt tuning embeddings) to the LLM input embeddings.

  • +
+
+
+
+ +
+
+build_tensorrt_llm_rank(pretrained_config, weights, rank, engine_dir, max_input_len=200, max_output_len=200, max_batch_size=1, max_beam_width=1, max_num_tokens=None, enable_sparsity=False, max_prompt_embedding_table_size=0)
+

The API to convert the TensorRT-LLM checkpoint to the engine for a single rank.

+
+
Parameters:
+
    +
  • pretrained_config (Dict[str, Any]) – The pretrained_config (dict) exported by +modelopt.torch.export.torch_to_tensorrt_llm_checkpoint.

  • +
  • weights (Dict[str, Tensor]) – a dict of model weights and scaling factors. +If not provided, the weights will be loaded from the directory of the pretrained_config.

  • +
  • rank (int) – the GPU rank of the engine to build.

  • +
  • engine_dir (str | Path) – The target output directory to save the built tensorrt_llm engines.

  • +
  • max_input_len (int) – The max input sequence length.

  • +
  • max_output_len (int) – The max output sequence length.

  • +
  • max_batch_size (int) – The max batch size.

  • +
  • max_beam_width (int) – The max beam search width.

  • +
  • max_num_tokens (int | None) – The max number of tokens that can be processed at the same time. +For the context phase, the max_num_tokens counts the full sequence length. +For the generation phase, the max_num_tokens counts only the ones under generation +as the input sequence has been processed as cached. +max_num_tokens should fall between [max_batch_size * max_beam_width, max_batch_size * max_input_len]. +when inflight batching is enabled. +Higher max_num_tokens means more GPU memory will be used for resource allocation. +If not specified the max_num_tokens will be set to the max bound. +Details: https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/perf_best_practices.md

  • +
  • enable_sparsity (bool) – The switch to enable sparsity for TRT compiler. +With this flag, the TRT compiler will search tactics of sparse kernels for each node of which +weight tensors are sparsified. This increases engine building time significantly.

  • +
  • max_prompt_embedding_table_size (int) – Length of the prepended/concatenated embeddings (either multimodal +feature embeddings or prompt tuning embeddings) to the LLM input embeddings.

  • +
+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.deploy.llm.nemo_utils.html b/reference/generated/modelopt.deploy.llm.nemo_utils.html new file mode 100644 index 0000000..49cd82b --- /dev/null +++ b/reference/generated/modelopt.deploy.llm.nemo_utils.html @@ -0,0 +1,284 @@ + + + + + + + nemo_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

nemo_utils

+

The utils to support Nemo models.

+

Classes

+ + + + + + +

CustomSentencePieceTokenizer

Custom tokenizer based on Nemo SentencePieceTokenizer.

+

Functions

+ + + + + + + + + +

get_nemo_tokenizer

Build tokenizer from Nemo tokenizer config.

get_tokenzier

Loads the tokenizer from the decoded NEMO weights dir.

+
+
+class CustomSentencePieceTokenizer
+

Bases: PreTrainedTokenizer

+

Custom tokenizer based on Nemo SentencePieceTokenizer.

+

This extension of SentencePieceTokenizer is to make API consistent with HuggingFace tokenizers +in order to run evaluation tools in examples/tensorrt_llm/scripts/nemo_example.sh script.

+
+
+__init__(*args, **kwargs)
+

Constructor method with extra check for non-legacy SentencePieceTokenizer variant.

+
+ +
+
+batch_decode(ids, **kwargs)
+

Method introduced for HF tokenizers API consistency for evaluation scripts.

+
+ +
+
+batch_encode_plus(texts, **kwargs)
+

Method introduced for HF tokenizers API consistency for evaluation scripts.

+

Note: kwargs are ignored.

+
+ +
+
+decode(ids, **kwargs)
+

MMethod introduced for HF tokenizers API consistency for evaluation scripts.

+

Note: kwargs are ignored.

+
+ +
+
+encode(text, return_tensors=None, max_length=None, **kwargs)
+

Method introduced for HF tokenizers API consistency for evaluation scripts.

+

Note: kwargs other than return_tensors and max_length are ignored.

+
+ +
+
+property eos_token
+

eos_token.

+
+ +
+
+property eos_token_id
+

eos_token_id.

+
+ +
+
+property pad_token
+

pad_token.

+
+ +
+
+property pad_token_id
+

pad_token_id.

+
+ +
+ +
+
+get_nemo_tokenizer(tokenizer_cfg_path)
+

Build tokenizer from Nemo tokenizer config.

+

Refer to the logic of get_nmt_tokenizer function on how to instantiate tokenizers in Nemo, see +https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/nlp/modules/common/tokenizer_utils.py.

+
+
Parameters:
+

tokenizer_cfg_path (str) –

+
+
+
+ +
+
+get_tokenzier(tokenizer_dir_or_path)
+

Loads the tokenizer from the decoded NEMO weights dir.

+
+
Parameters:
+

tokenizer_dir_or_path (Path) –

+
+
Return type:
+

PreTrainedTokenizer

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.html b/reference/generated/modelopt.onnx.html new file mode 100644 index 0000000..2bf97c0 --- /dev/null +++ b/reference/generated/modelopt.onnx.html @@ -0,0 +1,181 @@ + + + + + + + onnx — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

onnx

+

Modules

+ + + + + + + + + + + + +

modelopt.onnx.op_types

Utility functions to categorize onnx ops.

modelopt.onnx.quantization

Model optimization subpackage for onnx quantization.

modelopt.onnx.utils

Utility functions related to onnx.

+

Model optimization subpackage for onnx.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.op_types.html b/reference/generated/modelopt.onnx.op_types.html new file mode 100644 index 0000000..b8449b7 --- /dev/null +++ b/reference/generated/modelopt.onnx.op_types.html @@ -0,0 +1,501 @@ + + + + + + + op_types — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

op_types

+

Utility functions to categorize onnx ops.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

get_quantizable_op_types

Returns a set of quantizable op types.

is_binary_op

Returns whether the given op is a binary operator or not.

is_control_flow_op

Returns whether the given op type is of Control Flow category or not.

is_conversion_op

Returns whether the given op type is of Conversion category or not.

is_copy_op

Returns whether the given op is a copy operator or not.

is_default_quantizable_op_by_ort

Returns if ort quantizes the op type by default.

is_fusible_reduction_op

Returns whether the given op type is of reduction category and fusible by Myelin.

is_generator_op

Returns whether the given op type is of Generator category or not.

is_irregular_mem_access_op

Returns whether the given op type is of Irreggular mem access category or not.

is_linear_op

Returns whether the given op type is of Linear category or not.

is_modifier_op

Returns whether the given op type is of Modifier category or not.

is_multiclass_op

Returns whether the given op type is of Multiclass category or not.

is_non_reshape_copy_op

Returns whether the given op is a non-reshape copy op or not.

is_normalization_op

Returns whether the given op type is of Normalization category or not.

is_pointwise_or_elementwise_op

Returns whether the given op type is of Pointwise or Elementwise category or not.

is_pooling_or_window_op

Returns whether the given op type is of Pooling/Window category or not.

is_recurrent_op

Returns whether the given op type is of Recurrent category or not.

is_selection_op

Returns whether the given op type is of Selection category or not.

is_sequence_op

Returns whether the given op type is of Sequence category or not.

is_shape_op

Returns whether the given op type is of Shape category or not.

is_unary_op

Returns whether the given op is a unary operator or not.

+
+
+get_quantizable_op_types(op_types_to_quantize)
+

Returns a set of quantizable op types.

+

Note. This function should be called after quantize._configure_ort() is called once. +This returns quantizable op types either from the user supplied parameter +or from modelopt.onnx’s default quantizable ops setting.

+
+
Parameters:
+

op_types_to_quantize (List[str]) –

+
+
Return type:
+

List[str]

+
+
+
+ +
+
+is_binary_op(op_type)
+

Returns whether the given op is a binary operator or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_control_flow_op(op_type)
+

Returns whether the given op type is of Control Flow category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_conversion_op(op_type)
+

Returns whether the given op type is of Conversion category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_copy_op(op_type)
+

Returns whether the given op is a copy operator or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_default_quantizable_op_by_ort(op_type)
+

Returns if ort quantizes the op type by default.

+

Note. Subject to change with different ORT versions. +Note. Users can use nodes_to_quantize and/or op_types_to_quantize arguments to quantize +non-default operations. +Reference: https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/python/tools/quantization/registry.py

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_fusible_reduction_op(op_type)
+

Returns whether the given op type is of reduction category and fusible by Myelin.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_generator_op(op_type)
+

Returns whether the given op type is of Generator category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_irregular_mem_access_op(op_type)
+

Returns whether the given op type is of Irreggular mem access category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_linear_op(op_type)
+

Returns whether the given op type is of Linear category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_modifier_op(op_type)
+

Returns whether the given op type is of Modifier category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_multiclass_op(op_type)
+

Returns whether the given op type is of Multiclass category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_non_reshape_copy_op(op_type)
+

Returns whether the given op is a non-reshape copy op or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_normalization_op(op_type)
+

Returns whether the given op type is of Normalization category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_pointwise_or_elementwise_op(op_type)
+

Returns whether the given op type is of Pointwise or Elementwise category or not.

+

This considers only the fusible types.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_pooling_or_window_op(op_type)
+

Returns whether the given op type is of Pooling/Window category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_recurrent_op(op_type)
+

Returns whether the given op type is of Recurrent category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_selection_op(op_type)
+

Returns whether the given op type is of Selection category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_sequence_op(op_type)
+

Returns whether the given op type is of Sequence category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_shape_op(op_type)
+

Returns whether the given op type is of Shape category or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+
+is_unary_op(op_type)
+

Returns whether the given op is a unary operator or not.

+
+
Parameters:
+

op_type (str) –

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.calib_utils.html b/reference/generated/modelopt.onnx.quantization.calib_utils.html new file mode 100644 index 0000000..722bdd7 --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.calib_utils.html @@ -0,0 +1,248 @@ + + + + + + + calib_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

calib_utils

+

Provides basic calibration utils.

+

Classes

+ + + + + + + + + +

CalibrationDataProvider

Calibration data provider class.

RandomDataProvider

Calibration data reader class with random data provider.

+
+
+class CalibrationDataProvider
+

Bases: CalibrationDataReader

+

Calibration data provider class.

+
+
+__init__(onnx_path, calibration_data)
+

Intializes the data provider class with the calibration data iterator.

+
+
Parameters:
+
    +
  • onnx_path (str) – Path to the ONNX model.

  • +
  • calibration_data (ndarray | Dict[str, ndarray]) – Numpy data to calibrate the model. +Ex. If a model has input shapes like {“sample”: (2, 4, 64, 64), “timestep”: (1,), +“encoder_hidden_states”: (2, 16, 768)}, the calibration data should have dictionary +of tensors with shapes like {“sample”: (1024, 4, 64, 64), “timestep”: (512,), +“encoder_hidden_states”: (1024, 16, 768)} to calibrate with 512 samples.

  • +
+
+
+
+ +
+
+get_next()
+

Returns the next available calibration input from the reader.

+
+ +
+ +
+
+class RandomDataProvider
+

Bases: CalibrationDataReader

+

Calibration data reader class with random data provider.

+
+
+__init__(onnx_path)
+

Initializes the data reader class with random calibration data.

+
+
Parameters:
+

onnx_path (str) –

+
+
+
+ +
+
+get_next()
+

Returns the next available calibration input from the reader.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.graph_utils.html b/reference/generated/modelopt.onnx.quantization.graph_utils.html new file mode 100644 index 0000000..bd1e775 --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.graph_utils.html @@ -0,0 +1,383 @@ + + + + + + + graph_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

graph_utils

+

Provides ONNX graph related utils for QDQ placement.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

build_non_residual_input_map

Builds a map of non-residual Add input name to the Add node name from the given graph.

classify_partition_nodes

We should partially quantize the partition nodes with inputs outside of the partition.

filter_quantizable_kgen_heads

Returns the list of kgen head names if it follows a CASK partition.

get_fusible_backbone

Returns the linear backbone node for a given node if it matches the pattern.

has_const_input

Returns whether the given node has any constant input.

has_path_type

Checks if the given node is start/end of a given forward/backward path type.

is_const_input

Returns whether the given tensor is an initializer or produced by const-foldable nodes.

print_stat

Collect and print stats of the quantized model.

remove_partial_input_qdq

Modifies the onnx model by removing QDQ nodes from the marked inputs, ex.

+
+
+build_non_residual_input_map(graph)
+

Builds a map of non-residual Add input name to the Add node name from the given graph.

+

This assumes that the Add layer only has 2 inputs.

+

We will refer to a subgraph which has a Convolution node with a single output that is summed (element-wise) +with another non-constant input-tensor as a “residual-add” subgraph, because it occurs in modern +convnets that use residual connections.

+
+
Parameters:
+

graph (Graph) – Onnx model graph.

+
+
Returns:
+

Dictionary of Add node names vs their non-residual input name.

+
+
Return type:
+

Dict[str, str]

+
+
+
+ +
+
+classify_partition_nodes(partitions)
+

We should partially quantize the partition nodes with inputs outside of the partition.

+
+
Parameters:
+

partitions (List[List[Node]]) – Partitions created by modelopt ptq algo.

+
+
Returns:
+

List of non-quantizable nodes. +List of quantizable nodes. +List of partially-quantizable inputs with non-quantizable input info as (src, dst, input_name)

+
+
Return type:
+

Tuple[List[Node], List[Node], List[Tuple[Node, Node, str]]]

+
+
+
+ +
+
+filter_quantizable_kgen_heads(cask_fusible_partitions, kgen_partitions, quantizable_op_types)
+

Returns the list of kgen head names if it follows a CASK partition.

+
+
Parameters:
+
    +
  • cask_fusible_partitions (List[List[Node]]) –

  • +
  • kgen_partitions (List[List[Node]]) –

  • +
  • quantizable_op_types (List[str]) –

  • +
+
+
Return type:
+

Tuple[List[Node], List[Tuple[Node, Node, str]]]

+
+
+
+ +
+
+get_fusible_backbone(node, graph)
+

Returns the linear backbone node for a given node if it matches the pattern.

+

TensorRT fuses convolution with BN, Relu etc. when in some specific pattern. +This rule tries to match some of those patterns. +Note. BiasAdd and ConstMul are optional in path types.

+
+
Parameters:
+
    +
  • node (Node) – Start node of the pattern.

  • +
  • graph (Graph) – ONNX model graph.

  • +
+
+
Returns:
+

Backbone node of the given node, None if not found.

+
+
Return type:
+

Node | None

+
+
+
+ +
+
+has_const_input(node)
+

Returns whether the given node has any constant input.

+
+
Parameters:
+

node (Node) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+has_path_type(node, graph, path_type, is_forward, wild_card_types=[], path_nodes=[])
+

Checks if the given node is start/end of a given forward/backward path type.

+

Note, Path can be forward or backward wrt a node depending on the next level nodes. +Additionally, this method can work with optional nodes and collect the traversed path.

+
+
Parameters:
+
    +
  • node (Node) – Start node of the path.

  • +
  • graph (Graph) – ONNX model graph.

  • +
  • path_type (List[str]) – Path types to match from the given node.

  • +
  • is_forward (bool) – Whether to match forward or backward path.

  • +
  • wild_card_types (List[str]) – Wild card types, these type of nodes are skipped and not matched with the path_type.

  • +
  • path_nodes (List[Node]) – Accumulated nodes in the matched path.

  • +
+
+
Returns:
+

Bool, whether the given node is start/end of the given forward/backward path type.

+
+
Return type:
+

bool

+
+
+
+ +
+
+is_const_input(tensor)
+

Returns whether the given tensor is an initializer or produced by const-foldable nodes.

+
+
Parameters:
+

tensor (Tensor) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+print_stat(graph, verbose)
+

Collect and print stats of the quantized model.

+
+
Parameters:
+
    +
  • graph (Graph) –

  • +
  • verbose (bool) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+remove_partial_input_qdq(graph, no_quantize_inputs)
+

Modifies the onnx model by removing QDQ nodes from the marked inputs, ex. non-residual inputs etc.

+
+
Parameters:
+
    +
  • graph (Graph) – Onnx model graph.

  • +
  • no_quantize_inputs (List[Tuple[Node, Node, str]]) – List non-quantizable input info as (src, dst, input_name)

  • +
+
+
Return type:
+

None

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.gs_patching.html b/reference/generated/modelopt.onnx.quantization.gs_patching.html new file mode 100644 index 0000000..0c37b2c --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.gs_patching.html @@ -0,0 +1,196 @@ + + + + + + + gs_patching — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

gs_patching

+

Patch onnx_graphsurgeon to support explicitly setting a dtype.

+

Functions

+ + + + + + +

patch_gs_modules

Dynamically patch graphsurgeon modules.

+
+
+patch_gs_modules()
+

Dynamically patch graphsurgeon modules.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.html b/reference/generated/modelopt.onnx.quantization.html new file mode 100644 index 0000000..6e3f291 --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.html @@ -0,0 +1,219 @@ + + + + + + + quantization — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quantization

+

Modules

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

modelopt.onnx.quantization.calib_utils

Provides basic calibration utils.

modelopt.onnx.quantization.graph_utils

Provides ONNX graph related utils for QDQ placement.

modelopt.onnx.quantization.gs_patching

Patch onnx_graphsurgeon to support explicitly setting a dtype.

modelopt.onnx.quantization.int4

Perform INT4 WoQ on an ONNX model, and write it back to disk.

modelopt.onnx.quantization.operators

Additional or modified QDQ operators on top ORT quantized operators.

modelopt.onnx.quantization.ort_patching

This module contains all the patched functions from ORT.

modelopt.onnx.quantization.ort_utils

Provides basic ORT inference utils, shoule be replaced by modelopt.torch.ort_client.

modelopt.onnx.quantization.partitioning

Utilities related to partitioning the ONNX model to place QDQ nodes.

modelopt.onnx.quantization.qdq_utils

Various utils to support inserting Q/DQ nodes.

modelopt.onnx.quantization.quant_utils

Provides some basic utilities that can be used in quantize() methods.

modelopt.onnx.quantization.quantize

Convert ONNX model without QDQ nodes + calib data into ONNX model with QDQ nodes.

+

Model optimization subpackage for onnx quantization.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.int4.html b/reference/generated/modelopt.onnx.quantization.int4.html new file mode 100644 index 0000000..8f49b01 --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.int4.html @@ -0,0 +1,388 @@ + + + + + + + int4 — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

int4

+

Perform INT4 WoQ on an ONNX model, and write it back to disk.

+

Classes

+ + + + + + +

AWQClipHelper

AWQ calibration helper class.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + +

dq_tensor

Dequantizes w with scale factors s.

find_scales

Find scale factors for w via s = max(w.block(block_size)) / 7.

quant_tensor

Quantize a tensor using alpha etc.

quantize_int4

Applies INT4 WoQ (Weight-Only-Quantization) to an ONNX file.

quantize_int4_awq_clip

Quantizes onnx_model using the Activation aware quantization a.k.a AWQ algorithm.

quantize_int4_rtn

Quantizes onnx_model using the RTN (Round-to-Nearest) algorithm.

rtn

Quantizes w with scale factors s via Round-to-Nearest.

+
+
+class AWQClipHelper
+

Bases: object

+

AWQ calibration helper class.

+
+
+__init__(w, block_size)
+

Initializes AWQClipHelper with a module weight.

+
+
Parameters:
+

block_size (int) –

+
+
+
+ +
+
+alpha_step = 0.05
+
+ +
+
+alphas = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0]
+
+ +
+
+min_alpha = 0.5
+
+ +
+
+update_best_params()
+

Updates the loss dictionary.

+
+ +
+ +
+
+dq_tensor(w, s, block_size)
+

Dequantizes w with scale factors s.

+
+
Parameters:
+
    +
  • w (ndarray) –

  • +
  • s (ndarray) –

  • +
  • block_size (int) –

  • +
+
+
Return type:
+

ndarray

+
+
+
+ +
+
+find_scales(w, block_size, alpha=1.0)
+

Find scale factors for w via s = max(w.block(block_size)) / 7.

+
+
Parameters:
+
    +
  • w (ndarray) –

  • +
  • block_size (int) –

  • +
  • alpha (float) –

  • +
+
+
Return type:
+

ndarray

+
+
+
+ +
+
+quant_tensor(w, block_size, alpha=1.0)
+

Quantize a tensor using alpha etc. and return the quantized tensor.

+
+
Parameters:
+
    +
  • w (ndarray) –

  • +
  • block_size (int) –

  • +
  • alpha (float) –

  • +
+
+
+
+ +
+
+quantize_int4(quantize_mode, onnx_model, calibration_data_reader=None, use_external_data_format=True, gemm_io_type=1)
+

Applies INT4 WoQ (Weight-Only-Quantization) to an ONNX file.

+

Currently only GEMM quantization is supported.

+
+
Parameters:
+
    +
  • quantize_mode (str) –

  • +
  • onnx_model (ModelProto) –

  • +
  • calibration_data_reader (CalibrationDataReader) –

  • +
  • use_external_data_format (bool) –

  • +
  • gemm_io_type (<google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f7a18433710>) –

  • +
+
+
Return type:
+

ModelProto

+
+
+
+ +
+
+quantize_int4_awq_clip(onnx_model, data_reader, use_external_data_format, gemm_io_type)
+

Quantizes onnx_model using the Activation aware quantization a.k.a AWQ algorithm.

+
+
Parameters:
+
    +
  • onnx_model (ModelProto) –

  • +
  • data_reader (CalibrationDataReader) –

  • +
  • use_external_data_format (bool) –

  • +
  • gemm_io_type (<google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f7a18433710>) –

  • +
+
+
Return type:
+

ModelProto

+
+
+
+ +
+
+quantize_int4_rtn(onnx_model, gemm_io_type, dq_only=False)
+

Quantizes onnx_model using the RTN (Round-to-Nearest) algorithm.

+

This algorithm computes scale factors by computing s = max(abs(block)) / 8, for each block. The +quantized weights are computed via Q(w) = round_to_even(w / s), where round_to_even denotes +rounding ties to the nearest even integer (i.e. 1.5, 2.5 both round to 2).

+

Always selects the first dimension (0) to block over. This is because we must batch over the Cin +dimension, and in ONNX, weights are always plugged into the RHS (i.e. y = x @ W).

+
+
Parameters:
+
    +
  • onnx_model (ModelProto) –

  • +
  • gemm_io_type (<google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f7a18433710>) –

  • +
  • dq_only (bool) –

  • +
+
+
Return type:
+

ModelProto

+
+
+
+ +
+
+rtn(w, s, block_size)
+

Quantizes w with scale factors s via Round-to-Nearest.

+

Ties are broken by rounding to the nearest even number.

+
+
Parameters:
+
    +
  • w (ndarray) –

  • +
  • s (ndarray) –

  • +
  • block_size (int) –

  • +
+
+
Return type:
+

ndarray

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.operators.html b/reference/generated/modelopt.onnx.quantization.operators.html new file mode 100644 index 0000000..39e35a6 --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.operators.html @@ -0,0 +1,232 @@ + + + + + + + operators — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

operators

+

Additional or modified QDQ operators on top ORT quantized operators.

+

Classes

+ + + + + + + + + +

QDQConvTranspose

QDQ for ConvTranspose operator.

QDQNormalization

By default, ORT does not quantize Normalization ops.

+
+
+class QDQConvTranspose
+

Bases: QDQOperatorBase

+

QDQ for ConvTranspose operator.

+
+
+__init__(onnx_quantizer, onnx_node)
+

ConvTranspose quantizer init.

+
+ +
+
+quantize()
+

Main function to quantize the ConvTranspose ops.

+
+ +
+ +
+
+class QDQNormalization
+

Bases: QDQOperatorBase

+

By default, ORT does not quantize Normalization ops. This module is intended to help with that.

+

Note. QDQOperatorBase is not sufficient for dynamic input only quantization.

+
+
+__init__(onnx_quantizer, onnx_node)
+

Normalization quantizer init.

+
+ +
+
+quantize()
+

Main function to quantize the Normalization ops.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.ort_patching.html b/reference/generated/modelopt.onnx.quantization.ort_patching.html new file mode 100644 index 0000000..90fef4c --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.ort_patching.html @@ -0,0 +1,196 @@ + + + + + + + ort_patching — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

ort_patching

+

This module contains all the patched functions from ORT.

+

Functions

+ + + + + + +

patch_ort_modules

Patches the ORT modules.

+
+
+patch_ort_modules()
+

Patches the ORT modules.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.ort_utils.html b/reference/generated/modelopt.onnx.quantization.ort_utils.html new file mode 100644 index 0000000..400929c --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.ort_utils.html @@ -0,0 +1,201 @@ + + + + + + + ort_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

ort_utils

+

Provides basic ORT inference utils, shoule be replaced by modelopt.torch.ort_client.

+

Functions

+ + + + + + +

create_inference_session

Create an OnnxRuntime InferenceSession.

+
+
+create_inference_session(onnx_path)
+

Create an OnnxRuntime InferenceSession.

+
+
Parameters:
+

onnx_path (str) –

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.partitioning.html b/reference/generated/modelopt.onnx.quantization.partitioning.html new file mode 100644 index 0000000..7a9d5b5 --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.partitioning.html @@ -0,0 +1,329 @@ + + + + + + + partitioning — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

partitioning

+

Utilities related to partitioning the ONNX model to place QDQ nodes.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + +

find_fusible_partitions

Traverses the graph and collects all cask/kgen fusible partitions.

find_hardcoded_patterns

Finds some non-quantizable pre-defined patterns!.

find_layer_norm_partitions

Finds the layer norm patterns in the graph.

find_mha_partitions

Finds the MHA patterns in the graph that should not be quantized.

find_non_quantizable_partitions_from_patterns

Finds fusible partition from fixed patterns.

find_quantizable_nodes

Return the graph ops which are quantizable but not partitioned yet.

get_skiped_output_layers

Returns the name of the non-quantizable output layers.

+
+
+find_fusible_partitions(graph, partitioned_nodes, non_residual_inputs)
+

Traverses the graph and collects all cask/kgen fusible partitions.

+
+
Parameters:
+
    +
  • graph (Graph) – Onnx model graph.

  • +
  • partitioned_nodes (Set[str]) – Set of already partitioned nodes.

  • +
  • non_residual_inputs (Dict[str, str]) – Non-residual input map.

  • +
+
+
Returns:
+

List of partitions that are fusible by CASK with Conv/MatMul backbone. +List of KGEN partitions with pointwise ops only.

+
+
Return type:
+

Tuple[List[List[Node]], List[List[Node]]]

+
+
+
+ +
+
+find_hardcoded_patterns(graph)
+

Finds some non-quantizable pre-defined patterns!.

+

Note. matching this tail pattern causes MTL_v1 -5.5% +[“ReduceSum”, “Add”, “Div”, “Mul”, “ReduceSum”, “Sub”, “Pow”, “Mul”, “ReduceSum”, “Sqrt”]

+
+
Parameters:
+

graph (Graph) –

+
+
Return type:
+

List[List[Node]]

+
+
+
+ +
+
+find_layer_norm_partitions(graph)
+

Finds the layer norm patterns in the graph.

+
+
Parameters:
+

graph (Graph) –

+
+
Return type:
+

List[List[Node]]

+
+
+
+ +
+
+find_mha_partitions(graph)
+

Finds the MHA patterns in the graph that should not be quantized.

+

A common MHA implementation looks like this: +t -> MatMul -> (optional) Pointwise ops (such as Add, Mul, Sub) -> Softmax -> MatMul -> output +Patterns that do not look like that should not be quantized (at least for now).

+
+
Parameters:
+

graph (Graph) –

+
+
Return type:
+

List[List[Node]]

+
+
+
+ +
+
+find_non_quantizable_partitions_from_patterns(graph)
+

Finds fusible partition from fixed patterns.

+

Certain fused kernel counterpart is often a subgraph of native ops in onnx. +Those patterns are identified here and quantized to match compiler expectation.

+
+
Parameters:
+

graph (Graph) –

+
+
Return type:
+

List[List[str]]

+
+
+
+ +
+
+find_quantizable_nodes(graph, nodes_to_quantize, partitioned_nodes, quantizable_op_types)
+

Return the graph ops which are quantizable but not partitioned yet.

+
+
Parameters:
+
    +
  • graph (Graph) –

  • +
  • nodes_to_quantize (List[Node]) –

  • +
  • partitioned_nodes (Set[str]) –

  • +
  • quantizable_op_types (List[str]) –

  • +
+
+
Return type:
+

List[Node]

+
+
+
+ +
+
+get_skiped_output_layers(graph, paritially_quantizable_nodes)
+

Returns the name of the non-quantizable output layers.

+
+
Parameters:
+
    +
  • graph (Graph) –

  • +
  • paritially_quantizable_nodes (List[Node]) –

  • +
+
+
Return type:
+

List[str]

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.qdq_utils.html b/reference/generated/modelopt.onnx.quantization.qdq_utils.html new file mode 100644 index 0000000..b31e3cc --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.qdq_utils.html @@ -0,0 +1,385 @@ + + + + + + + qdq_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

qdq_utils

+

Various utils to support inserting Q/DQ nodes.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

insert_dq_nodes

Insert new initializers and DQ nodes into graph.

insert_qdq_nodes

Insert scales and QDQ nodes into graph.

make_gs_dequantize_node

Create a GraphSurgeon Dequantize node.

make_gs_dequantize_output

Create a GraphSurgeon variable representing the output of a quantize node.

make_gs_quantize_node

Create a GraphSurgeon Quantize node.

make_gs_quantize_output

Create a GraphSurgeon variable representing the output of a quantize node.

make_gs_quantized_weight

Create a GraphSurgeon tensor from a quantized weight tensor.

make_gs_scale

Create a GraphSurgeon scale tensor from the given numpy array.

make_gs_zp

Create a GraphSurgeon zero-point tensor of all zeroes with the given shape.

use_trt_qdq_ops

Globally set node names to TRT custom names.

+
+
+insert_dq_nodes(graph, scales, quantized_weights)
+

Insert new initializers and DQ nodes into graph.

+
+
Parameters:
+
    +
  • graph (Graph) – The graph to modify.

  • +
  • weights – A map from ONNX initializer name to tensor.

  • +
  • scales (Dict[str, ndarray]) – A map from ONNX initializer name to desired scale factor for that initializer.

  • +
  • dq_only – Whether to only insert dq nodes.

  • +
  • quantized_weights (Dict[str, ndarray]) –

  • +
+
+
+
+ +
+
+insert_qdq_nodes(graph, scales, weight_map)
+

Insert scales and QDQ nodes into graph.

+
+
Parameters:
+
    +
  • graph (Graph) – The graph to modify.

  • +
  • scales (Dict[str, ndarray]) – A map from ONNX initializer name to desired scale factor for that initializer.

  • +
  • weight_map (Dict[str, Tensor]) – A map from ONNX initializer name to graphsurgeon tensor.

  • +
+
+
+
+ +
+
+make_gs_dequantize_node(name, inputs, outputs)
+

Create a GraphSurgeon Dequantize node.

+

name is the desired _basename_ of the node.

+
+
Parameters:
+
    +
  • name (str) –

  • +
  • inputs (Sequence[Tensor]) –

  • +
  • outputs (Sequence[Tensor]) –

  • +
+
+
Return type:
+

Node

+
+
+
+ +
+
+make_gs_dequantize_output(name, shape, dtype)
+

Create a GraphSurgeon variable representing the output of a quantize node.

+

name is the desired _basename_ of the node.

+
+
Parameters:
+
    +
  • name (str) –

  • +
  • shape (Sequence[int]) –

  • +
  • dtype (dtype) –

  • +
+
+
Return type:
+

Variable

+
+
+
+ +
+
+make_gs_quantize_node(name, inputs, outputs)
+

Create a GraphSurgeon Quantize node.

+

name is the desired _basename_ of the node.

+
+
Parameters:
+
    +
  • name (str) –

  • +
  • inputs (Sequence[Tensor]) –

  • +
  • outputs (Sequence[Tensor]) –

  • +
+
+
Return type:
+

Node

+
+
+
+ +
+
+make_gs_quantize_output(name, shape, dtype)
+

Create a GraphSurgeon variable representing the output of a quantize node.

+

name is the desired _basename_ of the node.

+
+
Parameters:
+
    +
  • name (str) –

  • +
  • shape (Sequence[int]) –

  • +
  • dtype (<google.protobuf.internal.enum_type_wrapper.EnumTypeWrapper object at 0x7f7a18433710>) –

  • +
+
+
Return type:
+

Variable

+
+
+
+ +
+
+make_gs_quantized_weight(name, wq, dtype)
+

Create a GraphSurgeon tensor from a quantized weight tensor.

+

name is the desired _basename_ of the tensor.

+
+
Parameters:
+
    +
  • name (str) –

  • +
  • wq (ndarray) –

  • +
+
+
Return type:
+

Constant

+
+
+
+ +
+
+make_gs_scale(name, scale)
+

Create a GraphSurgeon scale tensor from the given numpy array.

+

name is the desired _basename_ of the tensor.

+
+
Parameters:
+
    +
  • name (str) –

  • +
  • scale (ndarray) –

  • +
+
+
Return type:
+

Constant

+
+
+
+ +
+
+make_gs_zp(name, shape, dtype)
+

Create a GraphSurgeon zero-point tensor of all zeroes with the given shape.

+

name is the desired _basename_ of the tensor.

+
+
Parameters:
+
    +
  • name (str) –

  • +
  • shape (Sequence[int]) –

  • +
+
+
Return type:
+

Constant

+
+
+
+ +
+
+use_trt_qdq_ops()
+

Globally set node names to TRT custom names.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.quant_utils.html b/reference/generated/modelopt.onnx.quantization.quant_utils.html new file mode 100644 index 0000000..6fa873a --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.quant_utils.html @@ -0,0 +1,213 @@ + + + + + + + quant_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quant_utils

+

Provides some basic utilities that can be used in quantize() methods.

+

Functions

+ + + + + + +

pack_float32_to_4bit_optimized

Convert an array of float32 value to a 4bit data-type and pack every two concecutive elements in a byte.

+
+
+pack_float32_to_4bit_optimized(array, signed)
+

Convert an array of float32 value to a 4bit data-type and pack every two concecutive elements in a byte.

+

This is the optimized version of pack_float32_to_4bit() utility in ONNX helper file. The basic optimizations +done here mainly rely on moving some common code out of the per-element function calls or loops, thereby making +them per-input-array, instead of per-input-element. The remaining logic should largely remain as is.

+
+
Parameters:
+
    +
  • array (ndarray | Sequence) – array of float to convert and pack

  • +
  • signed (bool) – Whether the 4 bit variant is signed or unsigned

  • +
+
+
Returns:
+

Packed array with size ceil(farray.size/2) (single dimension).

+
+
Return type:
+

ndarray

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.quantization.quantize.html b/reference/generated/modelopt.onnx.quantization.quantize.html new file mode 100644 index 0000000..7db3832 --- /dev/null +++ b/reference/generated/modelopt.onnx.quantization.quantize.html @@ -0,0 +1,242 @@ + + + + + + + quantize — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quantize

+

Convert ONNX model without QDQ nodes + calib data into ONNX model with QDQ nodes.

+

Typically quantizing linear operations like Conv, MatMul etc. gives most of the performance boost. +But there are many other ops that are quantizable (aka low precision kernels available) and provides +optimal performance with lower accuracy drop. The default op types that this ONNX ptq tool quantizes +in different quantization modes are: INT8: [‘Add’, ‘AveragePool’, ‘BatchNormalization’, ‘Clip’, +‘Conv’, ‘ConvTranspose’, ‘Gemm’, ‘GlobalAveragePool’, ‘MatMul’, ‘MaxPool’, ‘Mul’], INT4: [‘MatMul’], +FP8: [‘MatMul’]. The tool inserts QDQ nodes following compiler friendly patterns and generates an +explicit ONNX model.

+

Functions

+ + + + + + +

quantize

Quantize the given onnx model.

+
+
+quantize(onnx_path, calibration_data=None, calibration_method='entropy', op_types_to_quantize=None, op_types_to_exclude=None, nodes_to_quantize=None, nodes_to_exclude=None, use_external_data_format=False, keep_intermediate_files=False, output_path=None, verbose=False, quantize_mode='int8')
+

Quantize the given onnx model.

+
+
Parameters:
+
    +
  • onnx_path (str) – Path to the input onnx model.

  • +
  • calibration_data (ndarray | Dict[str, ndarray]) – Calibration data, either a numpy array or list/dict of numpy array.

  • +
  • calibration_method (str) – Calibration method. Options={entropy (default), minmax}.

  • +
  • op_types_to_quantize (List[str]) – List of types of operators to quantize. When this list is not None, only the types in this list +are quantized. Example: [‘Conv’] indicates that only ops of type ‘Conv’ should be quantized. +If this list is None (default), all supported operators are quantized. +This flag does not support regular expression.

  • +
  • op_types_to_exclude (List[str]) – List of types of operators to exclude from quantization. +This flag does not support regular expression.

  • +
  • nodes_to_quantize (List[str]) – List of node names to quantize. When this list is not None, only the nodes in this list +are quantized. Example: [‘Conv__224’, ‘Conv__252’]. +If this list is None (default), all supported nodes are quantized. +This flag does not support regular expression.

  • +
  • nodes_to_exclude (List[str]) – List of nodes names to exclude. The nodes in this list will be excluded from quantization +when it is not None. This flag supports regular expression.

  • +
  • use_external_data_format (bool) – If not None, this path will be used to store the weights of the quantized model.

  • +
  • keep_intermediate_files (bool) –

    +
    If False, only save the converted ONNX files for the user. Otherwise, keep all intermediate files

    generated during the ONNX models’ conversion/calibration.

    +
    +
    +

  • +
  • output_path (str) – Output filename to save the converted ONNX model. +If None, save in the same directory as the original ONNX model with .quant suffix.

  • +
  • verbose (bool) – Prints details of node partition, selection etc. throughout the quantization process.

  • +
  • quantize_mode (str) – Quantization mode. One of [‘int8’, ‘int4_rtn’, ‘int4_rtn_dq’, ‘int4_rtn_trt’, ‘int4_rtn_trt_dq’, +‘int4_awq_clip’, ‘int4_awq_clip_trt’, ‘fp8’]. ‘int8’ by default. Any INT4-based mode is Gemm, MatMul +weight-only and FP8 mode is Conv, Gemm and MatMul only quantization.

  • +
+
+
Returns:
+

None, write the quantized onnx model in the same directory with filename like “<model_name>.quant.onnx”.

+
+
Return type:
+

None

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.onnx.utils.html b/reference/generated/modelopt.onnx.utils.html new file mode 100644 index 0000000..671befd --- /dev/null +++ b/reference/generated/modelopt.onnx.utils.html @@ -0,0 +1,711 @@ + + + + + + + utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

utils

+

Utility functions related to onnx.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

duplicate_shared_linear_weights

Duplicate weights of linear operators if they are shared.

find_lowest_common_ancestor

Function to find the lowest common ancestor of two nodes.

gen_random_inputs

This function generates random inputs for an onnx model.

get_all_input_names

This function returns the inputs names of the given onnx model.

get_batch_size

Returns the batch size of the given onnx model.

get_batch_size_from_bytes

Returns the batch size of the given onnx model.

get_child_nodes

Returns list of output consumer nodes for the given node.

get_input_names

This function returns the external inputs names of the given onnx model.

get_input_names_from_bytes

This function returns the inputs names of the given onnx model in bytes.

get_input_shapes

This function returns the inputs shapes for the given onnx model.

get_input_shapes_from_bytes

This function returns the input shapes of the given onnx model in bytes.

get_node_names

This function returns all node names from the given onnx model.

get_node_names_from_bytes

This function returns all node names from the given onnx model in bytes.

get_output_names

This function returns the output names of the given onnx model.

get_output_names_from_bytes

This function returns the output names of the given onnx model in bytes.

get_output_shapes

This function returns the output shapes for the given onnx model.

get_parent_nodes

Returns list of input producer nodes for the given node.

get_variable_inputs

Returns the variable inputs of the given Node.

is_valid_onnx_model

Checks if the given file is a valid ONNX model.

name_onnx_nodes

Assigns name to the onnx nodes if not present and return the modified status.

randomize_weights

Assigns random values to the onnx model weights.

randomize_weights_onnx_bytes

Assigns random values to the onnx model weights.

remove_weights_data

Removes raw weight data from the onnx model.

save_onnx

Save an ONNX model to given path.

save_onnx_bytes_to_dir

Saves the onnx bytes to a directory with specified file name.

validate_batch_size

Returns True if all the model inputs has batch dimension equal to batch_size.

validate_onnx

Returns True if the onnx_bytes is valid, else False.

+
+
+duplicate_shared_linear_weights(graph)
+

Duplicate weights of linear operators if they are shared.

+
+
Parameters:
+

graph (GraphProto) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+find_lowest_common_ancestor(node1, node2)
+

Function to find the lowest common ancestor of two nodes.

+
+
Parameters:
+
    +
  • node1 (Node) – First node name.

  • +
  • node2 (Node) – Second node name.

  • +
+
+
Returns:
+

LCA node. +Distance from first node. +Distance from second node.

+
+
Return type:
+

Tuple[str | None, int, int]

+
+
+
+ +
+
+gen_random_inputs(model)
+

This function generates random inputs for an onnx model.

+
+
Parameters:
+

model (ModelProto) – Loaded in-memory onnx ModelProto.

+
+
Returns:
+

Dictionary of numpy tensors.

+
+
Return type:
+

Dict[str, ndarray]

+
+
+
+ +
+
+get_all_input_names(model)
+

This function returns the inputs names of the given onnx model.

+
+
Parameters:
+

model (ModelProto) –

+
+
Return type:
+

List[str]

+
+
+
+ +
+
+get_batch_size(model)
+

Returns the batch size of the given onnx model.

+

Assertion will fail if batch size is not same over all the inputs.

+
+
Parameters:
+

model (ModelProto) –

+
+
Return type:
+

int

+
+
+
+ +
+
+get_batch_size_from_bytes(onnx_bytes)
+

Returns the batch size of the given onnx model.

+

Assertion will fail if batch size is not same over all the inputs.

+
+
Parameters:
+

onnx_bytes (bytes) –

+
+
Return type:
+

int

+
+
+
+ +
+
+get_child_nodes(node)
+

Returns list of output consumer nodes for the given node.

+
+
Parameters:
+

node (Node) –

+
+
Return type:
+

List[Node]

+
+
+
+ +
+
+get_input_names(model, external_inputs_only=True)
+

This function returns the external inputs names of the given onnx model.

+

Note: external_input_names = input_names - initializer_names

+
+
Parameters:
+
    +
  • model (ModelProto) – Loaded in-memory onnx ModelProto.

  • +
  • external_inputs_only (bool) –

  • +
+
+
Returns:
+

List of external input names of the model.

+
+
Return type:
+

List[str]

+
+
+
+ +
+
+get_input_names_from_bytes(model_bytes, external_inputs_only=True)
+

This function returns the inputs names of the given onnx model in bytes.

+
+
Parameters:
+
    +
  • model_bytes (bytes) – Onnx model in bytes.

  • +
  • external_inputs_only (bool) –

  • +
+
+
Returns:
+

List of input names of the model.

+
+
Return type:
+

List[str]

+
+
+
+ +
+
+get_input_shapes(model, external_inputs_only=True)
+

This function returns the inputs shapes for the given onnx model.

+
+
Parameters:
+
    +
  • model (ModelProto) –

  • +
  • external_inputs_only (bool) –

  • +
+
+
Return type:
+

Dict[str, List[int]]

+
+
+
+ +
+
+get_input_shapes_from_bytes(model_bytes)
+

This function returns the input shapes of the given onnx model in bytes.

+
+
Parameters:
+

model_bytes (bytes) – Onnx model in bytes.

+
+
Returns:
+

Dictionary of inputs names and shapes.

+
+
Return type:
+

Dict[str, List[int]]

+
+
+
+ +
+
+get_node_names(model)
+

This function returns all node names from the given onnx model.

+
+
Parameters:
+

model (ModelProto) – Loaded in-memory onnx ModelProto.

+
+
Returns:
+

List of node names of the model.

+
+
Return type:
+

List[str]

+
+
+
+ +
+
+get_node_names_from_bytes(model_bytes)
+

This function returns all node names from the given onnx model in bytes.

+
+
Parameters:
+
    +
  • model – onnx model in bytes.

  • +
  • model_bytes (bytes) –

  • +
+
+
Returns:
+

List of node names of the model.

+
+
Return type:
+

List[str]

+
+
+
+ +
+
+get_output_names(model)
+

This function returns the output names of the given onnx model.

+
+
Parameters:
+

model (ModelProto) – Loaded in-memory onnx ModelProto.

+
+
Returns:
+

List of output names of the model.

+
+
Return type:
+

List[str]

+
+
+
+ +
+
+get_output_names_from_bytes(model_bytes)
+

This function returns the output names of the given onnx model in bytes.

+
+
Parameters:
+

model_bytes (bytes) – Onnx model in bytes.

+
+
Returns:
+

List of output names of the model.

+
+
Return type:
+

List[str]

+
+
+
+ +
+
+get_output_shapes(model)
+

This function returns the output shapes for the given onnx model.

+
+
Parameters:
+

model (ModelProto) –

+
+
Return type:
+

Dict[str, List[int]]

+
+
+
+ +
+
+get_parent_nodes(node)
+

Returns list of input producer nodes for the given node.

+
+
Parameters:
+

node (Node) –

+
+
Return type:
+

List[Node]

+
+
+
+ +
+
+get_variable_inputs(node)
+

Returns the variable inputs of the given Node.

+
+
Parameters:
+

node (Node) –

+
+
Return type:
+

List[Variable]

+
+
+
+ +
+
+is_valid_onnx_model(file_path)
+

Checks if the given file is a valid ONNX model.

+
+ +
+
+name_onnx_nodes(graph)
+

Assigns name to the onnx nodes if not present and return the modified status.

+
+
Parameters:
+

graph (GraphProto) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+randomize_weights(onnx_path)
+

Assigns random values to the onnx model weights.

+
+
Parameters:
+

onnx_path (str) –

+
+
Return type:
+

None

+
+
+
+ +
+
+randomize_weights_onnx_bytes(onnx_bytes, seed=0)
+

Assigns random values to the onnx model weights.

+
+
Parameters:
+
    +
  • onnx_bytes (bytes) –

  • +
  • seed (int) –

  • +
+
+
Return type:
+

bytes

+
+
+
+ +
+
+remove_weights_data(onnx_bytes)
+

Removes raw weight data from the onnx model.

+
+
Parameters:
+

onnx_bytes (bytes) –

+
+
Return type:
+

bytes

+
+
+
+ +
+
+save_onnx(onnx_model, onnx_path, save_as_external_data=False)
+

Save an ONNX model to given path.

+
+
Parameters:
+
    +
  • onnx_model (ModelProto) –

  • +
  • onnx_path (str) –

  • +
  • save_as_external_data (bool) –

  • +
+
+
+
+ +
+
+save_onnx_bytes_to_dir(onnx_bytes, onnx_dir, onnx_name)
+

Saves the onnx bytes to a directory with specified file name.

+
+
Parameters:
+
    +
  • onnx_bytes (bytes) –

  • +
  • onnx_dir (str) –

  • +
  • onnx_name (str) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+validate_batch_size(onnx_bytes, batch_size)
+

Returns True if all the model inputs has batch dimension equal to batch_size.

+
+
Parameters:
+
    +
  • onnx_bytes (bytes) –

  • +
  • batch_size (int) –

  • +
+
+
Return type:
+

bool

+
+
+
+ +
+
+validate_onnx(onnx_bytes)
+

Returns True if the onnx_bytes is valid, else False.

+
+
Parameters:
+

onnx_bytes (bytes) –

+
+
Return type:
+

bool

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.distribute.html b/reference/generated/modelopt.torch.export.distribute.html new file mode 100644 index 0000000..2ca0694 --- /dev/null +++ b/reference/generated/modelopt.torch.export.distribute.html @@ -0,0 +1,363 @@ + + + + + + + distribute — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

distribute

+

torch.distribute utils.

+

Classes

+ + + + + + +

NFSWorkspace

A shared workspace implementation using Network File Storage (NFS).

+

Functions

+ + + + + + + + + + + + + + + + + + + + + +

barrier

Set a parallel barrier.

get_configs_parallel

Gathers the layer config across distributed processes using shm or NFS.

get_group

Returns the process group if torch.distributed.is_initialized().

get_rank

Safe method to get local rank.

get_tensors_parallel

Gathers the tensors across distributed processes using shm.

get_world_size

Safe method to get world size.

+
+
+class NFSWorkspace
+

Bases: object

+

A shared workspace implementation using Network File Storage (NFS).

+
+
NOTE: all read/write/modifition to the NFS dir do not involve any collective

communication nor barrier. It is users’ responsibility to synchronize +all ranks (local and remove processes).

+
+
+

This implementation uses torch.save and torch.load for serialization.

+
+
Parameters:
+

workspace_path – the path to the NFS directory for postprocess cross rank communication. +If not provided, SharedMemory will be used instead.

+
+
+
+
+__init__(workspace_path=None)
+

Create the NFS work dir and clean up existing existing state files.

+
+
Parameters:
+

workspace_path (Path | str | None) –

+
+
+
+ +
+
+property is_initialized
+

Whether the workspace is intialized.

+
+ +
+
+read_configs_and_weights_from_rank(target_rank)
+

All ranks read the target_rank state file.

+
+
Parameters:
+

target_rank (int) – the target rank

+
+
Returns:
+

the model/module config and the weights

+
+
Return type:
+

Tuple[Dict[str, Any] | None, Dict[str, Any] | None]

+
+
+
+ +
+
+write_configs_and_weights(config_json, weights)
+

All ranks write the state file to the shared NFS dir.

+
+
Parameters:
+
    +
  • config_json (Dict[str, Any]) – model or module config in json

  • +
  • weights (Dict[str, Any]) – module weights in torch’s state_dict format

  • +
+
+
+
+ +
+ +
+
+barrier(group=None)
+

Set a parallel barrier.

+
+ +
+
+get_configs_parallel(config, ranks, group, workspace_path=None)
+

Gathers the layer config across distributed processes using shm or NFS.

+
+
Parameters:
+
    +
  • config – the config (nullable) that each rank want to pass to the first rank.

  • +
  • ranks (List[int]) – the list of the ranks

  • +
  • group – the barrier sync group.

  • +
  • workspace_path (Path | str | None) – the path to the NFS directory for postprocess cross rank communication.

  • +
+
+
Yields:
+

the first rank in the ranks has the full access of the configs across all the ranks. +the other ranks returns an empty list

+
+
+

When workspace_path is provided, an NFSWorkspace object is created to perform communication +across ranks. Otherwise, SharedMemory is used for local multi-process communication. +The shm will be destroyed after consumption.

+
+ +
+
+get_group(ranks)
+

Returns the process group if torch.distributed.is_initialized().

+
+
Parameters:
+

ranks (List[int]) –

+
+
+
+ +
+
+get_rank()
+

Safe method to get local rank.

+
+
Return type:
+

int

+
+
+
+ +
+
+get_tensors_parallel(tensor, ranks, group=None)
+

Gathers the tensors across distributed processes using shm.

+
+
Parameters:
+
    +
  • tensor (Tensor) – the tensor that each rank want to pass to the first rank. +The tensors across the ranks need to have the same size.

  • +
  • ranks (List[int]) – the list of the ranks

  • +
  • group – the barrier sync group.

  • +
+
+
Yields:
+

the first rank in the ranks has the full access of the tensors across all the ranks. +the other ranks returns an empty list

+
+
+

The shm will be destroyed after consumption.

+
+ +
+
+get_world_size()
+

Safe method to get world size.

+
+
Return type:
+

int

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.html b/reference/generated/modelopt.torch.export.html new file mode 100644 index 0000000..03fc645 --- /dev/null +++ b/reference/generated/modelopt.torch.export.html @@ -0,0 +1,213 @@ + + + + + + + export — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

export

+

Modules

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

modelopt.torch.export.distribute

torch.distribute utils.

modelopt.torch.export.layer_utils

Utils for model_config export.

modelopt.torch.export.model_config

This module defines the model_config format.

modelopt.torch.export.model_config_export

Code that export optimized models to the TensorRT-LLM checkpoint.

modelopt.torch.export.model_config_utils

Common utils for the ModelConfig.

modelopt.torch.export.postprocess

Utils to load and process model_config.

modelopt.torch.export.scaling_factor_utils

Utils for scaling factors adjustments.

modelopt.torch.export.tensorrt_llm_utils

Utils for TensorRT-LLM checkpoint export.

modelopt.torch.export.transformer_engine

Convert the Model Optimizer quantized model to the transformer_engine.

+

Export package. So far it only supports selected nemo and huggingface LLMs.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.layer_utils.html b/reference/generated/modelopt.torch.export.layer_utils.html new file mode 100644 index 0000000..2ee3a84 --- /dev/null +++ b/reference/generated/modelopt.torch.export.layer_utils.html @@ -0,0 +1,669 @@ + + + + + + + layer_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

layer_utils

+

Utils for model_config export.

+

Some of the logics in this file are empirical and needs constant update if exceptions occur.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

build_attention_config

Builds the attention config from the module.

build_decoder_config

Builds the full decoder config from the module.

build_embedding_config

Builds the embedding config from the module.

build_layernorm_config

Builds the layernorm config from the module.

build_linear_config

Builds the linear config for the module.

build_mlp_config

Builds the MLP config for the module.

build_moe_config

Builds the MOE config for the module.

build_qkv

Converts the qkv modules to the config.

build_stacked_experts

Builds the experts_weight_1 and experts_weight_2 configs for the experts.

check_model_compatibility

Returns whether the list of modules is compatible with the export logic.

get_activation_scaling_factor

Returns the activation scaling factor.

get_kv_cache_dtype

Returns the kv_cache dtype.

get_kv_cache_scaling_factor

Returns the kv_cache scaling factor if output quantizer is set.

get_prequant_scaling_factor

Returns the prequant scaling factor.

get_scaling_factor

Returns scaling factor from the quantizer as torch.Tensor.

get_transformer_layers

Returns the root module of the transformer model.

get_weight_block_size

Returns the weight block size.

get_weight_scaling_factor

Returns the weight scaling factor.

get_weight_scaling_factor_2

Returns the secondary weight scaling factor.

is_attention

Returns whether the module is an attention layer.

is_decoder_list

Returns whether the module is a decoder list.

is_embedding

Returns whether the module is an embedding layer.

is_layernorm

Returns whether the module is a layernorm layer.

is_linear

Returns whether the module is a linear layer.

is_mlp

Returns whether the module is an MLP layer.

is_moe

Returns whether the module is an MOE layer.

+
+
+build_attention_config(module, model_metadata_config, dtype, ext_config=None)
+

Builds the attention config from the module.

+
+
Parameters:
+
+
+
Return type:
+

AttentionConfig

+
+
+
+ +
+
+build_decoder_config(module, model_metadata_config, decoder_type, dtype)
+

Builds the full decoder config from the module.

+
+
Parameters:
+
    +
  • module (Module) –

  • +
  • decoder_type (str) –

  • +
  • dtype (dtype) –

  • +
+
+
Return type:
+

DecoderLayerConfig

+
+
+
+ +
+
+build_embedding_config(module, dtype, normalization_constant=1)
+

Builds the embedding config from the module.

+
+
Parameters:
+
    +
  • module (Module) –

  • +
  • dtype (dtype) –

  • +
  • normalization_constant (float) –

  • +
+
+
Return type:
+

EmbeddingConfig

+
+
+
+ +
+
+build_layernorm_config(module, dtype)
+

Builds the layernorm config from the module.

+
+
Parameters:
+
    +
  • module (Module) –

  • +
  • dtype (dtype) –

  • +
+
+
Return type:
+

LayernormConfig

+
+
+
+ +
+
+build_linear_config(module, linear_type, dtype)
+

Builds the linear config for the module.

+
+
Parameters:
+
    +
  • module (Module) –

  • +
  • linear_type (str) –

  • +
  • dtype (dtype) –

  • +
+
+
Return type:
+

LinearConfig

+
+
+
+ +
+
+build_mlp_config(module, decoder_type, dtype)
+

Builds the MLP config for the module.

+
+
Parameters:
+
    +
  • module (Module) –

  • +
  • dtype (dtype) –

  • +
+
+
Return type:
+

MLPConfig

+
+
+
+ +
+
+build_moe_config(module, decoder_type, dtype)
+

Builds the MOE config for the module.

+
+
Parameters:
+
    +
  • module (Module) –

  • +
  • dtype (dtype) –

  • +
+
+
Return type:
+

MOEConfig

+
+
+
+ +
+
+build_qkv(qkv_modules, model_metadata_config, dtype, ext_config=None)
+

Converts the qkv modules to the config.

+
+
Parameters:
+
    +
  • qkv_modules (List[Module]) –

  • +
  • dtype (dtype) –

  • +
  • ext_config (DecoderLayerConfig) –

  • +
+
+
Return type:
+

QKVConfig

+
+
+
+ +
+
+build_stacked_experts(experts, dtype)
+

Builds the experts_weight_1 and experts_weight_2 configs for the experts.

+
+
Parameters:
+
    +
  • experts (Module) –

  • +
  • dtype (dtype) –

  • +
+
+
+
+ +
+
+check_model_compatibility(module_list)
+

Returns whether the list of modules is compatible with the export logic.

+

And if positional embedding and embedding layernorm exists.

+

We assumes the model to be assembled with one or two embedding layers, +a ModuleList of transformer decoders, +and a final layernorm with optional embedding layernorm. +Otherwise it will not be supported.

+
+
Parameters:
+

module_list (List[Module]) –

+
+
Return type:
+

Tuple[bool, bool, bool]

+
+
+
+ +
+
+get_activation_scaling_factor(module)
+

Returns the activation scaling factor.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

Tensor

+
+
+
+ +
+
+get_kv_cache_dtype(qkv_modules)
+

Returns the kv_cache dtype.

+

If num_bits of output_quantizer is (4, 3) then returns FP8; if it is 8, returns int8, +otherwise returns None.

+
+
Parameters:
+

qkv_modules (List[Module]) –

+
+
Return type:
+

str

+
+
+
+ +
+
+get_kv_cache_scaling_factor(qkv_modules)
+

Returns the kv_cache scaling factor if output quantizer is set. Else returns None by default.

+
+
Parameters:
+

qkv_modules (List[Module]) –

+
+
Return type:
+

Tensor

+
+
+
+ +
+
+get_prequant_scaling_factor(module, dtype)
+

Returns the prequant scaling factor.

+
+
Parameters:
+
    +
  • module (Module) –

  • +
  • dtype (dtype) –

  • +
+
+
Return type:
+

Tensor

+
+
+
+ +
+
+get_scaling_factor(quantizer)
+

Returns scaling factor from the quantizer as torch.Tensor.

+
+
Parameters:
+

quantizer (TensorQuantizer) –

+
+
Return type:
+

Tensor

+
+
+
+ +
+
+get_transformer_layers(model)
+

Returns the root module of the transformer model.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

List[Module]

+
+
+
+ +
+
+get_weight_block_size(module)
+

Returns the weight block size.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

int

+
+
+
+ +
+
+get_weight_scaling_factor(module)
+

Returns the weight scaling factor.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

Tensor

+
+
+
+ +
+
+get_weight_scaling_factor_2(module)
+

Returns the secondary weight scaling factor.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

Tensor

+
+
+
+ +
+
+is_attention(module)
+

Returns whether the module is an attention layer.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+is_decoder_list(module)
+

Returns whether the module is a decoder list.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+is_embedding(module)
+

Returns whether the module is an embedding layer.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+is_layernorm(module)
+

Returns whether the module is a layernorm layer.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+is_linear(module)
+

Returns whether the module is a linear layer.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+is_mlp(module)
+

Returns whether the module is an MLP layer.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+is_moe(module)
+

Returns whether the module is an MOE layer.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.model_config.html b/reference/generated/modelopt.torch.export.model_config.html new file mode 100644 index 0000000..b8d7c06 --- /dev/null +++ b/reference/generated/modelopt.torch.export.model_config.html @@ -0,0 +1,994 @@ + + + + + + + model_config — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

model_config

+

This module defines the model_config format.

+

This format can be converted from huggingface, nemo or modelopt-quantized model. +And we will build tensorrt_llm engine from the context saved with this format.

+

Classes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

AttentionConfig

The attention layer config.

DecoderLayerConfig

The decoder layer config.

EmbeddingConfig

The embedding layer config.

ExpertConfig

The Expert config.

LayernormConfig

The layernorm layer config.

LinearConfig

The linear layer config.

MLPConfig

The MLP layer config.

MOEConfig

The Mixture of Expert layer config.

ModelConfig

The full LLM model config that includes the full information needed for tensorrt_llm engine building.

QKVConfig

The QKV layer config.

+
+
+class AttentionConfig
+

Bases: object

+

The attention layer config.

+
+
+__init__(qkv=None, dense=None, kv_cache_scaling_factor=None, kv_cache_dtype=None, rotary_dim=-inf, clip_qkv=None)
+
+
Parameters:
+
    +
  • qkv (QKVConfig | LinearConfig) –

  • +
  • dense (LinearConfig) –

  • +
  • kv_cache_scaling_factor (Tensor) –

  • +
  • kv_cache_dtype (str) –

  • +
  • rotary_dim (int) –

  • +
  • clip_qkv (float) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+clip_qkv: float = None
+
+ +
+
+dense: LinearConfig = None
+
+ +
+
+kv_cache_dtype: str = None
+
+ +
+
+kv_cache_scaling_factor: Tensor = None
+
+ +
+
+qkv: QKVConfig | LinearConfig = None
+
+ +
+
+rotary_dim: int = -inf
+
+ +
+ +
+
+class DecoderLayerConfig
+

Bases: object

+

The decoder layer config.

+
+
+__init__(quantization='', decoder_type='', input_layernorm=None, mlp_layernorm=None, attention=None, post_layernorm=None, mlp=None, num_attention_heads=0, attention_head_size=None, num_kv_heads=0, max_position_embeddings=0, rotary_pct=1.0, use_alibi=False, new_decoder_architecture=False, parallel_attention=False, apply_residual_connection_post_layernorm=False, use_cache=True, model_name='', rope_ratio=1.0, seq_length=0, rotary_base=0, partial_rotary_factor=0, moe_num_experts=0, moe_top_k=0, moe_tp_mode=0, moe_renorm_mode=0, alibi_bias_max=0, residual_layernorm=None, residual_mlp=None)
+
+
Parameters:
+
    +
  • quantization (str) –

  • +
  • decoder_type (str) –

  • +
  • input_layernorm (LayernormConfig) –

  • +
  • mlp_layernorm (LayernormConfig) –

  • +
  • attention (AttentionConfig) –

  • +
  • post_layernorm (LayernormConfig) –

  • +
  • mlp (MLPConfig | MOEConfig) –

  • +
  • num_attention_heads (int) –

  • +
  • attention_head_size (int) –

  • +
  • num_kv_heads (int) –

  • +
  • max_position_embeddings (int) –

  • +
  • rotary_pct (float) –

  • +
  • use_alibi (bool) –

  • +
  • new_decoder_architecture (bool) –

  • +
  • parallel_attention (bool) –

  • +
  • apply_residual_connection_post_layernorm (bool) –

  • +
  • use_cache (bool) –

  • +
  • model_name (str) –

  • +
  • rope_ratio (float) –

  • +
  • seq_length (int) –

  • +
  • rotary_base (int) –

  • +
  • partial_rotary_factor (float) –

  • +
  • moe_num_experts (int) –

  • +
  • moe_top_k (int) –

  • +
  • moe_tp_mode (int) –

  • +
  • moe_renorm_mode (int) –

  • +
  • alibi_bias_max (int) –

  • +
  • residual_layernorm (LayernormConfig) –

  • +
  • residual_mlp (MLPConfig) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+alibi_bias_max: int = 0
+
+ +
+
+apply_residual_connection_post_layernorm: bool = False
+
+ +
+
+attention: AttentionConfig = None
+
+ +
+
+attention_head_size: int = None
+
+ +
+
+decoder_type: str = ''
+
+ +
+
+property ffn_hidden_size_local
+

Returns the ffn hidden size of the transformer model.

+
+ +
+
+property hidden_size
+

Returns the hidden size of the transformer model.

+
+ +
+
+input_layernorm: LayernormConfig = None
+
+ +
+
+max_position_embeddings: int = 0
+
+ +
+
+mlp: MLPConfig | MOEConfig = None
+
+ +
+
+mlp_layernorm: LayernormConfig = None
+
+ +
+
+model_name: str = ''
+
+ +
+
+moe_num_experts: int = 0
+
+ +
+
+moe_renorm_mode: int = 0
+
+ +
+
+moe_top_k: int = 0
+
+ +
+
+moe_tp_mode: int = 0
+
+ +
+
+new_decoder_architecture: bool = False
+
+ +
+
+num_attention_heads: int = 0
+
+ +
+
+num_kv_heads: int = 0
+
+ +
+
+parallel_attention: bool = False
+
+ +
+
+partial_rotary_factor: float = 0
+
+ +
+
+post_layernorm: LayernormConfig = None
+
+ +
+
+quantization: str = ''
+
+ +
+
+residual_layernorm: LayernormConfig = None
+
+ +
+
+residual_mlp: MLPConfig = None
+
+ +
+
+rope_ratio: float = 1.0
+
+ +
+
+rotary_base: int = 0
+
+ +
+
+rotary_pct: float = 1.0
+
+ +
+
+seq_length: int = 0
+
+ +
+
+use_alibi: bool = False
+
+ +
+
+use_cache: bool = True
+
+ +
+ +
+
+class EmbeddingConfig
+

Bases: object

+

The embedding layer config.

+
+
+__init__(weight=None)
+
+
Parameters:
+

weight (Tensor) –

+
+
Return type:
+

None

+
+
+
+ +
+
+property hidden_size
+

Infers the hidden_size from the embedding layer weights shape.

+
+ +
+
+property local_vocab_size
+

Infers the vocab_size from the embedding layer weights shape.

+
+ +
+
+weight: Tensor = None
+
+ +
+ +
+
+class ExpertConfig
+

Bases: object

+

The Expert config.

+
+
+__init__(fc=None, proj=None)
+
+
Parameters:
+
+
+
Return type:
+

None

+
+
+
+ +
+
+fc: LinearConfig = None
+
+ +
+
+proj: LinearConfig = None
+
+ +
+ +
+
+class LayernormConfig
+

Bases: object

+

The layernorm layer config.

+
+
+__init__(weight=None, bias=None, layernorm_type='', eps=1e-05)
+
+
Parameters:
+
    +
  • weight (Tensor) –

  • +
  • bias (Tensor) –

  • +
  • layernorm_type (str) –

  • +
  • eps (float) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+bias: Tensor = None
+
+ +
+
+eps: float = 1e-05
+
+ +
+
+layernorm_type: str = ''
+
+ +
+
+weight: Tensor = None
+
+ +
+ +
+
+class LinearConfig
+

Bases: object

+

The linear layer config.

+
+
+__init__(linear_type='column', weight=None, bias=None, activation_scaling_factor=None, weights_scaling_factor=None, weights_scaling_factor_2=None, prequant_scaling_factor=None, awq_block_size=0)
+
+
Parameters:
+
    +
  • linear_type (str) –

  • +
  • weight (Tensor) –

  • +
  • bias (Tensor) –

  • +
  • activation_scaling_factor (Tensor) –

  • +
  • weights_scaling_factor (Tensor) –

  • +
  • weights_scaling_factor_2 (Tensor) –

  • +
  • prequant_scaling_factor (Tensor) –

  • +
  • awq_block_size (int) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+activation_scaling_factor: Tensor = None
+
+ +
+
+awq_block_size: int = 0
+
+ +
+
+bias: Tensor = None
+
+ +
+
+linear_type: str = 'column'
+
+ +
+
+prequant_scaling_factor: Tensor = None
+
+ +
+
+weight: Tensor = None
+
+ +
+
+weights_scaling_factor: Tensor = None
+
+ +
+
+weights_scaling_factor_2: Tensor = None
+
+ +
+ +
+
+class MLPConfig
+

Bases: object

+

The MLP layer config.

+
+
+__init__(fc=None, gate=None, proj=None, hidden_act='', merged_fc1_gate=False)
+
+
Parameters:
+
+
+
Return type:
+

None

+
+
+
+ +
+
+fc: LinearConfig = None
+
+ +
+
+gate: LinearConfig = None
+
+ +
+
+hidden_act: str = ''
+
+ +
+
+merged_fc1_gate: bool = False
+
+ +
+
+proj: LinearConfig = None
+
+ +
+ +
+
+class MOEConfig
+

Bases: object

+

The Mixture of Expert layer config.

+
+
+__init__(router=None, experts=None, hidden_act='')
+
+
Parameters:
+
+
+
Return type:
+

None

+
+
+
+ +
+
+experts: ExpertConfig = None
+
+ +
+
+property fc
+

Return the fc module from experts.

+
+ +
+
+hidden_act: str = ''
+
+ +
+
+router: LinearConfig = None
+
+ +
+ +
+
+class ModelConfig
+

Bases: object

+

The full LLM model config that includes the full information needed for tensorrt_llm engine building.

+

This class includes all the fields that tensorrt_llm supports, but not all of the fields are required. +pipeline_parallel > 1 is only supported for TensorRT-LLM checkpoint.

+
+
+__init__(version=0.0, quantization='', dtype='float16', vocab_size=0, rank=0, tensor_parallel=1, pipeline_parallel=1, vocab_embedding=None, position_embedding=None, ln_embed=None, layers=<factory>, ln_f=None, lm_head=None, share_embedding_table=False)
+
+
Parameters:
+
+
+
Return type:
+

None

+
+
+
+ +
+
+dtype: str = 'float16'
+
+ +
+
+property hidden_act
+

Returns the hidden_act of the model.

+
+ +
+
+property hidden_size
+

Returns the hidden_size of the model.

+
+ +
+
+layers: List[DecoderLayerConfig]
+
+ +
+
+lm_head: LinearConfig = None
+
+ +
+
+ln_embed: LayernormConfig = None
+
+ +
+
+ln_f: LayernormConfig = None
+
+ +
+
+property max_position_embeddings
+

Returns the max_position_embedding of the model.

+
+ +
+
+property num_attention_heads
+

Returns the num_attention_heads of the model.

+
+ +
+
+property num_kv_heads
+

Returns the num_key_value_heads of the model.

+
+ +
+
+pipeline_parallel: int = 1
+
+ +
+
+position_embedding: EmbeddingConfig = None
+
+ +
+
+quantization: str = ''
+
+ +
+
+rank: int = 0
+
+ +
+
+share_embedding_table: bool = False
+
+ +
+
+tensor_parallel: int = 1
+
+ +
+
+version: float = 0.0
+
+ +
+
+vocab_embedding: EmbeddingConfig = None
+
+ +
+
+vocab_size: int = 0
+
+ +
+
+property vocab_size_padded
+

Returns the padded vocab_size of the model rounds to the tensor_parallel.

+
+ +
+ +
+
+class QKVConfig
+

Bases: object

+

The QKV layer config.

+
+
+__init__(q=None, k=None, v=None)
+
+
Parameters:
+
+
+
Return type:
+

None

+
+
+
+ +
+
+property activation_scaling_factor
+

Returns the merged activation_scaling_factor across Q, K and V.

+

The max of the Q, K, V activation scaling factors is returned.

+
+ +
+
+property awq_block_size
+

Returns the awq_block_size of this QKV layer.

+
+ +
+
+property bias
+

The generated linear layer bias.

+

The Q, K, V bias are concat together to fit the TensorRT-LLM QKV linear layer.

+
+ +
+
+k: LinearConfig = None
+
+ +
+
+property prequant_scaling_factor
+

Returns the merged prequant_scaling_factor across Q, K and V.

+

Prequant scaling factors for Q, K, V should be the same. So just return one of them.

+
+ +
+
+q: LinearConfig = None
+
+ +
+
+v: LinearConfig = None
+
+ +
+
+property weight
+

The generated linear layer weight.

+

The Q, K, V weights are concat together to fit the TensorRT-LLM QKV linear layer.

+
+ +
+
+property weights_scaling_factor
+

Returns the merged weights_scaling_factor across Q, K and V.

+

If the quantization is FP8, the max of the Q, K, V weight scaling factors is returned. +If the quanitzation is INT8_SQ, the concat value is returned.

+
+ +
+
+property weights_scaling_factor_2
+

Returns the merged weights_scaling_factor_2 across Q, K and V.

+

weight_scaling_factor_2 is needed for W4A8 AWQ.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.model_config_export.html b/reference/generated/modelopt.torch.export.model_config_export.html new file mode 100644 index 0000000..36683ff --- /dev/null +++ b/reference/generated/modelopt.torch.export.model_config_export.html @@ -0,0 +1,274 @@ + + + + + + + model_config_export — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

model_config_export

+

Code that export optimized models to the TensorRT-LLM checkpoint.

+

Functions

+ + + + + + + + + +

export_tensorrt_llm_checkpoint

Exports the torch model to the TensorRT-LLM checkpoint and save to the export_dir.

torch_to_tensorrt_llm_checkpoint

Converts the torch model to the TensorRT-LLM checkpoint per GPU rank.

+
+
+export_tensorrt_llm_checkpoint(model, decoder_type, dtype=torch.float16, export_dir='/tmp', inference_tensor_parallel=0, inference_pipeline_parallel=1, export_npz=False, naive_fp8_quantization=False, use_nfs_workspace=False)
+

Exports the torch model to the TensorRT-LLM checkpoint and save to the export_dir.

+
+
Parameters:
+
    +
  • model (Module) – the torch model.

  • +
  • decoder_type (str) – the type of the decoder, e.g. gpt2, gptj, llama or gptnext.

  • +
  • dtype (dtype) – the weights data type to export the unquantized layers.

  • +
  • export_dir (Path | str) – the target export path.

  • +
  • inference_tensor_parallel (int) – The target inference time tensor parallel. +We will merge or split the calibration tensor parallelism to inference. +Default is 0, meaning using the calibration without manual config merge or split.

  • +
  • inference_pipeline_parallel (int) – The target inference time pipeline parallel. +We will merge or split the calibration pipeline parallelism to inference. +Default is 1, meaning no pipeline parallelism.

  • +
  • inference_pipeline_parallel – The target inference time pipeline parallel.

  • +
  • export_npz (bool) – Whether or not to export the model_config to the old NPZ format for backward +compatibility.

  • +
  • naive_fp8_quantization (bool) – Quantize the model naively to FP8 without calibration. +All scaling factors are set to 1.

  • +
  • use_nfs_workspace (bool) – if True, the an NFS workspace will be created under the export_dir and +used as a shared memory for cross process/node communication.

  • +
+
+
+

For tensorrt_llm deployment, save the representation under export_dir. +We will save the model_config as two files:

+
+
+
+
+ +
+
+torch_to_tensorrt_llm_checkpoint(model, decoder_type, dtype=torch.float16, inference_tensor_parallel=0, inference_pipeline_parallel=1, export_npz=False, naive_fp8_quantization=False, workspace_path=None)
+

Converts the torch model to the TensorRT-LLM checkpoint per GPU rank.

+

TensorRT-LLM checkpoint is the LLM model format that can be used by the TensorRT-LLM build API. +for the engine building process. +https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/architecture/checkpoint.md

+
+
Parameters:
+
    +
  • model (Module) – the torch model.

  • +
  • decoder_type (str) – the type of the decoder, e.g. gpt2, gptj, llama or gptnext.

  • +
  • dtype (dtype) – the weights data type to export the unquantized layers.

  • +
  • inference_tensor_parallel (int) – The target inference time tensor parallel. +We will merge or split the calibration tensor parallelism to inference. +Default is 0, meaning using the calibration without manual config merge or split.

  • +
  • inference_pipeline_parallel (int) – The target inference time pipeline parallel. +We will merge or split the calibration pipeline parallelism to inference. +Default is 1, meaning no pipeline parallelism.

  • +
  • export_npz (bool) – Whether or not to export the model_config to the old NPZ format for backward +compatibility.

  • +
  • naive_fp8_quantization (bool) – Quantize the model naively to FP8 without calibration. +All scaling factors are set to 1.

  • +
  • workspace_path (Path | str | None) – the path to the NFS directory for postprocess cross rank communication.

  • +
+
+
Yields:
+
+
A tuple of

tensorrt_llm_config: A dict that maps to the PretrainedConfig in TensorRT-LLM. +https://github.com/NVIDIA/TensorRT-LLM/blob/main/tensorrt_llm/models/modeling_utils.py +weights: A dict that stores all model weights and scaling factors for each rank.

+
+
+
+
Return type:
+

Iterator[Tuple[Dict[str, Any], Dict[str, Tensor]]]

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.model_config_utils.html b/reference/generated/modelopt.torch.export.model_config_utils.html new file mode 100644 index 0000000..6cc2ac1 --- /dev/null +++ b/reference/generated/modelopt.torch.export.model_config_utils.html @@ -0,0 +1,346 @@ + + + + + + + model_config_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

model_config_utils

+

Common utils for the ModelConfig.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

from_quantized_weight

Converts the quantized weight to the target torch_dtype format.

merge_fc1_gate

Merges the qkv fields in model_config from QKVConfig to a single LinearConfig.

merge_qkv

Merges the qkv fields in model_config from QKVConfig to a single LinearConfig.

model_config_from_dict

Load a dict to a ModelConfig instance.

model_config_to_dict

Converts the instance to a python dict.

naive_quantization

Generates a constant scaling factor (1) with target quantization.

pack_linear_weights

Packs the quantized linear weights in the model_config to the quantized format.

pad_weights

Returns the padded weights to tp_size.

restore_model_config

Recursively restores the model_config from json and loads np.ndarray or torch.Tensor weights from weights.

split_config_and_weights

Util function to split the weights or any torch.Tensor in nested config to weights.

to_quantized_weight

Converts the weight to the quantized (packed) format.

+
+
+from_quantized_weight(weight, weights_scaling_factor, quantization, torch_dtype)
+

Converts the quantized weight to the target torch_dtype format.

+
+
Parameters:
+
    +
  • weight (Tensor) –

  • +
  • weights_scaling_factor (Tensor) –

  • +
  • quantization (str) –

  • +
+
+
+
+ +
+
+merge_fc1_gate(model_config)
+

Merges the qkv fields in model_config from QKVConfig to a single LinearConfig.

+
+ +
+
+merge_qkv(model_config)
+

Merges the qkv fields in model_config from QKVConfig to a single LinearConfig.

+
+ +
+
+model_config_from_dict(d)
+

Load a dict to a ModelConfig instance.

+
+
Parameters:
+

d (dict) –

+
+
Return type:
+

ModelConfig

+
+
+
+ +
+
+model_config_to_dict(model_config)
+

Converts the instance to a python dict.

+
+
Parameters:
+

model_config (ModelConfig) –

+
+
Return type:
+

dict

+
+
+
+ +
+
+naive_quantization(config)
+

Generates a constant scaling factor (1) with target quantization.

+

This is for debugging and performance measurement only.

+
+
Parameters:
+

config (ModelConfig) –

+
+
+
+ +
+
+pack_linear_weights(model_config)
+

Packs the quantized linear weights in the model_config to the quantized format.

+
+
Parameters:
+

model_config (ModelConfig) –

+
+
+
+ +
+
+pad_weights(weights, tp_size)
+

Returns the padded weights to tp_size.

+
+ +
+
+restore_model_config(model_config, weights)
+

Recursively restores the model_config from json and loads np.ndarray or torch.Tensor weights from weights.

+
+
Parameters:
+

weights (Dict[str, ndarray | Tensor]) –

+
+
+
+ +
+
+split_config_and_weights(config, weights, prefix='transformer')
+

Util function to split the weights or any torch.Tensor in nested config to weights.

+

A weight id starts with transformers or lm_head will also be generated to link the original key to the weights dict. +The weights in the weights dict are contiguous.

+
+
Parameters:
+
    +
  • weights (Dict[str, tensor]) –

  • +
  • prefix (str) –

  • +
+
+
+
+ +
+
+to_quantized_weight(weight, weights_scaling_factor, quantization)
+

Converts the weight to the quantized (packed) format.

+
+
Parameters:
+
    +
  • weight (Tensor) –

  • +
  • weights_scaling_factor (Tensor) –

  • +
  • quantization (str) –

  • +
+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.postprocess.html b/reference/generated/modelopt.torch.export.postprocess.html new file mode 100644 index 0000000..ff3a9f4 --- /dev/null +++ b/reference/generated/modelopt.torch.export.postprocess.html @@ -0,0 +1,269 @@ + + + + + + + postprocess — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

postprocess

+

Utils to load and process model_config.

+

Functions

+ + + + + + + + + + + + + + + +

check_weight_shape_valid

Check if weight shape are valid with inference TP.

pad_embedding_lm_head

Pad lm_head and embedding as multiples of 64 for AWQ quantization.

postprocess_model_config

Postprocesses the model configs with trained tensor parallel to target inference tensor parallel.

postprocess_tensors

Make all tensors in the model_config are on CPU, contiguous and own the memory.

+
+
+check_weight_shape_valid(config, inference_tensor_parallel=1, training_tensor_parallel=1)
+

Check if weight shape are valid with inference TP.

+

This function is recurisve.

+
+ +
+
+pad_embedding_lm_head(model_config, padding_factor=64)
+

Pad lm_head and embedding as multiples of 64 for AWQ quantization.

+
+
Parameters:
+
    +
  • model_config (ModelConfig) –

  • +
  • padding_factor (int) –

  • +
+
+
+
+ +
+
+postprocess_model_config(model_config, inference_tensor_parallel=1, inference_pipeline_parallel=1, training_pipeline_parallel=1, workspace_path=None)
+

Postprocesses the model configs with trained tensor parallel to target inference tensor parallel.

+

If the training_pipeline_parallel > 1, the model configs across PP will be merged to one.

+
+
Returns:
+

+
The processed model config as a list.
+
For the merging case:

The merged rank will return the merged model_config as an single item list. +The other ranks will return an empty list as we no longer export them.

+
+
For the split case:

The splitted model config list is returned.

+
+
+
+
+

+
+
Parameters:
+
    +
  • inference_tensor_parallel (int) –

  • +
  • inference_pipeline_parallel (int) –

  • +
  • training_pipeline_parallel (int) –

  • +
  • workspace_path (Path | str | None) –

  • +
+
+
Return type:
+

List[ModelConfig]

+
+
+
+ +
+
+postprocess_tensors(model_config, force_cpu=True, force_contiguous=True, force_non_view=True)
+

Make all tensors in the model_config are on CPU, contiguous and own the memory.

+
+
Parameters:
+
    +
  • model_config (ModelConfig) –

  • +
  • force_cpu (bool) –

  • +
  • force_contiguous (bool) –

  • +
  • force_non_view (bool) –

  • +
+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.scaling_factor_utils.html b/reference/generated/modelopt.torch.export.scaling_factor_utils.html new file mode 100644 index 0000000..631149b --- /dev/null +++ b/reference/generated/modelopt.torch.export.scaling_factor_utils.html @@ -0,0 +1,225 @@ + + + + + + + scaling_factor_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

scaling_factor_utils

+

Utils for scaling factors adjustments.

+

Functions

+ + + + + + + + + +

get_weights_scaling_factor

Calculate the weight scaling facotrs for a given group size.

resmooth_and_get_scale

Resmooths weights from a single or multiple ranks.

+
+
+get_weights_scaling_factor(weight, group_size)
+

Calculate the weight scaling facotrs for a given group size.

+
+ +
+
+resmooth_and_get_scale(merged_weights, pre_quant_scales, ranks, group_size, avg_pre_quant_scale=None)
+

Resmooths weights from a single or multiple ranks.

+
+
Parameters:
+
    +
  • merged_weights (Tensor) – Merged weights from ranks.

  • +
  • pre_quant_scales (List[Tensor]) – List of pre-quantization scales for each rank.

  • +
  • ranks (int) – Number of ranks.

  • +
  • group_size (int) – Group size of the quantization block.

  • +
  • avg_pre_quant_scale (optional) – If not provided, weights will be resmoothed using +the average of pre_quant_scales.

  • +
+
+
Returns:
+

Resmoothed weights. +weight_scaling_factors: Resmoothed scaling factors. +avg_pre_quant_scale: Calculated average of the quantization scale.

+
+
Return type:
+

weights

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.tensorrt_llm_utils.html b/reference/generated/modelopt.torch.export.tensorrt_llm_utils.html new file mode 100644 index 0000000..339d90a --- /dev/null +++ b/reference/generated/modelopt.torch.export.tensorrt_llm_utils.html @@ -0,0 +1,234 @@ + + + + + + + tensorrt_llm_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

tensorrt_llm_utils

+

Utils for TensorRT-LLM checkpoint export.

+

Some of the logics in this file are empirical and needs constant update if exceptions occur.

+

Functions

+ + + + + + + + + + + + +

convert_to_tensorrt_llm_config

Convert to TensorRT-LLM checkpoint config.

is_tensorrt_llm_0_8_or_9

Returns true if tensorrt_llm version is 0.8 or 0.9.

weights_to_npz

Export the model_config and the weights in the backward-compatible npz forward.

+
+
+convert_to_tensorrt_llm_config(model_config, tp_size_overwrite=None)
+

Convert to TensorRT-LLM checkpoint config.

+

tp_size_overwrite overwrites the tp_size in config.mapping, set only only for phi with TP. +This is because the TRT-LLM builder expects its checkpoint to be unsharded.

+
+
Parameters:
+
    +
  • model_config (ModelConfig) –

  • +
  • tp_size_overwrite (int | None) –

  • +
+
+
+
+ +
+
+is_tensorrt_llm_0_8_or_9()
+

Returns true if tensorrt_llm version is 0.8 or 0.9.

+
+ +
+
+weights_to_npz(weights, tensorrt_llm_config, export_dir)
+

Export the model_config and the weights in the backward-compatible npz forward.

+
+
Parameters:
+
    +
  • weights (Dict[str, ndarray]) –

  • +
  • tensorrt_llm_config (Dict[str, Any]) –

  • +
  • export_dir (Path) –

  • +
+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.export.transformer_engine.html b/reference/generated/modelopt.torch.export.transformer_engine.html new file mode 100644 index 0000000..e2000a1 --- /dev/null +++ b/reference/generated/modelopt.torch.export.transformer_engine.html @@ -0,0 +1,201 @@ + + + + + + + transformer_engine — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

transformer_engine

+

Convert the Model Optimizer quantized model to the transformer_engine.

+

Functions

+ + + + + + +

convert_to_transformer_engine

Converts the Model Optimizer quantized model to the transformers_engine.

+
+
+convert_to_transformer_engine(model)
+

Converts the Model Optimizer quantized model to the transformers_engine.

+
+
Parameters:
+

model (Module) –

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.html b/reference/generated/modelopt.torch.html new file mode 100644 index 0000000..7baaea7 --- /dev/null +++ b/reference/generated/modelopt.torch.html @@ -0,0 +1,189 @@ + + + + + + + torch — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

torch

+

Modules

+ + + + + + + + + + + + + + + + + + +

modelopt.torch.export

Export package.

modelopt.torch.opt

Module for general-purpose model optimization infrastructure.

modelopt.torch.quantization

Quantization package.

modelopt.torch.sparsity

API for sparsification algorithms.

modelopt.torch.utils

Utility functions.

+

Model optimization and deployment subpackage for torch.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.opt.config.html b/reference/generated/modelopt.torch.opt.config.html new file mode 100644 index 0000000..dd0aba6 --- /dev/null +++ b/reference/generated/modelopt.torch.opt.config.html @@ -0,0 +1,453 @@ + + + + + + + config — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

config

+

Modelopt’s pydantic BaseModel used for any type of configuration in algorithms and mode.

+

Functions

+ + + + + + + + + +

ModeloptField

A pydantic.Field that enforces setting a default value.

get_kwargs_for_create_model_with_rules

Generate the kwargs for pydantic.create_model to auto-generate a rule config class.

+
+
+ModeloptConfig ModeloptBaseConfig
+

Bases: BaseModel

+

Our config base class for mode configuration.

+

The base class extends the capabilities of pydantic’s BaseModel to provide additional methods +and properties for easier access and manipulation of the configuration.

+

+Show default config as JSON
+
Default config (JSON):
+

+
+
{}
+
+
+

+
+get(key, default=None)
+

Get the value for the given key (can be name or alias) or default if not found.

+
+
Parameters:
+
    +
  • key (str) –

  • +
  • default (Any) –

  • +
+
+
Return type:
+

Any

+
+
+
+ +
+
+get_field_name_from_key(key)
+

Get the field name from the given key (can be name or alias of field).

+
+
Parameters:
+

key (str) –

+
+
Return type:
+

str

+
+
+
+ +
+
+items()
+

Return the items of the config with keys as aliases if possible.

+
+
Return type:
+

ItemsView[str, Any]

+
+
+
+ +
+
+keys()
+

Return the keys (aliases prioritized over names) of the config.

+
+
Return type:
+

KeysView[str]

+
+
+
+ +
+
+model_dump(**kwargs)
+

Dump the config to a dictionary with aliases and no warnings by default.

+
+ +
+
+model_dump_json(**kwargs)
+

Dump the config to a json with aliases and no warnings by default.

+
+ +
+
+update(config)
+

Update the config with the given config dictionary.

+
+
Parameters:
+

config (Dict[str, Any]) –

+
+
Return type:
+

None

+
+
+
+ +
+
+values()
+

Return the values of the config.

+
+
Return type:
+

ValuesView[Any]

+
+
+
+ +
+ +
+
+ModeloptConfig ModeloptBaseRule
+

Bases: ModeloptBaseConfig

+

Our base config class for rule-based config classes.

+

Rules are what governs the configuration for modifying dynamic module classes.

+

+Show default config as JSON
+
Default config (JSON):
+

+
+
{}
+
+
+

+
+classmethod customize_rule(rule, key)
+

Construct custom rule according to the provided key which is matched.

+
+
Parameters:
+
    +
  • rule (Dict[str, Any] | None | Dict[str, Dict[str, Any] | None]) –

  • +
  • key (str) –

  • +
+
+
Return type:
+

Dict[str, Any] | None

+
+
+
+ +
+
+classmethod get_rule_type(wrapped_only=False)
+

Get the rule type for the given ModeloptBaseConfig.

+
+
Parameters:
+

wrapped_only (bool) –

+
+
Return type:
+

TypeAlias

+
+
+
+ +
+
+classmethod validate_rule(rule)
+

Validate a rule with the current cls rule.

+

We will check the full rule type (wrapped and unwrapped) and then return the wrapped type.

+
+
Parameters:
+

rule (Dict[str, Any] | None | Dict[str, Dict[str, Any] | None]) –

+
+
Return type:
+

Dict[str, Dict[str, Any] | None]

+
+
+
+ +
+ +
+
+ModeloptConfig ModeloptBaseRuleConfig
+

Bases: ModeloptBaseConfig

+

Our config base class for mode configuration that are purely made from rules.

+

The base class extends the capabilities of pydantic’s BaseModel to provide additional methods +and properties for easier access and manipulation of the configuration.

+

+Show default config as JSON
+
Default config (JSON):
+

+
+
{}
+
+
+

+
+classmethod register_default(extra_default)
+

Register a new default value for the given key.

+
+
Parameters:
+

extra_default (Dict[str, Dict[str, Dict[str, Any] | None]]) –

+
+
Return type:
+

None

+
+
+
+ +
+
+classmethod unregister_default(key)
+

Unregister the default value for the given key.

+
+
Parameters:
+

key (str) –

+
+
Return type:
+

None

+
+
+
+ +
+ +
+
+ModeloptField(default=PydanticUndefined, **kwargs)
+

A pydantic.Field that enforces setting a default value.

+
+
Parameters:
+

default (Any) –

+
+
+
+ +
+
+get_kwargs_for_create_model_with_rules(registry, default_rules, doc)
+

Generate the kwargs for pydantic.create_model to auto-generate a rule config class.

+
+
Parameters:
+
    +
  • registry (Any) – The dynamic module registry that contains all relevant dynamic modules.

  • +
  • rule_fields – The fields that the rule-based config class should have.

  • +
  • doc (str) – The docstring for the rule-based config class.

  • +
  • default_rules (Dict[str, Dict[str, Any] | None | Dict[str, Dict[str, Any] | None]]) –

  • +
+
+
Return type:
+

Dict[str, Any]

+
+
+

A rule-based config class is a config class that purely consists of fields that pertain to +rules. We can procedurally generate these rule config classes by using

+
from pydantic import create_model
+
+MyRuleConfigs = create_model(
+    "MyRuleConfigs", **get_create_model_kwargs_for_rule_model(registry, rule_fields)
+)
+
+
+

For more info and example usage, you can take a look at +SparseMagnitudeConfig.

+
+

Note

+

We have this convenience function in place since autodocs only get generated when +create_model is explicitly called in the respective config file. So this function is a +workaround to at least lower the burden of correctly calling create_model.

+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.opt.conversion.html b/reference/generated/modelopt.torch.opt.conversion.html new file mode 100644 index 0000000..ea63716 --- /dev/null +++ b/reference/generated/modelopt.torch.opt.conversion.html @@ -0,0 +1,561 @@ + + + + + + + conversion — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

conversion

+

Module to handle model converting and restoring for optimization methods.

+

When applying a model optimization algorithm, we usually need to modify the model in each step +(mode) of the algorithm. This module provides the state manager, which is a standardized interface +(class) to record and store state information in the model.

+

Op top of the state manager, this module provides utilities to save a history of these modifications +(“modelopt state dict”) and restoring a unmodified model to the state indicated in the state dict.

+

Classes

+ + + + + + +

ModeloptStateManager

A class to handle the modelopt state stored for each mode correspondig to a task/mode.

+

Functions

+ + + + + + + + + + + + + + + + + + +

apply_mode

Apply the provided modes the model, record the changes, and return the model.

modelopt_state

Return the modelopt state dict describing the modifications to the model.

save

Save a model's state dict together with the modelopt state dict to restore its architecture.

restore_from_modelopt_state

Restore the model architecture from the modelopt state dictionary based on the user-provided model.

restore

Load the checkpoint, restore the modelopt model modifications, and load the model's weights.

+
+
+class ModeloptStateManager
+

Bases: object

+

A class to handle the modelopt state stored for each mode correspondig to a task/mode.

+
+
+__init__(model=None, init_state=False)
+

Initialize state manager.

+
+
Parameters:
+
    +
  • model (Module | None) – Module that has modelopt_state stored. If None, a fake module is created to store +any state that might be added with the manager.

  • +
  • init_state (bool) – Whether to initialize the modelopt state for the model if it does not exist.

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+add_mode(mode, config, metadata)
+

Add mode and update state in-place.

+

Note that self._state is a list (preserves insertion order of keys) and we can therefore +recall the order of modes!

+
+
Parameters:
+
    +
  • mode (_ModeDescriptor | str) –

  • +
  • config (ModeloptBaseConfig) –

  • +
  • metadata (Dict[str, Any]) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+check_mode(mode)
+

Check if the proposed mode is compatible with the current state.

+
+
Parameters:
+

mode (_ModeDescriptor | str) –

+
+
Return type:
+

None

+
+
+
+ +
+
+static get_config_class(mode, config)
+

Standardize the provided config to the corresponding config class.

+
+
Parameters:
+
    +
  • mode (_ModeDescriptor | str) –

  • +
  • config (Dict[str, Any]) –

  • +
+
+
Return type:
+

ModeloptBaseConfig

+
+
+
+ +
+
+property has_state: bool
+

Return whether the model has a non-trivial modelopt state.

+
+ +
+
+classmethod is_converted(model, is_root=False)
+

Check if model is converted.

+
+
Parameters:
+
    +
  • model (Module) – A model to be checked for state/metadata from the convert process.

  • +
  • is_root (bool) – Additionally check whether the module with state is the root module.

  • +
+
+
Returns:
+

True if the model contains modelopt state indicating that it has been converted.

+
+
Return type:
+

bool

+
+
+

This method raises an assertion when multiple modelopt_states are detected or when is_root is +set to True but the module with state is not the root module.

+
+ +
+
+property last_mode: _ModeDescriptor | None
+

Return the last mode applied to the model (last stored mode).

+
+ +
+
+load_state_dict(state_dict)
+

Load the provided state_dict to the modelopt_state.

+
+
Parameters:
+

state_dict (List[Tuple[str, Dict[str, Dict[str, Any]]]]) –

+
+
Return type:
+

None

+
+
+
+ +
+
+modes_with_states()
+

Yield the mode together with the full config and metadata from the state.

+
+
Return type:
+

Iterator[Tuple[_ModeDescriptor, ModeloptBaseConfig, Dict[str, Any]]]

+
+
+
+ +
+
+state_dict()
+

Return the metadata of the model.

+
+
Return type:
+

List[Tuple[str, Dict[str, Dict[str, Any]]]]

+
+
+
+ +
+
+classmethod transfer_state_dict(model_from, model_to)
+

Transfer the state (same instance) from one model to another.

+
+
Parameters:
+
    +
  • model_from (Module) –

  • +
  • model_to (Module) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+update_last_state_before_new_mode(model)
+

Update the metadata and config of the last mode applied to the model.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

None

+
+
+
+ +
+
+update_last_state_before_save(model)
+

Update the metadata and config of the last mode applied to the model.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

None

+
+
+
+ +
+ +
+
+apply_mode(model, mode, registry=None, init_state=None)
+

Apply the provided modes the model, record the changes, and return the model.

+
+
Parameters:
+
    +
  • model (Module | Type[Module] | Tuple | Callable) – A model-like object. Can be an nn.Module, a model class type, or a tuple. +Tuple must be of the form (model_cls,) or (model_cls, args) or +(model_cls, args, kwargs). Model will be initialized as +model_cls(*args, **kwargs).

  • +
  • mode (_ModeDescriptor | str | List[_ModeDescriptor | str] | List[Tuple[str, Dict[str, Any]]]) – A mode, a list of modes or a list of tuples containing the mode and its config. The +mode may be specified as a string or as the actual +_ModeDescriptor class such as +QuantizeModeDescriptor class.

  • +
  • registry (_ModeRegistryCls | None) – An optional mode registry from which to retrieve the mode. If not provided, all +registries will be searched.

  • +
  • init_state (bool | None) – Flag indicating whether we should initialize the state manager for the model. If +not provided, it will be inferred from the model. This flag can be used to enforce a +certain behavior. For example, for init_state=True the state manager will raise an +error if the model already contains state.

  • +
+
+
Returns:
+

The converted model after applying the desired modes.

+
+
Return type:
+

Module

+
+
+
+ +
+
+modelopt_state(model)
+

Return the modelopt state dict describing the modifications to the model.

+

Note that the returned modelopt_state does not contain the model parameters such as weights and biases. +modelopt_state is useful for saving and loading various modelopt optimization states separately from the +model parameters. For example:

+
import modelopt.torch.opt as mto
+
+# Save the modelopt state and model weights separately
+torch.save(mto.modelopt_state(model), "modelopt_state.pt") # Save the modelopt state
+torch.save(model.state_dict(), "model_weights.pt") # Save the model weights
+
+
+

If you want to save the model weights and the modelopt state together, please use +mto.save().

+
+
Parameters:
+

model (Module) – the modelopt-modified model.

+
+
Returns:
+

An modelopt state dictionary describing the modifications to the model.

+
+
Return type:
+

Dict[str, Any]

+
+
+
+ +
+
+restore(model, f, **kwargs)
+

Load the checkpoint, restore the modelopt model modifications, and load the model’s weights.

+
+
Parameters:
+
    +
  • model (Module | Type[Module] | Tuple | Callable) – A model-like object. Can be an nn.Module, a model class type, or a tuple. +Tuple must be of the form (model_cls,) or (model_cls, args) or (model_cls, args, kwargs). +Model will be initialized as model_cls(*args, **kwargs).

  • +
  • f (str | PathLike | BinaryIO) – Target file location generated by mto.save().

  • +
  • **kwargs – additional args for torch.load().

  • +
+
+
Returns:
+

The model with original weights and stored architecture.

+
+
Return type:
+

Module

+
+
+
+

Note

+

Note that wrappers such as DistributedDataParallel are not supported during the restore +process. Please wrap the model after the restore process.

+
+
+ +
+
+restore_from_modelopt_state(model, modelopt_state)
+

Restore the model architecture from the modelopt state dictionary based on the user-provided model.

+

This method does not restore the model parameters such as weights and biases. +Please load the weights and biases with the original checkpoint loading method after restoring +modelopt states with restore_from_modelopt_state. For example:

+
import modelopt.torch.opt as mto
+
+model = ...  # Create the model-like object
+
+# Restore the previously saved modelopt state followed by model weights
+mto.restore_from_modelopt_state(model, torch.load("modelopt_state.pt"))  # Restore modelopt state
+model.load_state_dict(torch.load("model_weights.pt"), ...)  # Load the model weights
+
+
+

If you want to restore the model weights and the modelopt state together, please use +mto.restore().

+
+
Parameters:
+
    +
  • model (Module | Type[Module] | Tuple | Callable) – A model-like object. Can be an nn.Module, a model class type, or a tuple. +Tuple must be of the form (model_cls,) or (model_cls, args) or +(model_cls, args, kwargs). Model will be initialized as +model_cls(*args, **kwargs).

  • +
  • modelopt_state (Dict[str, Any]) – The modelopt state dict describing the modelopt modifications to the model. The +modelopt_state can be generated via +mto.modelopt_state().

  • +
+
+
Returns:
+

A modified model architecture based on the restored modifications with the unmodified +weights as stored in the provided model argument.

+
+
Return type:
+

Module

+
+
+
+

Note

+

Note that wrappers such as DistributedDataParallel are not supported during the restore +process. Please wrap the model after the restore process.

+
+
+ +
+
+save(model, f, **kwargs)
+

Save a model’s state dict together with the modelopt state dict to restore its architecture.

+
+
Parameters:
+
    +
  • model (Module) – Any model.

  • +
  • f (str | PathLike | BinaryIO) – Target file location.

  • +
  • **kwargs – additional args for torch.save().

  • +
+
+
Return type:
+

None

+
+
+
+

Note

+

If model is a wrapper such as DistributedDataParallel, it will be unwrapped for saving.

+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.opt.dynamic.html b/reference/generated/modelopt.torch.opt.dynamic.html new file mode 100644 index 0000000..5c54c44 --- /dev/null +++ b/reference/generated/modelopt.torch.opt.dynamic.html @@ -0,0 +1,580 @@ + + + + + + + dynamic — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

dynamic

+

Basic dynamic module class and hparam class.

+

Classes

+ + + + + + + + + +

DynamicModule

Base class for dynamic modules.

DynamicSpace

A class to represent all dynamic model choices over a model with multiple submodules.

+
+
+class DynamicModule
+

Bases: Module

+

Base class for dynamic modules.

+

Dynamic modules are usually extended from torch.nn.Module’s. They +dynamically support a family of torch.nn.Module’s with different architectural +parameters, such as input/output channel numbers.

+

Dynamic modules can also be used to construct the basic searchable unit in a search space with +the option to select and sample a candidate unit.

+

Candidate units are usually described using Hparam objects and dynamic attributes. Each +hparam describes a basic searchable unit (e.g. number of output channels in DynamicConv2d). +Dynamic attributes are callbacks to dynamically construct the attribute depending on the current +value of the hparam(s), e.g., the weight tensor in DynamicConv2d that depends on the +hparam out_channels.

+

In addition, dynamic modules also support registering general attributes that are removed upon +export the module. This is useful for storing temporary attributes that are not hparams or +existing attributes that are converted to dynamic attributes.

+

For a DynamicModule class that contains other dynamic modules, the class implementation +should ensure only to expose hparams in the outermost class and handle other hparams +internally including hparams of child modules that are exposed on their own usually +(e.g. block module implementations containing DynamicLinear).

+
+
+__init__(*args, **kwargs)
+

Initializing a dynamic module is not allowed!

+
+ +
+
+classmethod convert(module)
+

Converts a module in-place into its dynamic counterpart by patching its class.

+
+
Parameters:
+

module (Module) – The module to be converted into a dynamic module.

+
+
Returns:
+

The converted dynamic module.

+
+
Return type:
+

DynamicModule

+
+
+

This should generally be a final method and child classes should inherit _setup() +instead to customize the conversion process.

+

Patching is achieved by updating the __class__ attribute of the module to its dynamic +counterpart. The dynamic counterpart is a subclass of the original class, hence, we ensure +the module is fully compatible with the original class. Simultaneously, we can inject the +corresponding dynamic behavior in a standardized and rigoruos fashion.

+
+ +
+
+export()
+

Export self (a dynamic module) in-place and return the exported module.

+

The export process will remove the top-level dynamic module and replace it with the original +module class. Note that the original class may be either another type of dynamic module or +a vanilla nn.Module. Consequently, any methods (including properties) that are implemented +in the child class will be removed. Hparams as well as dynamic and temporary attributes are +handled in a special fashion, see below.

+

In order to ensure that the exported module is still consistent there a several mechanisms +in place to handle hparams, dynamic attributes, and temporary attributes:

+
    +
  • +
    Hparams of the current type are replaced with their currently active value.

    Note that we do not need to explicitly handle hparams of the parent class as they are +mutually-exlusive, i.e., hparams are unique across all levels of inheritance.

    +
    +
    +
  • +
  • +
    Dynamic Attributes are handled depending on whether they exist in a parent class:
      +
    1. The same dynamic attribute exists in a parent class. In this case, the callback is +folded into (“appended to”) the callback for the same dyanmic attribute of the parent +class. This way we ensure that the final value of the attribute remains consistent.

    2. +
    3. The dynamic attribute does not exist in a parent class. In this case, the attribute +is not dynamic anymore as there are no more callbacks that could affect the value. +Therefore, we simply overwrite the underlying original object with the current value +and revert it to a regular attribute.

    4. +
    +
    +
    +
  • +
  • Temporary Attributes are kept until the final export, i.e., until the resultign class +is not a dynamic module anymore. This is to ensure that folded callbacks that may need +access to these attributes can still access them.

  • +
+
+
Return type:
+

Module

+
+
+
+ +
+
+extra_repr()
+

Generate extra_repr making sure all dynamic keys exist in self.__dict__.

+

Pytorch heavily uses self.__dict__ to generate extra_repr. However, we remove certain +attributes from self.__dict__ so we can manage them dynamically. Temporarily, adding them +back in here and removing them again afterwards.

+
+ +
+
+force_assign()
+

Force re-assign all dynamic attributes to their current values.

+
+

Warning

+

Note that this method overwrittes the actual buffers and parameters! Only use in +specific circumstances!!

+
+
+ +
+
+freeze()
+

Restrict the hparams of tbe dynamic module to the orginal choices.

+

This is useful to enforce the behavior of the parent class.

+
+

Note

+

After this call, the module’s hparams can no longer be modified although the underlying +type is still a dynamic module.

+
+
+ +
+
+get_hparam(target)
+

Look up and return hparam (like “torch.nn.Module.get_parameter()” but for hparam).

+
+
Parameters:
+

target (str) –

+
+
Return type:
+

Hparam

+
+
+
+ +
+
+modify(*args, **kwargs)
+

Modify the module’s dynamic choices in a standardized & scalable fashion.

+

This method can be overriden by the child class! While users can also directly modify +the choices of individual hparams, this method should provide a way to modify a batch of +dynamic modules with the same arguments, e.g., out_features_ratio for DynamicLinear.

+

Note that arguments of the modify method that are exposed to the user via the rule system +should be specified as keyword-only arguments. When they are exposed as keyword-only +arguments, the _DMRegistryCls can automatically generate the corresponding config class +on the fly that lets user provide configs and then they are automatically validated before +being passed to the modify method.

+

If possible, modify()’s keyword arguments should have default values that leave the hparams +intact if not provided, e.g., one might call some_dynamic_module.modify() without any +arguments and the module will remain unchanged.

+
+ +
+
+named_hparams(configurable=None)
+

Return an iterator over all hparams of the module.

+
+
Parameters:
+

configurable (bool | None) – Whether to include configurable hparams.

+
+
Yields:
+

(name, Hparam) – Tuple containing the name and hparam.

+
+
Return type:
+

Iterator[Tuple[str, Hparam]]

+
+
+

Default behavior is to iterate over configurable and non-configurable hparams. Set +configurable accordingly to only iterate over either. If configurable is set to +True, only configurable symbols are iterated over. If configurable is set to +False, configurable symbols are skipped over (only non-configurable symbols).

+
+ +
+
+property original_cls: Type[Module]
+

Return the original class of the dynamic module.

+
+ +
+
+reset_dynamic_attributes()
+

Context manager to temporarily remove any dynamic attributes and re-register values.

+

This context manager is intended to be used when we want to access a dynamic attribute in +its original unmodified version, i.e., without this class interfering with its original +value and its corresponding getattr/setattr/delattr behavior.

+

Upon exiting the context manager, the dynamic attributes are re-registered and the same +callbacks are re-registered together with the new value.

+
+ +
+ +
+
+class DynamicSpace
+

Bases: object

+

A class to represent all dynamic model choices over a model with multiple submodules.

+
+
+__init__(model)
+

Initialize the dynamic space from the model.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

None

+
+
+
+ +
+
+config(configurable=None)
+

Return the config dict of all hyperparameters.

+
+
Parameters:
+
    +
  • model – A model that contains DynamicModule(s).

  • +
  • configurable (bool | None) – None -> all hps, True -> configurable hps, False -> non-configurable hps

  • +
+
+
Returns:
+

A dict of (parameter_name, choice) that specifies an active subnet.

+
+
Return type:
+

Dict[str, Any]

+
+
+
+ +
+
+convert_to_dynamic(rules, dm_registry)
+

Convert the model to dynamic modules according to the rules and provided registry.

+
+
Parameters:
+
    +
  • rules (Dict[str, Dict[str, Any] | None | Dict[str, Dict[str, Any] | None]] | None) – A dictionary containing rules for the dynamic modules.

  • +
  • dm_registry (_DMRegistryCls) – A registry containing the dynamic modules to be converted to.

  • +
+
+
Returns:
+

A dictionary containing the converted modules with submodule names as keys and the +converted dynamic modules as values.

+
+
Return type:
+

Dict[str, Module]

+
+
+
+ +
+
+export(dm_registry)
+

Recursively export the module including self and return the result.

+
+
Parameters:
+

dm_registry (_DMRegistryCls) – A dynamic module registry to check for dynamic modules that should be +exported.

+
+
Returns:
+

The model after exporting the dynamic modules found in the registry.

+
+
Return type:
+

Module

+
+
+
+ +
+
+get_hparam(name)
+

Get the hparam with the given name.

+
+
Parameters:
+

name (str) –

+
+
Return type:
+

Hparam

+
+
+
+ +
+
+is_configurable()
+

Check if the model has any configurable hyperparameters.

+
+
Parameters:
+

model – A model to be checked for DynamicModule(s) with configurable hyperparameters.

+
+
Returns:
+

True if the model contains DynamicModule(s) with configurable hyperparameters w/ more +than one choice.

+
+
Return type:
+

bool

+
+
+
+ +
+
+is_dynamic()
+

Check if any module is dynamic.

+
+
Returns:
+

True if the model contains DynamicModule(s).

+
+
Return type:
+

bool

+
+
+
+ +
+
+named_dynamic_modules()
+

Recursively yield the name and instance of all DynamicModules.

+
+
Yields:
+

(name, DynamicModule) – Tuple containing the name and module.

+
+
Return type:
+

Iterator[Tuple[str, DynamicModule]]

+
+
+
+ +
+
+named_hparams(configurable=None)
+

Recursively yield the name and instance of all hparams.

+
+
Parameters:
+

configurable (bool | None) – Whether to include configurable hparams.

+
+
Yields:
+

(name, Hparam) – Tuple containing the name and hparam.

+
+
Return type:
+

Iterator[Tuple[str, Hparam]]

+
+
+

Default behavior is to iterate over all hparams. If configurable is set to True, +only configurable, non-duplicate symbols are iterated over.

+
+ +
+
+select(config, strict=True)
+

Select the subnet provided by config.

+

If strict is set, then config must contain the exact set of keys representing both the +configurable and non-configurable hparams.

+
+
Parameters:
+
    +
  • config (Dict[str, Any]) –

  • +
  • strict (bool) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+size()
+

Get the search space size of the model.

+
+
Returns:
+

A int representing the search space size of the model.

+
+
Return type:
+

int

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.opt.hparam.html b/reference/generated/modelopt.torch.opt.hparam.html new file mode 100644 index 0000000..51fef0f --- /dev/null +++ b/reference/generated/modelopt.torch.opt.hparam.html @@ -0,0 +1,329 @@ + + + + + + + hparam — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

hparam

+

Standard hyperparameter class for regular symbol.

+

Classes

+ + + + + + +

Hparam

A base hyperparameter of a DynamicModule.

+
+
+class Hparam
+

Bases: object

+

A base hyperparameter of a DynamicModule.

+

An example of such a Hparam could be an hparam with identity dependencies.

+
+
+ActiveSlice
+

alias of Union[slice, LongTensor]

+
+ +
+
+Importance
+

alias of Optional[Tensor]

+
+ +
+
+ImportanceEstimator
+

alias of Callable[[], Optional[Tensor]]

+
+ +
+
+__init__(choices, original=None)
+

Initializes Hparam with original value and choices.

+
+
Parameters:
+
    +
  • choices (Sequence[Tuple[int, ...] | int | float]) –

  • +
  • original (Tuple[int, ...] | int | float | None) –

  • +
+
+
Return type:
+

None

+
+
+
+ +
+
+property active: Tuple[int, ...] | int | float
+

Return the currently active value.

+
+ +
+
+property active_slice: slice | LongTensor
+

Return the currently active sorted indices or slice corresponding to the active value.

+
+ +
+
+property choices: Sequence[Tuple[int, ...] | int | float]
+

Return available choices.

+
+ +
+
+enforce_order(order=None)
+

Store a reference to this order and enforce the order for active_slice.

+

This function enables the user to enforce an order how the active_slice is generated.

+

Example

+

If the hparam has active value 16 and the max value is 32, the active_slice by default +will be slice(16), which is equivalent to range(16) (although faster). +When order is set active_slice will instead return self._order[:16].

+

TODO: will we ever need a cycle detector here?

+
+
Parameters:
+

order (Tensor | None) –

+
+
Return type:
+

None

+
+
+
+ +
+
+property importance: Tensor | None
+

Computes and returns the normalized importance among the features the hparam represents.

+

Note that the importance is represented as a 1d-tensor with the length equal to the max +choice (Hparam.max) of the hparam.

+

For example, if the hparam represents the number of in_channels to a Conv2d layer, the +importance should be a 1d-tensor of importance score with length equal to the number of +in_channels.

+

Note that each module should register appropriate importance callbacks to compute the +actual importance associated with the hparam choices. If there is no notion of importance +for the hparam, this function returns None.

+
+ +
+
+property is_configurable
+

Return whether the hparam is configurable.

+
+ +
+
+property is_sortable
+

Return whether hparam in sortable.

+
+ +
+
+property max: Tuple[int, ...] | int | float
+

Return max value from among choices.

+
+ +
+
+property min: Tuple[int, ...] | int | float
+

Return min value from among choices.

+
+ +
+
+property original: Tuple[int, ...] | int | float
+

Return original value from among choices.

+
+ +
+
+register_importance(importance_estimator)
+

Register importance estimator for the hparam.

+

This estimator does not take any arguments and should return a single argument ( +optional 1d-tensor) representing the importance among features the hparam represents. +If the return argument is a tensor, the length of the tensor must be equal to the max choice +(Hparam.max) of the hparam.

+
+
Parameters:
+

importance_estimator (Callable[[], Tensor | None]) –

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.opt.html b/reference/generated/modelopt.torch.opt.html new file mode 100644 index 0000000..38319c9 --- /dev/null +++ b/reference/generated/modelopt.torch.opt.html @@ -0,0 +1,218 @@ + + + + + + + opt — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

opt

+

Modules

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

modelopt.torch.opt.config

Modelopt's pydantic BaseModel used for any type of configuration in algorithms and mode.

modelopt.torch.opt.conversion

Module to handle model converting and restoring for optimization methods.

modelopt.torch.opt.dynamic

Basic dynamic module class and hparam class.

modelopt.torch.opt.hparam

Standard hyperparameter class for regular symbol.

modelopt.torch.opt.mode

Interface and utilities for optimization modes/algorithms.

modelopt.torch.opt.plugins

Handles plugins for third-party modules.

modelopt.torch.opt.searcher

Standard interface to implement a searcher algorithm.

modelopt.torch.opt.utils

Utilities for optimization.

+

Module for general-purpose model optimization infrastructure.

+

modelopt.torch.opt contains tooling to:

+
    +
  • ingest a user-provided model and set it up for optimization;

  • +
  • define and implement search and optimization procedures;

  • +
  • export a model back to a regular pytorch model after optimization;

  • +
  • save, restore, and manage checkpoints from which the model modifications can be resumed.

  • +
+

Please refer to each individual sub-module to learn more about the various concepts wihin +modelopt.torch.opt and how to use them to implement a model optimization algorithm.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.opt.mode.html b/reference/generated/modelopt.torch.opt.mode.html new file mode 100644 index 0000000..17c8711 --- /dev/null +++ b/reference/generated/modelopt.torch.opt.mode.html @@ -0,0 +1,219 @@ + + + + + + + mode — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

mode

+

Interface and utilities for optimization modes/algorithms.

+

A mode is a specific type or algorithms for model optimization, e.g., some type of algorithm for +pruning or quantization. It can also specify a single step within an optimization algorithm instead +of the whole algorithm. For example, a mode can prepare a model for pruning or export (i.e. fix the +optimal model configuration) after pruning.

+

Within modelopt, a mode constitutes the unit for model optimization. We can define arbitrary +modes, each mode gets recorded in the model’s modelopt state dict, and we can define workflows as a +sequence of modes.

+

Classes

+ + + + + + + + + + + + + + + +

ABC

Helper class that provides a standard way to create an ABC using inheritance.

Any

Special type indicating an unconstrained type.

BaseSearcher

A basic search interface that can be used to search/optimize a model.

TypeVar

Type variable.

+

Functions

+ + + + + + + + + + + + +

abstractmethod

A decorator indicating abstract methods.

get_mode_config

Standardize mode to ModeConfigDict and return.

val2list

Repeat val for repeat_time times and return the list or val if list/tuple.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.opt.plugins.html b/reference/generated/modelopt.torch.opt.plugins.html new file mode 100644 index 0000000..11cbcb6 --- /dev/null +++ b/reference/generated/modelopt.torch.opt.plugins.html @@ -0,0 +1,186 @@ + + + + + + + plugins — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

plugins

+

Modules

+ + + +
+

Handles plugins for third-party modules.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.opt.searcher.html b/reference/generated/modelopt.torch.opt.searcher.html new file mode 100644 index 0000000..6e4872a --- /dev/null +++ b/reference/generated/modelopt.torch.opt.searcher.html @@ -0,0 +1,399 @@ + + + + + + + searcher — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

searcher

+

Standard interface to implement a searcher algorithm.

+

A searcher is useful whenever we want to search/optimize over a set of hyperparameters in the model. +Searchers are usually used in conjunction with a mode, which can define a search space via its +entrypoints, i.e., convert the model into a search space. The searcher then optimizes over this +search space.

+

Classes

+ + + + + + +

BaseSearcher

A basic search interface that can be used to search/optimize a model.

+
+
+class BaseSearcher
+

Bases: ABC

+

A basic search interface that can be used to search/optimize a model.

+

The base interface supports basic features like setting up a search, checkpointing, and +loading logic and defines a minimal workflow to follow.

+
+
+final __init__()
+

We don’t allow to override __init__ method.

+
+
Return type:
+

None

+
+
+
+ +
+ +

Optional post-processing steps after the search.

+
+
Return type:
+

None

+
+
+
+ +
+ +

Optional pre-processing steps before the search.

+
+
Return type:
+

None

+
+
+
+ +
+
+config: Dict[str, Any]
+
+ +
+
+constraints: Dict[str, str | float | None]
+
+ +
+
+construct_forward_loop(silent=True)
+

Get runnable forward loop on the model using the provided configs.

+
+
Return type:
+

Callable[[Module], None] | None

+
+
+
+ +
+
+property default_search_config: Dict[str, Any]
+

Get the default config for the searcher.

+
+ +
+
+abstract property default_state_dict: Dict[str, Any]
+

Return default state dict.

+
+ +
+
+dummy_input: Any | Tuple
+
+ +
+
+eval_score(silent=True)
+

Optionally silent evaluation of the score function.

+
+
Return type:
+

float

+
+
+
+ +
+
+forward_loop: Callable[[Module], None] | None
+
+ +
+
+property has_score: bool
+

Check if the model has a score function.

+
+ +
+
+load_search_checkpoint()
+

Load function for search checkpoint returning indicator whether checkpoint was loaded.

+
+
Return type:
+

bool

+
+
+
+ +
+
+model: Module
+
+ +
+ +

Reset search at the beginning.

+
+
Return type:
+

None

+
+
+
+ +
+ +

Run actual search.

+
+
Return type:
+

None

+
+
+
+ +
+
+sanitize_search_config(config)
+

Sanitize the search config dict.

+
+
Parameters:
+

config (Dict[str, Any] | None) –

+
+
Return type:
+

Dict[str, Any]

+
+
+
+ +
+
+save_search_checkpoint()
+

Save function for search checkpoint.

+
+
Return type:
+

None

+
+
+
+ +
+
+final search(model, constraints, dummy_input, config=None)
+

Search a given prunable model for the best sub-net and return the search model.

+

The best sub-net maximizes the score given by score_func while satisfying the +constraints.

+
+
Parameters:
+
    +
  • model (Module) – The converted model to be searched.

  • +
  • constraints (Dict[str, str | float | None]) – The dictionary from constraint name to upper bound the searched model has +to satisfy.

  • +
  • dummy_input (Any | Tuple) – Arguments of model.forward(). This is used for exporting and +calculating inference-based metrics, such as latency/FLOPs. The format of +dummy_inputs follows the convention of the args argument in +torch.onnx.export.

  • +
  • config (Dict[str, Any] | None) – Additional optional arguments to configure the search.

  • +
+
+
Return type:
+

Dict[str, Any]

+
+
+
+
Returns: A tuple (subnet, state_dict) where

subnet is the searched subnet (nn.Module), which can be used for subsequent tasks like +fine-tuning, state_dict contains the history and detailed stats of the search procedure.

+
+
+
+ +
+
+final state_dict()
+

The state dictionary that can be stored/loaded.

+
+
Return type:
+

Dict[str, Any]

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.opt.utils.html b/reference/generated/modelopt.torch.opt.utils.html new file mode 100644 index 0000000..b44353f --- /dev/null +++ b/reference/generated/modelopt.torch.opt.utils.html @@ -0,0 +1,254 @@ + + + + + + + utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

utils

+

Utilities for optimization.

+

Functions

+ + + + + + + + + + + + + + + +

is_configurable

Check if the model is configurable.

is_dynamic

Check if the model is dynamic.

named_hparams

Recursively yield the name and instance of all hparams.

search_space_size

Return the size of the search space.

+
+
+is_configurable(model)
+

Check if the model is configurable.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+is_dynamic(model)
+

Check if the model is dynamic.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+named_hparams(model, configurable=None)
+

Recursively yield the name and instance of all hparams.

+
+
Parameters:
+

configurable (bool | None) –

+
+
Return type:
+

Generator[Tuple[str, Hparam], None, None]

+
+
+
+ +
+
+search_space_size(model)
+

Return the size of the search space.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

int

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.calib.calibrator.html b/reference/generated/modelopt.torch.quantization.calib.calibrator.html new file mode 100644 index 0000000..1e2c7be --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.calib.calibrator.html @@ -0,0 +1,187 @@ + + + + + + + calibrator — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

calibrator

+

Abstract base class for calibrators.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.calib.histogram.html b/reference/generated/modelopt.torch.quantization.calib.histogram.html new file mode 100644 index 0000000..1ea3495 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.calib.histogram.html @@ -0,0 +1,301 @@ + + + + + + + histogram — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

histogram

+

Histogram based calibrators.

+

Classes

+ + + + + + +

HistogramCalibrator

Unified histogram calibrator.

+

Functions

+ + + + + + +

calibrate_weights

Calibrate weights of all child quantized modules.

+
+
+class HistogramCalibrator
+

Bases: _Calibrator

+

Unified histogram calibrator.

+
+
Histogram will be only collected once. compute_amax() performs entropy, percentile, or mse

calibration based on arguments

+
+
+
+
Parameters:
+
    +
  • num_bits – An integer. Number of bits of quantization.

  • +
  • axis – A tuple. see QuantDescriptor.

  • +
  • unsigned – A boolean. using unsigned quantization.

  • +
  • num_bins – An integer. Number of histograms bins. Default 2048.

  • +
  • grow_method – A string. DEPRECATED. default None.

  • +
  • skip_zeros – A boolean. If True, skips zeros when collecting data for histogram. Default False.

  • +
  • torch_hist – A boolean. If True, collect histogram by torch.histc instead of np.histogram. If input tensor +is on GPU, histc will also be running on GPU. Default True.

  • +
+
+
+
+
+__init__(num_bits=8, axis=None, unsigned=False, num_bins=2048, grow_method=None, skip_zeros=False, torch_hist=True)
+

Initialize.

+
+ +
+
+collect(x)
+

Collect histogram.

+
+ +
+
+compute_amax(method, *, stride=1, start_bin=128, percentile=99.99)
+

Compute the amax from the collected histogram.

+
+
Parameters:
+
    +
  • method (str) – A string. One of [‘entropy’, ‘mse’, ‘percentile’]

  • +
  • stride (int) –

  • +
  • start_bin (int) –

  • +
  • percentile (float) –

  • +
+
+
Keyword Arguments:
+
    +
  • stride – An integer. Default 1

  • +
  • start_bin – An integer. Default 128

  • +
  • percentils – A float number between [0, 100]. Default 99.99.

  • +
+
+
Returns:
+

a tensor

+
+
Return type:
+

amax

+
+
+
+ +
+
+reset()
+

Reset the collected histogram.

+
+ +
+ +
+
+calibrate_weights(model, method='percentile', perchannel=True, percentile=99.99, num_bins=2048)
+

Calibrate weights of all child quantized modules.

+

Ideally, we would split calibration functionality to histogram collector and calibrator which +takes histogram and compute amax. But since we haven’t decoupled collector and calibrator, it +is easier to create a separate function to calibrate weight.

+
+

Note

+

This function uses method specified by the argument to decide which method to use, NOT the one +specified by the calibrator embedded in weight_quantizer. +We haven’t moved calibration to GPU, so everything is transfered to CPU

+
+
+
Parameters:
+
    +
  • model – A torch.nn.Module.

  • +
  • method – A string of calibration method. Supports “mse” and “percentile”. Default “percentile”

  • +
  • perchannel – A bool. Set channel/neuron axis if True. Default True.

  • +
  • percentile – A float. Default 99.99

  • +
  • num_bins – A integer. Number of bins of histogram. Default 2048.

  • +
+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.calib.html b/reference/generated/modelopt.torch.quantization.calib.html new file mode 100644 index 0000000..72e9052 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.calib.html @@ -0,0 +1,202 @@ + + + + + + + calib — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

calib

+

Modules

+ + + + + + + + + + + + +

modelopt.torch.quantization.calib.calibrator

Abstract base class for calibrators.

modelopt.torch.quantization.calib.histogram

Histogram based calibrators.

modelopt.torch.quantization.calib.max

Calibrator that returns the absolute max of all collected tensors.

+

Calibrator classes.

+

modelopt.torch.quantization.calib provides Calibrator classes that +collect data statistics and determine modelopt.torch.quantization parameters.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.calib.max.html b/reference/generated/modelopt.torch.quantization.calib.max.html new file mode 100644 index 0000000..565c917 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.calib.max.html @@ -0,0 +1,254 @@ + + + + + + + max — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

max

+

Calibrator that returns the absolute max of all collected tensors.

+

Classes

+ + + + + + +

MaxCalibrator

Max calibrator, tracks the maximum value globally.

+
+
+class MaxCalibrator
+

Bases: _Calibrator

+

Max calibrator, tracks the maximum value globally.

+
+
Parameters:
+
    +
  • calib_desc – A MaxCalibDescriptor.

  • +
  • num_bits – An integer. Number of bits of quantization.

  • +
  • axis – A tuple. see QuantDescriptor.

  • +
  • unsigned – A boolean. using unsigned quantization.

  • +
+
+
+
+
Readonly Properties:

amaxs: A list of amax. Numpy array is saved as it is likely to be used for some plot.

+
+
+
+
+__init__(num_bits=8, axis=None, unsigned=False, track_amax=False)
+

Initialize.

+
+ +
+
+property amaxs
+

Returns the list of amax`s collected so far.

+
+ +
+
+collect(x)
+

Tracks the absolute max of all tensors.

+
+
Parameters:
+

x – A tensor

+
+
Raises:
+

RuntimeError – If amax shape changes

+
+
+
+ +
+
+compute_amax()
+

Return the absolute max of all tensors collected.

+
+ +
+
+reset()
+

Reset the collected absolute max.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.config.html b/reference/generated/modelopt.torch.quantization.config.html new file mode 100644 index 0000000..db75d6f --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.config.html @@ -0,0 +1,365 @@ + + + + + + + config — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

config

+

This document lists the quantization formats supported by Model Optimizer and example quantization configs.

+
+

Quantization Formats

+

The following table lists the quantization formats supported by Model Optimizer and the corresponding quantization +config. See Quantization Configs for the +specific quantization config definitions.

+

Please see choosing the right quantization formats to +learn more about the formats and their use-cases.

+
+

Note

+

The recommended configs given below are for LLM models. For CNN models, only INT8 quantization +is supported. Please use quantization config INT8_DEFAULT_CFG for CNN models.

+
+ + + + + + + + + + + + + + + + + + + + +

Quantization Format

Model Optimizer config

INT8

INT8_SMOOTHQUANT_CFG

FP8

FP8_DEFAULT_CFG

INT4 Weights only AWQ (W4A16)

INT4_AWQ_CFG

INT4-FP8 AWQ (W4A8)

W4A8_AWQ_BETA_CFG

+
+
+

Quantization Configs

+

Quantization config is dictionary specifying the values for keys "quant_cfg" and +"algorithm". The "quant_cfg" key specifies the quantization configurations. The +"algorithm" key specifies the algorithm argument to +calibrate.

+

Quantization configurations is a dictionary mapping wildcards or filter functions +to its quantizer attributes. The wildcards or filter functions are matched +against the quantizer module names. The quantizer modules have names ending with +weight_quantizer and input_quantizer and they perform weight quantization and +input quantization (or activation quantization) respectively. The quantizer modules are generally +instances of +TensorQuantizer and +the specified quantizer attributes describe its quantization behavior. Quantizer attributes is a +dictionary mapping quantizer attribute names to their values.

+

Quantizer attributes can also be a list of dictionaries. In this case, the matched quantizer module +is replaced with a +SequentialQuantizer +module which is used to quantize a tensor in multiple formats sequentially. Each quantizer attribute +dictionary in the list specifies the quantization formats for each quantization step of the +sequential quantizer. For example, SequentialQuantizer is used in ‘INT4 Weights, FP8 Activations’ +quantization in which the weights are quantized in INT4 followed by FP8.

+

Here are examples quantization configs from Model Optimizer:

+
INT8_DEFAULT_CFG = {
+    "quant_cfg": {
+    "*weight_quantizer": {"num_bits": 8, "axis": 0},
+    "*input_quantizer": {"num_bits": 8, "axis": None},
+    "*lm_head*": {"enable": False},
+    "*block_sparse_moe.gate*": {"enable": False},  # Skip the MOE router
+    "default": {"num_bits": 8, "axis": None},
+    },
+    "algorithm": "max",
+}
+
+INT8_SMOOTHQUANT_CFG = {
+    "quant_cfg": {
+    "*weight_quantizer": {"num_bits": 8, "axis": 0},
+    "*input_quantizer": {"num_bits": 8, "axis": -1},
+    "*lm_head*": {"enable": False},
+    "*block_sparse_moe.gate*": {"enable": False},  # Skip the MOE router
+    "default": {"num_bits": 8, "axis": None},
+    },
+    "algorithm": "smoothquant",
+}
+
+FP8_DEFAULT_CFG = {
+    "quant_cfg": {
+    "*weight_quantizer": {"num_bits": (4, 3), "axis": None},
+    "*input_quantizer": {"num_bits": (4, 3), "axis": None},
+    "*block_sparse_moe.gate*": {"enable": False},  # Skip the MOE router
+    "default": {"num_bits": (4, 3), "axis": None},
+    },
+    "algorithm": "max",
+}
+
+INT4_BLOCKWISE_WEIGHT_ONLY_CFG = {
+    "quant_cfg": {
+    "*weight_quantizer": {"num_bits": 4, "block_sizes": {-1: 128}, "enable": True},
+    "*input_quantizer": {"enable": False},
+    "*lm_head*": {"enable": False},
+    "*block_sparse_moe.gate*": {"enable": False},  # Skip the MOE router
+    "default": {"enable": False},
+    },
+    "algorithm": "max",
+}
+
+INT4_AWQ_CFG = {
+    "quant_cfg": {
+    "*weight_quantizer": {"num_bits": 4, "block_sizes": {-1: 128}, "enable": True},
+    "*input_quantizer": {"enable": False},
+    "*lm_head*": {"enable": False},
+    "*block_sparse_moe.gate*": {"enable": False},  # Skip the MOE router
+    "default": {"enable": False},
+    },
+    "algorithm": {"method": "awq_lite", "alpha_step": 0.1},
+    # "algorithm": {"method": "awq_full", "alpha_step": 0.1, "max_co_batch_size": 1024},
+    # "algorithm": {"method": "awq_clip", "max_co_batch_size": 2048},
+}
+
+W4A8_AWQ_BETA_CFG = {
+"quant_cfg": {
+    "*weight_quantizer": [
+        {"num_bits": 4, "block_sizes": {-1: 128}, "enable": True},
+        {"num_bits": (4, 3), "axis": None, "enable": True},
+    ],
+    "*input_quantizer": {"num_bits": (4, 3), "axis": None, "enable": True},
+    "*lm_head*": {"enable": False},
+    "*block_sparse_moe.gate*": {"enable": False},  # Skip the MOE router
+    "default": {"enable": False},
+},
+"algorithm": "awq_lite",
+}
+
+
+

These config can be accessed as attributes of modelopt.torch.quantization and can be given as +input to mtq.quantize(). For example:

+
import modelopt.torch.quantization as mtq
+model = mtq.quantize(model, mtq.INT8_DEFAULT_CFG, forward_loop)
+
+
+

You can also create your own config by following these examples. +For instance, if you want to quantize a model with int4 AWQ algorithm, but need to skip quantizing +the layer named lm_head, you can create a custom config and quantize your model as following:

+
# Create custom config
+CUSTOM_INT4_AWQ_CFG = copy.deepcopy(mtq.INT4_AWQ_CFG)
+CUSTOM_INT4_AWQ_CFG["quant_cfg"]["*lm_head*"] = {"enable": False}
+
+# quantize model
+model = mtq.quantize(model, CUSTOM_INT4_AWQ_CFG, forward_loop)
+
+
+
+
+
+ModeloptConfig QuantizeConfig
+

Bases: ModeloptBaseConfig

+

Default configuration for quantize mode.

+

+Show default config as JSON
+
Default config (JSON):
+

+
+
{
+   "quant_cfg": {
+      "default": {
+         "num_bits": 8,
+         "axis": null
+      }
+   },
+   "algorithm": "max"
+}
+
+
+

+
+field algorithm: str | Dict[str, Any]
+
+ +
+
+field quant_cfg: Dict[str | Callable, Any]
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.conversion.html b/reference/generated/modelopt.torch.quantization.conversion.html new file mode 100644 index 0000000..2fdb4cb --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.conversion.html @@ -0,0 +1,313 @@ + + + + + + + conversion — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

conversion

+

Quantization conversion/restore utilities.

+

Functions

+ + + + + + + + + + + + + + + + + + +

replace_quant_module

Recursively replace the module with quantized module.

set_quantizer_by_cfg

Update the quantizer attributes based on the specified quant_cfg.

set_quantizer_attribute

Finegrained adjustment of quantizer attribute by wildcard or filter function.

register

Register a quantized class for the given un-quantized original class.

unregister

Unregister the quantized class for the given un-quantized original class.

+
+
+register(original_cls, quantized_cls)
+

Register a quantized class for the given un-quantized original class.

+
+
Parameters:
+
    +
  • original_cls (Module) – The original un-quantized class.

  • +
  • quantized_cls (Module) – The quantized class. This class should have a _setup method which initializes +various quantizers called in the forward. The forward function of the quantized class should call the +quantizers at the correct location.

  • +
+
+
+

Here is an example of defining a quantized class and registering it:

+
import modelopt.torch.quantization as mtq
+from modelopt.torch.quantization.tensor_quant import TensorQuantizer, QuantDescriptor
+
+
+class QuantLayerNorm(nn.LayerNorm):
+    def __init__(self, normalized_shape):
+        super().__init__(normalized_shape)
+        self._setup()
+
+    def _setup(self):
+        # Method to setup the quantizers
+        self.input_quantizer = TensorQuantizer(QuantDescriptor())
+        self.weight_quantizer = TensorQuantizer(QuantDescriptor())
+
+    def forward(self, input):
+        input = self.input_quantizer(input)
+        weight = self.weight_quantizer(self.weight)
+        return F.layer_norm(input, self.normalized_shape, weight, self.bias, self.eps)
+
+
+# Register the custom quantized module
+mtq.register(original_cls=nn.LayerNorm, quantized_cls=QuantLayerNorm)
+
+
+
+ +
+
+replace_quant_module(model)
+

Recursively replace the module with quantized module.

+
+
Parameters:
+

model (Module) –

+
+
+
+ +
+
+set_quantizer_attribute(quant_model, wildcard_or_filter_func, attribute)
+

Finegrained adjustment of quantizer attribute by wildcard or filter function.

+
+
Parameters:
+
    +
  • quant_model (Module) – A pytorch model

  • +
  • wildcard_or_filter_func (str | Callable) – a wildcard string or a filter function. The wildcard string is matched +against the quantizer module names. The quantizer modules are +instances of +TensorQuantizer. +The filter function takes a quantized module name as input and returns True if the +quantizer should be adjusted and False otherwise.

  • +
  • attribute – a dict of quantizer attributes or a list of quantizer attribute dicts. +An example attribute dict is: {"num_bits": 8, "axis": 0, "enable": True}. +If attribute is a list of dicts, the matched +TensorQuantizer modules will be replaced with +SequentialQuantizer modules having one quantizer +for each attribute dict from the list. +See set_from_attribute_dict +for more details on the supported attributes and their types.

  • +
+
+
+
+ +
+
+set_quantizer_by_cfg(quant_model, quant_cfg)
+

Update the quantizer attributes based on the specified quant_cfg.

+

quant_cfg is a dictionary mapping wildcards or filter functions +to its quantizer attributes. The wildcards or filter functions are matched +against the quantizer module names. The specified quantizer attributes of the +matched quantizer modules are set accordingly.

+

See set_quantizer_attribute +for more details.

+
+
Parameters:
+

quant_model (Module) –

+
+
+
+ +
+
+unregister(original_cls)
+

Unregister the quantized class for the given un-quantized original class.

+
+
Parameters:
+

original_cls (Module) – The original un-quantized class.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.extensions.html b/reference/generated/modelopt.torch.quantization.extensions.html new file mode 100644 index 0000000..0ebe9ac --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.extensions.html @@ -0,0 +1,186 @@ + + + + + + + extensions — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

extensions

+

Module to load C++ / CUDA extensions.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.html b/reference/generated/modelopt.torch.quantization.html new file mode 100644 index 0000000..01f14fc --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.html @@ -0,0 +1,229 @@ + + + + + + + quantization — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quantization

+

Modules

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

modelopt.torch.quantization.calib

Calibrator classes.

modelopt.torch.quantization.config

This document lists the quantization formats supported by Model Optimizer and example quantization configs.

modelopt.torch.quantization.conversion

Quantization conversion/restore utilities.

modelopt.torch.quantization.extensions

Module to load C++ / CUDA extensions.

modelopt.torch.quantization.mode

This module contains the mode descriptor for the quantization mode.

modelopt.torch.quantization.model_calib

Calibration utilities.

modelopt.torch.quantization.model_quant

User-facing quantization API.

modelopt.torch.quantization.nn

Modules with quantization support.

modelopt.torch.quantization.optim

Deprecated.

modelopt.torch.quantization.plugins

Handles quantization plugins to correctly quantize third-party modules.

modelopt.torch.quantization.quant_modules

Deprecated.

modelopt.torch.quantization.tensor_quant

Basic tensor quantization functions.

modelopt.torch.quantization.utils

Quantization utilities.

+

Quantization package.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.mode.html b/reference/generated/modelopt.torch.quantization.mode.html new file mode 100644 index 0000000..9bbd224 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.mode.html @@ -0,0 +1,293 @@ + + + + + + + mode — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

mode

+

This module contains the mode descriptor for the quantization mode.

+

Classes

+ + + + + + + + + +

QuantizeExportModeDescriptor

Class to describe the export of quantization mode.

QuantizeModeDescriptor

Class to describe the "quant" mode.

+
+
+class QuantizeExportModeDescriptor
+

Bases: _ModeDescriptor

+

Class to describe the export of quantization mode.

+

Note that this mode is just a placeholder to throw an error since we don’t support exporting +quantized models right now. It is used to properly indicate that the quantize mode does +require an export mode if we ever wanted to do chaining/stacking of modes with it.

+
+
+property config_class: Type[ModeloptBaseConfig]
+

Specifies the config class for the mode.

+
+ +
+
+property convert: Callable[[Module, ModeloptBaseConfig], Tuple[Module, Dict[str, Any]]]
+

The mode’s entrypoint for converting a model.

+
+ +
+
+property is_export_mode: bool
+

Specifies whether the mode is an export mode.

+
+ +
+
+property name: str
+

Returns the value (str representation) of the mode.

+
+ +
+
+property restore: Callable[[Module, ModeloptBaseConfig, Dict[str, Any]], Module]
+

The mode’s entrypoint for restoring a model.

+
+ +
+ +
+
+class QuantizeModeDescriptor
+

Bases: _ModeDescriptor

+

Class to describe the "quant" mode.

+

The properties of this mode can be inspected via the source code.

+
+
+property config_class: Type[ModeloptBaseConfig]
+

Specifies the config class for the mode.

+
+ +
+
+property convert: Callable[[Module, ModeloptBaseConfig], Tuple[Module, Dict[str, Any]]]
+

The mode’s entrypoint for converting a model.

+
+ +
+
+property export_mode: str | None
+

The mode that corresponds to the export mode of this mode.

+
+ +
+
+property name: str
+

Returns the value (str representation) of the mode.

+
+ +
+
+property next_modes: Set[str] | None
+

Modes that must immediately follow this mode.

+
+ +
+
+property restore: Callable[[Module, ModeloptBaseConfig, Dict[str, Any]], Module]
+

The mode’s entrypoint for restoring a model.

+
+ +
+
+property update_for_new_mode: Callable[[Module, ModeloptBaseConfig, Dict[str, Any]], None]
+

The mode’s entrypoint for updating the models state before new mode.

+
+ +
+
+property update_for_save: Callable[[Module, ModeloptBaseConfig, Dict[str, Any]], None]
+

The mode’s entrypoint for updating the models state before saving.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.model_calib.html b/reference/generated/modelopt.torch.quantization.model_calib.html new file mode 100644 index 0000000..036197f --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.model_calib.html @@ -0,0 +1,240 @@ + + + + + + + model_calib — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

model_calib

+

Calibration utilities.

+

Functions

+ + + + + + + + + +

calibrate

Adjusts weights and scaling factors based on selected algorithms.

postprocess_amax

Experimental API to postprocess the amax values after calibration.

+
+
+calibrate(model, algorithm='max', forward_loop=None)
+

Adjusts weights and scaling factors based on selected algorithms.

+
+
Parameters:
+
    +
  • model (Module) – A pytorch model with quantizer modules.

  • +
  • algorithm (str | dict | None) – A string or dictionary specifying the calibration algorithm to use. Supported +algorithms are "max", "smoothquant", "awq_lite", "awq_full", and +"awq_clip". If a dictionary is passed, the key "method" should specify the +calibration algorithm to use. Other key-value pairs in this dictionary will be passed +as kwargs to the algorithm. An example dictionary argument: +{"method": "awq_clip", "max_co_batch_size": 4096}. If None, no calibration is +performed.

  • +
  • forward_loop (Callable[[Module], None] | None) – A callable which takes the model as argument and forwards calibration data +through the model.

  • +
+
+
Return type:
+

Module

+
+
+

Returns: The calibrated pytorch model.

+
+ +
+
+postprocess_amax(model, key, post_process_fn)
+

Experimental API to postprocess the amax values after calibration.

+
+
Parameters:
+
    +
  • model (Module) –

  • +
  • key (str) –

  • +
+
+
Return type:
+

Module

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.model_quant.html b/reference/generated/modelopt.torch.quantization.model_quant.html new file mode 100644 index 0000000..5529faf --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.model_quant.html @@ -0,0 +1,322 @@ + + + + + + + model_quant — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

model_quant

+

User-facing quantization API.

+

Functions

+ + + + + + + + + + + + + + + + + + +

quantize

Quantizes and calibrates the model.

disable_quantizer

Disable quantizer by wildcard or filter function.

enable_quantizer

Enable quantizer by wildcard or filter function.

print_quant_summary

Print summary of all quantizer modules in the model.

fold_weight

Fold weight quantizer for fast evaluation.

+
+
+disable_quantizer(model, wildcard_or_filter_func)
+

Disable quantizer by wildcard or filter function.

+
+
Parameters:
+
    +
  • model (Module) –

  • +
  • wildcard_or_filter_func (str | Callable) –

  • +
+
+
+
+ +
+
+enable_quantizer(model, wildcard_or_filter_func)
+

Enable quantizer by wildcard or filter function.

+
+
Parameters:
+
    +
  • model (Module) –

  • +
  • wildcard_or_filter_func (str | Callable) –

  • +
+
+
+
+ +
+
+fold_weight(model)
+

Fold weight quantizer for fast evaluation.

+
+
Parameters:
+

model (Module) –

+
+
+
+ +
+
+print_quant_summary(model)
+

Print summary of all quantizer modules in the model.

+
+
Parameters:
+

model (Module) –

+
+
+
+ +
+
+quantize(model, config, forward_loop=None)
+

Quantizes and calibrates the model.

+

This method performs replacement of modules with their quantized counterparts and +performs calibration as specified by quant_cfg. +forward_loop is used to forward data through the model and gather statistics for calibration.

+
+
Parameters:
+
    +
  • model (Module) – A pytorch model

  • +
  • config (Dict[str, Any]) –

    A dictionary specifying the values for keys "quant_cfg" and "algorithm". +The "quant_cfg" key specifies the quantization configurations. +The "algorithm" key specifies the algorithm argument to +calibrate.

    +

    Quantization configurations is a dictionary mapping wildcards or filter functions +to its quantizer attributes. The wildcards or filter functions are matched +against the quantizer module names. The quantizer modules have names ending with +weight_quantizer and input_quantizer and they perform weight quantization and +input quantization (or activation quantization) respectively. The quantizer modules +are instances of +TensorQuantizer and the +specified quantizer attributes describe its quantization behavior. See +set_quantizer_by_cfg for more details +on "quant_cfg" dictionary.

    +

    An example config dictionary is given below:

    +

    Please see config for more examples.

    +

  • +
  • forward_loop (Callable[[Module], None] | None) –

    A callable that forwards all calibration data through the model. This is used +to gather statistics for calibration. It should take model as the argument. It does not need +to return anything. Here are a few examples for correct forward_loop definitions: +Example 1:

    +
    def forward_loop(model) -> None:
    +    # iterate over the data loader and forward data through the model
    +    for batch in data_loader:
    +        model(batch)
    +
    +
    +

    Example 2:

    +
    def forward_loop(model) -> float:
    +    # evaluate the model on the task
    +    return evaluate(model, task, ....)
    +
    +
    +

    Example 3:

    +
    def forward_loop(model) -> None:
    +    # run evaluation pipeline
    +    evaluator.model = model
    +    evaluator.evaluate()
    +
    +
    +
    +

    Note

    +

    Calibration does not require forwarding the entire dataset through the model. +Please subsample the dataset or reduce the number of batches if needed.

    +
    +

  • +
+
+
Return type:
+

Module

+
+
+

Returns: A pytorch model which has been quantized and calibrated.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.functional.html b/reference/generated/modelopt.torch.quantization.nn.functional.html new file mode 100644 index 0000000..f5ddae2 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.functional.html @@ -0,0 +1,217 @@ + + + + + + + functional — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

functional

+

Some supportive functions.

+

Classes

+ + + + + + +

ClipFunction

An universal tensor clip function.

+
+
+class ClipFunction
+

Bases: Function

+

An universal tensor clip function.

+

Pytorch’s clamp() only supports scalar range and doesn’t support broadcast. This implementation uses min/max which +is more genaral. The gradient is defined according to IBM’s PACT paper https://arxiv.org/abs/1805.06085, which is +also the behavior of Tensorflow’s clip_by_value()

+
+
+static backward(ctx, grad_output)
+

Backward pass for the clip function.

+
+ +
+
+static forward(ctx, input, clip_value_min, clip_value_max)
+

Forward pass for the clip function.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.html b/reference/generated/modelopt.torch.quantization.nn.html new file mode 100644 index 0000000..87d149b --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.html @@ -0,0 +1,197 @@ + + + + + + + nn — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+ +
+
+ +
+

nn

+

Modules

+ + + + + + + + + +

modelopt.torch.quantization.nn.functional

Some supportive functions.

modelopt.torch.quantization.nn.modules

Modules with quantization support.

+

Modules with quantization support.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.clip.html b/reference/generated/modelopt.torch.quantization.nn.modules.clip.html new file mode 100644 index 0000000..0051d4e --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.clip.html @@ -0,0 +1,228 @@ + + + + + + + clip — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

clip

+

Implement a clip module as pytorch only has a simple clamp function.

+

Classes

+ + + + + + +

Clip

Clip tensor.

+
+
+class Clip
+

Bases: Module

+

Clip tensor.

+
+
Parameters:
+
    +
  • clip_value_min – A number or tensor of lower bound to clip

  • +
  • clip_value_max – A number of tensor of upper bound to clip

  • +
  • learn_min – A boolean. If True, learn min. clip_value_min will be used to initialize. Default False

  • +
  • learn_max – A boolean. Similar as learn_min but for max.

  • +
+
+
Raises:
+

ValueError

+
+
+
+
+__init__(clip_value_min, clip_value_max, learn_min=False, learn_max=False)
+

Initialize.

+
+ +
+
+forward(inputs)
+

Clip input tensor.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.html b/reference/generated/modelopt.torch.quantization.nn.modules.html new file mode 100644 index 0000000..289a0c2 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.html @@ -0,0 +1,219 @@ + + + + + + + modules — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

modules

+

Modules

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

modelopt.torch.quantization.nn.modules.clip

Implement a clip module as pytorch only has a simple clamp function.

modelopt.torch.quantization.nn.modules.quant_activations

Quantized activations module.

modelopt.torch.quantization.nn.modules.quant_batchnorm

Quantized batch normalization module.

modelopt.torch.quantization.nn.modules.quant_conv

Quantized convolution.

modelopt.torch.quantization.nn.modules.quant_instancenorm

Quantized instance normalization module.

modelopt.torch.quantization.nn.modules.quant_linear

Quantized Linear.

modelopt.torch.quantization.nn.modules.quant_module

Base class for quantization modules.

modelopt.torch.quantization.nn.modules.quant_pooling

Quantized Pooling modules.

modelopt.torch.quantization.nn.modules.tensor_quantizer

TensorQuantizer Module.

+

Modules with quantization support.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.html new file mode 100644 index 0000000..90073b7 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.html @@ -0,0 +1,188 @@ + + + + + + + quant_activations — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quant_activations

+

Quantized activations module.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.html new file mode 100644 index 0000000..cec7956 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.html @@ -0,0 +1,188 @@ + + + + + + + quant_batchnorm — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quant_batchnorm

+

Quantized batch normalization module.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html new file mode 100644 index 0000000..6fe0bc2 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.html @@ -0,0 +1,337 @@ + + + + + + + quant_conv — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quant_conv

+

Quantized convolution.

+

Classes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Conv2d

alias of QuantConv2d

QuantConv2d

Quantized 2D convolution.

Conv3d

alias of QuantConv3d

QuantConv3d

Quantized 3D convolution.

Conv1d

alias of QuantConv1d

QuantConv1d

Quantized 1D convolution.

ConvTranspose1d

alias of QuantConvTranspose1d

ConvTranspose2d

alias of QuantConvTranspose2d

ConvTranspose3d

alias of QuantConvTranspose3d

QuantConvTranspose1d

Quantized 1D transposed convolution.

QuantConvTranspose2d

Quantized 2D transposed convolution.

QuantConvTranspose3d

Quantized 3D transposed convolution.

+
+
+Conv1d
+

alias of QuantConv1d

+
+ +
+
+Conv2d
+

alias of QuantConv2d

+
+ +
+
+Conv3d
+

alias of QuantConv3d

+
+ +
+
+ConvTranspose1d
+

alias of QuantConvTranspose1d

+
+ +
+
+ConvTranspose2d
+

alias of QuantConvTranspose2d

+
+ +
+
+ConvTranspose3d
+

alias of QuantConvTranspose3d

+
+ +
+
+class QuantConv1d
+

Bases: _LegacyQuantLinearConvBaseMixin, Conv1d

+

Quantized 1D convolution.

+
+
+default_quant_desc_weight = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+ +
+
+class QuantConv2d
+

Bases: _LegacyQuantLinearConvBaseMixin, Conv2d

+

Quantized 2D convolution.

+
+
+default_quant_desc_weight = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+ +
+
+class QuantConv3d
+

Bases: _LegacyQuantLinearConvBaseMixin, Conv3d

+

Quantized 3D convolution.

+
+
+default_quant_desc_weight = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+ +
+
+class QuantConvTranspose1d
+

Bases: _LegacyQuantLinearConvBaseMixin, ConvTranspose1d

+

Quantized 1D transposed convolution.

+
+
+default_quant_desc_weight = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+ +
+
+class QuantConvTranspose2d
+

Bases: _LegacyQuantLinearConvBaseMixin, ConvTranspose2d

+

Quantized 2D transposed convolution.

+
+
+default_quant_desc_weight = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+ +
+
+class QuantConvTranspose3d
+

Bases: _LegacyQuantLinearConvBaseMixin, ConvTranspose3d

+

Quantized 3D transposed convolution.

+
+
+default_quant_desc_weight = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.html new file mode 100644 index 0000000..c0bedd8 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.html @@ -0,0 +1,223 @@ + + + + + + + quant_instancenorm — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quant_instancenorm

+

Quantized instance normalization module.

+

Classes

+ + + + + + + + + + + + +

QuantInstanceNorm1d

Applies Quantized Instance Normalization over a 3D input.

QuantInstanceNorm2d

Applies Quantized Instance Normalization over a 4D input.

QuantInstanceNorm3d

Applies Quantized Instance Normalization over a 5D input.

+
+
+class QuantInstanceNorm1d
+

Bases: _LegacyQuantInputBaseMixin, InstanceNorm1d

+

Applies Quantized Instance Normalization over a 3D input.

+
+ +
+
+class QuantInstanceNorm2d
+

Bases: _LegacyQuantInputBaseMixin, InstanceNorm2d

+

Applies Quantized Instance Normalization over a 4D input.

+
+ +
+
+class QuantInstanceNorm3d
+

Bases: _LegacyQuantInputBaseMixin, InstanceNorm3d

+

Applies Quantized Instance Normalization over a 5D input.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.html new file mode 100644 index 0000000..61d4330 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.html @@ -0,0 +1,217 @@ + + + + + + + quant_linear — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quant_linear

+

Quantized Linear.

+

Classes

+ + + + + + + + + +

Linear

alias of QuantLinear

QuantLinear

Quantized version of nn.Linear.

+
+
+Linear
+

alias of QuantLinear

+
+ +
+
+class QuantLinear
+

Bases: _LegacyQuantLinearConvBaseMixin, Linear

+

Quantized version of nn.Linear.

+
+
+default_quant_desc_weight = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html new file mode 100644 index 0000000..441db74 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_module.html @@ -0,0 +1,268 @@ + + + + + + + quant_module — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quant_module

+

Base class for quantization modules.

+

Classes

+ + + + + + + + + +

QuantInputBase

Base class for modules where the input is quantized.

QuantLinearConvBase

Base class for quantized linear modules.

+
+
+class QuantInputBase
+

Bases: DynamicModule

+

Base class for modules where the input is quantized.

+
+
+default_quant_desc_input = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+
+default_quant_desc_output = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+
+forward(input, *args, **kwargs)
+

Quantize the input before calling the original forward method.

+
+ +
+
+input_quantizer: TensorQuantizer | SequentialQuantizer
+
+ +
+
+output_quantizer: TensorQuantizer | SequentialQuantizer
+
+ +
+ +
+
+class QuantLinearConvBase
+

Bases: QuantInputBase

+

Base class for quantized linear modules.

+

Quantized linear modules are modules where both the input and the weight are quantized.

+
+
+default_quant_desc_weight = <modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>
+
+ +
+
+forward(input, *args, **kwargs)
+

Quantize the input and the weight before calling the original forward method.

+
+ +
+
+static initialize_quantizer_with_dummy_states(module)
+

Initialize the quantizer states with dummy values with the correct type and device.

+
+ +
+
+quantize_weight()
+

Context in which self.weight is quantized.

+
+ +
+
+weight_quantizer: TensorQuantizer | SequentialQuantizer
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html b/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html new file mode 100644 index 0000000..fc3b0cc --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.html @@ -0,0 +1,364 @@ + + + + + + + quant_pooling — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quant_pooling

+

Quantized Pooling modules.

+

Classes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

MaxPool1d

alias of QuantMaxPool1d

QuantMaxPool1d

Quantized version of nn.MaxPool1d.

MaxPool2d

alias of QuantMaxPool2d

QuantMaxPool2d

Quantized version of nn.MaxPool2d.

MaxPool3d

alias of QuantMaxPool3d

QuantMaxPool3d

Quantized version of nn.MaxPool3d.

AvgPool1d

alias of QuantAvgPool1d

QuantAvgPool1d

Quantized version of nn.AvgPool1d.

AvgPool2d

alias of QuantAvgPool2d

QuantAvgPool2d

Quantized version of nn.AvgPool2d.

AvgPool3d

alias of QuantAvgPool3d

QuantAvgPool3d

Quantized version of nn.AvgPool3d.

AdaptiveAvgPool1d

alias of QuantAdaptiveAvgPool1d

QuantAdaptiveAvgPool1d

Quantized version of nn.AdaptiveAvgPool1d.

AdaptiveAvgPool2d

alias of QuantAdaptiveAvgPool2d

QuantAdaptiveAvgPool2d

Quantized version of nn.AdaptiveAvgPool2d.

AdaptiveAvgPool3d

alias of QuantAdaptiveAvgPool3d

QuantAdaptiveAvgPool3d

Quantized version of nn.AdaptiveAvgPool3d.

+
+
+AdaptiveAvgPool1d
+

alias of QuantAdaptiveAvgPool1d

+
+ +
+
+AdaptiveAvgPool2d
+

alias of QuantAdaptiveAvgPool2d

+
+ +
+
+AdaptiveAvgPool3d
+

alias of QuantAdaptiveAvgPool3d

+
+ +
+
+AvgPool1d
+

alias of QuantAvgPool1d

+
+ +
+
+AvgPool2d
+

alias of QuantAvgPool2d

+
+ +
+
+AvgPool3d
+

alias of QuantAvgPool3d

+
+ +
+
+MaxPool1d
+

alias of QuantMaxPool1d

+
+ +
+
+MaxPool2d
+

alias of QuantMaxPool2d

+
+ +
+
+MaxPool3d
+

alias of QuantMaxPool3d

+
+ +
+
+class QuantAdaptiveAvgPool1d
+

Bases: _LegacyQuantInputBaseMixin, AdaptiveAvgPool1d

+

Quantized version of nn.AdaptiveAvgPool1d.

+
+ +
+
+class QuantAdaptiveAvgPool2d
+

Bases: _LegacyQuantInputBaseMixin, AdaptiveAvgPool2d

+

Quantized version of nn.AdaptiveAvgPool2d.

+
+ +
+
+class QuantAdaptiveAvgPool3d
+

Bases: _LegacyQuantInputBaseMixin, AdaptiveAvgPool3d

+

Quantized version of nn.AdaptiveAvgPool3d.

+
+ +
+
+class QuantAvgPool1d
+

Bases: _LegacyQuantInputBaseMixin, AvgPool1d

+

Quantized version of nn.AvgPool1d.

+
+ +
+
+class QuantAvgPool2d
+

Bases: _LegacyQuantInputBaseMixin, AvgPool2d

+

Quantized version of nn.AvgPool2d.

+
+ +
+
+class QuantAvgPool3d
+

Bases: _LegacyQuantInputBaseMixin, AvgPool3d

+

Quantized version of nn.AvgPool3d.

+
+ +
+
+class QuantMaxPool1d
+

Bases: _LegacyQuantInputBaseMixin, MaxPool1d

+

Quantized version of nn.MaxPool1d.

+
+ +
+
+class QuantMaxPool2d
+

Bases: _LegacyQuantInputBaseMixin, MaxPool2d

+

Quantized version of nn.MaxPool2d.

+
+ +
+
+class QuantMaxPool3d
+

Bases: _LegacyQuantInputBaseMixin, MaxPool3d

+

Quantized version of nn.MaxPool3d.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html b/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html new file mode 100644 index 0000000..c9aa07a --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.html @@ -0,0 +1,541 @@ + + + + + + + tensor_quantizer — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

tensor_quantizer

+

TensorQuantizer Module.

+

Classes

+ + + + + + + + + +

TensorQuantizer

Tensor quantizer module.

SequentialQuantizer

A sequential container for TensorQuantizer modules.

+
+
+class SequentialQuantizer
+

Bases: Sequential

+

A sequential container for TensorQuantizer modules.

+

This modules is used to quantize a tensor in multiple formats sequentially. It takes as input +TensorQuantizer modules and containerize them similar to torch.nn.Sequential.

+
+
Parameters:
+

quantizers (TensorQuantizer) – TensorQuantizer modules to be added to the container.

+
+
+
+
+__init__(*quantizers)
+

Initialize SequentialQuantizer module.

+
+
Parameters:
+

quantizers (TensorQuantizer) –

+
+
+
+ +
+
+disable()
+

Disable the quantizer modules.

+
+ +
+
+get_modelopt_state()
+

Get meta state to be saved in checkpoint.

+
+
Return type:
+

Dict[str, Any]

+
+
+
+ +
+
+static replace_sequential_quantizer_with_single_quantizer(model, indx=0)
+

Replace instances of SequentialQuantizer in the model with single quantizers.

+

The quantizer indexed by indx from the sequential quantizer is used to replace it. +This method is useful for individually calibrating the quantizers in a sequential quantizer.

+
+
Parameters:
+

indx (int) –

+
+
+
+ +
+
+set_from_attribute_dict(attributes)
+

Set the attributes of contained quantizers from a list of attribute_dicts.

+
+
Parameters:
+

attributes (List[Dict[str, Any]]) –

+
+
+
+ +
+
+static tensor_quantizer_iterator(quantizers)
+

Iterator for the quantizers in the container (but yield itself if its a TensorQuantizer).

+
+ +
+ +
+
+class TensorQuantizer
+

Bases: Module

+

Tensor quantizer module.

+

This module uses tensor_quant or fake_tensor_quant function to quantize a tensor. And wrappers +variable, moving statistics we’d want when training a quantized network.

+
+
Experimental features:
    +
  • clip stage learns range before enabling quantization.

  • +
  • calib stage runs calibration

  • +
+
+
+
+
Parameters:
+
    +
  • quant_desc – An instance of QuantDescriptor.

  • +
  • disabled – A boolean. If True, by pass the whole module returns input. Default False.

  • +
  • if_quant – A boolean. If True, run main quantization body. Default True.

  • +
  • if_clip – A boolean. If True, clip before quantization and learn amax. Default False.

  • +
  • if_calib – A boolean. If True, run calibration. Not implemented yet. Settings of calibration +will probably go to QuantDescriptor.

  • +
+
+
+
+
Readonly Properties:
    +
  • axis:

  • +
  • fake_quant:

  • +
  • scale:

  • +
  • step_size:

  • +
+
+
Mutable Properties:
    +
  • num_bits:

  • +
  • unsigned:

  • +
  • amax:

  • +
+
+
+
+
+__init__(quant_desc=<modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor object>, disabled=False, if_quant=True, if_clip=False, if_calib=False)
+

Initialize quantizer and set up required variables.

+
+ +
+
+property amax
+

Return amax for quantization.

+
+ +
+
+property axis
+

Return axis for quantization.

+
+ +
+
+property block_sizes
+

Return block_sizes for quantization.

+
+ +
+
+clean_up_after_set_from_modelopt_state(prefix='')
+

Clean up temporary variables created during set_from_modelopt_state.

+
+ +
+
+disable()
+

Bypass the module.

+

Neither of calibration, clipping and quantization will be performed if the module is disabled.

+
+ +
+
+disable_calib()
+

Disable calibration.

+
+ +
+
+disable_clip()
+

Disable clip stage.

+
+ +
+
+disable_quant()
+

Disable quantization.

+
+ +
+
+enable()
+

Enable the module.

+
+ +
+
+enable_calib()
+

Enable calibration.

+
+ +
+
+enable_clip()
+

Enable clip stage.

+
+ +
+
+enable_quant()
+

Enable quantization.

+
+ +
+
+export_amax()
+

Export correctly formatted/shaped amax.

+
+
Return type:
+

Tensor | None

+
+
+
+ +
+
+extra_repr()
+

Set the extra information about this module.

+
+ +
+
+property fake_quant
+

Return True if fake quantization is used.

+
+ +
+
+forward(inputs)
+

Apply tensor_quant function to inputs.

+
+
Parameters:
+

inputs – A Tensor of type float32.

+
+
Returns:
+

A Tensor of type output_dtype

+
+
Return type:
+

outputs

+
+
+
+ +
+
+get_modelopt_state()
+

Get meta state to be saved in checkpoint.

+
+
Return type:
+

Dict[str, Any]

+
+
+
+ +
+
+init_learn_amax()
+

Initialize learned amax from fixed amax.

+
+ +
+
+property is_enabled
+

Return true if the modules is not disabled.

+
+ +
+
+load_calib_amax(*args, **kwargs)
+

Load amax from calibrator.

+

Updates the amax buffer with value computed by the calibrator, creating it if necessary. +*args and **kwargs are directly passed to compute_amax, except "strict" in +kwargs. Refer to compute_amax for more details.

+
+ +
+
+property maxbound
+

Return maxbound for quantization.

+
+ +
+
+property narrow_range
+

Return True if symmetric integer range for signed quantization is used.

+
+ +
+
+property num_bits
+

Return num_bits for quantization.

+
+ +
+
+property pre_quant_scale
+

Return pre_quant_scale used for smoothquant.

+
+ +
+
+reset_amax()
+

Reset amax to None.

+
+ +
+
+property scale
+

Return scale used for quantization.

+
+ +
+
+set_from_attribute_dict(attribute_dict)
+

Set quantizer attributes from attribute_dict.

+
+
Parameters:
+

attribute_dict (Dict[str, Any]) –

+
+
+
+ +
+
+set_from_modelopt_state(modelopt_state, prefix='')
+

Set meta state from checkpoint.

+
+ +
+
+property step_size
+

Return step size for integer quantization.

+
+ +
+
+sync_amax_across_distributed_group(parallel_group)
+

Synchronize the amax across all ranks in the given group.

+
+
Parameters:
+

parallel_group (DistributedProcessGroup) –

+
+
+
+ +
+
+property unsigned
+

Return True if unsigned quantization is used.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.optim.html b/reference/generated/modelopt.torch.quantization.optim.html new file mode 100644 index 0000000..4c04a08 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.optim.html @@ -0,0 +1,227 @@ + + + + + + + optim — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

optim

+

Deprecated. Placeholder module for throwing deprecated error.

+

Functions

+ + + + + + + + + + + + + + + +

freeze_parameters

Deprecated.

group_parameters

Deprecated.

match_parameters

Deprecated.

quant_weight_inplace

Deprecated.

+
+
+freeze_parameters(*args, **kwargs)
+

Deprecated. Placeholder function for throwing deprecated error.

+
+ +
+
+group_parameters(*args, **kwargs)
+

Deprecated. Placeholder function for throwing deprecated error.

+
+ +
+
+match_parameters(*args, **kwargs)
+

Deprecated. Placeholder function for throwing deprecated error.

+
+ +
+
+quant_weight_inplace(*args, **kwargs)
+

Deprecated. Placeholder function for throwing deprecated error.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.plugins.html b/reference/generated/modelopt.torch.quantization.plugins.html new file mode 100644 index 0000000..a7a5e7b --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.plugins.html @@ -0,0 +1,200 @@ + + + + + + + plugins — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

plugins

+

Modules

+ + + +
+

Handles quantization plugins to correctly quantize third-party modules.

+

Please check out the source code of this module for examples of how plugins work and how you can +write your own one. Currently, we support plugins for

+
    +
  • apex

  • +
  • diffusers

  • +
  • huggingface

  • +
  • megatron

  • +
  • nemo

  • +
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.quant_modules.html b/reference/generated/modelopt.torch.quantization.quant_modules.html new file mode 100644 index 0000000..60b4464 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.quant_modules.html @@ -0,0 +1,220 @@ + + + + + + + quant_modules — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

quant_modules

+

Deprecated. Placeholder module for throwing deprecated error.

+

Functions

+ + + + + + + + + + + + +

deactivate

Deprecated.

enable_onnx_export

Deprecated.

initialize

Deprecated.

+
+
+deactivate()
+

Deprecated. This API is no longer supported.

+
+ +
+
+enable_onnx_export()
+

Deprecated. You no longer need to use this context manager while exporting to ONNX.

+
+ +
+
+initialize(*args, **kwargs)
+

Deprecated. This API is no longer supported.

+

Use mtq.quantize() +instead to quantize the model.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.tensor_quant.html b/reference/generated/modelopt.torch.quantization.tensor_quant.html new file mode 100644 index 0000000..ade5f07 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.tensor_quant.html @@ -0,0 +1,612 @@ + + + + + + + tensor_quant — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

tensor_quant

+

Basic tensor quantization functions.

+

Classes

+ + + + + + + + + + + + + + + + + + + + + + + + +

FakeAffineTensorQuantFunction

Fake version of affine quantization.

FakeTensorQuantFunction

Fake version of TensorQuantFunction use CUDA extension.

LegacyFakeTensorQuantFunction

Fake version of TensorQuantFunction.

QuantDescriptor

alias of ScaledQuantDescriptor

ScaledE4M3Function

E4M3fy input with scale.

ScaledQuantDescriptor

Supportive descriptor of quantization.

TensorQuantFunction

A universal tensor quantization function.

+

Functions

+ + + + + + +

scaled_e4m3_abstract

Register an abstract implementation for scaled_e4m3.

+
+
+class FakeAffineTensorQuantFunction
+

Bases: Function

+

Fake version of affine quantization.

+

gemmlowp style scale+shift quantization. See more details in +https://github.com/google/gemmlowp/blob/master/doc/quantization.md.

+

We DO NOT recommend affine quantization on weights for performance reason. There might be value to affine quantize +activation as it can be cancelled by bias and comes with no performance penalty. This functionality is only added +for experimental purpose.

+
+
+static backward(ctx, grad_outputs)
+

Implements straight through estimation with clipping.

+
+
Parameters:
+
    +
  • ctx – Pytorch convention.

  • +
  • grad_output – A tensor of gradient of outputs.

  • +
+
+
Returns:
+

A tensor of gradient

+
+
Return type:
+

grad_inputs

+
+
+
+ +
+
+static forward(ctx, inputs, min_range, max_range, num_bits=8)
+

As it will be only applied on activation with per tensor granularity, broadcast is not needed.

+
+
Parameters:
+
    +
  • ctx – Pytorch convention.

  • +
  • inputs – A Tensor of type float32.

  • +
  • min_range – A float.

  • +
  • max_range – A float.

  • +
  • num_bits – An integer

  • +
+
+
Returns:
+

A Tensor of type output_dtype

+
+
Return type:
+

outputs

+
+
+
+ +
+ +
+
+class FakeTensorQuantFunction
+

Bases: Function

+

Fake version of TensorQuantFunction use CUDA extension.

+
+
+static backward(ctx, grad_outputs)
+

Implements straight through estimation with clipping.

+
+ +
+
+static forward(ctx, inputs, amax, num_bits=8, unsigned=False, narrow_range=True)
+

Forward method.

+
+ +
+
+static symbolic(g, inputs, amax, num_bits=8, unsigned=False, narrow_range=True)
+

ONNX symbolic function.

+
+ +
+ +
+
+class LegacyFakeTensorQuantFunction
+

Bases: Function

+

Fake version of TensorQuantFunction.

+

See comments of TensorQuantFunction, arguments are the same.

+
+
+static backward(ctx, grad_outputs)
+

Implements straight through estimation.

+
+ +
+
+static forward(ctx, inputs, amax, num_bits=8, unsigned=False, narrow_range=True)
+

Forward method.

+
+ +
+ +
+
+QuantDescriptor
+

alias of ScaledQuantDescriptor

+
+ +
+
+class ScaledE4M3Function
+

Bases: Function

+

E4M3fy input with scale.

+
+
+static backward(ctx, grad_outputs)
+

Implements straight through estimation with clipping.

+
+ +
+
+static forward(ctx, inputs, amax, E, M)
+

Forward method.

+
+ +
+
+static symbolic(g, inputs, amax=None, E=4, M=3)
+

ONNX symbolic function.

+
+ +
+ +
+
+class ScaledQuantDescriptor
+

Bases: object

+

Supportive descriptor of quantization.

+

Describe how a tensor should be quantized. A QuantDescriptor and a tensor defines a quantized +tensor.

+
+
Parameters:
+
    +
  • num_bits

    An integer or a tuple of two integers. +Specifically, num_bits can be:

    +
      +
    1. +
      A positive integer argument for integer quantization. num_bits specify

      the number of bits used for integer quantization.

      +
      +
      +
    2. +
    3. +
      Constant integer tuple (E,M) for floating point quantization emulating

      Nvidia’s FPx quantization. E is the number of exponent bits and M is the number +of mantissa bits. Supported FPx quantizations: FP8 with (E=4, M=3).

      +
      +
      +
    4. +
    +

    Default: 8.

    +

  • +
  • name – Seems a nice thing to have

  • +
  • fake_quant – A boolean. If True, use fake quantization mode. Default True.

  • +
  • axis – None, int or tuple of int. The specified axis/axes will have its own amax for +computing scaling factor. If None (the default), use per tensor scale. Must be in the +range [-rank(input_tensor), rank(input_tensor)). E.g. For a KCRS weight tensor, +quant_axis=(0) will yield per channel scaling.

  • +
  • block_sizes

    None or a dictionary. The dictionary specifies +block quantization parameters. The keys are the axes for block quantization and the +values are block sizes for quantization along the respective axes. Keys must be in the +range [-rank(input_tensor), rank(input_tensor)]. Values, which are the block sizes +for quantization must be positive integers.

    +

    In addition, there can be special string keys “type” and “scale_bits”. Key “type” +should map to “dynamic” or “static” where “dynamic” indicates dynamic block quantization and “static” +indicates static calibrated block quantization. By default, the type is “static”. Key “scale_bits” +specify the quantization bits for the per-block quantization scale factor +(i.e a double quantization scheme). By default per-block quantization scale is not quantized.

    +

    For example, block_sizes = {-1: 32} will quantize the last axis of the input tensor in +blocks of size 32 with static calibration and block_sizes = {-1: 32, "type": "dynamic"} +will perform dynamic block quantization. If None, block +quantization is not performed. axis must be None when block_sizes is not None.

    +

  • +
  • amax – A float or list/ndarray of floats of user specified absolute max range. If supplied, +ignore quant_axis and use this to quantize. If learn_amax is True, will be used to +initialize learnable amax.

  • +
  • learn_amax – A boolean. If True, learn amax.

  • +
  • scale_amax – A float. If supplied, multiply amax by scale_amax. Default None. It is useful +for some quick experiment.

  • +
  • calib_method – A string. One of ["max", "histogram"] indicates which calibration to use. +Except the simple max calibration, other methods are all histogram based.

  • +
  • unsigned – A boolean. If True, use unsigned.

  • +
  • narrow_range – A boolean. if True, symmetric integer range for signed quantization is used.

  • +
+
+
+
+
Read-only properties:
    +
  • fake_quant:

  • +
  • name:

  • +
  • learn_amax:

  • +
  • scale_amax:

  • +
  • axis:

  • +
  • calib_method:

  • +
  • num_bits:

  • +
  • amax:

  • +
  • unsigned:

  • +
+
+
+
+
+__init__(num_bits=8, name=None, fake_quant=True, axis=None, block_sizes=None, amax=None, learn_amax=False, scale_amax=None, calib_method='max', unsigned=False, narrow_range=False)
+

Initialize QuantDescriptor.

+
+ +
+
+property amax
+

Return amax.

+
+ +
+
+property axis
+

Return axis for quantization.

+
+ +
+
+property block_sizes
+

Return block_sizes for quantization.

+
+ +
+
+property calib_method
+

Return calibration method.

+
+ +
+
+dict()
+

Serialize to dict.

+

The build-in __dict__ method returns all the attributes, which includes those have default value and have +protected prefix “_”. This method only returns those have values other than the default one and don’t have _ in +key. Construct a instance by dict returned by this method should get exactly the same instance.

+
+ +
+
+property fake_quant
+

Return True if fake quantization is used.

+
+ +
+
+static get_block_quant_axes_and_sizes(block_sizes)
+

Return axes and sizes for block quantization.

+
+
Parameters:
+

block_sizes (dict) –

+
+
+
+ +
+
+property learn_amax
+

Return True if amax is learnable.

+
+ +
+
+property name
+

Return name.

+
+ +
+
+property narrow_range
+

Return True if symmetric integer range for signed quantization is used.

+
+ +
+
+property num_bits
+

Return num_bits.

+
+ +
+
+property scale_amax
+

Return scale_amax.

+
+ +
+
+property unsigned
+

Return True if unsigned integer range is used.

+
+ +
+ +
+
+class TensorQuantFunction
+

Bases: Function

+

A universal tensor quantization function.

+

Take an input tensor, output an quantized tensor. The granularity of scale can be interpreted from the +shape of amax. +output_dtype indicates whether the quantized value will be stored in integer or float. The reason we want to store +it in float is the pytorch function takes the quantized value may not accept integer input, e.g. Conv2D.

+

It uses 2^num_bits -1 values instead of 2^num_bits. e.g., for num_bits=8, it uses [-127, 127] instead of [-128, 127]

+
+
+static backward(ctx, grad_outputs, grad_scale)
+

Implements straight through estimation with clipping.

+

For -amax <= input <= amax the gradient passes straight through, otherwise the gradient is zero.

+
+
Parameters:
+
    +
  • ctx – A Context object with saved tensors from forward.

  • +
  • grad_outputs – A tensor of gradient of outputs.

  • +
  • grad_scale – A tensor of gradient of scale.

  • +
+
+
Returns:
+

A tensor of gradient.

+
+
Return type:
+

grad_inputs

+
+
+
+ +
+
+static forward(ctx, inputs, amax, num_bits=8, unsigned=False, narrow_range=True)
+

Forward method.

+

Follow tensorflow convention, max value is passed in and used to decide scale, instead of inputing scale +directly. Though inputing scale directly may be more natural to use.

+
+
Parameters:
+
    +
  • ctx – A Context object to store tensors for backward.

  • +
  • inputs – A Tensor of type float32.

  • +
  • amax – A Tensor of type float32. Inputs will be quantized within range [-amax, amax] +amax will be broadcasted to inputs tensor.

  • +
  • num_bits – A integer used to calculate scaling factor, scale = (2^(num_bits-1) - 1) / max +Effectively, it indicates how many integer bits is used to represent the value. Default 8.

  • +
  • output_dtype – A type of Tensor. torch.int32 or torch.float32.

  • +
  • unsigned – A boolean. Use unsigned integer range. E.g. [0, 255] for num_bits=8. Default False.

  • +
  • narrow_range – A boolean. Use symmetric integer range for signed quantization +E.g. [-127,127] instead of [-128,127] for num_bits=8. Default True.

  • +
+
+
Returns:
+

A Tensor of type output_dtype. +scale: A Tensor of type float32. outputs / scale will dequantize outputs tensor.

+
+
Return type:
+

outputs

+
+
Raises:
+

ValueError

+
+
+
+ +
+
+static symbolic(g, inputs, amax, num_bits=8, unsigned=False, narrow_range=True)
+

ONNX symbolic function.

+
+ +
+ +
+
+scaled_e4m3_abstract(input, amax)
+

Register an abstract implementation for scaled_e4m3.

+

This abstract function returns an empty tensor with the same shape and dtype.

+
+
Parameters:
+
    +
  • input (Tensor) –

  • +
  • amax (Tensor) –

  • +
+
+
Return type:
+

Tensor

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.quantization.utils.html b/reference/generated/modelopt.torch.quantization.utils.html new file mode 100644 index 0000000..aeefd60 --- /dev/null +++ b/reference/generated/modelopt.torch.quantization.utils.html @@ -0,0 +1,290 @@ + + + + + + + utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

utils

+

Quantization utilities.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

reduce_amax

Compute the absolute maximum value of a tensor.

is_quantized

Check if a module is quantized.

is_quantized_layer_with_weight

Check if a module is quantized with weights.

is_quantized_column_parallel_linear

Check if a module is a quantized column parallel linear module.

is_quantized_row_parallel_linear

Check if a module is a quantized row parallel linear module.

replace_function

Replace a function with a new one within a context.

export_torch_mode

Context manager enabling the export mode.

is_torch_library_supported

Check if the installed PyTorch version meets or exceeds a specified version.

+
+
+export_torch_mode()
+

Context manager enabling the export mode.

+
+ +
+
+is_quantized(module)
+

Check if a module is quantized.

+
+ +
+
+is_quantized_column_parallel_linear(module)
+

Check if a module is a quantized column parallel linear module.

+
+ +
+
+is_quantized_layer_with_weight(module)
+

Check if a module is quantized with weights.

+
+ +
+
+is_quantized_row_parallel_linear(module)
+

Check if a module is a quantized row parallel linear module.

+
+ +
+
+is_torch_library_supported()
+

Check if the installed PyTorch version meets or exceeds a specified version.

+
+ +
+
+reduce_amax(input, axis=None, keepdims=True)
+

Compute the absolute maximum value of a tensor.

+

Reduces input_tensor along the dimensions given in axis. Unless keepdims is true, +the rank of the tensor is reduced by 1 for each entry in axis. If keepdims is true, +the reduced dimensions are retained with length 1.

+
+

Note

+

Gradient computation is disabled as this function is never meant learning reduces amax

+
+
+
Parameters:
+
    +
  • input – Input tensor

  • +
  • axis – The dimensions to reduce. None or int or tuple of ints. If None (the default), +reduces all dimensions. Must be in the range [-rank(input_tensor), rank(input_tensor)).

  • +
  • keepdims – A boolean. If true, retains reduced dimensions with length 1. Default True

  • +
  • granularity – DEPRECTED. specifies if the statistic has to be calculated at tensor or channel granularity

  • +
+
+
Returns:
+

The reduced tensor.

+
+
Raises:
+
    +
  • ValueError – Any axis which doesn’t make sense or is not supported

  • +
  • ValueError – If unknown granularity is passed in.

  • +
+
+
+
+ +
+
+replace_function(package, name, new_func)
+

Replace a function with a new one within a context.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.sparsity.config.html b/reference/generated/modelopt.torch.sparsity.config.html new file mode 100644 index 0000000..e9f2388 --- /dev/null +++ b/reference/generated/modelopt.torch.sparsity.config.html @@ -0,0 +1,392 @@ + + + + + + + config — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

config

+

Default configurations for sparsity modes.

+
+
+ModeloptConfig ExportSparseConfig
+

Bases: ModeloptBaseConfig

+

Configuration (empty!) for the "export_sparse" mode.

+

+Show default config as JSON
+
Default config (JSON):
+

+
+
{}
+
+
+

+ +
+
+ModeloptConfig SparseGPTConfig
+

Bases: ModeloptBaseRuleConfig

+

Configuration for the "sparse_gpt" mode.

+

+Show default config as JSON
+
Default config (JSON):
+

+
+
{
+   "nn.Linear": {
+      "*": {},
+      "*lm_head*": null
+   },
+   "nn.Conv2d": {
+      "*": {},
+      "*lm_head*": null
+   }
+}
+
+
+

+
+field nn.Conv2d: SparseConv2dConfig | None | Dict[str, SparseConv2dConfig | None]
+

+Show details

Configuration for sparse nn.Conv2d module.

+

If the "nn.Conv2d" key is not specified, the default configuration (shown in JSON) will be used:

+
{
+  "*": {},
+  "*lm_head*": null
+}
+
+
+

To deactivate any sparse nn.Conv2d module, use None instead of providing a dictionary {}.

+

To specify layer-specific configurations, you can specify a config for each submodule with the key +specifying a glob pattern that matches the submodule name. For example, to convert to a sparse +module for all nn.Conv2d layers except for those in the "lm_head" submodule use:

+
{
+    "*": {...},
+    "*lm_head*": None,
+}
+
+
+

Note that glob expressions are processed sequentially in the order they are specified. Later keys in +the config will overwrite earlier keys if they match the same submodule name.

+

If you want to specify the same configuration for all submodules, you can provide an unnested +dictionary as well:

+
{...}
+
+
+

which is short for

+
{
+    "*": {...},
+}
+
+
+

+ +
+
+field nn.Linear: SparseLinearConfig | None | Dict[str, SparseLinearConfig | None]
+

+Show details

Configuration for sparse nn.Linear module.

+

If the "nn.Linear" key is not specified, the default configuration (shown in JSON) will be used:

+
{
+  "*": {},
+  "*lm_head*": null
+}
+
+
+

To deactivate any sparse nn.Linear module, use None instead of providing a dictionary {}.

+

To specify layer-specific configurations, you can specify a config for each submodule with the key +specifying a glob pattern that matches the submodule name. For example, to convert to a sparse +module for all nn.Linear layers except for those in the "lm_head" submodule use:

+
{
+    "*": {...},
+    "*lm_head*": None,
+}
+
+
+

Note that glob expressions are processed sequentially in the order they are specified. Later keys in +the config will overwrite earlier keys if they match the same submodule name.

+

If you want to specify the same configuration for all submodules, you can provide an unnested +dictionary as well:

+
{...}
+
+
+

which is short for

+
{
+    "*": {...},
+}
+
+
+

+ +
+ +
+
+ModeloptConfig SparseMagnitudeConfig
+

Bases: ModeloptBaseRuleConfig

+

Configuration for the "sparse_magnitude" mode.

+

+Show default config as JSON
+
Default config (JSON):
+

+
+
{
+   "nn.Linear": {
+      "*": {},
+      "*lm_head*": null
+   },
+   "nn.Conv2d": {
+      "*": {},
+      "*lm_head*": null
+   }
+}
+
+
+

+
+field nn.Conv2d: SparseConv2dConfig | None | Dict[str, SparseConv2dConfig | None]
+

+Show details

Configuration for sparse nn.Conv2d module.

+

If the "nn.Conv2d" key is not specified, the default configuration (shown in JSON) will be used:

+
{
+  "*": {},
+  "*lm_head*": null
+}
+
+
+

To deactivate any sparse nn.Conv2d module, use None instead of providing a dictionary {}.

+

To specify layer-specific configurations, you can specify a config for each submodule with the key +specifying a glob pattern that matches the submodule name. For example, to convert to a sparse +module for all nn.Conv2d layers except for those in the "lm_head" submodule use:

+
{
+    "*": {...},
+    "*lm_head*": None,
+}
+
+
+

Note that glob expressions are processed sequentially in the order they are specified. Later keys in +the config will overwrite earlier keys if they match the same submodule name.

+

If you want to specify the same configuration for all submodules, you can provide an unnested +dictionary as well:

+
{...}
+
+
+

which is short for

+
{
+    "*": {...},
+}
+
+
+

+ +
+
+field nn.Linear: SparseLinearConfig | None | Dict[str, SparseLinearConfig | None]
+

+Show details

Configuration for sparse nn.Linear module.

+

If the "nn.Linear" key is not specified, the default configuration (shown in JSON) will be used:

+
{
+  "*": {},
+  "*lm_head*": null
+}
+
+
+

To deactivate any sparse nn.Linear module, use None instead of providing a dictionary {}.

+

To specify layer-specific configurations, you can specify a config for each submodule with the key +specifying a glob pattern that matches the submodule name. For example, to convert to a sparse +module for all nn.Linear layers except for those in the "lm_head" submodule use:

+
{
+    "*": {...},
+    "*lm_head*": None,
+}
+
+
+

Note that glob expressions are processed sequentially in the order they are specified. Later keys in +the config will overwrite earlier keys if they match the same submodule name.

+

If you want to specify the same configuration for all submodules, you can provide an unnested +dictionary as well:

+
{...}
+
+
+

which is short for

+
{
+    "*": {...},
+}
+
+
+

+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.sparsity.html b/reference/generated/modelopt.torch.sparsity.html new file mode 100644 index 0000000..a30b343 --- /dev/null +++ b/reference/generated/modelopt.torch.sparsity.html @@ -0,0 +1,209 @@ + + + + + + + sparsity — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

sparsity

+

Modules

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

modelopt.torch.sparsity.config

Default configurations for sparsity modes.

modelopt.torch.sparsity.magnitude

Magnitude-base sparsity inspired by NVIDIA ASP (Automatic SParsity).

modelopt.torch.sparsity.mode

Sparsity mode descriptor.

modelopt.torch.sparsity.module

Dynamic class for all sparse modules.

modelopt.torch.sparsity.plugins

Handles sparsity plugins for third-party modules.

modelopt.torch.sparsity.searcher

Searcher interface for sparsity algorithms.

modelopt.torch.sparsity.sparsegpt

Utility functions of SparseGPT.

modelopt.torch.sparsity.sparsification

High-level API to automatically sparsify your model with various algorithms.

+

API for sparsification algorithms.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.sparsity.magnitude.html b/reference/generated/modelopt.torch.sparsity.magnitude.html new file mode 100644 index 0000000..0c8a573 --- /dev/null +++ b/reference/generated/modelopt.torch.sparsity.magnitude.html @@ -0,0 +1,291 @@ + + + + + + + magnitude — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

magnitude

+

Magnitude-base sparsity inspired by NVIDIA ASP (Automatic SParsity).

+

Classes

+ + + + + + +

MagnitudeSearcher

Searcher for magnitude-based sparsity.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + +

compute_valid_1d_patterns

Computes all possible m:n patterns in a 1D vector.

create_asp_mask

Creates a mask for a given tensor based on a specified sparse pattern.

fill

Calculates the ratio of non-zero elements in a tensor.

get_nmprune_info

Gets the n:m sparsity pattern information from a given string.

m4n2_1d

Finds the best 2:4 pattern in a given matrix.

mn_1d_best

Finds the best m:n pattern in a given matrix.

reshape_1d

Reshapes a given matrix into m-dimensional vectors: (h,w) -> (hw/m, m).

+
+
+class MagnitudeSearcher
+

Bases: BaseSparseSearcher

+

Searcher for magnitude-based sparsity.

+
+ +
+
+compute_valid_1d_patterns(m, n)
+

Computes all possible m:n patterns in a 1D vector.

+

The function generates a tensor of size m with n ones and (m-n) zeros. +It then generates all permutations of this tensor, removes duplicates, +and returns the unique patterns as a tensor.

+
+ +
+
+create_asp_mask(tensor, pattern)
+

Creates a mask for a given tensor based on a specified sparse pattern.

+

The function reshapes the tensor and applies the specified pattern to create a sparse mask. +The default pattern is m4n2_1d, which finds the best 2:4 sparsity pattern in the tensor.

+
+
Parameters:
+
    +
  • tensor (Parameter) –

  • +
  • pattern (str) –

  • +
+
+
Return type:
+

BoolTensor

+
+
+
+ +
+
+fill(x)
+

Calculates the ratio of non-zero elements in a tensor.

+
+ +
+
+get_nmprune_info(pattern)
+

Gets the n:m sparsity pattern information from a given string.

+
+
Parameters:
+

pattern (str) –

+
+
Return type:
+

Tuple[bool, int, int]

+
+
+
+ +
+
+m4n2_1d(mat)
+

Finds the best 2:4 pattern in a given matrix.

+
+ +
+
+mn_1d_best(matrix, m, n)
+

Finds the best m:n pattern in a given matrix.

+

The function computes all possible m:n patterns and selects the one +that maximizes the sum of non-masked weights in the matrix. The selected +pattern is then used to create a mask for the matrix.

+
+ +
+
+reshape_1d(matrix, m)
+

Reshapes a given matrix into m-dimensional vectors: (h,w) -> (hw/m, m).

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.sparsity.mode.html b/reference/generated/modelopt.torch.sparsity.mode.html new file mode 100644 index 0000000..a7c11c7 --- /dev/null +++ b/reference/generated/modelopt.torch.sparsity.mode.html @@ -0,0 +1,427 @@ + + + + + + + mode — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

mode

+

Sparsity mode descriptor.

+

Classes

+ + + + + + + + + + + + +

ExportSparseModeDescriptor

Class to describe the "export_sparse" mode.

SparseGPTModeDescriptor

Class to define and describe sparsification based on SparseGPT.

SparseMagnitudeModeDescriptor

Class to define and describe magnitude-based sparsification.

+

Functions

+ + + + + + + + + + + + + + + + + + +

convert_sparse_model

Function for converting a model to a sparsity meta-model.

export_sparse

Export a sparse model to a regular model.

restore_export_sparse

Restore & export a sparse model to a regular model.

restore_sparse_model

Function for restoring a previously convert model to a sparsity meta-model.

update_sparse_metadata

Update subnet config to current subnet config of model.

+
+
+class ExportSparseModeDescriptor
+

Bases: _ModeDescriptor

+

Class to describe the "export_sparse" mode.

+

The properties of this mode can be inspected via the source code.

+
+
+property config_class: Type[ModeloptBaseConfig]
+

Specifies the config class for the mode.

+
+ +
+
+property convert: Callable[[Module, ModeloptBaseConfig], Tuple[Module, Dict[str, Any]]]
+

The mode’s entrypoint for converting a model.

+
+ +
+
+property is_export_mode: bool
+

Specifies if this mode is an export mode.

+
+ +
+
+property name: str
+

Returns the value (str representation) of the mode.

+
+ +
+
+property restore: Callable[[Module, ModeloptBaseConfig, Dict[str, Any]], Module]
+

The mode’s entrypoint for restoring a model.

+
+ +
+ +
+
+class SparseGPTModeDescriptor
+

Bases: SparseMagnitudeModeDescriptor

+

Class to define and describe sparsification based on SparseGPT.

+
+
+property config_class: Type[ModeloptBaseConfig]
+

Specifies the config class for the mode.

+
+ +
+
+property name: str
+

Returns the name of the mode.

+
+ +
+
+property search_algorithm: Type[BaseSearcher]
+

Specifies the search algorithm for the mode.

+
+ +
+ +
+
+class SparseMagnitudeModeDescriptor
+

Bases: _ModeDescriptor

+

Class to define and describe magnitude-based sparsification.

+
+
+property config_class: Type[ModeloptBaseConfig]
+

Specifies the config class for the mode.

+
+ +
+
+property convert: Callable[[Module, ModeloptBaseConfig], Tuple[Module, Dict[str, Any]]]
+

The mode’s entrypoint for converting a model.

+
+ +
+
+property export_mode: str | None
+

The mode that corresponds to the export mode of this mode.

+
+ +
+
+property name: str
+

Returns the name of the mode.

+
+ +
+
+property next_modes: Set[str] | None
+

Specifies the next modes for the mode.

+
+ +
+
+property restore: Callable[[Module, ModeloptBaseConfig, Dict[str, Any]], Module]
+

The mode’s entrypoint for restoring a model.

+
+ +
+
+property search_algorithm: Type[BaseSearcher]
+

Specifies the search algorithm for the mode.

+
+ +
+
+property update_for_new_mode: Callable[[Module, ModeloptBaseConfig, Dict[str, Any]], None]
+

The mode’s entrypoint for updating the models metadata.

+
+ +
+
+property update_for_save: Callable[[Module, ModeloptBaseConfig, Dict[str, Any]], None]
+

The mode’s entrypoint for updating the models metadata.

+
+ +
+ +
+
+convert_sparse_model(model, config)
+

Function for converting a model to a sparsity meta-model.

+
+
Parameters:
+
+
+
Return type:
+

Tuple[Module, Dict[str, Any]]

+
+
+
+ +
+
+export_sparse(model, config)
+

Export a sparse model to a regular model.

+
+
Parameters:
+
+
+
Return type:
+

Tuple[Module, Dict[str, Any]]

+
+
+
+ +
+
+restore_export_sparse(model, config, metadata)
+

Restore & export a sparse model to a regular model.

+
+
Parameters:
+
+
+
Return type:
+

Module

+
+
+
+ +
+
+restore_sparse_model(model, config, metadata)
+

Function for restoring a previously convert model to a sparsity meta-model.

+
+
Parameters:
+
+
+
Return type:
+

Module

+
+
+
+ +
+
+update_sparse_metadata(model, config, metadata)
+

Update subnet config to current subnet config of model.

+
+
Parameters:
+
+
+
Return type:
+

None

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.sparsity.module.html b/reference/generated/modelopt.torch.sparsity.module.html new file mode 100644 index 0000000..e60ef21 --- /dev/null +++ b/reference/generated/modelopt.torch.sparsity.module.html @@ -0,0 +1,215 @@ + + + + + + + module — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

module

+

Dynamic class for all sparse modules.

+

Classes

+ + + + + + +

SparseModule

Base dynamic class for all sparse modules.

+
+
+class SparseModule
+

Bases: DynamicModule

+

Base dynamic class for all sparse modules.

+
+
+modify(*args, **kwargs)
+

Initialize the sparsity mask when this is called.

+

Note that for any module that is not frozen via None in the rules, this function will be +called. Hence, we use this function to initialize the sparsity mask only when necessary.

+
+ +
+
+set_mask(value)
+

Set the active sparse mask of the module weights.

+
+
Parameters:
+

value (BoolTensor | None) –

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.sparsity.plugins.html b/reference/generated/modelopt.torch.sparsity.plugins.html new file mode 100644 index 0000000..ad452fa --- /dev/null +++ b/reference/generated/modelopt.torch.sparsity.plugins.html @@ -0,0 +1,190 @@ + + + + + + + plugins — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

plugins

+

Modules

+ + + +
+

Handles sparsity plugins for third-party modules.

+

Currently, we support plugins for

+
    +
  • megatron

  • +
+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.sparsity.searcher.html b/reference/generated/modelopt.torch.sparsity.searcher.html new file mode 100644 index 0000000..e53bbd2 --- /dev/null +++ b/reference/generated/modelopt.torch.sparsity.searcher.html @@ -0,0 +1,228 @@ + + + + + + + searcher — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

searcher

+

Searcher interface for sparsity algorithms.

+

Classes

+ + + + + + +

BaseSparseSearcher

A generic sparse mask searching algorithm.

+
+
+class BaseSparseSearcher
+

Bases: BaseSearcher

+

A generic sparse mask searching algorithm.

+
+
+property default_search_config: Dict[str, Any]
+

Get the default config for the searcher.

+
+ +
+
+property default_state_dict: Dict[str, Any]
+

Return default state dict.

+
+ +
+ +

Search for sparse mask.

+
+ +
+
+sanitize_search_config(config)
+

Sanitize the search config dict.

+
+
Parameters:
+

config (Dict[str, Any] | None) –

+
+
Return type:
+

Dict[str, Any]

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.sparsity.sparsegpt.html b/reference/generated/modelopt.torch.sparsity.sparsegpt.html new file mode 100644 index 0000000..5adbdc2 --- /dev/null +++ b/reference/generated/modelopt.torch.sparsity.sparsegpt.html @@ -0,0 +1,278 @@ + + + + + + + sparsegpt — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

sparsegpt

+

Utility functions of SparseGPT.

+

Classes

+ + + + + + +

SparseGPTSearcher

SparseGPT-based sparse mask searching algorithm.

+

Functions

+ + + + + + + + + + + + +

create_sgpt_mask

Create a sparse mask for the given tensor.

invert

Invert a Hessian matrix.

prepare

Prepare the inverse Hessian matrix.

+
+
+class SparseGPTSearcher
+

Bases: BaseSparseSearcher

+

SparseGPT-based sparse mask searching algorithm.

+
+ +

Remove Hessian artifcats from network.

+
+ +
+ +

Register the forward hook to collect the hessian matrix.

+
+ +
+
+property default_search_config: Dict[str, Any]
+

Get the default config for the searcher.

+
+ +
+ +
+
+create_sgpt_mask(tensor, hessian, config)
+

Create a sparse mask for the given tensor.

+
+
Parameters:
+
    +
  • tensor (Tensor) –

  • +
  • hessian (Tensor) –

  • +
  • config (Dict[str, Any]) –

  • +
+
+
Return type:
+

Tensor

+
+
+
+ +
+
+invert(hessian)
+

Invert a Hessian matrix.

+
+
Parameters:
+

hessian (Tensor) –

+
+
Return type:
+

Tensor

+
+
+
+ +
+
+prepare(tensor, hessian, hessian_damp)
+

Prepare the inverse Hessian matrix.

+
+
Parameters:
+
    +
  • tensor (Tensor) –

  • +
  • hessian (Tensor) –

  • +
  • hessian_damp (float) –

  • +
+
+
Return type:
+

Tuple[Tensor, Tensor]

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.sparsity.sparsification.html b/reference/generated/modelopt.torch.sparsity.sparsification.html new file mode 100644 index 0000000..f0ec2ba --- /dev/null +++ b/reference/generated/modelopt.torch.sparsity.sparsification.html @@ -0,0 +1,279 @@ + + + + + + + sparsification — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

sparsification

+

High-level API to automatically sparsify your model with various algorithms.

+

Functions

+ + + + + + + + + +

sparsify

Sparsify a given model and search for they optimal sparsified weights.

export

Export a sparse dynamic model to a regular model.

+
+
+export(model)
+

Export a sparse dynamic model to a regular model.

+

This should be done after the model is fine-tuned and the weights are fixed.

+
+

Warning

+

After the call to export(), the sparsity mask will no longer be enforced. This means any +future weight updates would destroy the sparsity pattern. If you want to continue training, +call export() after training is finished.

+
+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

Module

+
+
+
+ +
+
+sparsify(model, mode, config=None)
+

Sparsify a given model and search for they optimal sparsified weights.

+
+
Parameters:
+
    +
  • model (Module) – A standard model that contains standard building blocks to be sparsified in-place.

  • +
  • mode (_ModeDescriptor | str | List[_ModeDescriptor | str] | List[Tuple[str, Dict[str, Any]]]) –

    A (list of) string(s) or Mode(s) or a list of tuples containing the mode and its +config indicating the desired mode(s) (and configurations) for the convert +process. Modes set up the model for different algorithms for model optimization. The +following modes are available:

    +
      +
    • "sparse_magnitude": +The model will be sparsified according to the magnitude of weights in each +layer. The mode’s config is described in +SparseMagnitudeConfig.

    • +
    • "sparsegpt": +The model will be sparsified and weights are updated optimally using an Hessian +approximation of the loss function (see SparseGPT paper for details). The mode’s +config is described in +SparseGPTConfig.

    • +
    +

    If the mode argument is specified as a dictionary, the keys should indicate the mode and +the values specify the per-mode configuration. If not provided, then default +configuration would be used.

    +

  • +
  • config (Dict[str, Any] | None) –

    Additional optional arguments to configure the search. Currently, we support:

    +
      +
    • verbose: Whether to print detailed search stats during search.

    • +
    • +
      forward_loop: A Callable that takes a model as input and runs a forward loop

      on it. It is recommended to choose the data loader used inside the forward loop +carefully to reduce the runtime. Cannot be provided at the same time as +data_loader and collect_func.

      +
      +
      +
    • +
    • data_loader: An iterator yielding batches of data for calibrating the +normalization layers in the model or compute gradient scores. It is recommended to use +the same data loader as for training but with significantly fewer iterations. Cannot +be provided at the same time as forward_loop.

    • +
    • collect_func: A Callable that takes a batch of data from the data loader as +input and returns the input to model.forward() as described in +run_forward_loop. Cannot +be provided at the same time as forward_loop.

    • +
    +
    +

    Note

    +

    Additional configuration options may be added by individual algorithms. Please +refer to the documentation of the individual algorithms for more information.

    +
    +

  • +
+
+
Return type:
+

Tuple[Module, Dict[str, Any]]

+
+
+

Returns: A sparsified model

+
+

Note

+

The given model is sparsified in-place. The returned model is thus a reference to the same +model instance as the input model.

+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.cpp_extension.html b/reference/generated/modelopt.torch.utils.cpp_extension.html new file mode 100644 index 0000000..9d563c1 --- /dev/null +++ b/reference/generated/modelopt.torch.utils.cpp_extension.html @@ -0,0 +1,211 @@ + + + + + + + cpp_extension — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

cpp_extension

+

Utility functions for loading CPP / CUDA extensions.

+

Functions

+ + + + + + +

load_cpp_extension

Load a C++ / CUDA extension using torch.utils.cpp_extension.load() if the current CUDA version satisfies it.

+
+
+load_cpp_extension(name, sources, cuda_version_specifiers, fail_msg='', **load_kwargs)
+

Load a C++ / CUDA extension using torch.utils.cpp_extension.load() if the current CUDA version satisfies it.

+

Loading first time may take a few mins because of the compilation, but subsequent loads are instantaneous.

+
+
Parameters:
+
    +
  • name (str) – Name of the extension.

  • +
  • sources (List[str | Path]) – Source files to compile.

  • +
  • cuda_version_specifiers (str | None) – Specifier (e.g. “>=11.8,<12”) for CUDA versions required to enable the extension.

  • +
  • **load_kwargs (Any) – Keyword arguments to torch.utils.cpp_extension.load().

  • +
+
+
Return type:
+

module | None

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.dataset_utils.html b/reference/generated/modelopt.torch.utils.dataset_utils.html new file mode 100644 index 0000000..af7ffce --- /dev/null +++ b/reference/generated/modelopt.torch.utils.dataset_utils.html @@ -0,0 +1,268 @@ + + + + + + + dataset_utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

dataset_utils

+

Utility functions for getting samples and forward loop function for different datasets.

+

Functions

+ + + + + + + + + +

create_forward_loop

Creates and returns a forward loop function configured for a specific model, dataset, and tokenizer.

get_dataset_dataloader

Get a dataloader with the dataset name and toknizer of the target model.

+
+
+create_forward_loop(model=None, dataset_name='cnn_dailymail', tokenizer=None, batch_size=1, num_samples=512, max_sample_length=512, device=None)
+

Creates and returns a forward loop function configured for a specific model, dataset, and tokenizer.

+

This function initializes a forward loop function tailored to process batches of data from the specified dataset +using the given model and tokenizer. The forward loop function, when called, iterates over the dataset, applies the +tokenizer to prepare the input data, feeds it into the model, and returns the model’s predictions.

+

Parameters: +- model: The PyTorch model for inference. +- dataset_name: The name of the dataset to be used. +- tokenizer: The tokenizer used to preprocess text data into a format suitable +for the model. +- batch_size: Batch size of the returned dataloader. +- num_samples: Number of samples from the dataset. +- max_sample_length: Maximum length of a sample. +- device: Target device for the returned dataloader.

+

Example usage for quantization:

+
import modelopt.torch.quantization as mtq
+
+# Initialize model and tokenizer
+# ...
+
+# Create forward loop for calibration
+forward_loop = create_forward_loop(model=model, dataset_name="cnn_dailymail", tokenizer=tokenizer)
+
+# Quantize the model with the calibration dataset
+mtq.quantize(model, quant_cfg, forward_loop=forward_loop)
+
+
+

Returns: +- function: A forward loop function that can be called with no arguments. When called, this function iterates over +the dataset specified by dataset_name.

+
+
Parameters:
+
    +
  • model (Module) –

  • +
  • dataset_name (str) –

  • +
  • tokenizer (PreTrainedTokenizer | PreTrainedTokenizerFast) –

  • +
  • batch_size (int) –

  • +
  • num_samples (int) –

  • +
  • max_sample_length (int) –

  • +
  • device (str | None) –

  • +
+
+
Return type:
+

Callable

+
+
+
+ +
+
+get_dataset_dataloader(dataset_name='cnn_dailymail', tokenizer=None, batch_size=1, num_samples=512, max_sample_length=512, device=None)
+

Get a dataloader with the dataset name and toknizer of the target model.

+
+
Parameters:
+
    +
  • dataset_name (str) – Name of the dataset to load.

  • +
  • tokenizer (PreTrainedTokenizer | PreTrainedTokenizerFast) – Instancne of Hugginface tokenizer.

  • +
  • batch_size (int) – Batch size of the returned dataloader.

  • +
  • num_samples (int) – Number of samples from the dataset.

  • +
  • max_sample_length (int) – Maximum length of a sample.

  • +
  • device (str | None) – Target device for the returned dataloader.

  • +
+
+
Returns:
+

A instance of dataloader.

+
+
Return type:
+

DataLoader

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.distributed.html b/reference/generated/modelopt.torch.utils.distributed.html new file mode 100644 index 0000000..623fd68 --- /dev/null +++ b/reference/generated/modelopt.torch.utils.distributed.html @@ -0,0 +1,314 @@ + + + + + + + distributed — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

distributed

+

Utility functions for using torch.distributed.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

backend

Returns the distributed backend.

size

Returns the number of processes.

rank

Returns the rank of the current process.

is_master

Returns whether the current process is the master process.

barrier

Synchronizes all processes.

set_data_parallel_group

Set the data parallel group.

set_tensor_parallel_group

Set the tensor parallel group.

get_data_parallel_group

Get the data parallel group.

get_tensor_parallel_group

Get the tensor parallel group.

+
+
+backend()
+

Returns the distributed backend.

+
+
Return type:
+

str | None

+
+
+
+ +
+
+barrier()
+

Synchronizes all processes.

+
+
Return type:
+

None

+
+
+
+ +
+
+get_data_parallel_group()
+

Get the data parallel group.

+
+
Return type:
+

DistributedProcessGroup

+
+
+
+ +
+
+get_tensor_parallel_group()
+

Get the tensor parallel group.

+
+
Return type:
+

DistributedProcessGroup

+
+
+
+ +
+
+is_master(group=None)
+

Returns whether the current process is the master process.

+
+
Return type:
+

bool

+
+
+
+ +
+
+rank(group=None)
+

Returns the rank of the current process.

+
+
Return type:
+

int

+
+
+
+ +
+
+set_data_parallel_group(group)
+

Set the data parallel group.

+
+
Parameters:
+

group (ProcessGroup | int | None) –

+
+
+
+ +
+
+set_tensor_parallel_group(group)
+

Set the tensor parallel group.

+
+
Parameters:
+

group (ProcessGroup | int | None) –

+
+
+
+ +
+
+size(group=None)
+

Returns the number of processes.

+
+
Return type:
+

int

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.graph.html b/reference/generated/modelopt.torch.utils.graph.html new file mode 100644 index 0000000..3cb8868 --- /dev/null +++ b/reference/generated/modelopt.torch.utils.graph.html @@ -0,0 +1,211 @@ + + + + + + + graph — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

graph

+

Utility functions for computational graph.

+

Functions

+ + + + + + +

match

Check if a module matches any of the patterns.

+
+
+match(module, patterns)
+

Check if a module matches any of the patterns.

+
+
Parameters:
+
    +
  • module (Module) – The module to be checked.

  • +
  • patterns (Sequence[Module]) – The patterns to be matched.

  • +
+
+
Returns:
+

True if the module matches any of the patterns, False otherwise.

+
+
Return type:
+

bool

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.html b/reference/generated/modelopt.torch.utils.html new file mode 100644 index 0000000..69bcd48 --- /dev/null +++ b/reference/generated/modelopt.torch.utils.html @@ -0,0 +1,217 @@ + + + + + + + utils — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

utils

+

Modules

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

modelopt.torch.utils.cpp_extension

Utility functions for loading CPP / CUDA extensions.

modelopt.torch.utils.dataset_utils

Utility functions for getting samples and forward loop function for different datasets.

modelopt.torch.utils.distributed

Utility functions for using torch.distributed.

modelopt.torch.utils.graph

Utility functions for computational graph.

modelopt.torch.utils.list

Utils for operating on lists.

modelopt.torch.utils.logging

Utility functions for logging.

modelopt.torch.utils.network

Utility functions for PyTorch models.

modelopt.torch.utils.perf

Utility functions for performance measurement.

modelopt.torch.utils.random

Random number generator with a deterministic, synchronized seed for sampling.

modelopt.torch.utils.tensor

Utility functions for PyTorch tensors.

+

Utility functions.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.list.html b/reference/generated/modelopt.torch.utils.list.html new file mode 100644 index 0000000..e6ba21c --- /dev/null +++ b/reference/generated/modelopt.torch.utils.list.html @@ -0,0 +1,260 @@ + + + + + + + list — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

list

+

Utils for operating on lists.

+

Functions

+ + + + + + + + + + + + + + + +

list_closest_to_median

Return element from list that's closest to list mean.

val2list

Repeat val for repeat_time times and return the list or val if list/tuple.

val2tuple

Return tuple with min_len by repeating element at idx_repeat.

stats

Compute min, max, avg, std of vals.

+
+
+list_closest_to_median(x)
+

Return element from list that’s closest to list mean.

+
+
Parameters:
+

x (List) –

+
+
Return type:
+

Any

+
+
+
+ +
+
+stats(vals)
+

Compute min, max, avg, std of vals.

+
+
Parameters:
+

vals (List[float]) –

+
+
Return type:
+

Dict[str, float]

+
+
+
+ +
+
+val2list(val, repeat_time=1)
+

Repeat val for repeat_time times and return the list or val if list/tuple.

+
+
Parameters:
+

val (List | Tuple | Any) –

+
+
Return type:
+

List

+
+
+
+ +
+
+val2tuple(val, min_len=1, idx_repeat=-1)
+

Return tuple with min_len by repeating element at idx_repeat.

+
+
Parameters:
+
    +
  • val (List | Tuple | Any) –

  • +
  • min_len (int) –

  • +
  • idx_repeat (int) –

  • +
+
+
Return type:
+

Tuple

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.logging.html b/reference/generated/modelopt.torch.utils.logging.html new file mode 100644 index 0000000..fd19dbd --- /dev/null +++ b/reference/generated/modelopt.torch.utils.logging.html @@ -0,0 +1,230 @@ + + + + + + + logging — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

logging

+

Utility functions for logging.

+

Functions

+ + + + + + + + + + + + +

num2hrb

Convert big floating number to human readable string.

no_stdout

Silences stdout within the invoked context.

print_rank_0

Prints only on the master process.

+
+
+exception DeprecatedError
+

Bases: NotImplementedError

+

Error for deprecated functions.

+
+ +
+
+no_stdout()
+

Silences stdout within the invoked context.

+
+ +
+
+num2hrb(num, suffix='')
+

Convert big floating number to human readable string.

+
+
Parameters:
+

num (float) –

+
+
Return type:
+

str

+
+
+
+ +
+
+print_rank_0(*args, **kwargs)
+

Prints only on the master process.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.network.html b/reference/generated/modelopt.torch.utils.network.html new file mode 100644 index 0000000..90e1db7 --- /dev/null +++ b/reference/generated/modelopt.torch.utils.network.html @@ -0,0 +1,652 @@ + + + + + + + network — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

network

+

Utility functions for PyTorch models.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

compare_dict

Compare two dictionaries and return keys with unmatched values.

get_model_attributes

Get the key attributes of a PyTorch model.

get_module_device

Get the device of a PyTorch module.

get_same_padding

Get the same padding for a given kernel size.

init_model_from_model_like

Initialize a model from a model-like object.

is_channels_last

Check if the model is using channels last memory format.

is_parallel

Check if a PyTorch model is parallelized.

make_divisible

Function taken from the original tf repo.

model_to

Convert model to the same device, dtype and memory layout as the target_model.

param_num

Get the number of parameters of a PyTorch model.

param_num_from_forward

Get the number of parameters of a PyTorch model from a forward pass.

remove_bn

Remove all batch normalization layers in the network.

set_submodule

The set function that complements nn.Module.get_submodule().

standardize_model_args

Standardize model arguments according to torch.onnx.export.

standardize_model_like_tuple

Standardize a model-like tuple.

standardize_named_model_args

Standardize model arguments according to torch.onnx.export and give them a name.

standardize_constructor_args

Standardize a constructor-like tuple.

unwrap_model

Unwrap a model that is wrapped by supported wrapper module or return original model.

zero_grad

Set any gradients in the model's parameters to None.

run_forward_loop

Run multiple forward passes with a model according to the provided data loader.

+
+
+compare_dict(dict1, dict2)
+

Compare two dictionaries and return keys with unmatched values.

+
+
Parameters:
+
    +
  • dict1 (Dict[str, Any]) –

  • +
  • dict2 (Dict[str, Any]) –

  • +
+
+
Return type:
+

Tuple[str, …]

+
+
+
+ +
+
+get_model_attributes(model)
+

Get the key attributes of a PyTorch model.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

Dict[str, Any]

+
+
+
+ +
+
+get_module_device(module)
+

Get the device of a PyTorch module.

+
+
Parameters:
+

module (Module) –

+
+
Return type:
+

device

+
+
+
+ +
+
+get_same_padding(kernel_size)
+

Get the same padding for a given kernel size.

+
+
Parameters:
+

kernel_size (int | Tuple[int, int]) –

+
+
Return type:
+

int | tuple

+
+
+
+ +
+
+init_model_from_model_like(model)
+

Initialize a model from a model-like object.

+
+
Parameters:
+

model (Module | Type[Module] | Tuple | Callable) – A model-like object. Can be a nn.Module (returned as it is), a model class or callable, or a tuple. +If a tuple, it must be of the form (model_cls_or_callable,) or (model_cls_or_callable, args) or +(model_cls_or_callable, args, kwargs). +Model will be initialized as model_cls_or_callable(*args, **kwargs).

+
+
Return type:
+

Module

+
+
+
+ +
+
+is_channels_last(model)
+

Check if the model is using channels last memory format.

+
+
Parameters:
+

model (Module) –

+
+
+
+ +
+
+is_parallel(model)
+

Check if a PyTorch model is parallelized.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

bool

+
+
+
+ +
+
+make_divisible(v, divisor, min_val=None)
+

Function taken from the original tf repo.

+

It ensures that all layers have a channel number that is divisible by 8 +It can be seen here: +https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py

+
+
Parameters:
+
    +
  • v (int | float) –

  • +
  • divisor (int | None) –

  • +
+
+
Return type:
+

int | float

+
+
+
+ +
+
+model_to(model, target_model)
+

Convert model to the same device, dtype and memory layout as the target_model.

+
+
Parameters:
+
    +
  • model (Module) –

  • +
  • target_model (Module) –

  • +
+
+
+
+ +
+
+param_num(network, trainable_only=False, unit=1000000.0)
+

Get the number of parameters of a PyTorch model.

+
+
Parameters:
+
    +
  • network (Module) – The PyTorch model.

  • +
  • trainable_only (bool) – Whether to only count trainable parameters. Default is False.

  • +
  • unit – The unit to return the number of parameters in. Default is 1e6 (million).

  • +
+
+
Returns:
+

The number of parameters in the model in the given unit.

+
+
Return type:
+

float

+
+
+
+ +
+
+param_num_from_forward(model, trainable_only=False, args=None, unit=1000000.0)
+

Get the number of parameters of a PyTorch model from a forward pass.

+
+
Parameters:
+
    +
  • network – The PyTorch model.

  • +
  • trainable_only (bool) – Whether to only count trainable parameters. Default is False.

  • +
  • unit (float) – The unit to return the number of parameters in. Default is 1e6 (million).

  • +
  • model (Module) –

  • +
  • args (Tensor | Tuple | None) –

  • +
+
+
Returns:
+

The number of parameters from the model’s forward pass in the given unit.

+
+
+

This can helpful for dynamic modules, where the state dict might contain extra parameters that +is not actively used in the model, e.g., because of a DynamicModule that is deactivated for the +forward pass. We circumvent this issue by just counting parameters of modules that appear in a +forward pass.

+
+ +
+
+remove_bn(model)
+

Remove all batch normalization layers in the network.

+
+
Parameters:
+

model (Module) –

+
+
+
+ +
+
+run_forward_loop(model, data_loader, max_iters=None, collect_func=None, progress_bar=None)
+

Run multiple forward passes with a model according to the provided data loader.

+
+
Parameters:
+
    +
  • model – The model with which we run forward.

  • +
  • data_loader (Iterable) – An iterator with data samples.

  • +
  • max_iters (int | None) – Number of batches to run; by default it is infiinite or until data_loader +is exhausted.

  • +
  • collect_func (Callable[[Any], Any | Tuple] | None) –

    A Callable that takes a batch of data from the data_loader +as input and returns the input to model.forward() such that the return value +(input) is either:

    +
    +
      +
    1. a single argument (type(input) != tuple) corresponding to

      +
      model.forward(input)
      +
      +
      +
    2. +
    3. a tuple of arguments corresponding to

      +
      model.forward(*input)
      +
      +
      +
    4. +
    5. a tuple of arguments such that type(input[-1]) == dict corresponding to

      +
      model.forward(*input[:-1], **input[-1])
      +
      +
      +
    6. +
    +
    +

    Note

    +

    In order to pass a dict as last non-keyword argument, you need to use a tuple as +input and add an empty dict as the last element, e.g.,

    +
    input = (x, {"y": y, "z": z}, {})
    +
    +
    +

    The empty dict at the end will then be interpreted as the keyword args.

    +
    +

    See the args argument of +torch.onnx.export +for more info on the format of the return value of collect_func (input).

    +

    The default collect_func assumes that the data loader returns a tuple, e.g., +(images, labels, ...), and returns the first element of the tuple.

    +
    +

  • +
  • progress_bar (str | None) – Set to a description string to see the progress bar.

  • +
+
+
+
+ +
+
+set_submodule(model, target, target_submodule)
+

The set function that complements nn.Module.get_submodule().

+
+
Parameters:
+
    +
  • model (Module) –

  • +
  • target (str) –

  • +
  • target_submodule (Module) –

  • +
+
+
+
+ +
+
+standardize_constructor_args(constructor_args)
+

Standardize a constructor-like tuple.

+
+
Parameters:
+

constructor_args (Callable | Tuple) –

+
+
Return type:
+

Tuple[Callable, Tuple, Dict]

+
+
+
+ +
+
+standardize_model_args(model_or_fw_or_sig, args, use_kwargs=False)
+

Standardize model arguments according to torch.onnx.export.

+
+
Parameters:
+
    +
  • model_or_fw_or_sig (Module | Callable | Signature) – A nn.Module, its forward method, or its forward method’s signature.

  • +
  • args (Any | Tuple) – Refer to the dummy_input parameter in +mtn.profile().

  • +
  • use_kwargs – Affects the return value, see below. For use_kwargs==False, the returned +args are also compatible with torch.onnx.export.

  • +
+
+
Returns:
+

Standardized model args that can be used in model.forward() in the same standardized +way no matter how they were provided, see below for more info.

+
+
Return type:
+

Tuple

+
+
+
    +
  • If use_kwargs == False, the returned args can be used as

    +
    args = standardize_model_args(model, args, use_kwargs=False)
    +model(*args)
    +
    +
    +
  • +
  • If use_kwargs == True, the returned args can be used as

    +
    args = standardize_model_args(model, args, use_kwargs=True)
    +model.forward(*args[:-1], **args[-1])
    +
    +
    +
  • +
+
+

Warning

+

If use_kwargs == False the model’s forward() method cannot contain keyword-only +arguments (e.g. forward(..., *, kw_only_args)) without default values and you must not +provide them in args.

+
+
+

Warning

+

If use_kwargs == False you must not provide variable keyword arguments in args that +are processed via variable keyword arguments in the model’s forward() method +(e.g. forward(..., **kwargs)).

+
+
+ +
+
+standardize_model_like_tuple(model)
+

Standardize a model-like tuple.

+
+
Parameters:
+

model (Module | Type[Module] | Tuple | Callable) –

+
+
Return type:
+

Tuple[Type[Module], Tuple, Dict]

+
+
+
+ +
+
+standardize_named_model_args(model_or_fw_or_sig, args)
+

Standardize model arguments according to torch.onnx.export and give them a name.

+
+
Parameters:
+
    +
  • model_or_fw_or_sig (Module | Callable | Signature) – A nn.Module, its forward method, or its forward method’s signature.

  • +
  • args (Any | Tuple) – A tuple of args/kwargs or torch.Tensor feed into the model’s forward() method.

  • +
+
+
Return type:
+

Tuple[Dict[str, Any], Set[str]]

+
+
+
+
Returns: A tuple (args_normalized, args_with_default) where
+
args_normalized is a dictionary of ordered model args where the key represents a unique

serialized string based on the the argument’s name in the function signature and the +value contains the actual argument,

+
+
args_with_default is a set indicating whether the argument was retrieved from the default

value in the function signature of the model’s forward() method or whether the +argument exactly corresponds to the default value.

+
+
+
+
+
+

Note

+

See standardize_model_args() for +more info as well.

+
+
+ +
+
+unwrap_model(model, warn=False, raise_error=False, msg='')
+

Unwrap a model that is wrapped by supported wrapper module or return original model.

+
+
Parameters:
+
    +
  • model (Module) –

  • +
  • warn (bool) –

  • +
  • raise_error (bool) –

  • +
  • msg (str) –

  • +
+
+
Return type:
+

Module

+
+
+
+ +
+
+zero_grad(model)
+

Set any gradients in the model’s parameters to None.

+
+
Parameters:
+

model (Module) –

+
+
Return type:
+

None

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.perf.html b/reference/generated/modelopt.torch.utils.perf.html new file mode 100644 index 0000000..c35a246 --- /dev/null +++ b/reference/generated/modelopt.torch.utils.perf.html @@ -0,0 +1,253 @@ + + + + + + + perf — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

perf

+

Utility functions for performance measurement.

+

Classes

+ + + + + + +

Timer

A Timer that can be used as a decorator as well.

+

Functions

+ + + + + + + + + + + + +

clear_cuda_cache

Clear the CUDA cache.

get_cuda_memory_stats

Get memory usage of specified GPU in Bytes.

report_memory

Simple GPU memory report.

+
+
+class Timer
+

Bases: ContextDecorator

+

A Timer that can be used as a decorator as well.

+
+
+__init__(name='')
+

Initialize Timer.

+
+ +
+
+start()
+

Start the timer.

+
+ +
+
+stop()
+

End the timer.

+
+
Return type:
+

float

+
+
+
+ +
+ +
+
+clear_cuda_cache()
+

Clear the CUDA cache.

+
+ +
+
+get_cuda_memory_stats(device=None)
+

Get memory usage of specified GPU in Bytes.

+
+ +
+
+report_memory(name='', rank=0)
+

Simple GPU memory report.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.random.html b/reference/generated/modelopt.torch.utils.random.html new file mode 100644 index 0000000..ece7d86 --- /dev/null +++ b/reference/generated/modelopt.torch.utils.random.html @@ -0,0 +1,320 @@ + + + + + + + random — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

random

+

Random number generator with a deterministic, synchronized seed for sampling.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + +

centroid

Reduce each element of the seq via torch.prod() and then return seq element closest.

choice

Return a random element from the sequence using a synchronized seed.

original

Return an indicator (None) that can be recognized internally to sample the original choice.

random

Generate a random number from [0, 1) with a deterministic seed.

sample

Sample elements from a given population with a deterministic seed.

shuffle

Shuffle the sequence in-place with a deterministic seed.

+
+
+centroid(seq)
+

Reduce each element of the seq via torch.prod() and then return seq element closest.

+
+
Parameters:
+

seq (Sequence[T]) – Sequence to determine centroid.

+
+
Returns:
+

Centroid of the sequence.

+
+
Return type:
+

T

+
+
+

This function can be used to sample the centroid subnet of an search space via +mtn.sample(). The centroid subnet aims to cheaply +approximate the median of the search space defined by the model.

+

Example: +.. code-block:: python

+
+

from modelopt.torch.nas import random +import modelopt.torch.nas as mtn

+

# Sample the centroid subnet of a converted model +config = mtn.sample(model, random.centroid)

+
+
+ +
+
+choice(seq)
+

Return a random element from the sequence using a synchronized seed.

+
+
Parameters:
+

seq (Sequence[T]) – Sequence to sample from.

+
+
Returns:
+

Random element from the sequence.

+
+
Return type:
+

T

+
+
+

This function is synchronized across all GPUs and can be used to sample a random subnet from a +search space via mtn.sample() such that the resulting +subnet/configuration is the same across all GPUs.

+

Example: +.. code-block:: python

+
+

from modelopt.torch.nas import random +import modelopt.torch.nas as mtn

+

# Sample a random subnet of a converted model +config = mtn.sample(model, random.choice)

+

# random.choice is also the default option for sample +config = mtn.sample(model)

+
+
+ +
+
+original(seq)
+

Return an indicator (None) that can be recognized internally to sample the original choice.

+
+
Parameters:
+

seq (Sequence[T]) – Sequence of choices from which we want to “choose” original choice.

+
+
Returns:
+

None indicating to internally select the original choice from the sequence.

+
+
Return type:
+

None

+
+
+

This function can be used to sample the original subnet of a search space via +mtn.sample(). The original subnet corresponds to the +model architecture before the conversion process.

+

Example: +.. code-block:: python

+
+

from modelopt.torch.nas import random +import modelopt.torch.nas as mtn

+

# Sample the original subnet of a converted model +config = mtn.sample(model, random.original)

+
+
+ +
+
+random()
+

Generate a random number from [0, 1) with a deterministic seed.

+
+
Return type:
+

float

+
+
+
+ +
+
+sample(*args, **kwargs)
+

Sample elements from a given population with a deterministic seed.

+
+ +
+
+shuffle(seq)
+

Shuffle the sequence in-place with a deterministic seed.

+
+
Parameters:
+

seq (MutableSequence[Any]) –

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/reference/generated/modelopt.torch.utils.tensor.html b/reference/generated/modelopt.torch.utils.tensor.html new file mode 100644 index 0000000..1c13de2 --- /dev/null +++ b/reference/generated/modelopt.torch.utils.tensor.html @@ -0,0 +1,240 @@ + + + + + + + tensor — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

tensor

+

Utility functions for PyTorch tensors.

+

Functions

+ + + + + + + + + + + + + + + +

torch_to

Try to recursively move the data to the specified args/kwargs.

torch_detach

Try to recursively detach the data from the computation graph.

torch_to_numpy

Convert torch tensors to numpy arrays.

numpy_to_torch

Convert numpy arrays to torch tensors.

+
+
+numpy_to_torch(np_outputs)
+

Convert numpy arrays to torch tensors.

+
+
Parameters:
+

np_outputs (List[ndarray]) –

+
+
Return type:
+

List[Tensor]

+
+
+
+ +
+
+torch_detach(data)
+

Try to recursively detach the data from the computation graph.

+
+ +
+
+torch_to(data, *args, **kwargs)
+

Try to recursively move the data to the specified args/kwargs.

+
+ +
+
+torch_to_numpy(inputs)
+

Convert torch tensors to numpy arrays.

+
+
Parameters:
+

inputs (List[Tensor]) –

+
+
Return type:
+

List[ndarray]

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + diff --git a/search.html b/search.html new file mode 100644 index 0000000..411e5c3 --- /dev/null +++ b/search.html @@ -0,0 +1,166 @@ + + + + + + Search — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ + + + +
+ +
+ +
+
+
+ +
+ +
+

© Copyright 2023-2024, NVIDIA Corporation.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + + + + + + diff --git a/searchindex.js b/searchindex.js new file mode 100644 index 0000000..42b2e24 --- /dev/null +++ b/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"docnames": ["deployment/1_tensorrt_llm_deployment", "examples/0_all_examples", "getting_started/1_overview", "getting_started/2_installation", "getting_started/3_quantization", "getting_started/6_sparsity", "guides/1_quantization", "guides/5_sparsity", "guides/_basic_quantization", "guides/_choosing_quant_methods", "guides/_onnx_quantization", "guides/_pytorch_quantization", "index", "reference/0_versions", "reference/1_modelopt_api", "reference/generated/modelopt.deploy", "reference/generated/modelopt.deploy.llm", "reference/generated/modelopt.deploy.llm.generate", "reference/generated/modelopt.deploy.llm.model_config_trt", "reference/generated/modelopt.deploy.llm.nemo_utils", "reference/generated/modelopt.onnx", "reference/generated/modelopt.onnx.op_types", "reference/generated/modelopt.onnx.quantization", "reference/generated/modelopt.onnx.quantization.calib_utils", "reference/generated/modelopt.onnx.quantization.graph_utils", "reference/generated/modelopt.onnx.quantization.gs_patching", "reference/generated/modelopt.onnx.quantization.int4", "reference/generated/modelopt.onnx.quantization.operators", "reference/generated/modelopt.onnx.quantization.ort_patching", "reference/generated/modelopt.onnx.quantization.ort_utils", "reference/generated/modelopt.onnx.quantization.partitioning", "reference/generated/modelopt.onnx.quantization.qdq_utils", "reference/generated/modelopt.onnx.quantization.quant_utils", "reference/generated/modelopt.onnx.quantization.quantize", "reference/generated/modelopt.onnx.utils", "reference/generated/modelopt.torch", "reference/generated/modelopt.torch.export", "reference/generated/modelopt.torch.export.distribute", "reference/generated/modelopt.torch.export.layer_utils", "reference/generated/modelopt.torch.export.model_config", "reference/generated/modelopt.torch.export.model_config_export", "reference/generated/modelopt.torch.export.model_config_utils", "reference/generated/modelopt.torch.export.postprocess", "reference/generated/modelopt.torch.export.scaling_factor_utils", "reference/generated/modelopt.torch.export.tensorrt_llm_utils", "reference/generated/modelopt.torch.export.transformer_engine", "reference/generated/modelopt.torch.opt", "reference/generated/modelopt.torch.opt.config", "reference/generated/modelopt.torch.opt.conversion", "reference/generated/modelopt.torch.opt.dynamic", "reference/generated/modelopt.torch.opt.hparam", "reference/generated/modelopt.torch.opt.mode", "reference/generated/modelopt.torch.opt.plugins", "reference/generated/modelopt.torch.opt.searcher", "reference/generated/modelopt.torch.opt.utils", "reference/generated/modelopt.torch.quantization", "reference/generated/modelopt.torch.quantization.calib", "reference/generated/modelopt.torch.quantization.calib.calibrator", "reference/generated/modelopt.torch.quantization.calib.histogram", "reference/generated/modelopt.torch.quantization.calib.max", "reference/generated/modelopt.torch.quantization.config", "reference/generated/modelopt.torch.quantization.conversion", "reference/generated/modelopt.torch.quantization.extensions", "reference/generated/modelopt.torch.quantization.mode", "reference/generated/modelopt.torch.quantization.model_calib", "reference/generated/modelopt.torch.quantization.model_quant", "reference/generated/modelopt.torch.quantization.nn", "reference/generated/modelopt.torch.quantization.nn.functional", "reference/generated/modelopt.torch.quantization.nn.modules", "reference/generated/modelopt.torch.quantization.nn.modules.clip", "reference/generated/modelopt.torch.quantization.nn.modules.quant_activations", "reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm", "reference/generated/modelopt.torch.quantization.nn.modules.quant_conv", "reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm", "reference/generated/modelopt.torch.quantization.nn.modules.quant_linear", "reference/generated/modelopt.torch.quantization.nn.modules.quant_module", "reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling", "reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer", "reference/generated/modelopt.torch.quantization.optim", "reference/generated/modelopt.torch.quantization.plugins", "reference/generated/modelopt.torch.quantization.quant_modules", "reference/generated/modelopt.torch.quantization.tensor_quant", "reference/generated/modelopt.torch.quantization.utils", "reference/generated/modelopt.torch.sparsity", "reference/generated/modelopt.torch.sparsity.config", "reference/generated/modelopt.torch.sparsity.magnitude", "reference/generated/modelopt.torch.sparsity.mode", "reference/generated/modelopt.torch.sparsity.module", "reference/generated/modelopt.torch.sparsity.plugins", "reference/generated/modelopt.torch.sparsity.searcher", "reference/generated/modelopt.torch.sparsity.sparsegpt", "reference/generated/modelopt.torch.sparsity.sparsification", "reference/generated/modelopt.torch.utils", "reference/generated/modelopt.torch.utils.cpp_extension", "reference/generated/modelopt.torch.utils.dataset_utils", "reference/generated/modelopt.torch.utils.distributed", "reference/generated/modelopt.torch.utils.graph", "reference/generated/modelopt.torch.utils.list", "reference/generated/modelopt.torch.utils.logging", "reference/generated/modelopt.torch.utils.network", "reference/generated/modelopt.torch.utils.perf", "reference/generated/modelopt.torch.utils.random", "reference/generated/modelopt.torch.utils.tensor", "support/1_contact", "support/2_faqs"], "filenames": ["deployment/1_tensorrt_llm_deployment.rst", "examples/0_all_examples.rst", "getting_started/1_overview.rst", "getting_started/2_installation.rst", "getting_started/3_quantization.rst", "getting_started/6_sparsity.rst", "guides/1_quantization.rst", "guides/5_sparsity.rst", "guides/_basic_quantization.rst", "guides/_choosing_quant_methods.rst", "guides/_onnx_quantization.rst", "guides/_pytorch_quantization.rst", "index.rst", "reference/0_versions.rst", "reference/1_modelopt_api.rst", "reference/generated/modelopt.deploy.rst", "reference/generated/modelopt.deploy.llm.rst", "reference/generated/modelopt.deploy.llm.generate.rst", "reference/generated/modelopt.deploy.llm.model_config_trt.rst", "reference/generated/modelopt.deploy.llm.nemo_utils.rst", "reference/generated/modelopt.onnx.rst", "reference/generated/modelopt.onnx.op_types.rst", "reference/generated/modelopt.onnx.quantization.rst", "reference/generated/modelopt.onnx.quantization.calib_utils.rst", "reference/generated/modelopt.onnx.quantization.graph_utils.rst", "reference/generated/modelopt.onnx.quantization.gs_patching.rst", "reference/generated/modelopt.onnx.quantization.int4.rst", "reference/generated/modelopt.onnx.quantization.operators.rst", "reference/generated/modelopt.onnx.quantization.ort_patching.rst", "reference/generated/modelopt.onnx.quantization.ort_utils.rst", "reference/generated/modelopt.onnx.quantization.partitioning.rst", "reference/generated/modelopt.onnx.quantization.qdq_utils.rst", "reference/generated/modelopt.onnx.quantization.quant_utils.rst", "reference/generated/modelopt.onnx.quantization.quantize.rst", "reference/generated/modelopt.onnx.utils.rst", "reference/generated/modelopt.torch.rst", "reference/generated/modelopt.torch.export.rst", "reference/generated/modelopt.torch.export.distribute.rst", "reference/generated/modelopt.torch.export.layer_utils.rst", "reference/generated/modelopt.torch.export.model_config.rst", "reference/generated/modelopt.torch.export.model_config_export.rst", "reference/generated/modelopt.torch.export.model_config_utils.rst", "reference/generated/modelopt.torch.export.postprocess.rst", "reference/generated/modelopt.torch.export.scaling_factor_utils.rst", "reference/generated/modelopt.torch.export.tensorrt_llm_utils.rst", "reference/generated/modelopt.torch.export.transformer_engine.rst", "reference/generated/modelopt.torch.opt.rst", "reference/generated/modelopt.torch.opt.config.rst", "reference/generated/modelopt.torch.opt.conversion.rst", "reference/generated/modelopt.torch.opt.dynamic.rst", "reference/generated/modelopt.torch.opt.hparam.rst", "reference/generated/modelopt.torch.opt.mode.rst", "reference/generated/modelopt.torch.opt.plugins.rst", "reference/generated/modelopt.torch.opt.searcher.rst", "reference/generated/modelopt.torch.opt.utils.rst", "reference/generated/modelopt.torch.quantization.rst", "reference/generated/modelopt.torch.quantization.calib.rst", "reference/generated/modelopt.torch.quantization.calib.calibrator.rst", "reference/generated/modelopt.torch.quantization.calib.histogram.rst", "reference/generated/modelopt.torch.quantization.calib.max.rst", "reference/generated/modelopt.torch.quantization.config.rst", "reference/generated/modelopt.torch.quantization.conversion.rst", "reference/generated/modelopt.torch.quantization.extensions.rst", "reference/generated/modelopt.torch.quantization.mode.rst", "reference/generated/modelopt.torch.quantization.model_calib.rst", "reference/generated/modelopt.torch.quantization.model_quant.rst", "reference/generated/modelopt.torch.quantization.nn.rst", "reference/generated/modelopt.torch.quantization.nn.functional.rst", "reference/generated/modelopt.torch.quantization.nn.modules.rst", "reference/generated/modelopt.torch.quantization.nn.modules.clip.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_activations.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_batchnorm.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_conv.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_instancenorm.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_linear.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_module.rst", "reference/generated/modelopt.torch.quantization.nn.modules.quant_pooling.rst", "reference/generated/modelopt.torch.quantization.nn.modules.tensor_quantizer.rst", "reference/generated/modelopt.torch.quantization.optim.rst", "reference/generated/modelopt.torch.quantization.plugins.rst", "reference/generated/modelopt.torch.quantization.quant_modules.rst", "reference/generated/modelopt.torch.quantization.tensor_quant.rst", "reference/generated/modelopt.torch.quantization.utils.rst", "reference/generated/modelopt.torch.sparsity.rst", "reference/generated/modelopt.torch.sparsity.config.rst", "reference/generated/modelopt.torch.sparsity.magnitude.rst", "reference/generated/modelopt.torch.sparsity.mode.rst", "reference/generated/modelopt.torch.sparsity.module.rst", "reference/generated/modelopt.torch.sparsity.plugins.rst", "reference/generated/modelopt.torch.sparsity.searcher.rst", "reference/generated/modelopt.torch.sparsity.sparsegpt.rst", "reference/generated/modelopt.torch.sparsity.sparsification.rst", "reference/generated/modelopt.torch.utils.rst", "reference/generated/modelopt.torch.utils.cpp_extension.rst", "reference/generated/modelopt.torch.utils.dataset_utils.rst", "reference/generated/modelopt.torch.utils.distributed.rst", "reference/generated/modelopt.torch.utils.graph.rst", "reference/generated/modelopt.torch.utils.list.rst", "reference/generated/modelopt.torch.utils.logging.rst", "reference/generated/modelopt.torch.utils.network.rst", "reference/generated/modelopt.torch.utils.perf.rst", "reference/generated/modelopt.torch.utils.random.rst", "reference/generated/modelopt.torch.utils.tensor.rst", "support/1_contact.rst", "support/2_faqs.rst"], "titles": ["TensorRT-LLM Deployment", "All ModelOpt Examples", "Overview", "Installation", "Quick Start: Quantization", "Quick Start: Sparsity", "Quantization", "Sparsity", "Basic Concepts", "Best practices to choose the right quantization methods", "ONNX Quantization (Beta)", "PyTorch Quantization", "Welcome to Model Optimizer (ModelOpt) documentation!", "Model Optimizer Changelog", "modelopt API", "deploy", "llm", "generate", "model_config_trt", "nemo_utils", "onnx", "op_types", "quantization", "calib_utils", "graph_utils", "gs_patching", "int4", "operators", "ort_patching", "ort_utils", "partitioning", "qdq_utils", "quant_utils", "quantize", "utils", "torch", "export", "distribute", "layer_utils", "model_config", "model_config_export", "model_config_utils", "postprocess", "scaling_factor_utils", "tensorrt_llm_utils", "transformer_engine", "opt", "config", "conversion", "dynamic", "hparam", "mode", "plugins", "searcher", "utils", "quantization", "calib", "calibrator", "histogram", "max", "config", "conversion", "extensions", "mode", "model_calib", "model_quant", "nn", "functional", "modules", "clip", "quant_activations", "quant_batchnorm", "quant_conv", "quant_instancenorm", "quant_linear", "quant_module", "quant_pooling", "tensor_quantizer", "optim", "plugins", "quant_modules", "tensor_quant", "utils", "sparsity", "config", "magnitude", "mode", "module", "plugins", "searcher", "sparsegpt", "sparsification", "utils", "cpp_extension", "dataset_utils", "distributed", "graph", "list", "logging", "network", "perf", "random", "tensor", "Contact us", "FAQs"], "terms": {"pleas": [0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 17, 46, 48, 60, 65, 79, 91, 104], "read": [0, 37, 81], "workflow": [0, 2, 51, 53], "first": [0, 3, 7, 9, 11, 26, 34, 37, 93, 99], "befor": [0, 3, 8, 49, 53, 63, 75, 77, 101], "go": [0, 77], "through": [0, 5, 8, 11, 64, 65, 81], "thi": [0, 3, 5, 7, 9, 10, 11, 13, 18, 19, 21, 24, 26, 27, 28, 30, 32, 33, 34, 37, 38, 39, 41, 42, 44, 47, 48, 49, 50, 53, 58, 60, 61, 63, 64, 65, 67, 77, 79, 80, 81, 82, 85, 86, 87, 91, 94, 99, 101, 104], "section": [0, 7], "modelopt": [0, 2, 3, 4, 5, 6, 7, 8, 10, 11, 13, 16, 18, 21, 24, 29, 39, 46, 47, 48, 51, 56, 60, 61, 72, 74, 75, 77, 94, 101, 104], "toolkit": [0, 6], "automat": [0, 7, 11, 16, 49, 85, 91], "convers": [0, 9, 16, 21, 33, 49, 101], "engin": [0, 10, 16, 17, 18, 39, 40], "acceler": [0, 2, 4, 5, 7, 16], "inferenc": [0, 16], "i": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 16, 17, 18, 19, 21, 24, 26, 27, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 44, 47, 48, 49, 50, 51, 53, 54, 58, 59, 60, 61, 63, 64, 65, 67, 75, 77, 80, 81, 82, 84, 85, 86, 87, 91, 95, 99, 101, 104], "achiev": [0, 6, 7, 11, 49], "huggingfac": [0, 3, 4, 11, 17, 19, 36, 39, 79], "nemo": [0, 2, 4, 11, 19, 36, 39, 79], "build": [0, 10, 16, 18, 19, 24, 38, 39, 40, 81, 91], "from": [0, 2, 4, 7, 8, 9, 11, 13, 16, 17, 18, 19, 21, 23, 24, 28, 30, 31, 33, 34, 38, 39, 41, 43, 46, 47, 48, 49, 50, 53, 58, 60, 61, 77, 81, 85, 90, 91, 94, 97, 99, 101, 102], "after": [0, 7, 8, 10, 11, 13, 21, 37, 46, 48, 49, 51, 53, 64, 91], "can": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 18, 21, 24, 32, 39, 40, 46, 47, 48, 49, 51, 53, 60, 63, 79, 81, 84, 86, 94, 99, 100, 101], "format": [0, 2, 4, 6, 7, 9, 10, 11, 13, 37, 39, 40, 41, 53, 77, 94, 99], "store": [0, 7, 33, 40, 48, 49, 50, 53, 81], "A": [0, 7, 8, 9, 11, 17, 30, 31, 37, 40, 41, 47, 48, 49, 50, 51, 53, 58, 59, 61, 64, 65, 69, 77, 81, 82, 89, 91, 94, 99, 100], "singl": [0, 8, 10, 16, 18, 24, 32, 41, 42, 43, 50, 51, 77, 99], "json": [0, 37, 40, 41, 47, 60, 84], "file": [0, 10, 13, 18, 26, 32, 33, 34, 37, 38, 40, 44, 47, 48, 93], "record": [0, 48, 51], "structur": 0, "metadata": [0, 7, 48, 86], "config": [0, 4, 5, 7, 17, 19, 37, 38, 39, 40, 41, 42, 44, 48, 49, 53, 63, 65, 86, 89, 90, 91, 101], "group": [0, 9, 13, 37, 43, 77, 95], "safetensor": [0, 40], "each": [0, 7, 8, 11, 13, 18, 26, 37, 40, 43, 46, 48, 49, 50, 51, 60, 61, 82, 84, 91, 101], "local": [0, 37], "calibr": [0, 4, 5, 7, 9, 11, 13, 23, 26, 33, 40, 56, 58, 59, 60, 64, 65, 77, 81, 91, 94], "gpu": [0, 2, 7, 9, 18, 40, 58, 100, 101], "rank": [0, 18, 37, 39, 40, 42, 43, 77, 81, 82, 95, 100], "weight": [0, 2, 5, 7, 8, 9, 11, 18, 19, 26, 31, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44, 48, 49, 58, 60, 61, 64, 65, 75, 81, 82, 85, 87, 91], "scale": [0, 10, 18, 26, 31, 38, 39, 40, 41, 43, 64, 77, 81], "factor": [0, 9, 18, 26, 31, 38, 39, 40, 41, 43, 64, 81], "per": [0, 8, 9, 32, 40, 81, 91], "The": [0, 2, 4, 7, 8, 9, 10, 11, 13, 16, 17, 18, 19, 26, 31, 32, 33, 37, 39, 40, 41, 42, 47, 48, 49, 53, 60, 61, 63, 64, 65, 67, 77, 81, 82, 85, 86, 91, 94, 96, 99, 101], "api": [0, 2, 3, 4, 5, 7, 10, 11, 12, 13, 16, 17, 18, 19, 40, 64, 65, 80, 83, 91], "export_tensorrt_llm_checkpoint": [0, 18, 40], "us": [0, 2, 3, 4, 5, 7, 8, 9, 10, 11, 13, 16, 18, 21, 24, 26, 32, 33, 37, 40, 43, 46, 47, 48, 49, 53, 58, 59, 60, 63, 64, 65, 67, 69, 77, 80, 81, 84, 85, 87, 91, 93, 94, 95, 99, 100, 101, 104], "follow": [0, 3, 7, 8, 9, 10, 11, 13, 24, 33, 48, 53, 60, 63, 81, 91], "torch": [0, 2, 3, 4, 5, 7, 11, 13, 18, 29, 37, 38, 40, 41, 46, 48, 49, 53, 56, 58, 60, 61, 72, 74, 75, 77, 81, 93, 94, 95, 99, 101, 102], "import": [0, 3, 4, 5, 7, 8, 10, 11, 16, 47, 48, 50, 60, 61, 94, 101], "inference_mod": 0, "decoder_typ": [0, 38, 39, 40], "type": [0, 7, 10, 17, 19, 21, 24, 26, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 47, 48, 49, 50, 51, 53, 54, 58, 61, 63, 64, 65, 75, 77, 81, 85, 86, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102], "str": [0, 17, 18, 19, 21, 23, 24, 26, 29, 30, 31, 33, 34, 37, 38, 39, 40, 41, 42, 44, 47, 48, 49, 53, 54, 58, 60, 61, 63, 64, 65, 77, 84, 85, 86, 89, 90, 91, 93, 94, 95, 97, 98, 99], "e": [0, 5, 7, 8, 9, 11, 13, 26, 40, 49, 51, 53, 81, 93, 99], "g": [0, 5, 7, 8, 11, 13, 40, 49, 51, 81, 93, 99], "gptj": [0, 40], "llama": [0, 40], "gptnext": [0, 40], "dtype": [0, 25, 31, 38, 39, 40, 81, 99], "data": [0, 4, 5, 7, 9, 10, 11, 13, 23, 32, 33, 34, 40, 56, 58, 64, 65, 91, 94, 95, 99, 102], "unquant": [0, 8, 40], "layer": [0, 7, 11, 24, 30, 37, 38, 39, 40, 50, 60, 84, 91, 99], "export_dir": [0, 40, 44], "directori": [0, 10, 17, 18, 33, 34, 37, 40], "where": [0, 7, 8, 11, 26, 53, 75, 81, 99], "inference_tensor_parallel": [0, 13, 40, 42], "number": [0, 18, 26, 43, 49, 50, 58, 59, 65, 69, 81, 94, 95, 98, 99, 101], "infer": [0, 2, 4, 5, 7, 9, 16, 29, 39, 40, 42, 48, 53, 94], "time": [0, 3, 9, 13, 16, 18, 40, 91, 93, 97], "tensor": [0, 7, 8, 9, 11, 13, 18, 23, 24, 26, 31, 34, 37, 38, 39, 40, 41, 42, 43, 49, 50, 58, 59, 60, 67, 69, 77, 81, 82, 85, 90, 95, 99], "parallel": [0, 7, 13, 37, 40, 42, 82, 95, 99], "inference_pipeline_parallel": [0, 40, 42], "pipelin": [0, 7, 11, 40, 65], "If": [0, 3, 7, 9, 10, 18, 23, 33, 37, 38, 39, 42, 43, 48, 49, 50, 58, 59, 61, 64, 69, 77, 81, 82, 84, 91, 99], "call": [0, 3, 7, 11, 16, 21, 32, 47, 49, 61, 75, 87, 91, 94], "success": [0, 3], "save": [0, 6, 10, 11, 18, 33, 34, 37, 39, 40, 46, 48, 53, 59, 63, 77, 81], "otherwis": [0, 33, 37, 38, 61, 81, 96], "state_dict": [0, 7, 11, 37, 48, 53], "instead": [0, 13, 32, 37, 49, 50, 51, 58, 80, 81, 84], "fp16": [0, 9, 10], "bf16": [0, 9], "fp8": [0, 2, 8, 9, 11, 13, 33, 38, 39, 40, 60, 81], "int8_sq": [0, 39], "int4_awq": 0, "gpt2": [0, 40], "ye": 0, "No": 0, "2": [0, 2, 5, 7, 13, 23, 24, 26, 32, 65, 81, 85], "3": [0, 3, 38, 60, 65, 81], "mistral": 0, "mixtral": 0, "8x7b": 0, "falcon": 0, "40b": 0, "180b": 0, "7b": 0, "rw": 0, "1b": 0, "mpt": 0, "30b": 0, "baichuan": 0, "1": [0, 3, 10, 11, 13, 17, 18, 23, 26, 38, 39, 40, 41, 42, 58, 60, 65, 81, 82, 94, 97, 99, 101], "qwen": 0, "14b": 0, "chatglm2": 0, "6b": [0, 7], "bloom": 0, "phi": [0, 44], "nemotron": 0, "8": [0, 2, 3, 9, 10, 11, 26, 38, 44, 58, 59, 60, 61, 81, 93, 99], "gemma": 0, "2b": 0, "onc": [0, 16, 21, 58], "avail": [0, 2, 3, 4, 8, 13, 23, 33, 50, 91], "deploi": [0, 3, 9, 13, 16], "visit": [1, 2], "tensorrt": [1, 4, 6, 9, 10, 11, 12, 13, 16, 17, 18, 24, 39, 40, 44], "model": [1, 6, 8, 9, 15, 16, 17, 18, 19, 20, 22, 23, 24, 26, 30, 33, 34, 35, 37, 38, 39, 40, 42, 45, 46, 48, 49, 51, 53, 54, 58, 60, 61, 63, 64, 65, 77, 80, 86, 91, 94, 99, 101, 104], "optim": [1, 4, 6, 7, 11, 15, 16, 20, 22, 32, 33, 35, 40, 45, 46, 48, 51, 53, 54, 60, 91], "github": [1, 4, 5, 6, 17, 18, 19, 21, 40, 81, 99, 103], "repositori": [1, 2, 6], "minim": [2, 8, 13, 53], "cost": [2, 8], "present": [2, 16, 34], "signific": 2, "challeng": 2, "gener": [2, 8, 10, 16, 18, 21, 33, 34, 39, 41, 46, 47, 48, 49, 50, 54, 60, 85, 89, 101], "ai": 2, "continu": [2, 7, 91], "grow": 2, "complex": 2, "size": [2, 8, 9, 18, 32, 34, 37, 38, 39, 43, 49, 54, 77, 81, 85, 94, 95, 99], "refer": [2, 4, 6, 7, 10, 11, 13, 17, 19, 21, 24, 46, 50, 77, 91, 99], "librari": 2, "compris": [2, 9], "state": [2, 11, 37, 48, 51, 53, 63, 75, 77, 89, 99], "art": 2, "includ": [2, 3, 8, 13, 17, 39, 49, 81], "compress": [2, 7, 9], "It": [2, 11, 37, 51, 63, 65, 77, 81, 85, 91, 99], "accept": [2, 13, 81], "onnx": [2, 3, 4, 6, 9, 11, 13, 21, 22, 23, 24, 26, 30, 31, 32, 33, 34, 53, 80, 81, 99], "input": [2, 4, 5, 7, 10, 11, 13, 17, 18, 23, 24, 27, 30, 31, 32, 33, 34, 49, 58, 60, 61, 65, 67, 69, 73, 75, 77, 81, 82, 91, 94, 99, 102], "provid": [2, 5, 7, 8, 10, 11, 18, 23, 24, 29, 32, 33, 37, 43, 46, 47, 48, 49, 53, 56, 84, 91, 99], "python": [2, 3, 10, 16, 21, 41, 101], "user": [2, 4, 7, 9, 10, 11, 13, 21, 33, 37, 46, 48, 49, 50, 65, 81], "easili": 2, "stack": [2, 6, 63], "differ": [2, 6, 8, 9, 21, 33, 49, 91, 94], "produc": [2, 24, 34], "checkpoint": [2, 7, 18, 39, 40, 44, 46, 48, 53, 77], "seamlessli": 2, "integr": 2, "within": [2, 10, 51, 81, 82, 98], "softwar": [2, 6], "ecosystem": 2, "readi": [2, 4], "deploy": [2, 6, 9, 11, 15, 16, 35, 40], "downstream": 2, "framework": [2, 6, 11], "like": [2, 4, 6, 7, 8, 10, 11, 13, 23, 30, 33, 48, 49, 53, 59, 99], "llm": [2, 4, 6, 9, 11, 12, 17, 18, 36, 39, 40, 44, 60], "further": 2, "ar": [2, 3, 7, 8, 9, 11, 13, 18, 19, 24, 26, 30, 33, 34, 38, 39, 40, 41, 42, 44, 47, 48, 49, 53, 60, 61, 64, 65, 75, 77, 81, 82, 84, 91, 93, 99], "plan": 2, "megatron": [2, 79, 88], "lm": 2, "train": [2, 4, 13, 42, 77, 91, 104], "loop": [2, 4, 5, 11, 32, 53, 91, 94], "For": [2, 3, 7, 8, 9, 10, 11, 16, 17, 18, 40, 42, 47, 48, 49, 50, 51, 60, 81, 84, 99], "enterpris": 2, "bit": [2, 8, 9, 11, 32, 58, 59, 81], "stabl": 2, "diffus": [2, 79], "also": [2, 4, 7, 11, 41, 49, 51, 58, 60, 67, 99, 101], "nim": 2, "free": 2, "all": [2, 3, 7, 12, 13, 28, 30, 31, 33, 34, 37, 39, 40, 42, 47, 48, 49, 54, 58, 59, 65, 77, 81, 82, 84, 85, 87, 95, 99, 101], "develop": 2, "pypi": [2, 3], "end": [2, 4, 5, 24, 60, 65, 99, 100], "exampl": [2, 3, 4, 5, 6, 7, 8, 10, 11, 16, 17, 19, 33, 47, 48, 50, 51, 60, 61, 64, 65, 79, 81, 84, 94, 101], "script": [2, 19], "recip": 2, "an": [2, 4, 5, 7, 8, 10, 11, 13, 16, 24, 26, 29, 32, 33, 34, 37, 38, 40, 42, 48, 49, 50, 51, 58, 59, 61, 63, 64, 65, 67, 77, 81, 84, 86, 91, 99, 101, 103], "effect": [2, 4, 5, 8, 11, 81], "larg": [2, 9, 32], "2x": [2, 7], "4x": 2, "speed": [2, 4, 5, 7], "up": [2, 3, 4, 7, 9, 37, 46, 49, 53, 77, 91], "while": [2, 8, 11, 13, 49, 53, 80], "preserv": [2, 8, 48], "qualiti": [2, 11], "enabl": [2, 4, 7, 11, 18, 50, 60, 61, 65, 77, 82, 93], "highli": [2, 7, 11], "perform": [2, 4, 5, 9, 11, 17, 26, 33, 37, 41, 58, 60, 64, 65, 77, 81, 100], "int8": [2, 4, 9, 10, 33, 38, 60], "int4": [2, 9, 11, 33, 60], "etc": [2, 4, 10, 11, 24, 26, 33], "support": [2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 16, 19, 25, 26, 31, 33, 36, 38, 39, 48, 49, 53, 58, 60, 61, 63, 64, 66, 67, 68, 79, 80, 81, 82, 88, 91, 99], "advanc": [2, 4, 5, 8], "algorithm": [2, 4, 9, 10, 11, 13, 26, 46, 47, 48, 51, 53, 60, 64, 65, 83, 86, 89, 90, 91], "smoothquant": [2, 4, 8, 9, 11, 60, 64, 77], "awq": [2, 4, 8, 9, 11, 26, 39, 42, 60], "doubl": [2, 81], "easi": [2, 5, 7, 10, 16], "both": [2, 4, 7, 8, 9, 10, 13, 26, 49, 75], "post": [2, 4, 8, 13, 53], "ptq": [2, 5, 8, 13, 24, 33], "awar": [2, 4, 13, 26], "qat": [2, 4, 5, 13], "page": 2, "list": [2, 3, 8, 11, 17, 21, 24, 30, 33, 34, 37, 38, 39, 40, 42, 43, 48, 59, 60, 61, 77, 81, 91, 93, 102], "reduc": [2, 4, 5, 7, 8, 65, 82, 91, 101], "memori": [2, 4, 5, 6, 7, 9, 11, 18, 34, 40, 42, 99, 100], "footprint": [2, 4, 5, 7, 18], "deep": [2, 4, 5, 8], "learn": [2, 4, 5, 8, 11, 46, 60, 69, 77, 81, 82], "mt": [2, 5, 7], "sparsifi": [2, 5, 7, 13, 18, 91], "appli": [2, 5, 7, 9, 26, 48, 73, 77, 81, 85, 94], "given": [2, 5, 21, 24, 31, 33, 34, 43, 47, 49, 53, 60, 61, 65, 77, 82, 85, 90, 91, 94, 99, 101], "4": [2, 5, 7, 8, 9, 23, 32, 38, 60, 81, 85], "pattern": [2, 5, 7, 24, 30, 33, 84, 85, 91, 96], "variou": [2, 5, 31, 46, 48, 61, 91], "sparsif": [2, 13, 83, 86], "method": [2, 5, 6, 7, 11, 19, 24, 32, 33, 37, 47, 48, 49, 53, 58, 60, 61, 64, 65, 75, 77, 81, 99], "asp": [2, 5, 7, 85], "sparsegpt": [2, 5, 7, 86, 91], "fine": [2, 7, 8, 11, 53, 91], "tune": [2, 7, 8, 11, 18, 53, 91], "latter": 2, "recommend": [2, 3, 9, 11, 60, 81, 91], "accuraci": [2, 6, 7, 8, 9, 33], "degrad": [2, 9, 11], "nvidia": [3, 5, 6, 7, 11, 13, 17, 18, 19, 40, 81, 85], "current": [3, 6, 7, 10, 13, 26, 47, 48, 49, 50, 79, 86, 88, 91, 93, 95], "ha": [3, 9, 11, 18, 23, 24, 34, 37, 48, 49, 50, 53, 65, 69, 82], "o": 3, "linux": 3, "window": [3, 13, 21], "architectur": [3, 7, 40, 48, 49, 101], "x86_64": 3, "aarch64": 3, "win_amd64": [3, 13], "12": [3, 93], "pytorch": [3, 6, 7, 9, 46, 49, 61, 64, 65, 67, 69, 81, 82, 94, 99, 102], "11": [3, 93], "cuda": [3, 62, 81, 93, 100], "its": [3, 7, 44, 48, 49, 53, 60, 61, 65, 77, 81, 91, 99], "depend": [3, 13, 24, 49, 50], "via": [3, 7, 9, 26, 48, 49, 53, 63, 86, 87, 99, 101], "pip": [3, 13], "review": 3, "licens": 3, "term": [3, 7], "ani": [3, 13, 18, 24, 33, 37, 40, 41, 44, 47, 48, 49, 50, 53, 60, 63, 65, 77, 82, 84, 86, 87, 89, 90, 91, 93, 96, 97, 99, 101, 103], "quick": [3, 12, 81], "detail": [3, 4, 6, 8, 9, 10, 11, 18, 33, 53, 61, 65, 77, 81, 84, 91], "instruct": 3, "set": [3, 4, 7, 11, 13, 17, 18, 21, 25, 30, 31, 37, 38, 40, 44, 46, 47, 48, 49, 50, 53, 58, 61, 63, 77, 86, 87, 91, 95, 99], "virtual": 3, "environ": 3, "we": [3, 4, 7, 8, 9, 11, 24, 26, 38, 39, 40, 42, 47, 48, 49, 50, 51, 53, 58, 63, 77, 79, 81, 87, 88, 91, 99, 101], "you": [3, 6, 7, 9, 10, 11, 13, 18, 47, 48, 60, 79, 80, 84, 91, 99, 103], "don": [3, 53, 63, 81], "t": [3, 30, 53, 58, 63, 67, 81, 82, 101], "have": [3, 7, 8, 9, 11, 23, 37, 47, 49, 60, 61, 65, 81, 99, 103], "one": [3, 8, 38, 39, 42, 48, 49, 58, 61, 79, 81, 82, 85], "alreadi": [3, 30, 48], "run": [3, 10, 11, 16, 19, 53, 58, 65, 77, 91, 99], "command": [3, 10], "activ": [3, 8, 9, 26, 38, 39, 49, 50, 60, 65, 70, 81, 87, 99], "conda": 3, "name": [3, 10, 13, 24, 30, 31, 33, 34, 47, 49, 53, 54, 60, 61, 63, 65, 81, 82, 84, 86, 93, 94, 99, 100], "creat": [3, 7, 10, 11, 24, 29, 31, 37, 40, 48, 58, 60, 77, 85, 90, 94], "n": [3, 85], "option": [3, 7, 13, 24, 30, 33, 38, 43, 48, 49, 50, 53, 91, 101], "desir": [3, 4, 31, 48, 91], "version": [3, 11, 21, 32, 39, 44, 49, 74, 76, 81, 82, 93], "By": [3, 10, 13, 27, 81], "default": [3, 10, 11, 13, 21, 27, 33, 38, 40, 47, 49, 50, 53, 58, 60, 69, 77, 81, 82, 84, 85, 89, 90, 91, 99, 101], "latest": 3, "want": [3, 7, 11, 37, 48, 49, 53, 60, 63, 77, 81, 84, 91, 101], "specif": [3, 7, 9, 24, 49, 51, 60, 81, 84, 94], "your": [3, 7, 9, 11, 13, 60, 79, 91], "extra": [3, 13, 19, 77, 99], "index": [3, 77], "url": 3, "http": [3, 17, 18, 19, 21, 40, 67, 81, 99], "download": [3, 10], "org": [3, 67], "whl": 3, "cu118": 3, "identifi": [3, 30], "correct": [3, 61, 65, 75], "partial": [3, 24], "note": [3, 11, 19, 21, 24, 27, 30, 34, 37, 48, 49, 50, 63, 84, 87], "when": [3, 7, 9, 18, 24, 33, 37, 47, 48, 49, 50, 58, 77, 81, 87, 94, 104], "without": [3, 7, 33, 40, 49, 99], "onli": [3, 5, 7, 8, 9, 10, 11, 13, 18, 21, 24, 26, 27, 30, 31, 33, 36, 39, 41, 44, 47, 49, 58, 60, 67, 69, 81, 87, 98, 99], "barebon": 3, "none": [3, 18, 19, 24, 26, 33, 34, 37, 38, 39, 40, 42, 43, 44, 47, 48, 49, 50, 53, 54, 58, 59, 60, 63, 64, 65, 77, 81, 82, 84, 86, 87, 89, 91, 93, 94, 95, 99, 100, 101], "modul": [3, 7, 13, 15, 16, 19, 20, 22, 25, 26, 27, 28, 35, 36, 37, 38, 39, 40, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 58, 60, 61, 62, 63, 64, 65, 66, 69, 70, 71, 73, 75, 76, 77, 78, 79, 80, 82, 83, 84, 86, 88, 91, 92, 93, 94, 96, 99], "work": [3, 8, 10, 24, 37, 79], "appropri": [3, 4, 5, 50], "below": [3, 4, 6, 7, 8, 9, 10, 11, 49, 60, 65, 99], "need": [3, 8, 11, 13, 37, 38, 39, 44, 48, 49, 50, 60, 65, 80, 81, 99], "correctli": [3, 7, 11, 47, 77, 79], "correspond": [3, 48, 49, 50, 60, 63, 86, 99, 101], "_deploi": [3, 13], "addition": [3, 24, 48], "3rd": 3, "parti": [3, 52, 79, 88], "plugin": 3, "third": [3, 52, 79, 88], "packag": [3, 13, 15, 16, 36, 55, 82], "transform": [3, 7, 10, 38, 39, 41], "hf": [3, 13, 19], "cach": [3, 9, 11, 17, 18, 100], "dir": [3, 19, 37], "com": [3, 17, 18, 19, 21, 40, 81, 99], "": [3, 4, 5, 6, 7, 9, 10, 11, 15, 16, 21, 26, 37, 47, 48, 49, 51, 59, 63, 67, 81, 86, 91, 94, 97, 99], "quantiz": [3, 12, 13, 16, 21, 24, 26, 27, 30, 31, 32, 38, 39, 40, 41, 42, 43, 45, 51, 56, 58, 59, 61, 63, 64, 65, 66, 68, 70, 71, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 94], "compil": [3, 10, 18, 30, 33, 93], "fast": [3, 65], "kernel": [3, 9, 18, 30, 33, 99], "mai": [3, 7, 8, 11, 48, 49, 81, 91, 93, 103], "take": [3, 7, 11, 47, 50, 58, 61, 64, 65, 77, 81, 91, 93, 99], "few": [3, 65, 93], "minut": [3, 9], "subsequ": [3, 7, 53, 93], "much": [3, 11], "faster": [3, 50], "To": [3, 6, 7, 11, 13, 84], "invok": [3, 98], "now": [3, 13, 30, 63], "c": [3, 62, 93], "extens": [3, 19, 81, 93], "ext": 3, "print": [3, 11, 16, 24, 33, 65, 91, 98], "cuda_ext": 3, "cuda_ext_fp8": 3, "techniqu": [4, 5, 7, 8, 11, 13], "mtq": [4, 11, 60, 61, 80, 94], "case": [4, 5, 7, 9, 10, 11, 42, 49, 60], "requir": [4, 5, 7, 9, 11, 13, 39, 63, 65, 77, 93], "configur": [4, 5, 7, 11, 47, 49, 50, 51, 53, 54, 60, 65, 84, 91, 94, 101], "forward": [4, 5, 8, 11, 24, 44, 53, 61, 64, 65, 67, 69, 75, 77, 81, 90, 91, 94, 99], "here": [4, 5, 11, 30, 32, 49, 50, 60, 61, 65, 99], "setup": [4, 5, 11, 61], "get_model": [4, 5, 11], "show": [4, 11, 47, 60, 84], "rough": 4, "how": [4, 9, 19, 46, 50, 79, 81, 99], "loader": [4, 5, 11, 65, 91, 99], "calib_s": [4, 5, 11], "data_load": [4, 5, 7, 11, 65, 91, 99], "get_dataload": [4, 11], "num_sampl": [4, 5, 11, 94], "defin": [4, 5, 7, 8, 11, 30, 39, 46, 51, 53, 61, 67, 81, 86, 101], "forward_loop": [4, 5, 11, 53, 60, 64, 65, 91, 94], "function": [4, 11, 18, 19, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 40, 41, 42, 43, 44, 45, 47, 48, 50, 51, 53, 54, 58, 60, 61, 64, 65, 69, 77, 78, 80, 81, 82, 85, 86, 87, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102], "should": [4, 10, 11, 13, 18, 21, 23, 24, 30, 32, 33, 39, 47, 48, 49, 50, 61, 64, 65, 81, 91], "wrap": [4, 11, 47, 48, 99], "insid": [4, 91], "def": [4, 11, 61, 65], "batch": [4, 9, 11, 16, 17, 18, 26, 34, 49, 65, 71, 91, 94, 99], "int8_smoothquant_cfg": [4, 11, 60], "just": [4, 39, 63, 99], "regular": [4, 7, 8, 11, 33, 46, 49, 50, 86, 91], "evalu": [4, 19, 53, 65], "export": [4, 6, 7, 11, 13, 16, 18, 38, 40, 42, 44, 46, 49, 51, 53, 63, 77, 80, 82, 86, 91, 99], "see": [4, 9, 10, 11, 19, 49, 58, 59, 60, 61, 65, 81, 91, 99], "guid": [4, 5, 11], "more": [4, 5, 6, 7, 9, 11, 18, 46, 47, 49, 60, 61, 65, 67, 77, 81, 91, 99], "next": [4, 5, 23, 24, 86], "step": [4, 5, 7, 10, 48, 51, 53, 60, 77], "about": [4, 5, 7, 46, 60, 77], "usag": [4, 5, 7, 11, 18, 47, 94, 100], "checkout": [4, 5], "out": [4, 5, 7, 32, 79], "featur": [5, 7, 13, 18, 50, 53, 77], "get_train_dataload": 5, "sparsity_config": [5, 7], "collect_func": [5, 7, 91, 99], "lambda": [5, 7], "x": [5, 7, 11, 26, 58, 59, 85, 97, 99], "mode": [5, 13, 33, 47, 48, 53, 60, 81, 82, 84, 91], "driven": [5, 7], "sparse_magnitud": [5, 7, 84, 91], "doe": [5, 7, 9, 27, 33, 48, 49, 50, 63, 65], "pure": [5, 47], "base": [5, 6, 7, 9, 11, 17, 19, 23, 26, 27, 33, 37, 39, 47, 48, 49, 50, 53, 57, 58, 59, 60, 61, 63, 64, 67, 69, 72, 73, 74, 75, 76, 77, 81, 84, 85, 86, 87, 89, 90, 98, 99, 100], "substitut": 5, "iter": [5, 17, 23, 40, 48, 49, 65, 77, 91, 94, 99], "dataset": [5, 65, 94], "hardwar": [6, 7], "simul": [6, 11], "origin": [6, 7, 8, 9, 10, 11, 33, 41, 48, 49, 50, 61, 75, 99, 101], "precis": [6, 9, 10, 11, 33], "test": [6, 16], "best": [6, 10, 17, 53, 85], "trade": 6, "off": 6, "between": [6, 18, 58], "low": [6, 8, 9, 11, 33], "actual": [6, 9, 48, 49, 50, 53, 99], "speedup": [6, 10, 11], "find": [6, 7, 11, 26, 30, 34, 85], "document": [6, 60, 91], "basic": [6, 7, 23, 29, 32, 49, 53, 81], "concept": [6, 46], "practic": [6, 17], "choos": [6, 60, 91, 101], "right": [6, 60, 63], "beta": 6, "describ": [7, 11, 48, 49, 60, 63, 65, 81, 86, 91], "obtain": 7, "either": [7, 18, 21, 33, 49, 99], "exist": [7, 37, 38, 48, 49], "load": [7, 9, 10, 16, 18, 19, 34, 37, 41, 42, 48, 53, 62, 77, 93, 94], "pre": [7, 11, 30, 43, 53], "re": [7, 49], "mto": [7, 11, 48], "relat": [7, 24, 30, 34], "process": [7, 8, 10, 18, 33, 37, 40, 42, 48, 49, 53, 84, 91, 94, 95, 98, 99, 101], "convert": [7, 11, 13, 16, 18, 32, 33, 38, 39, 40, 41, 44, 45, 48, 49, 53, 63, 84, 86, 91, 98, 99, 101, 102, 104], "dens": [7, 39], "retrain": 7, "simplest": [7, 8, 11], "wai": [7, 8, 11, 49, 99], "return": [7, 11, 17, 19, 21, 23, 24, 26, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44, 47, 48, 49, 50, 53, 54, 58, 59, 61, 63, 64, 65, 77, 81, 82, 85, 86, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102], "dictionari": [7, 10, 11, 23, 24, 26, 34, 47, 48, 49, 53, 60, 61, 64, 65, 81, 84, 91, 99], "specifi": [7, 11, 18, 34, 48, 49, 51, 58, 60, 61, 63, 64, 65, 81, 82, 84, 85, 86, 91, 93, 94, 100, 102], "dataload": [7, 11, 94], "magnitud": [7, 86, 91], "respect": [7, 8, 47, 60, 65, 81], "automodelforcausallm": 7, "from_pretrain": [7, 11], "eleutherai": 7, "gpt": 7, "j": 7, "calib_dataload": 7, "sparse_model": 7, "threshold": 7, "futur": [7, 11, 91], "modelopt_sparse_model": 7, "pth": 7, "along": [7, 8, 81, 82], "mask": [7, 85, 87, 89, 90, 91], "later": [7, 9, 84], "opt": [7, 9, 11, 13, 48], "initi": [7, 11, 17, 23, 24, 26, 31, 48, 49, 50, 58, 59, 61, 69, 75, 77, 80, 81, 87, 94, 99, 100], "unmodifi": [7, 48, 49], "plain": 7, "enforc": [7, 47, 48, 49, 50, 91], "access": [7, 21, 37, 47, 49, 60], "remov": [7, 24, 34, 37, 49, 85, 90, 99], "longer": [7, 11, 42, 49, 80, 91], "dure": [7, 8, 11, 33, 48, 77, 91, 104], "do": [7, 30, 37, 49, 63, 81], "overview": [7, 8, 12], "well": [7, 9, 11, 49, 84, 99, 100], "terminologi": 7, "fraction": 7, "zero": [7, 31, 58, 81, 85], "broadli": 7, "categor": [7, 21], "randomli": 7, "distribut": [7, 13], "across": [7, 8, 37, 39, 42, 49, 77, 101], "matrix": [7, 85, 90], "flexibl": 7, "lead": 7, "poor": 7, "util": [7, 13, 19, 21, 23, 24, 29, 30, 31, 32, 37, 38, 41, 42, 43, 44, 48, 51, 61, 64, 90, 93, 94, 95, 96, 97, 98, 99, 100, 102], "other": [7, 8, 13, 19, 33, 37, 42, 49, 64, 81], "hand": 7, "effici": 7, "exploit": 7, "higher": [7, 18], "math": [7, 8], "throughput": [7, 9, 10], "usual": [7, 48, 49, 53], "special": [7, 11, 49, 81], "grain": [7, 8], "block": [7, 9, 11, 26, 38, 43, 49, 81, 91, 101], "contigu": [7, 41, 42], "element": [7, 24, 32, 85, 97, 99, 101], "most": [7, 8, 9, 33], "nonzero": 7, "due": [7, 11], "implement": [7, 8, 30, 37, 46, 49, 53, 67, 69, 77, 81], "benefit": 7, "bandwidth": [7, 8, 9], "smaller": 7, "than": [7, 11, 19, 49, 81], "core": [7, 13], "deliv": 7, "multipli": [7, 81], "oper": [7, 8, 9, 13, 21, 33, 34, 97], "argument": [7, 21, 48, 49, 50, 53, 58, 60, 64, 65, 81, 91, 93, 94, 99], "allow": [7, 10, 11, 13, 49, 53], "On": 7, "amper": [7, 9], "four": 7, "two": [7, 10, 32, 34, 38, 40, 81, 99], "There": [7, 81], "mani": [7, 33, 81], "commonli": [7, 8], "approach": 7, "largest": 7, "retain": [7, 82], "rest": 7, "simpl": [7, 10, 11, 69, 81, 100], "brain": 7, "surgeon": 7, "better": [7, 10], "consist": [8, 19, 47, 49], "found": [8, 24, 47, 49], "topic": 8, "width": [8, 17, 18], "valu": [8, 10, 32, 34, 39, 47, 49, 50, 59, 60, 63, 64, 65, 75, 77, 81, 82, 86, 87, 91, 99], "integ": [8, 9, 26, 58, 59, 77, 81], "sign": [8, 32, 77, 81], "mantissa": [8, 81], "float": [8, 17, 26, 32, 38, 39, 50, 53, 58, 65, 81, 90, 97, 98, 99, 100, 101], "point": [8, 9, 11, 31, 81], "expon": [8, 81], "FOR": 8, "explan": 8, "unscal": 8, "map": [8, 24, 30, 31, 40, 44, 60, 61, 65, 81], "rang": [8, 50, 67, 77, 81, 82], "share": [8, 34, 37, 40], "same": [8, 13, 18, 33, 34, 37, 39, 48, 49, 81, 84, 91, 99, 101], "calcul": [8, 43, 53, 81, 82, 85], "divid": 8, "common": [8, 11, 19, 30, 32, 34, 41], "whole": [8, 51, 77], "global": [8, 31, 59], "channel": [8, 9, 10, 49, 58, 81, 82, 99], "separ": [8, 11, 48, 58], "fix": [8, 13, 30, 51, 77, 91], "dimens": [8, 26, 32, 34, 82], "typic": [8, 9, 10, 11, 33], "gptq": 8, "stai": 8, "high": [8, 9, 17, 91], "help": [8, 27, 99], "constrain": 8, "scenario": [8, 9], "comput": [8, 9, 11, 26, 50, 58, 77, 81, 82, 85, 91, 96, 97, 102], "potenti": 8, "adjust": [8, 11, 43, 61, 64], "maxim": [8, 53, 85], "max": [8, 9, 11, 17, 18, 26, 39, 50, 60, 64, 67, 69, 81, 97], "which": [8, 11, 18, 24, 30, 46, 47, 48, 50, 53, 58, 60, 61, 64, 65, 67, 75, 81, 82, 84, 85, 99, 101], "maximum": [8, 59, 82, 94], "unchang": [8, 49], "round": [8, 26, 39], "nearest": [8, 26], "entropi": [8, 33, 58], "view": 8, "updat": [8, 11, 26, 38, 44, 47, 48, 49, 61, 63, 77, 86, 91], "loss": [8, 26, 91], "compar": [8, 99], "must": [8, 11, 26, 48, 49, 50, 63, 81, 82, 99], "backward": [8, 13, 24, 40, 44, 67, 81], "pass": [8, 11, 37, 49, 64, 67, 77, 81, 82, 99], "straight": [8, 81], "estim": [8, 13, 50, 81], "ste": 8, "clip": [8, 33, 67, 77, 81], "behind": 8, "explicit": [8, 10, 33], "graph": [8, 13, 24, 30, 31, 34, 102], "represent": [8, 40, 63, 86], "qdq": [8, 10, 24, 27, 30, 31, 33], "node": [8, 10, 11, 13, 18, 24, 30, 31, 33, 34, 40], "network": [8, 37, 77, 90], "three": 9, "primari": 9, "compon": 9, "context": [9, 18, 39, 49, 75, 80, 81, 82, 98], "small": [9, 11], "often": [9, 11, 30], "bound": [9, 18, 53, 69], "In": [9, 10, 49, 60, 81, 99], "limit": [9, 18], "regim": 9, "give": [9, 33, 99], "superior": 9, "improv": 9, "serv": 9, "16": [9, 23, 50], "densiti": 9, "becom": 9, "crucial": 9, "consequ": [9, 49], "lower": [9, 33, 47, 69], "choic": [9, 49, 50, 101], "suggest": 9, "priorit": [9, 47], "caus": [9, 30], "veri": 9, "littl": 9, "strong": 9, "meet": [9, 82], "could": [9, 11, 16, 17, 49, 50], "try": [9, 102], "earlier": [9, 84], "sq": 9, "might": [9, 11, 48, 49, 81, 99], "toler": 9, "tabl": [9, 60], "summar": 9, "tradeoff": 9, "consid": [9, 21], "medium": 9, "min": [9, 50, 67, 69, 93, 97], "50": 9, "ada": 9, "hopper": 9, "variant": [9, 19, 32], "w4a16": [9, 60], "wise": [9, 11, 24], "25": 9, "ten": 9, "w4a8": [9, 39, 60], "impact": 9, "measur": [9, 41, 100], "10": [9, 11], "popular": 9, "ll": 9, "subject": [9, 21], "togeth": [10, 11, 39, 48, 49], "eq": 10, "kei": [10, 11, 41, 47, 48, 49, 60, 64, 65, 81, 84, 91, 99], "advantag": [10, 11], "offer": [10, 11, 16], "non": [10, 19, 21, 24, 30, 48, 49, 85, 99], "expert": [10, 11, 38, 39], "white": 10, "box": 10, "design": 10, "custom": [10, 19, 31, 47, 49, 60, 61], "vision": 10, "new": [10, 13, 31, 47, 49, 63, 82], "rule": [10, 11, 24, 47, 49, 87], "real": [10, 11], "6": [10, 26], "9": [10, 26, 44], "prefer": 10, "link": [10, 13, 41], "done": [10, 11, 32, 91], "random": [10, 13, 23, 34], "imag": [10, 99], "numpi": [10, 23, 31, 33, 34, 59, 102], "multi": [10, 37], "arrai": [10, 31, 32, 33, 59, 102], "calib_data": 10, "np": [10, 41, 58], "randn": 10, "batch_siz": [10, 34, 94], "h": [10, 85], "w": [10, 26, 49, 85], "npy": 10, "dict": [10, 17, 18, 23, 24, 30, 31, 33, 34, 37, 40, 41, 44, 47, 48, 49, 51, 53, 60, 61, 63, 64, 65, 77, 81, 84, 86, 89, 90, 91, 97, 99], "match": [10, 11, 24, 30, 47, 60, 61, 65, 84, 96], "input_nam": [10, 24, 34], "shape": [10, 17, 21, 23, 31, 34, 39, 42, 59, 77, 81], "input_name2": 10, "shape2": 10, "savez": 10, "npz": [10, 40, 44], "moq": 10, "calibration_data": [10, 23, 33], "calibration_data_path": 10, "onnx_path": [10, 23, 29, 33, 34], "output_path": [10, 33], "quant": [10, 33, 63], "quantize_mod": [10, 26, 33], "altern": 10, "line": 10, "m": [10, 81, 85], "path": [10, 13, 17, 18, 19, 23, 24, 33, 34, 37, 40, 42, 44, 93], "output": [10, 11, 16, 17, 18, 24, 30, 31, 33, 34, 38, 49, 77, 81], "calibraton": 10, "tool": [10, 19, 21, 33, 46], "insert": [10, 11, 31, 33, 48], "friendli": [10, 33], "chang": [10, 13, 21, 48, 59], "behavior": [10, 48, 49, 60, 65, 67], "tweak": 10, "param": 10, "op_types_to_quant": [10, 21, 33], "op_types_to_exclud": [10, 33], "trtexec": 10, "usr": 10, "src": [10, 24], "bin": [10, 58], "previou": 10, "saveengin": 10, "check": [10, 11, 19, 24, 34, 42, 47, 48, 49, 53, 54, 79, 82, 96, 99], "report": [10, 100], "latenc": [10, 53], "field": [10, 39, 41, 47, 60, 84], "replac": [10, 11, 29, 49, 60, 61, 65, 77, 82], "flag": [10, 18, 33, 48], "implicit": 10, "refactor": 11, "pytorch_quant": 11, "nativ": [11, 13, 30], "hug": 11, "face": [11, 65], "fake": [11, 48, 77, 81], "mean": [11, 18, 40, 91, 97], "cover": 11, "128": [11, 58, 60, 81], "512": [11, 23, 94], "sampl": [11, 17, 23, 49, 94, 99, 101], "callabl": [11, 48, 50, 53, 60, 61, 63, 64, 65, 86, 91, 94, 99], "own": [11, 42, 49, 60, 79, 81], "order": [11, 19, 48, 49, 50, 84, 99], "collect": [11, 19, 24, 30, 37, 56, 58, 59, 90], "statist": [11, 56, 65, 77, 82], "around": 11, "select": [11, 21, 26, 33, 36, 49, 64, 85, 101], "look": [11, 30, 47, 49], "verifi": 11, "place": [11, 30, 47, 48, 49, 91, 101], "let": [11, 49], "summari": [11, 65], "successfulli": 11, "print_quantization_summari": 11, "normal": [11, 21, 27, 50, 71, 73, 91, 99], "flow": [11, 21], "sample_input": 11, "onnx_fil": 11, "direct": 11, "recov": 11, "resourc": [11, 18], "directli": [11, 49, 77, 81], "frozen": [11, 87], "int8_default_cfg": [11, 60], "calib_set": 11, "rate": 11, "durat": 11, "train_load": 11, "schedul": 11, "epoch": 11, "even": [11, 26], "less": 11, "suffici": [11, 27], "resum": [11, 46], "modelopt_st": [11, 48, 77], "pt": [11, 48], "trainer": 11, "save_model": 11, "restor": [11, 41, 46, 48, 61, 63, 86], "restore_from_modelopt_st": [11, 48], "un": [11, 61], "load_state_dict": [11, 48], "under": [11, 18, 40], "hood": 11, "linear": [11, 21, 24, 33, 34, 38, 39, 41, 74, 75, 82, 84], "conv": [11, 30, 33], "patch": [11, 25, 28, 49], "instanc": [11, 17, 41, 48, 49, 54, 60, 61, 65, 73, 77, 81, 91, 94], "quantdescriptor": [11, 58, 59, 61, 77, 81], "paramet": [11, 13, 17, 18, 19, 21, 23, 24, 26, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 53, 54, 56, 58, 59, 61, 64, 65, 69, 77, 81, 82, 85, 86, 87, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99, 101, 102], "axi": [11, 58, 59, 60, 61, 77, 81, 82], "tensor_qu": [11, 61, 72, 74, 75, 77], "nn": [11, 48, 49, 53, 58, 61, 74, 76, 77, 84, 99], "descriptor": [11, 63, 81, 86], "quant_desc": [11, 77], "num_bit": [11, 38, 58, 59, 60, 61, 77, 81], "unsign": [11, 32, 58, 59, 77, 81], "true": [11, 17, 26, 34, 39, 40, 42, 44, 48, 49, 53, 58, 60, 61, 69, 77, 81, 82, 96, 99], "quant_x": 11, "disabl": [11, 13, 65, 77, 82], "who": 11, "wildcard": [11, 60, 61, 65], "filter": [11, 60, 61, 65], "copi": [11, 13, 21, 60], "quant_cfg": [11, 60, 61, 65, 94], "bmm": 11, "output_quant": [11, 38, 75], "howev": [11, 49], "regist": [11, 13, 47, 49, 50, 61, 81, 90], "them": [11, 32, 39, 42, 46, 49, 77, 99], "handl": [11, 13, 48, 49, 52, 79, 88], "unsupport": 11, "subclass": [11, 49], "kv": [11, 17], "attent": [11, 38, 39], "layernorm": [11, 38, 39, 61], "class": [11, 17, 19, 23, 26, 27, 37, 39, 47, 48, 49, 50, 51, 53, 56, 57, 58, 59, 61, 63, 67, 69, 72, 73, 74, 75, 76, 77, 81, 85, 86, 87, 89, 90, 99, 100], "quantlayernorm": [11, 61], "__init__": [11, 17, 19, 23, 26, 27, 37, 39, 48, 49, 50, 53, 58, 59, 61, 69, 77, 81, 100], "self": [11, 48, 49, 50, 61, 75], "normalized_shap": [11, 61], "super": [11, 61], "_setup": [11, 49, 61], "input_quant": [11, 60, 61, 65, 75], "weight_quant": [11, 58, 60, 61, 65, 75], "anywher": 11, "f": [11, 48, 61], "layer_norm": [11, 61], "bia": [11, 39, 61, 81], "ep": [11, 39, 61], "so": [11, 36, 39, 47, 49, 58, 59], "instanti": [11, 19], "attribut": [11, 49, 60, 61, 65, 77, 81, 99], "code": [11, 32, 40, 63, 79, 86, 101], "original_cl": [11, 49, 61], "quantized_cl": [11, 61], "fold": [11, 49, 65], "avoid": [11, 104], "repeat": [11, 97], "inferec": 11, "fold_weight": [11, 65], "quantized_model": 11, "user_evaluate_func": 11, "instal": [12, 13, 82], "sparsiti": [12, 13, 18, 84, 85, 86, 87, 88, 89, 91], "changelog": 12, "contact": 12, "u": 12, "faq": 12, "break": 13, "wa": [13, 53, 99], "renam": 13, "ammo": 13, "full": [13, 18, 37, 38, 39, 47, 48], "product": 13, "being": [13, 49], "deprec": [13, 58, 78, 80, 98], "inference_gpu": 13, "arg": [13, 19, 48, 49, 53, 75, 77, 78, 80, 87, 98, 99, 101, 102], "model_config_export": 13, "torch_to_tensorrt_llm_checkpoint": [13, 18, 40], "experiment": [13, 64, 77, 81], "sat": 13, "chain": [13, 63], "set_data_parallel_group": [13, 95], "set_tensor_parallel_group": [13, 95], "multipl": [13, 16, 42, 43, 48, 49, 60, 77, 99], "modif": [13, 46, 48], "float8": 13, "fsdp": 13, "fulli": [13, 49], "shard": 13, "ad": [13, 48, 49, 77, 81, 91], "releas": 13, "wheel": 13, "submodul": [13, 49, 84], "bug": 13, "compat": [13, 38, 40, 44, 48, 49, 99], "issu": [13, 99, 103, 104], "dynam": [13, 25, 27, 47, 54, 81, 87, 91, 99], "dim": 13, "opset": 13, "neg": 13, "pb": 13, "tmp": [13, 40], "folder": 13, "tensorrt_llm": [16, 17, 18, 19, 39, 40, 44], "stage": [16, 77], "top": [16, 27, 48, 49], "build_tensorrt_llm": [16, 18], "pretrained_config": [16, 18], "pretrained_config_json_path": 16, "engine_dir": [16, 17, 18], "max_input_len": [16, 17, 18], "max_output_len": [16, 18], "max_batch_s": [16, 18], "max_beam_width": [16, 17, 18], "max_num_beam": 16, "num_build_work": [16, 18], "offlin": 16, "built": [16, 18], "host_context": 16, "token": [16, 17, 18, 19, 94], "num_beam": 16, "long": 16, "input_text": 16, "wrapper": [17, 48, 77, 99], "over": [17, 26, 34, 47, 49, 53, 65, 73, 94], "level": [17, 24, 49, 91], "runner": 17, "hlapi": 17, "profil": [17, 99], "valid": [17, 34, 42, 47, 49], "kv_cache_config": 17, "tokenizerbas": 17, "int": [17, 18, 26, 31, 34, 37, 38, 39, 40, 42, 43, 44, 49, 50, 54, 58, 77, 81, 82, 85, 94, 95, 97, 99], "blob": [17, 18, 19, 21, 40, 81, 99], "main": [17, 18, 19, 21, 27, 40, 77], "doc": [17, 18, 40, 47, 81], "sourc": [17, 18, 40, 63, 79, 86, 93], "perf": 17, "md": [17, 18, 40, 81], "generate_text": 17, "prompt": [17, 18], "max_new_token": 17, "temperatur": 17, "0": [17, 18, 26, 34, 39, 40, 44, 58, 60, 61, 77, 81, 99, 100, 101], "keep_input_prompt": 17, "text": [17, 19, 94], "string": [17, 48, 58, 61, 64, 81, 85, 91, 98, 99], "length": [17, 18, 50, 82, 94], "bool": [17, 18, 24, 26, 32, 33, 34, 38, 39, 40, 42, 47, 48, 49, 53, 54, 58, 63, 85, 86, 95, 96, 99], "prommpt": 17, "2d": [17, 72], "beam": [17, 18], "properti": [17, 19, 37, 39, 47, 48, 49, 50, 53, 59, 63, 77, 81, 86, 89, 90], "get": [17, 37, 47, 49, 51, 53, 77, 81, 85, 89, 90, 94, 95, 99, 100], "200": 18, "max_num_token": 18, "enable_spars": 18, "fals": [18, 26, 33, 34, 39, 40, 47, 48, 49, 58, 59, 60, 61, 69, 77, 81, 96, 99, 104], "max_prompt_embedding_table_s": 18, "target": [18, 37, 40, 41, 42, 48, 49, 94, 99], "sequenc": [18, 21, 31, 32, 50, 51, 96, 101], "search": [18, 46, 48, 49, 53, 54, 86, 89, 90, 91, 101], "phase": 18, "count": [18, 99], "ones": [18, 85], "been": [18, 48, 65], "fall": 18, "inflight": 18, "alloc": 18, "perf_best_practic": 18, "worker": 18, "concern": 18, "increas": 18, "num": [18, 98], "At": 18, "lost": 18, "higer": 18, "cpu": [18, 42, 58], "conserv": 18, "switch": 18, "trt": [18, 31, 44], "With": 18, "tactic": 18, "spars": [18, 84, 85, 86, 87, 89, 90, 91], "significantli": [18, 91], "prepend": 18, "concaten": 18, "embed": [18, 38, 39, 42, 58], "multimod": 18, "build_tensorrt_llm_rank": 18, "customsentencepiecetoken": 19, "pretrainedtoken": [19, 94], "sentencepiecetoken": 19, "make": [19, 32, 42, 49, 82], "nemo_exampl": 19, "sh": 19, "kwarg": [19, 47, 48, 49, 64, 75, 77, 78, 80, 87, 98, 99, 101, 102], "constructor": [19, 99], "legaci": 19, "batch_decod": 19, "id": [19, 41], "introduc": 19, "batch_encode_plu": 19, "ignor": [19, 81], "decod": [19, 38, 39, 40], "mmethod": 19, "encod": 19, "return_tensor": 19, "max_length": 19, "eos_token": 19, "eos_token_id": 19, "pad_token": 19, "pad_token_id": 19, "get_nemo_token": 19, "tokenizer_cfg_path": 19, "logic": [19, 32, 38, 44, 53], "get_nmt_token": 19, "nlp": 19, "tokenizer_util": 19, "py": [19, 21, 40, 99], "get_tokenzi": 19, "tokenizer_dir_or_path": 19, "subpackag": [20, 22, 35], "op": [21, 27, 30, 33, 48], "get_quantizable_op_typ": 21, "_configure_ort": 21, "suppli": [21, 81], "is_binary_op": 21, "whether": [21, 24, 31, 32, 37, 38, 40, 48, 49, 50, 53, 63, 81, 91, 95, 99], "binari": 21, "is_control_flow_op": 21, "control": 21, "categori": 21, "is_conversion_op": 21, "is_copy_op": 21, "is_default_quantizable_op_by_ort": 21, "ort": [21, 27, 28, 29], "nodes_to_quant": [21, 30, 33], "microsoft": 21, "onnxruntim": [21, 29], "registri": [21, 47, 48, 49], "is_fusible_reduction_op": 21, "reduct": 21, "fusibl": [21, 30], "myelin": 21, "is_generator_op": 21, "is_irregular_mem_access_op": 21, "irreggular": 21, "mem": 21, "is_linear_op": 21, "is_modifier_op": 21, "modifi": [21, 24, 27, 31, 34, 47, 48, 49, 87], "is_multiclass_op": 21, "multiclass": 21, "is_non_reshape_copy_op": 21, "reshap": [21, 85], "is_normalization_op": 21, "is_pointwise_or_elementwise_op": 21, "pointwis": [21, 30], "elementwis": 21, "is_pooling_or_window_op": 21, "pool": [21, 76], "is_recurrent_op": 21, "recurr": 21, "is_selection_op": 21, "is_sequence_op": 21, "is_shape_op": 21, "is_unary_op": 21, "unari": 21, "calibrationdataprovid": 23, "calibrationdataread": [23, 26], "intial": [23, 37], "ndarrai": [23, 26, 31, 32, 33, 34, 41, 44, 81, 102], "ex": [23, 24], "64": [23, 42], "timestep": 23, "encoder_hidden_st": 23, "768": 23, "1024": [23, 60], "get_next": 23, "reader": 23, "randomdataprovid": 23, "placement": 24, "build_non_residual_input_map": 24, "residu": [24, 30], "add": [24, 30, 33, 48, 99], "assum": [24, 38, 99], "subgraph": [24, 30], "convolut": [24, 72], "sum": [24, 85], "anoth": [24, 48, 49], "constant": [24, 31, 38, 41, 44, 81], "becaus": [24, 26, 44, 93, 99], "occur": [24, 38, 44], "modern": 24, "convnet": 24, "connect": 24, "v": [24, 39, 99], "classify_partition_nod": 24, "partit": [24, 33], "outsid": 24, "algo": 24, "info": [24, 47, 99], "dst": 24, "tupl": [24, 30, 34, 37, 38, 40, 48, 49, 50, 53, 54, 58, 59, 63, 81, 82, 85, 86, 90, 91, 97, 99], "filter_quantizable_kgen_head": 24, "cask_fusible_partit": 24, "kgen_partit": 24, "quantizable_op_typ": [24, 30], "kgen": [24, 30], "head": 24, "cask": [24, 30], "get_fusible_backbon": 24, "backbon": [24, 30], "fuse": [24, 30], "bn": 24, "relu": 24, "some": [24, 30, 32, 38, 44, 51, 59, 67, 81], "tri": 24, "those": [24, 30, 81, 84], "biasadd": 24, "constmul": 24, "start": [24, 41, 100], "has_const_input": 24, "has_path_typ": 24, "path_typ": 24, "is_forward": 24, "wild_card_typ": 24, "path_nod": 24, "wrt": 24, "travers": [24, 30], "wild": 24, "card": 24, "skip": [24, 49, 58, 60], "accumul": 24, "is_const_input": 24, "const": 24, "foldabl": 24, "print_stat": 24, "verbos": [24, 33, 91], "stat": [24, 53, 91, 97], "remove_partial_input_qdq": 24, "no_quantize_input": 24, "mark": 24, "onnx_graphsurgeon": 25, "explicitli": [25, 47, 49], "patch_gs_modul": 25, "graphsurgeon": [25, 31], "woq": 26, "write": [26, 33, 37, 79], "back": [26, 46, 49], "disk": 26, "awqcliphelp": 26, "object": [26, 31, 37, 39, 48, 49, 50, 72, 74, 75, 77, 81, 99], "helper": [26, 32], "block_siz": [26, 60, 77, 81], "alpha_step": [26, 60], "05": [26, 39], "alpha": 26, "5": [26, 30], "55": 26, "65": 26, "7": 26, "75": 26, "85": 26, "95": 26, "min_alpha": 26, "update_best_param": 26, "dq_tensor": 26, "dequant": [26, 31, 81], "find_scal": 26, "quant_tensor": 26, "quantize_int4": 26, "onnx_model": [26, 34], "calibration_data_read": 26, "use_external_data_format": [26, 33], "gemm_io_typ": 26, "gemm": [26, 33], "modelproto": [26, 34], "googl": [26, 31, 81], "protobuf": [26, 31], "intern": [26, 31, 49, 101], "enum_type_wrapp": [26, 31], "enumtypewrapp": [26, 31], "0x7f7a18433710": [26, 31], "quantize_int4_awq_clip": 26, "data_read": 26, "k": [26, 39], "quantize_int4_rtn": 26, "dq_onli": [26, 31], "rtn": 26, "ab": [26, 67], "q": [26, 31, 39], "round_to_even": 26, "denot": 26, "ti": 26, "alwai": 26, "cin": 26, "plug": 26, "rh": 26, "y": [26, 99], "broken": 26, "addit": [27, 47, 48, 49, 53, 81, 91], "qdqconvtranspos": 27, "qdqoperatorbas": 27, "convtranspos": [27, 33], "onnx_quant": 27, "onnx_nod": 27, "init": 27, "qdqnormal": 27, "intend": [27, 49], "contain": [28, 46, 47, 48, 49, 53, 63, 77, 91, 99], "patch_ort_modul": 28, "shoul": 29, "ort_client": 29, "create_inference_sess": 29, "inferencesess": 29, "find_fusible_partit": 30, "partitioned_nod": 30, "non_residual_input": 30, "matmul": [30, 33], "find_hardcoded_pattern": 30, "tail": 30, "mtl_v1": 30, "reducesum": 30, "div": 30, "mul": [30, 33], "sub": [30, 46, 53], "pow": 30, "sqrt": 30, "find_layer_norm_partit": 30, "norm": 30, "find_mha_partit": 30, "mha": 30, "softmax": 30, "least": [30, 47], "find_non_quantizable_partitions_from_pattern": 30, "certain": [30, 48, 49], "counterpart": [30, 49, 65], "expect": [30, 44], "find_quantizable_nod": 30, "yet": [30, 77], "get_skiped_output_lay": 30, "paritially_quantizable_nod": 30, "dq": 31, "insert_dq_nod": 31, "quantized_weight": 31, "insert_qdq_nod": 31, "weight_map": 31, "make_gs_dequantize_nod": 31, "_basename_": 31, "make_gs_dequantize_output": 31, "variabl": [31, 34, 77, 99], "repres": [31, 49, 50, 81, 99], "make_gs_quantize_nod": 31, "make_gs_quantize_output": 31, "make_gs_quantized_weight": 31, "wq": 31, "make_gs_scal": 31, "make_gs_zp": 31, "use_trt_qdq_op": 31, "pack_float32_to_4bit_optim": 32, "float32": [32, 77, 81], "4bit": 32, "pack": [32, 41], "everi": 32, "concecut": 32, "byte": [32, 34, 100], "pack_float32_to_4bit": 32, "mainli": 32, "reli": 32, "move": [32, 58, 77, 102], "therebi": 32, "remain": [32, 49], "ceil": 32, "farrai": 32, "calib": [33, 77], "boost": 33, "But": [33, 58], "aka": 33, "drop": 33, "averagepool": 33, "batchnorm": 33, "globalaveragepool": 33, "maxpool": 33, "calibration_method": 33, "nodes_to_exclud": 33, "keep_intermediate_fil": 33, "minmax": 33, "indic": [33, 48, 50, 53, 63, 81, 91, 99, 101], "express": [33, 84], "exclud": 33, "conv__224": 33, "conv__252": 33, "keep": 33, "intermedi": 33, "filenam": 33, "suffix": [33, 98], "throughout": 33, "One": [33, 58, 81], "int4_rtn": 33, "int4_rtn_dq": 33, "int4_rtn_trt": 33, "int4_rtn_trt_dq": 33, "int4_awq_clip": 33, "int4_awq_clip_trt": 33, "model_nam": [33, 39], "duplicate_shared_linear_weight": 34, "duplic": [34, 49, 85], "thei": [34, 49, 60, 65, 84, 91, 99], "graphproto": 34, "find_lowest_common_ancestor": 34, "node1": 34, "node2": 34, "lowest": 34, "ancestor": 34, "second": 34, "lca": 34, "distanc": 34, "gen_random_input": 34, "get_all_input_nam": 34, "get_batch_s": 34, "assert": [34, 48], "fail": 34, "get_batch_size_from_byt": 34, "onnx_byt": 34, "get_child_nod": 34, "consum": 34, "get_input_nam": 34, "external_inputs_onli": 34, "extern": 34, "external_input_nam": 34, "initializer_nam": 34, "get_input_names_from_byt": 34, "model_byt": 34, "get_input_shap": 34, "get_input_shapes_from_byt": 34, "get_node_nam": 34, "get_node_names_from_byt": 34, "get_output_nam": 34, "get_output_names_from_byt": 34, "get_output_shap": 34, "get_parent_nod": 34, "get_variable_input": 34, "is_valid_onnx_model": 34, "file_path": 34, "name_onnx_nod": 34, "assign": [34, 49], "statu": 34, "randomize_weight": 34, "randomize_weights_onnx_byt": 34, "seed": [34, 101], "remove_weights_data": 34, "raw": 34, "save_onnx": 34, "save_as_external_data": 34, "save_onnx_bytes_to_dir": 34, "onnx_dir": 34, "onnx_nam": 34, "validate_batch_s": 34, "equal": [34, 50], "validate_onnx": 34, "els": [34, 38], "far": [36, 59], "nfsworkspac": 37, "workspac": [37, 40], "storag": 37, "nf": [37, 40], "modifit": 37, "involv": 37, "commun": [37, 40], "nor": 37, "barrier": [37, 95], "respons": 37, "synchron": [37, 77, 95, 101], "serial": [37, 81, 99], "workspace_path": [37, 40, 42], "postprocess": [37, 40, 64], "cross": [37, 40], "sharedmemori": 37, "clean": [37, 77], "is_initi": 37, "read_configs_and_weights_from_rank": 37, "target_rank": 37, "write_configs_and_weight": 37, "config_json": 37, "get_configs_parallel": 37, "gather": [37, 65], "shm": 37, "nullabl": 37, "sync": 37, "yield": [37, 40, 48, 49, 54, 77, 81, 91], "empti": [37, 42, 81, 84, 99], "destroi": [37, 91], "consumpt": 37, "get_group": 37, "get_rank": 37, "safe": 37, "get_tensors_parallel": 37, "get_world_s": 37, "world": 37, "model_config": [38, 40, 41, 42, 44], "empir": [38, 44], "except": [38, 44, 77, 81, 84, 98], "build_attention_config": 38, "model_metadata_config": 38, "ext_config": 38, "decoderlayerconfig": [38, 39], "attentionconfig": [38, 39], "build_decoder_config": 38, "build_embedding_config": 38, "normalization_const": 38, "embeddingconfig": [38, 39], "build_layernorm_config": 38, "layernormconfig": [38, 39], "build_linear_config": 38, "linear_typ": [38, 39], "linearconfig": [38, 39, 41], "build_mlp_config": 38, "mlp": [38, 39], "mlpconfig": [38, 39], "build_moe_config": 38, "moe": [38, 60], "moeconfig": [38, 39], "build_qkv": 38, "qkv_modul": 38, "qkv": [38, 39, 41], "qkvconfig": [38, 39, 41], "build_stacked_expert": 38, "experts_weight_1": 38, "experts_weight_2": 38, "check_model_compat": 38, "module_list": 38, "And": [38, 39, 77], "posit": [38, 81], "assembl": 38, "modulelist": 38, "final": [38, 49, 53], "get_activation_scaling_factor": 38, "get_kv_cache_dtyp": 38, "kv_cach": 38, "get_kv_cache_scaling_factor": 38, "get_prequant_scaling_factor": 38, "prequant": [38, 39], "get_scaling_factor": 38, "tensorquant": [38, 60, 61, 65, 75, 77], "get_transformer_lay": 38, "root": [38, 48], "get_weight_block_s": 38, "get_weight_scaling_factor": 38, "get_weight_scaling_factor_2": 38, "secondari": 38, "is_attent": 38, "is_decoder_list": 38, "is_embed": 38, "is_layernorm": 38, "is_linear": 38, "is_mlp": 38, "is_mo": 38, "kv_cache_scaling_factor": 39, "kv_cache_dtyp": 39, "rotary_dim": 39, "inf": 39, "clip_qkv": 39, "input_layernorm": 39, "mlp_layernorm": 39, "post_layernorm": 39, "num_attention_head": 39, "attention_head_s": 39, "num_kv_head": 39, "max_position_embed": 39, "rotary_pct": 39, "use_alibi": 39, "new_decoder_architectur": 39, "parallel_attent": 39, "apply_residual_connection_post_layernorm": 39, "use_cach": 39, "rope_ratio": 39, "seq_length": 39, "rotary_bas": 39, "partial_rotary_factor": 39, "moe_num_expert": 39, "moe_top_k": 39, "moe_tp_mod": 39, "moe_renorm_mod": 39, "alibi_bias_max": 39, "residual_layernorm": 39, "residual_mlp": 39, "ffn_hidden_size_loc": 39, "ffn": 39, "hidden": 39, "hidden_s": 39, "local_vocab_s": 39, "vocab_s": 39, "expertconfig": 39, "fc": 39, "proj": 39, "layernorm_typ": 39, "1e": 39, "column": [39, 82], "activation_scaling_factor": 39, "weights_scaling_factor": [39, 41], "weights_scaling_factor_2": 39, "prequant_scaling_factor": 39, "awq_block_s": 39, "gate": [39, 60], "hidden_act": 39, "merged_fc1_g": 39, "mixtur": 39, "router": [39, 60], "modelconfig": [39, 41, 42, 44], "inform": [39, 48, 77, 85, 91], "pipeline_parallel": 39, "float16": [39, 40], "tensor_parallel": 39, "vocab_embed": 39, "position_embed": 39, "ln_emb": 39, "factori": 39, "ln_f": 39, "lm_head": [39, 41, 42, 60, 84], "share_embedding_t": 39, "num_key_value_head": 39, "vocab_size_pad": 39, "pad": [39, 41, 42, 99], "merg": [39, 40, 41, 42, 43], "concat": 39, "fit": 39, "quanitz": 39, "weight_scaling_factor_2": 39, "export_npz": 40, "naive_fp8_quant": 40, "use_nfs_workspac": 40, "split": [40, 41, 42, 58], "manual": 40, "old": 40, "naiv": 40, "nest": [40, 41], "pretrainedconfig": 40, "modeling_util": 40, "uniqu": [40, 49, 85, 99], "tensorrt_llm_config": [40, 44], "from_quantized_weight": 41, "torch_dtyp": 41, "merge_fc1_g": 41, "merge_qkv": 41, "model_config_from_dict": 41, "d": [41, 77], "model_config_to_dict": 41, "naive_quant": 41, "debug": 41, "pack_linear_weight": 41, "pad_weight": 41, "tp_size": [41, 44], "restore_model_config": 41, "recurs": [41, 49, 54, 61, 102], "split_config_and_weight": 41, "prefix": [41, 77, 81], "to_quantized_weight": 41, "check_weight_shape_valid": 42, "training_tensor_parallel": 42, "tp": [42, 44], "recurisv": 42, "pad_embedding_lm_head": 42, "padding_factor": 42, "postprocess_model_config": 42, "training_pipeline_parallel": 42, "pp": 42, "item": [42, 47], "postprocess_tensor": 42, "force_cpu": 42, "force_contigu": 42, "force_non_view": 42, "get_weights_scaling_factor": 43, "group_siz": 43, "facotr": 43, "resmooth_and_get_scal": 43, "merged_weight": 43, "pre_quant_scal": [43, 77], "avg_pre_quant_scal": 43, "resmooth": 43, "averag": 43, "weight_scaling_factor": 43, "convert_to_tensorrt_llm_config": 44, "tp_size_overwrit": 44, "overwrit": [44, 49, 84], "builder": 44, "unshard": 44, "is_tensorrt_llm_0_8_or_9": 44, "weights_to_npz": 44, "convert_to_transformer_engin": 45, "transformers_engin": 45, "purpos": [46, 81], "infrastructur": 46, "ingest": 46, "procedur": [46, 47, 53], "manag": [46, 48, 49, 80, 82], "individu": [46, 49, 77, 91], "wihin": 46, "pydant": 47, "basemodel": 47, "modeloptconfig": [47, 60, 84], "modeloptbaseconfig": [47, 48, 60, 63, 84, 86], "our": 47, "extend": [47, 49], "capabl": 47, "easier": [47, 58], "manipul": 47, "alia": [47, 50, 72, 74, 76, 81], "get_field_name_from_kei": 47, "alias": 47, "possibl": [47, 49, 85], "itemsview": 47, "keysview": 47, "model_dump": 47, "dump": 47, "warn": [47, 99], "model_dump_json": 47, "valuesview": 47, "modeloptbaserul": 47, "what": 47, "govern": 47, "classmethod": [47, 48, 49], "customize_rul": 47, "construct": [47, 49, 81], "accord": [47, 49, 67, 91, 99], "get_rule_typ": 47, "wrapped_onli": 47, "typealia": 47, "validate_rul": 47, "cl": 47, "unwrap": [47, 48, 99], "modeloptbaseruleconfig": [47, 84], "made": 47, "register_default": 47, "extra_default": 47, "unregister_default": 47, "unregist": [47, 61], "modeloptfield": 47, "pydanticundefin": 47, "get_kwargs_for_create_model_with_rul": 47, "default_rul": 47, "create_model": 47, "auto": 47, "relev": 47, "rule_field": 47, "docstr": 47, "pertain": 47, "myruleconfig": 47, "get_create_model_kwargs_for_rule_model": 47, "sparsemagnitudeconfig": [47, 84, 91], "conveni": 47, "sinc": [47, 58, 63], "autodoc": 47, "workaround": 47, "burden": 47, "standard": [48, 49, 50, 53, 91, 99], "interfac": [48, 51, 53, 89], "histori": [48, 53], "modeloptstatemanag": 48, "correspondig": 48, "task": [48, 53, 65], "init_st": 48, "add_mod": 48, "_state": 48, "therefor": [48, 49], "recal": 48, "_modedescriptor": [48, 63, 86, 91], "check_mod": 48, "propos": 48, "static": [48, 67, 75, 77, 81], "get_config_class": 48, "has_stat": 48, "trivial": 48, "is_convert": 48, "is_root": 48, "rais": [48, 59, 69, 81, 82, 103], "detect": 48, "last_mod": 48, "last": [48, 81, 99], "modes_with_st": 48, "transfer_state_dict": 48, "model_from": 48, "model_to": [48, 99], "transfer": [48, 58], "update_last_state_before_new_mod": 48, "update_last_state_before_sav": 48, "apply_mod": 48, "form": [48, 99], "model_cl": 48, "quantizemodedescriptor": [48, 63], "_moderegistrycl": 48, "retriev": [48, 99], "error": [48, 63, 78, 80, 98], "bias": 48, "model_weight": 48, "pathlik": 48, "binaryio": 48, "locat": [48, 61], "distributeddataparallel": 48, "previous": [48, 86], "hparam": [49, 54], "dynamicmodul": [49, 50, 75, 87, 99], "famili": 49, "searchabl": 49, "unit": [49, 51, 99], "space": [49, 53, 54, 101], "candid": 49, "dynamicconv2d": 49, "callback": [49, 50], "out_channel": 49, "upon": 49, "temporari": [49, 77], "ensur": [49, 99], "expos": 49, "outermost": 49, "child": [49, 58], "dynamiclinear": 49, "inherit": 49, "__class__": 49, "henc": [49, 87], "simultan": 49, "inject": 49, "rigoruo": 49, "fashion": 49, "vanilla": 49, "still": 49, "sever": 49, "mechan": 49, "parent": 49, "mutual": 49, "exlus": 49, "append": 49, "dyanmic": 49, "anymor": 49, "affect": [49, 99], "simpli": 49, "underli": 49, "revert": 49, "kept": 49, "until": [49, 99], "resultign": 49, "extra_repr": [49, 77], "sure": 49, "__dict__": [49, 81], "heavili": 49, "temporarili": 49, "again": 49, "afterward": 49, "force_assign": 49, "forc": 49, "overwritt": 49, "buffer": [49, 77], "circumst": 49, "freez": 49, "restrict": 49, "tbe": 49, "orgin": 49, "although": [49, 50], "get_hparam": 49, "get_paramet": 49, "scalabl": 49, "overriden": 49, "out_features_ratio": 49, "system": 49, "keyword": [49, 58, 93, 99], "_dmregistrycl": 49, "fly": 49, "leav": 49, "intact": 49, "some_dynamic_modul": 49, "named_hparam": [49, 54], "accordingli": [49, 61], "symbol": [49, 50, 81], "reset_dynamic_attribut": 49, "interf": 49, "getattr": 49, "setattr": 49, "delattr": 49, "exit": 49, "dynamicspac": 49, "hyperparamet": [49, 50, 53], "hp": 49, "parameter_nam": 49, "subnet": [49, 53, 86, 101], "convert_to_dynam": 49, "dm_registri": 49, "result": [49, 101], "is_configur": [49, 50, 54], "is_dynam": [49, 54], "named_dynamic_modul": 49, "strict": [49, 77], "exact": 49, "ident": 50, "activeslic": 50, "union": 50, "slice": 50, "longtensor": 50, "importanceestim": 50, "active_slic": 50, "sort": 50, "enforce_ord": 50, "32": [50, 81], "equival": 50, "_order": 50, "todo": 50, "ever": [50, 63], "cycl": 50, "detector": 50, "among": 50, "1d": [50, 72, 85], "in_channel": 50, "conv2d": [50, 72, 81, 84], "score": [50, 53, 91], "associ": 50, "notion": 50, "is_sort": 50, "sortabl": 50, "register_import": 50, "importance_estim": 50, "prune": 51, "prepar": [51, 90, 94], "constitut": 51, "arbitrari": 51, "whenev": 53, "conjunct": [53, 104], "entrypoint": [53, 63, 86], "basesearch": [53, 86, 89], "abc": 53, "overrid": 53, "after_search": [53, 90], "before_search": [53, 90], "constraint": 53, "construct_forward_loop": 53, "silent": 53, "runnabl": 53, "default_search_config": [53, 89, 90], "abstract": [53, 57, 81], "default_state_dict": [53, 89], "dummy_input": [53, 99], "eval_scor": 53, "has_scor": 53, "load_search_checkpoint": 53, "reset_search": 53, "reset": [53, 58, 59, 77], "begin": 53, "run_search": [53, 89], "sanitize_search_config": [53, 89], "sanit": [53, 89], "save_search_checkpoint": 53, "prunabl": 53, "net": [53, 99], "score_func": 53, "satisfi": [53, 93], "upper": [53, 69], "metric": 53, "flop": 53, "convent": [53, 81], "search_space_s": 54, "determin": [56, 101], "histogramcalibr": 58, "_calibr": [58, 59], "unifi": 58, "compute_amax": [58, 59, 77], "percentil": 58, "mse": 58, "boolean": [58, 59, 69, 77, 81, 82], "num_bin": 58, "2048": [58, 60], "grow_method": 58, "skip_zero": 58, "torch_hist": 58, "histc": 58, "stride": 58, "start_bin": 58, "99": 58, "amax": [58, 59, 64, 77, 81, 82], "100": 58, "calibrate_weight": 58, "perchannel": 58, "ideal": 58, "would": [58, 91], "collector": 58, "haven": 58, "decoupl": 58, "decid": [58, 81], "NOT": [58, 81], "everyth": 58, "neuron": 58, "absolut": [59, 81, 82], "maxcalibr": 59, "track": 59, "calib_desc": 59, "maxcalibdescriptor": 59, "readonli": [59, 77], "plot": 59, "track_amax": 59, "runtimeerror": 59, "definit": [60, 65], "cnn": 60, "fp8_default_cfg": 60, "int4_awq_cfg": 60, "w4a8_awq_beta_cfg": 60, "against": [60, 61, 65], "sequentialquant": [60, 61, 75, 77], "sequenti": [60, 77, 84], "block_sparse_mo": 60, "int4_blockwise_weight_only_cfg": 60, "awq_lit": [60, 64], "awq_ful": [60, 64], "max_co_batch_s": [60, 64], "awq_clip": [60, 64], "These": 60, "custom_int4_awq_cfg": 60, "deepcopi": 60, "quantizeconfig": 60, "null": [60, 84], "replace_quant_modul": 61, "set_quantizer_attribut": 61, "quant_model": 61, "wildcard_or_filter_func": [61, 65], "finegrain": 61, "set_from_attribute_dict": [61, 77], "set_quantizer_by_cfg": [61, 65], "quantizeexportmodedescriptor": 63, "placehold": [63, 78, 80], "throw": [63, 78, 80], "properli": 63, "config_class": [63, 86], "is_export_mod": [63, 86], "inspect": [63, 86], "export_mod": [63, 86], "next_mod": [63, 86], "immedi": 63, "update_for_new_mod": [63, 86], "update_for_sav": [63, 86], "pair": 64, "4096": 64, "postprocess_amax": 64, "post_process_fn": 64, "disable_quant": 65, "enable_quant": 65, "print_quant_summari": 65, "anyth": 65, "entir": 65, "subsampl": 65, "clipfunct": 67, "univers": [67, 81], "clamp": [67, 69], "scalar": 67, "doesn": [67, 82], "broadcast": [67, 81], "genar": 67, "gradient": [67, 81, 82, 91, 99], "ibm": 67, "pact": 67, "paper": [67, 91], "arxiv": 67, "1805": 67, "06085": 67, "tensorflow": [67, 81, 99], "clip_by_valu": 67, "ctx": [67, 81], "grad_output": [67, 81], "clip_value_min": [67, 69], "clip_value_max": [67, 69], "learn_min": 69, "learn_max": 69, "similar": [69, 77], "valueerror": [69, 81, 82], "conv1d": 72, "quantconv1d": 72, "quantconv2d": 72, "conv3d": 72, "quantconv3d": 72, "convtranspose1d": 72, "quantconvtranspose1d": 72, "convtranspose2d": 72, "quantconvtranspose2d": 72, "convtranspose3d": 72, "quantconvtranspose3d": 72, "_legacyquantlinearconvbasemixin": [72, 74], "default_quant_desc_weight": [72, 74, 75], "scaledquantdescriptor": [72, 74, 75, 77, 81], "3d": [72, 73], "transpos": 72, "quantinstancenorm1d": 73, "_legacyquantinputbasemixin": [73, 76], "instancenorm1d": 73, "quantinstancenorm2d": 73, "instancenorm2d": 73, "4d": 73, "quantinstancenorm3d": 73, "instancenorm3d": 73, "5d": 73, "quantlinear": 74, "quantinputbas": 75, "default_quant_desc_input": 75, "default_quant_desc_output": 75, "quantlinearconvbas": 75, "initialize_quantizer_with_dummy_st": 75, "dummi": 75, "devic": [75, 94, 99, 100], "quantize_weight": 75, "adaptiveavgpool1d": 76, "quantadaptiveavgpool1d": 76, "adaptiveavgpool2d": 76, "quantadaptiveavgpool2d": 76, "adaptiveavgpool3d": 76, "quantadaptiveavgpool3d": 76, "avgpool1d": 76, "quantavgpool1d": 76, "avgpool2d": 76, "quantavgpool2d": 76, "avgpool3d": 76, "quantavgpool3d": 76, "maxpool1d": 76, "quantmaxpool1d": 76, "maxpool2d": 76, "quantmaxpool2d": 76, "maxpool3d": 76, "quantmaxpool3d": 76, "container": 77, "get_modelopt_st": 77, "meta": [77, 86], "replace_sequential_quantizer_with_single_quant": 77, "indx": 77, "attribute_dict": 77, "tensor_quantizer_iter": 77, "itself": 77, "fake_tensor_qu": 77, "if_quant": 77, "bodi": 77, "if_clip": 77, "if_calib": 77, "Not": 77, "probabl": 77, "fake_qu": [77, 81], "step_siz": 77, "mutabl": 77, "clean_up_after_set_from_modelopt_st": 77, "set_from_modelopt_st": 77, "bypass": 77, "neither": 77, "disable_calib": 77, "disable_clip": 77, "disable_qu": 77, "enable_calib": 77, "enable_clip": 77, "enable_qu": 77, "export_amax": 77, "output_dtyp": [77, 81], "init_learn_amax": 77, "is_en": 77, "load_calib_amax": 77, "necessari": [77, 87], "maxbound": 77, "narrow_rang": [77, 81], "symmetr": [77, 81], "reset_amax": 77, "sync_amax_across_distributed_group": 77, "parallel_group": 77, "distributedprocessgroup": [77, 95], "freeze_paramet": 78, "group_paramet": 78, "match_paramet": 78, "quant_weight_inplac": 78, "apex": 79, "deactiv": [80, 84, 99], "enable_onnx_export": 80, "fakeaffinetensorquantfunct": 81, "affin": 81, "gemmlowp": 81, "style": 81, "shift": 81, "master": [81, 95, 98, 99], "reason": 81, "cancel": 81, "come": 81, "penalti": 81, "grad_input": 81, "min_rang": 81, "max_rang": 81, "As": 81, "granular": [81, 82], "faketensorquantfunct": 81, "tensorquantfunct": 81, "legacyfaketensorquantfunct": 81, "comment": 81, "scalede4m3funct": 81, "e4m3fi": 81, "emul": 81, "fpx": 81, "seem": 81, "nice": 81, "thing": 81, "ax": 81, "input_tensor": [81, 82], "kcr": 81, "quant_axi": 81, "scale_bit": 81, "scheme": 81, "learn_amax": 81, "learnabl": 81, "scale_amax": 81, "experi": 81, "calib_method": 81, "histogram": 81, "protect": 81, "_": 81, "exactli": [81, 99], "get_block_quant_axes_and_s": 81, "interpret": [81, 99], "127": 81, "grad_scal": 81, "though": 81, "natur": 81, "int32": 81, "255": 81, "scaled_e4m3_abstract": 81, "scaled_e4m3": 81, "export_torch_mod": 82, "is_quant": 82, "is_quantized_column_parallel_linear": 82, "is_quantized_layer_with_weight": 82, "is_quantized_row_parallel_linear": 82, "row": 82, "is_torch_library_support": 82, "exce": 82, "reduce_amax": 82, "keepdim": 82, "unless": 82, "entri": 82, "never": 82, "meant": 82, "deprect": 82, "sens": 82, "unknown": 82, "replace_funct": 82, "new_func": 82, "exportsparseconfig": [84, 86], "export_spars": [84, 86], "sparsegptconfig": [84, 91], "sparse_gpt": 84, "sparseconv2dconfig": 84, "shown": 84, "glob": 84, "unnest": 84, "short": 84, "sparselinearconfig": 84, "inspir": 85, "magnitudesearch": 85, "basesparsesearch": [85, 89, 90], "searcher": [85, 90], "compute_valid_1d_pattern": 85, "vector": 85, "permut": 85, "create_asp_mask": 85, "m4n2_1d": 85, "booltensor": [85, 87], "fill": 85, "ratio": 85, "get_nmprune_info": 85, "mat": 85, "mn_1d_best": 85, "reshape_1d": 85, "dimension": 85, "hw": 85, "exportsparsemodedescriptor": 86, "sparsegptmodedescriptor": 86, "sparsemagnitudemodedescriptor": 86, "search_algorithm": 86, "convert_sparse_model": 86, "restore_export_spars": 86, "restore_sparse_model": 86, "update_sparse_metadata": 86, "sparsemodul": 87, "set_mask": 87, "sparsegptsearch": 90, "hessian": [90, 91], "artifcat": 90, "hook": 90, "create_sgpt_mask": 90, "invert": 90, "hessian_damp": 90, "invers": 90, "finish": 91, "approxim": [91, 101], "carefulli": 91, "runtim": 91, "cannot": [91, 99], "fewer": 91, "run_forward_loop": [91, 99], "thu": 91, "cpp": 93, "load_cpp_extens": 93, "cuda_version_specifi": 93, "fail_msg": 93, "load_kwarg": 93, "instantan": 93, "create_forward_loop": 94, "dataset_nam": 94, "cnn_dailymail": 94, "max_sample_length": 94, "tailor": 94, "feed": [94, 99], "predict": 94, "preprocess": 94, "suitabl": 94, "pretrainedtokenizerfast": 94, "get_dataset_dataload": 94, "tokniz": 94, "instancn": 94, "hugginfac": 94, "backend": 95, "get_data_parallel_group": 95, "get_tensor_parallel_group": 95, "is_mast": 95, "processgroup": 95, "list_closest_to_median": 97, "closest": [97, 101], "val": 97, "avg": 97, "std": 97, "val2list": 97, "repeat_tim": 97, "val2tupl": 97, "min_len": 97, "idx_repeat": 97, "deprecatederror": 98, "notimplementederror": 98, "no_stdout": 98, "silenc": 98, "stdout": 98, "num2hrb": 98, "big": 98, "human": 98, "readabl": 98, "print_rank_0": 98, "compare_dict": 99, "dict1": 99, "dict2": 99, "unmatch": 99, "get_model_attribut": 99, "get_module_devic": 99, "get_same_pad": 99, "kernel_s": 99, "init_model_from_model_lik": 99, "model_cls_or_cal": 99, "is_channels_last": 99, "is_parallel": 99, "make_divis": 99, "divisor": 99, "min_val": 99, "taken": 99, "tf": 99, "repo": 99, "divis": 99, "seen": 99, "research": 99, "slim": 99, "mobilenet": 99, "target_model": 99, "layout": 99, "param_num": 99, "trainable_onli": 99, "1000000": 99, "trainabl": 99, "1e6": 99, "million": 99, "param_num_from_forward": 99, "circumv": 99, "appear": 99, "remove_bn": 99, "max_it": 99, "progress_bar": 99, "infiinit": 99, "exhaust": 99, "z": 99, "label": 99, "descript": 99, "progress": 99, "bar": 99, "set_submodul": 99, "target_submodul": 99, "complement": 99, "get_submodul": 99, "standardize_constructor_arg": 99, "constructor_arg": 99, "standardize_model_arg": 99, "model_or_fw_or_sig": 99, "use_kwarg": 99, "signatur": 99, "mtn": [99, 101], "matter": 99, "were": 99, "kw_only_arg": 99, "standardize_model_like_tupl": 99, "standardize_named_model_arg": 99, "args_norm": 99, "args_with_default": 99, "unwrap_model": 99, "raise_error": 99, "msg": 99, "zero_grad": 99, "timer": 100, "contextdecor": 100, "decor": 100, "stop": 100, "clear_cuda_cach": 100, "clear": 100, "get_cuda_memory_stat": 100, "report_memori": 100, "determinist": 101, "centroid": 101, "seq": 101, "prod": 101, "aim": 101, "cheapli": 101, "median": 101, "na": 101, "recogn": 101, "popul": 101, "shuffl": 101, "mutablesequ": 101, "numpy_to_torch": 102, "np_output": 102, "torch_detach": 102, "detach": 102, "torch_to": 102, "torch_to_numpi": 102, "question": 103}, "objects": {"modelopt": [[15, 0, 0, "-", "deploy"], [20, 0, 0, "-", "onnx"], [35, 0, 0, "-", "torch"]], "modelopt.deploy": [[16, 0, 0, "-", "llm"]], "modelopt.deploy.llm": [[17, 0, 0, "-", "generate"], [18, 0, 0, "-", "model_config_trt"], [19, 0, 0, "-", "nemo_utils"]], "modelopt.deploy.llm.generate": [[17, 1, 1, "", "LLM"]], "modelopt.deploy.llm.generate.LLM": [[17, 2, 1, "", "__init__"], [17, 2, 1, "", "generate_text"], [17, 3, 1, "", "max_beam_width"], [17, 3, 1, "", "max_input_len"]], "modelopt.deploy.llm.model_config_trt": [[18, 4, 1, "", "build_tensorrt_llm"], [18, 4, 1, "", "build_tensorrt_llm_rank"]], "modelopt.deploy.llm.nemo_utils": [[19, 1, 1, "", "CustomSentencePieceTokenizer"], [19, 4, 1, "", "get_nemo_tokenizer"], [19, 4, 1, "", "get_tokenzier"]], "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer": [[19, 2, 1, "", "__init__"], [19, 2, 1, "", "batch_decode"], [19, 2, 1, "", "batch_encode_plus"], [19, 2, 1, "", "decode"], [19, 2, 1, "", "encode"], [19, 3, 1, "", "eos_token"], [19, 3, 1, "", "eos_token_id"], [19, 3, 1, "", "pad_token"], [19, 3, 1, "", "pad_token_id"]], "modelopt.onnx": [[21, 0, 0, "-", "op_types"], [22, 0, 0, "-", "quantization"], [34, 0, 0, "-", "utils"]], "modelopt.onnx.op_types": [[21, 4, 1, "", "get_quantizable_op_types"], [21, 4, 1, "", "is_binary_op"], [21, 4, 1, "", "is_control_flow_op"], [21, 4, 1, "", "is_conversion_op"], [21, 4, 1, "", "is_copy_op"], [21, 4, 1, "", "is_default_quantizable_op_by_ort"], [21, 4, 1, "", "is_fusible_reduction_op"], [21, 4, 1, "", "is_generator_op"], [21, 4, 1, "", "is_irregular_mem_access_op"], [21, 4, 1, "", "is_linear_op"], [21, 4, 1, "", "is_modifier_op"], [21, 4, 1, "", "is_multiclass_op"], [21, 4, 1, "", "is_non_reshape_copy_op"], [21, 4, 1, "", "is_normalization_op"], [21, 4, 1, "", "is_pointwise_or_elementwise_op"], [21, 4, 1, "", "is_pooling_or_window_op"], [21, 4, 1, "", "is_recurrent_op"], [21, 4, 1, "", "is_selection_op"], [21, 4, 1, "", "is_sequence_op"], [21, 4, 1, "", "is_shape_op"], [21, 4, 1, "", "is_unary_op"]], "modelopt.onnx.quantization": [[23, 0, 0, "-", "calib_utils"], [24, 0, 0, "-", "graph_utils"], [25, 0, 0, "-", "gs_patching"], [26, 0, 0, "-", "int4"], [27, 0, 0, "-", "operators"], [28, 0, 0, "-", "ort_patching"], [29, 0, 0, "-", "ort_utils"], [30, 0, 0, "-", "partitioning"], [31, 0, 0, "-", "qdq_utils"], [32, 0, 0, "-", "quant_utils"], [33, 0, 0, "-", "quantize"]], "modelopt.onnx.quantization.calib_utils": [[23, 1, 1, "", "CalibrationDataProvider"], [23, 1, 1, "", "RandomDataProvider"]], "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider": [[23, 2, 1, "", "__init__"], [23, 2, 1, "", "get_next"]], "modelopt.onnx.quantization.calib_utils.RandomDataProvider": [[23, 2, 1, "", "__init__"], [23, 2, 1, "", "get_next"]], "modelopt.onnx.quantization.graph_utils": [[24, 4, 1, "", "build_non_residual_input_map"], [24, 4, 1, "", "classify_partition_nodes"], [24, 4, 1, "", "filter_quantizable_kgen_heads"], [24, 4, 1, "", "get_fusible_backbone"], [24, 4, 1, "", "has_const_input"], [24, 4, 1, "", "has_path_type"], [24, 4, 1, "", "is_const_input"], [24, 4, 1, "", "print_stat"], [24, 4, 1, "", "remove_partial_input_qdq"]], "modelopt.onnx.quantization.gs_patching": [[25, 4, 1, "", "patch_gs_modules"]], "modelopt.onnx.quantization.int4": [[26, 1, 1, "", "AWQClipHelper"], [26, 4, 1, "", "dq_tensor"], [26, 4, 1, "", "find_scales"], [26, 4, 1, "", "quant_tensor"], [26, 4, 1, "", "quantize_int4"], [26, 4, 1, "", "quantize_int4_awq_clip"], [26, 4, 1, "", "quantize_int4_rtn"], [26, 4, 1, "", "rtn"]], "modelopt.onnx.quantization.int4.AWQClipHelper": [[26, 2, 1, "", "__init__"], [26, 5, 1, "", "alpha_step"], [26, 5, 1, "", "alphas"], [26, 5, 1, "", "min_alpha"], [26, 2, 1, "", "update_best_params"]], "modelopt.onnx.quantization.operators": [[27, 1, 1, "", "QDQConvTranspose"], [27, 1, 1, "", "QDQNormalization"]], "modelopt.onnx.quantization.operators.QDQConvTranspose": [[27, 2, 1, "", "__init__"], [27, 2, 1, "", "quantize"]], "modelopt.onnx.quantization.operators.QDQNormalization": [[27, 2, 1, "", "__init__"], [27, 2, 1, "", "quantize"]], "modelopt.onnx.quantization.ort_patching": [[28, 4, 1, "", "patch_ort_modules"]], "modelopt.onnx.quantization.ort_utils": [[29, 4, 1, "", "create_inference_session"]], "modelopt.onnx.quantization.partitioning": [[30, 4, 1, "", "find_fusible_partitions"], [30, 4, 1, "", "find_hardcoded_patterns"], [30, 4, 1, "", "find_layer_norm_partitions"], [30, 4, 1, "", "find_mha_partitions"], [30, 4, 1, "", "find_non_quantizable_partitions_from_patterns"], [30, 4, 1, "", "find_quantizable_nodes"], [30, 4, 1, "", "get_skiped_output_layers"]], "modelopt.onnx.quantization.qdq_utils": [[31, 4, 1, "", "insert_dq_nodes"], [31, 4, 1, "", "insert_qdq_nodes"], [31, 4, 1, "", "make_gs_dequantize_node"], [31, 4, 1, "", "make_gs_dequantize_output"], [31, 4, 1, "", "make_gs_quantize_node"], [31, 4, 1, "", "make_gs_quantize_output"], [31, 4, 1, "", "make_gs_quantized_weight"], [31, 4, 1, "", "make_gs_scale"], [31, 4, 1, "", "make_gs_zp"], [31, 4, 1, "", "use_trt_qdq_ops"]], "modelopt.onnx.quantization.quant_utils": [[32, 4, 1, "", "pack_float32_to_4bit_optimized"]], "modelopt.onnx.quantization.quantize": [[33, 4, 1, "", "quantize"]], "modelopt.onnx.utils": [[34, 4, 1, "", "duplicate_shared_linear_weights"], [34, 4, 1, "", "find_lowest_common_ancestor"], [34, 4, 1, "", "gen_random_inputs"], [34, 4, 1, "", "get_all_input_names"], [34, 4, 1, "", "get_batch_size"], [34, 4, 1, "", "get_batch_size_from_bytes"], [34, 4, 1, "", "get_child_nodes"], [34, 4, 1, "", "get_input_names"], [34, 4, 1, "", "get_input_names_from_bytes"], [34, 4, 1, "", "get_input_shapes"], [34, 4, 1, "", "get_input_shapes_from_bytes"], [34, 4, 1, "", "get_node_names"], [34, 4, 1, "", "get_node_names_from_bytes"], [34, 4, 1, "", "get_output_names"], [34, 4, 1, "", "get_output_names_from_bytes"], [34, 4, 1, "", "get_output_shapes"], [34, 4, 1, "", "get_parent_nodes"], [34, 4, 1, "", "get_variable_inputs"], [34, 4, 1, "", "is_valid_onnx_model"], [34, 4, 1, "", "name_onnx_nodes"], [34, 4, 1, "", "randomize_weights"], [34, 4, 1, "", "randomize_weights_onnx_bytes"], [34, 4, 1, "", "remove_weights_data"], [34, 4, 1, "", "save_onnx"], [34, 4, 1, "", "save_onnx_bytes_to_dir"], [34, 4, 1, "", "validate_batch_size"], [34, 4, 1, "", "validate_onnx"]], "modelopt.torch": [[36, 0, 0, "-", "export"], [46, 0, 0, "-", "opt"], [55, 0, 0, "-", "quantization"], [83, 0, 0, "-", "sparsity"], [92, 0, 0, "-", "utils"]], "modelopt.torch.export": [[37, 0, 0, "-", "distribute"], [38, 0, 0, "-", "layer_utils"], [39, 0, 0, "-", "model_config"], [40, 0, 0, "-", "model_config_export"], [41, 0, 0, "-", "model_config_utils"], [42, 0, 0, "-", "postprocess"], [43, 0, 0, "-", "scaling_factor_utils"], [44, 0, 0, "-", "tensorrt_llm_utils"], [45, 0, 0, "-", "transformer_engine"]], "modelopt.torch.export.distribute": [[37, 1, 1, "", "NFSWorkspace"], [37, 4, 1, "", "barrier"], [37, 4, 1, "", "get_configs_parallel"], [37, 4, 1, "", "get_group"], [37, 4, 1, "", "get_rank"], [37, 4, 1, "", "get_tensors_parallel"], [37, 4, 1, "", "get_world_size"]], "modelopt.torch.export.distribute.NFSWorkspace": [[37, 2, 1, "", "__init__"], [37, 3, 1, "", "is_initialized"], [37, 2, 1, "", "read_configs_and_weights_from_rank"], [37, 2, 1, "", "write_configs_and_weights"]], "modelopt.torch.export.layer_utils": [[38, 4, 1, "", "build_attention_config"], [38, 4, 1, "", "build_decoder_config"], [38, 4, 1, "", "build_embedding_config"], [38, 4, 1, "", "build_layernorm_config"], [38, 4, 1, "", "build_linear_config"], [38, 4, 1, "", "build_mlp_config"], [38, 4, 1, "", "build_moe_config"], [38, 4, 1, "", "build_qkv"], [38, 4, 1, "", "build_stacked_experts"], [38, 4, 1, "", "check_model_compatibility"], [38, 4, 1, "", "get_activation_scaling_factor"], [38, 4, 1, "", "get_kv_cache_dtype"], [38, 4, 1, "", "get_kv_cache_scaling_factor"], [38, 4, 1, "", "get_prequant_scaling_factor"], [38, 4, 1, "", "get_scaling_factor"], [38, 4, 1, "", "get_transformer_layers"], [38, 4, 1, "", "get_weight_block_size"], [38, 4, 1, "", "get_weight_scaling_factor"], [38, 4, 1, "", "get_weight_scaling_factor_2"], [38, 4, 1, "", "is_attention"], [38, 4, 1, "", "is_decoder_list"], [38, 4, 1, "", "is_embedding"], [38, 4, 1, "", "is_layernorm"], [38, 4, 1, "", "is_linear"], [38, 4, 1, "", "is_mlp"], [38, 4, 1, "", "is_moe"]], "modelopt.torch.export.model_config": [[39, 1, 1, "", "AttentionConfig"], [39, 1, 1, "", "DecoderLayerConfig"], [39, 1, 1, "", "EmbeddingConfig"], [39, 1, 1, "", "ExpertConfig"], [39, 1, 1, "", "LayernormConfig"], [39, 1, 1, "", "LinearConfig"], [39, 1, 1, "", "MLPConfig"], [39, 1, 1, "", "MOEConfig"], [39, 1, 1, "", "ModelConfig"], [39, 1, 1, "", "QKVConfig"]], "modelopt.torch.export.model_config.AttentionConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "clip_qkv"], [39, 5, 1, "", "dense"], [39, 5, 1, "", "kv_cache_dtype"], [39, 5, 1, "", "kv_cache_scaling_factor"], [39, 5, 1, "", "qkv"], [39, 5, 1, "", "rotary_dim"]], "modelopt.torch.export.model_config.DecoderLayerConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "alibi_bias_max"], [39, 5, 1, "", "apply_residual_connection_post_layernorm"], [39, 5, 1, "", "attention"], [39, 5, 1, "", "attention_head_size"], [39, 5, 1, "", "decoder_type"], [39, 3, 1, "", "ffn_hidden_size_local"], [39, 3, 1, "", "hidden_size"], [39, 5, 1, "", "input_layernorm"], [39, 5, 1, "", "max_position_embeddings"], [39, 5, 1, "", "mlp"], [39, 5, 1, "", "mlp_layernorm"], [39, 5, 1, "", "model_name"], [39, 5, 1, "", "moe_num_experts"], [39, 5, 1, "", "moe_renorm_mode"], [39, 5, 1, "", "moe_top_k"], [39, 5, 1, "", "moe_tp_mode"], [39, 5, 1, "", "new_decoder_architecture"], [39, 5, 1, "", "num_attention_heads"], [39, 5, 1, "", "num_kv_heads"], [39, 5, 1, "", "parallel_attention"], [39, 5, 1, "", "partial_rotary_factor"], [39, 5, 1, "", "post_layernorm"], [39, 5, 1, "", "quantization"], [39, 5, 1, "", "residual_layernorm"], [39, 5, 1, "", "residual_mlp"], [39, 5, 1, "", "rope_ratio"], [39, 5, 1, "", "rotary_base"], [39, 5, 1, "", "rotary_pct"], [39, 5, 1, "", "seq_length"], [39, 5, 1, "", "use_alibi"], [39, 5, 1, "", "use_cache"]], "modelopt.torch.export.model_config.EmbeddingConfig": [[39, 2, 1, "", "__init__"], [39, 3, 1, "", "hidden_size"], [39, 3, 1, "", "local_vocab_size"], [39, 5, 1, "", "weight"]], "modelopt.torch.export.model_config.ExpertConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "fc"], [39, 5, 1, "", "proj"]], "modelopt.torch.export.model_config.LayernormConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "bias"], [39, 5, 1, "", "eps"], [39, 5, 1, "", "layernorm_type"], [39, 5, 1, "", "weight"]], "modelopt.torch.export.model_config.LinearConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "activation_scaling_factor"], [39, 5, 1, "", "awq_block_size"], [39, 5, 1, "", "bias"], [39, 5, 1, "", "linear_type"], [39, 5, 1, "", "prequant_scaling_factor"], [39, 5, 1, "", "weight"], [39, 5, 1, "", "weights_scaling_factor"], [39, 5, 1, "", "weights_scaling_factor_2"]], "modelopt.torch.export.model_config.MLPConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "fc"], [39, 5, 1, "", "gate"], [39, 5, 1, "", "hidden_act"], [39, 5, 1, "", "merged_fc1_gate"], [39, 5, 1, "", "proj"]], "modelopt.torch.export.model_config.MOEConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "experts"], [39, 3, 1, "", "fc"], [39, 5, 1, "", "hidden_act"], [39, 5, 1, "", "router"]], "modelopt.torch.export.model_config.ModelConfig": [[39, 2, 1, "", "__init__"], [39, 5, 1, "", "dtype"], [39, 3, 1, "", "hidden_act"], [39, 3, 1, "", "hidden_size"], [39, 5, 1, "", "layers"], [39, 5, 1, "", "lm_head"], [39, 5, 1, "", "ln_embed"], [39, 5, 1, "", "ln_f"], [39, 3, 1, "", "max_position_embeddings"], [39, 3, 1, "", "num_attention_heads"], [39, 3, 1, "", "num_kv_heads"], [39, 5, 1, "", "pipeline_parallel"], [39, 5, 1, "", "position_embedding"], [39, 5, 1, "", "quantization"], [39, 5, 1, "", "rank"], [39, 5, 1, "", "share_embedding_table"], [39, 5, 1, "", "tensor_parallel"], [39, 5, 1, "", "version"], [39, 5, 1, "", "vocab_embedding"], [39, 5, 1, "", "vocab_size"], [39, 3, 1, "", "vocab_size_padded"]], "modelopt.torch.export.model_config.QKVConfig": [[39, 2, 1, "", "__init__"], [39, 3, 1, "", "activation_scaling_factor"], [39, 3, 1, "", "awq_block_size"], [39, 3, 1, "", "bias"], [39, 5, 1, "", "k"], [39, 3, 1, "", "prequant_scaling_factor"], [39, 5, 1, "", "q"], [39, 5, 1, "", "v"], [39, 3, 1, "", "weight"], [39, 3, 1, "", "weights_scaling_factor"], [39, 3, 1, "", "weights_scaling_factor_2"]], "modelopt.torch.export.model_config_export": [[40, 4, 1, "", "export_tensorrt_llm_checkpoint"], [40, 4, 1, "", "torch_to_tensorrt_llm_checkpoint"]], "modelopt.torch.export.model_config_utils": [[41, 4, 1, "", "from_quantized_weight"], [41, 4, 1, "", "merge_fc1_gate"], [41, 4, 1, "", "merge_qkv"], [41, 4, 1, "", "model_config_from_dict"], [41, 4, 1, "", "model_config_to_dict"], [41, 4, 1, "", "naive_quantization"], [41, 4, 1, "", "pack_linear_weights"], [41, 4, 1, "", "pad_weights"], [41, 4, 1, "", "restore_model_config"], [41, 4, 1, "", "split_config_and_weights"], [41, 4, 1, "", "to_quantized_weight"]], "modelopt.torch.export.postprocess": [[42, 4, 1, "", "check_weight_shape_valid"], [42, 4, 1, "", "pad_embedding_lm_head"], [42, 4, 1, "", "postprocess_model_config"], [42, 4, 1, "", "postprocess_tensors"]], "modelopt.torch.export.scaling_factor_utils": [[43, 4, 1, "", "get_weights_scaling_factor"], [43, 4, 1, "", "resmooth_and_get_scale"]], "modelopt.torch.export.tensorrt_llm_utils": [[44, 4, 1, "", "convert_to_tensorrt_llm_config"], [44, 4, 1, "", "is_tensorrt_llm_0_8_or_9"], [44, 4, 1, "", "weights_to_npz"]], "modelopt.torch.export.transformer_engine": [[45, 4, 1, "", "convert_to_transformer_engine"]], "modelopt.torch.opt": [[47, 0, 0, "-", "config"], [48, 0, 0, "-", "conversion"], [49, 0, 0, "-", "dynamic"], [50, 0, 0, "-", "hparam"], [51, 0, 0, "-", "mode"], [52, 0, 0, "-", "plugins"], [53, 0, 0, "-", "searcher"], [54, 0, 0, "-", "utils"]], "modelopt.torch.opt.config": [[47, 6, 1, "", "ModeloptBaseConfig"], [47, 6, 1, "", "ModeloptBaseRule"], [47, 6, 1, "", "ModeloptBaseRuleConfig"], [47, 4, 1, "", "ModeloptField"], [47, 4, 1, "", "get_kwargs_for_create_model_with_rules"]], "modelopt.torch.opt.config.ModeloptBaseConfig": [[47, 2, 1, "", "get"], [47, 2, 1, "", "get_field_name_from_key"], [47, 2, 1, "", "items"], [47, 2, 1, "", "keys"], [47, 2, 1, "", "model_dump"], [47, 2, 1, "", "model_dump_json"], [47, 2, 1, "", "update"], [47, 2, 1, "", "values"]], "modelopt.torch.opt.config.ModeloptBaseRule": [[47, 2, 1, "", "customize_rule"], [47, 2, 1, "", "get_rule_type"], [47, 2, 1, "", "validate_rule"]], "modelopt.torch.opt.config.ModeloptBaseRuleConfig": [[47, 2, 1, "", "register_default"], [47, 2, 1, "", "unregister_default"]], "modelopt.torch.opt.conversion": [[48, 1, 1, "", "ModeloptStateManager"], [48, 4, 1, "", "apply_mode"], [48, 4, 1, "", "modelopt_state"], [48, 4, 1, "", "restore"], [48, 4, 1, "", "restore_from_modelopt_state"], [48, 4, 1, "", "save"]], "modelopt.torch.opt.conversion.ModeloptStateManager": [[48, 2, 1, "", "__init__"], [48, 2, 1, "", "add_mode"], [48, 2, 1, "", "check_mode"], [48, 2, 1, "", "get_config_class"], [48, 3, 1, "", "has_state"], [48, 2, 1, "", "is_converted"], [48, 3, 1, "", "last_mode"], [48, 2, 1, "", "load_state_dict"], [48, 2, 1, "", "modes_with_states"], [48, 2, 1, "", "state_dict"], [48, 2, 1, "", "transfer_state_dict"], [48, 2, 1, "", "update_last_state_before_new_mode"], [48, 2, 1, "", "update_last_state_before_save"]], "modelopt.torch.opt.dynamic": [[49, 1, 1, "", "DynamicModule"], [49, 1, 1, "", "DynamicSpace"]], "modelopt.torch.opt.dynamic.DynamicModule": [[49, 2, 1, "", "__init__"], [49, 2, 1, "", "convert"], [49, 2, 1, "", "export"], [49, 2, 1, "", "extra_repr"], [49, 2, 1, "", "force_assign"], [49, 2, 1, "", "freeze"], [49, 2, 1, "", "get_hparam"], [49, 2, 1, "", "modify"], [49, 2, 1, "", "named_hparams"], [49, 3, 1, "", "original_cls"], [49, 2, 1, "", "reset_dynamic_attributes"]], "modelopt.torch.opt.dynamic.DynamicSpace": [[49, 2, 1, "", "__init__"], [49, 2, 1, "", "config"], [49, 2, 1, "", "convert_to_dynamic"], [49, 2, 1, "", "export"], [49, 2, 1, "", "get_hparam"], [49, 2, 1, "", "is_configurable"], [49, 2, 1, "", "is_dynamic"], [49, 2, 1, "", "named_dynamic_modules"], [49, 2, 1, "", "named_hparams"], [49, 2, 1, "", "select"], [49, 2, 1, "", "size"]], "modelopt.torch.opt.hparam": [[50, 1, 1, "", "Hparam"]], "modelopt.torch.opt.hparam.Hparam": [[50, 5, 1, "", "ActiveSlice"], [50, 5, 1, "", "Importance"], [50, 5, 1, "", "ImportanceEstimator"], [50, 2, 1, "", "__init__"], [50, 3, 1, "", "active"], [50, 3, 1, "", "active_slice"], [50, 3, 1, "", "choices"], [50, 2, 1, "", "enforce_order"], [50, 3, 1, "", "importance"], [50, 3, 1, "", "is_configurable"], [50, 3, 1, "", "is_sortable"], [50, 3, 1, "", "max"], [50, 3, 1, "", "min"], [50, 3, 1, "", "original"], [50, 2, 1, "", "register_importance"]], "modelopt.torch.opt.searcher": [[53, 1, 1, "", "BaseSearcher"]], "modelopt.torch.opt.searcher.BaseSearcher": [[53, 2, 1, "", "__init__"], [53, 2, 1, "", "after_search"], [53, 2, 1, "", "before_search"], [53, 5, 1, "", "config"], [53, 5, 1, "", "constraints"], [53, 2, 1, "", "construct_forward_loop"], [53, 3, 1, "", "default_search_config"], [53, 3, 1, "", "default_state_dict"], [53, 5, 1, "", "dummy_input"], [53, 2, 1, "", "eval_score"], [53, 5, 1, "", "forward_loop"], [53, 3, 1, "", "has_score"], [53, 2, 1, "", "load_search_checkpoint"], [53, 5, 1, "", "model"], [53, 2, 1, "", "reset_search"], [53, 2, 1, "", "run_search"], [53, 2, 1, "", "sanitize_search_config"], [53, 2, 1, "", "save_search_checkpoint"], [53, 2, 1, "", "search"], [53, 2, 1, "", "state_dict"]], "modelopt.torch.opt.utils": [[54, 4, 1, "", "is_configurable"], [54, 4, 1, "", "is_dynamic"], [54, 4, 1, "", "named_hparams"], [54, 4, 1, "", "search_space_size"]], "modelopt.torch.quantization": [[56, 0, 0, "-", "calib"], [60, 0, 0, "-", "config"], [61, 0, 0, "-", "conversion"], [62, 0, 0, "-", "extensions"], [63, 0, 0, "-", "mode"], [64, 0, 0, "-", "model_calib"], [65, 0, 0, "-", "model_quant"], [66, 0, 0, "-", "nn"], [78, 0, 0, "-", "optim"], [79, 0, 0, "-", "plugins"], [80, 0, 0, "-", "quant_modules"], [81, 0, 0, "-", "tensor_quant"], [82, 0, 0, "-", "utils"]], "modelopt.torch.quantization.calib": [[57, 0, 0, "-", "calibrator"], [58, 0, 0, "-", "histogram"], [59, 0, 0, "-", "max"]], "modelopt.torch.quantization.calib.histogram": [[58, 1, 1, "", "HistogramCalibrator"], [58, 4, 1, "", "calibrate_weights"]], "modelopt.torch.quantization.calib.histogram.HistogramCalibrator": [[58, 2, 1, "", "__init__"], [58, 2, 1, "", "collect"], [58, 2, 1, "", "compute_amax"], [58, 2, 1, "", "reset"]], "modelopt.torch.quantization.calib.max": [[59, 1, 1, "", "MaxCalibrator"]], "modelopt.torch.quantization.calib.max.MaxCalibrator": [[59, 2, 1, "", "__init__"], [59, 3, 1, "", "amaxs"], [59, 2, 1, "", "collect"], [59, 2, 1, "", "compute_amax"], [59, 2, 1, "", "reset"]], "modelopt.torch.quantization.config": [[60, 6, 1, "", "QuantizeConfig"]], "modelopt.torch.quantization.config.QuantizeConfig": [[60, 7, 1, "", "algorithm"], [60, 7, 1, "", "quant_cfg"]], "modelopt.torch.quantization.conversion": [[61, 4, 1, "", "register"], [61, 4, 1, "", "replace_quant_module"], [61, 4, 1, "", "set_quantizer_attribute"], [61, 4, 1, "", "set_quantizer_by_cfg"], [61, 4, 1, "", "unregister"]], "modelopt.torch.quantization.mode": [[63, 1, 1, "", "QuantizeExportModeDescriptor"], [63, 1, 1, "", "QuantizeModeDescriptor"]], "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor": [[63, 3, 1, "", "config_class"], [63, 3, 1, "", "convert"], [63, 3, 1, "", "is_export_mode"], [63, 3, 1, "", "name"], [63, 3, 1, "", "restore"]], "modelopt.torch.quantization.mode.QuantizeModeDescriptor": [[63, 3, 1, "", "config_class"], [63, 3, 1, "", "convert"], [63, 3, 1, "", "export_mode"], [63, 3, 1, "", "name"], [63, 3, 1, "", "next_modes"], [63, 3, 1, "", "restore"], [63, 3, 1, "", "update_for_new_mode"], [63, 3, 1, "", "update_for_save"]], "modelopt.torch.quantization.model_calib": [[64, 4, 1, "", "calibrate"], [64, 4, 1, "", "postprocess_amax"]], "modelopt.torch.quantization.model_quant": [[65, 4, 1, "", "disable_quantizer"], [65, 4, 1, "", "enable_quantizer"], [65, 4, 1, "", "fold_weight"], [65, 4, 1, "", "print_quant_summary"], [65, 4, 1, "", "quantize"]], "modelopt.torch.quantization.nn": [[67, 0, 0, "-", "functional"], [68, 0, 0, "-", "modules"]], "modelopt.torch.quantization.nn.functional": [[67, 1, 1, "", "ClipFunction"]], "modelopt.torch.quantization.nn.functional.ClipFunction": [[67, 2, 1, "", "backward"], [67, 2, 1, "", "forward"]], "modelopt.torch.quantization.nn.modules": [[69, 0, 0, "-", "clip"], [70, 0, 0, "-", "quant_activations"], [71, 0, 0, "-", "quant_batchnorm"], [72, 0, 0, "-", "quant_conv"], [73, 0, 0, "-", "quant_instancenorm"], [74, 0, 0, "-", "quant_linear"], [75, 0, 0, "-", "quant_module"], [76, 0, 0, "-", "quant_pooling"], [77, 0, 0, "-", "tensor_quantizer"]], "modelopt.torch.quantization.nn.modules.clip": [[69, 1, 1, "", "Clip"]], "modelopt.torch.quantization.nn.modules.clip.Clip": [[69, 2, 1, "", "__init__"], [69, 2, 1, "", "forward"]], "modelopt.torch.quantization.nn.modules.quant_conv": [[72, 5, 1, "", "Conv1d"], [72, 5, 1, "", "Conv2d"], [72, 5, 1, "", "Conv3d"], [72, 5, 1, "", "ConvTranspose1d"], [72, 5, 1, "", "ConvTranspose2d"], [72, 5, 1, "", "ConvTranspose3d"], [72, 1, 1, "", "QuantConv1d"], [72, 1, 1, "", "QuantConv2d"], [72, 1, 1, "", "QuantConv3d"], [72, 1, 1, "", "QuantConvTranspose1d"], [72, 1, 1, "", "QuantConvTranspose2d"], [72, 1, 1, "", "QuantConvTranspose3d"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d": [[72, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_instancenorm": [[73, 1, 1, "", "QuantInstanceNorm1d"], [73, 1, 1, "", "QuantInstanceNorm2d"], [73, 1, 1, "", "QuantInstanceNorm3d"]], "modelopt.torch.quantization.nn.modules.quant_linear": [[74, 5, 1, "", "Linear"], [74, 1, 1, "", "QuantLinear"]], "modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear": [[74, 5, 1, "", "default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_module": [[75, 1, 1, "", "QuantInputBase"], [75, 1, 1, "", "QuantLinearConvBase"]], "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase": [[75, 5, 1, "", "default_quant_desc_input"], [75, 5, 1, "", "default_quant_desc_output"], [75, 2, 1, "", "forward"], [75, 5, 1, "", "input_quantizer"], [75, 5, 1, "", "output_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase": [[75, 5, 1, "", "default_quant_desc_weight"], [75, 2, 1, "", "forward"], [75, 2, 1, "", "initialize_quantizer_with_dummy_states"], [75, 2, 1, "", "quantize_weight"], [75, 5, 1, "", "weight_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_pooling": [[76, 5, 1, "", "AdaptiveAvgPool1d"], [76, 5, 1, "", "AdaptiveAvgPool2d"], [76, 5, 1, "", "AdaptiveAvgPool3d"], [76, 5, 1, "", "AvgPool1d"], [76, 5, 1, "", "AvgPool2d"], [76, 5, 1, "", "AvgPool3d"], [76, 5, 1, "", "MaxPool1d"], [76, 5, 1, "", "MaxPool2d"], [76, 5, 1, "", "MaxPool3d"], [76, 1, 1, "", "QuantAdaptiveAvgPool1d"], [76, 1, 1, "", "QuantAdaptiveAvgPool2d"], [76, 1, 1, "", "QuantAdaptiveAvgPool3d"], [76, 1, 1, "", "QuantAvgPool1d"], [76, 1, 1, "", "QuantAvgPool2d"], [76, 1, 1, "", "QuantAvgPool3d"], [76, 1, 1, "", "QuantMaxPool1d"], [76, 1, 1, "", "QuantMaxPool2d"], [76, 1, 1, "", "QuantMaxPool3d"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer": [[77, 1, 1, "", "SequentialQuantizer"], [77, 1, 1, "", "TensorQuantizer"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer": [[77, 2, 1, "", "__init__"], [77, 2, 1, "", "disable"], [77, 2, 1, "", "get_modelopt_state"], [77, 2, 1, "", "replace_sequential_quantizer_with_single_quantizer"], [77, 2, 1, "", "set_from_attribute_dict"], [77, 2, 1, "", "tensor_quantizer_iterator"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer": [[77, 2, 1, "", "__init__"], [77, 3, 1, "", "amax"], [77, 3, 1, "", "axis"], [77, 3, 1, "", "block_sizes"], [77, 2, 1, "", "clean_up_after_set_from_modelopt_state"], [77, 2, 1, "", "disable"], [77, 2, 1, "", "disable_calib"], [77, 2, 1, "", "disable_clip"], [77, 2, 1, "", "disable_quant"], [77, 2, 1, "", "enable"], [77, 2, 1, "", "enable_calib"], [77, 2, 1, "", "enable_clip"], [77, 2, 1, "", "enable_quant"], [77, 2, 1, "", "export_amax"], [77, 2, 1, "", "extra_repr"], [77, 3, 1, "", "fake_quant"], [77, 2, 1, "", "forward"], [77, 2, 1, "", "get_modelopt_state"], [77, 2, 1, "", "init_learn_amax"], [77, 3, 1, "", "is_enabled"], [77, 2, 1, "", "load_calib_amax"], [77, 3, 1, "", "maxbound"], [77, 3, 1, "", "narrow_range"], [77, 3, 1, "", "num_bits"], [77, 3, 1, "", "pre_quant_scale"], [77, 2, 1, "", "reset_amax"], [77, 3, 1, "", "scale"], [77, 2, 1, "", "set_from_attribute_dict"], [77, 2, 1, "", "set_from_modelopt_state"], [77, 3, 1, "", "step_size"], [77, 2, 1, "", "sync_amax_across_distributed_group"], [77, 3, 1, "", "unsigned"]], "modelopt.torch.quantization.optim": [[78, 4, 1, "", "freeze_parameters"], [78, 4, 1, "", "group_parameters"], [78, 4, 1, "", "match_parameters"], [78, 4, 1, "", "quant_weight_inplace"]], "modelopt.torch.quantization.quant_modules": [[80, 4, 1, "", "deactivate"], [80, 4, 1, "", "enable_onnx_export"], [80, 4, 1, "", "initialize"]], "modelopt.torch.quantization.tensor_quant": [[81, 1, 1, "", "FakeAffineTensorQuantFunction"], [81, 1, 1, "", "FakeTensorQuantFunction"], [81, 1, 1, "", "LegacyFakeTensorQuantFunction"], [81, 5, 1, "", "QuantDescriptor"], [81, 1, 1, "", "ScaledE4M3Function"], [81, 1, 1, "", "ScaledQuantDescriptor"], [81, 1, 1, "", "TensorQuantFunction"], [81, 4, 1, "", "scaled_e4m3_abstract"]], "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"]], "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"], [81, 2, 1, "", "symbolic"]], "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"]], "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"], [81, 2, 1, "", "symbolic"]], "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor": [[81, 2, 1, "", "__init__"], [81, 3, 1, "", "amax"], [81, 3, 1, "", "axis"], [81, 3, 1, "", "block_sizes"], [81, 3, 1, "", "calib_method"], [81, 2, 1, "", "dict"], [81, 3, 1, "", "fake_quant"], [81, 2, 1, "", "get_block_quant_axes_and_sizes"], [81, 3, 1, "", "learn_amax"], [81, 3, 1, "", "name"], [81, 3, 1, "", "narrow_range"], [81, 3, 1, "", "num_bits"], [81, 3, 1, "", "scale_amax"], [81, 3, 1, "", "unsigned"]], "modelopt.torch.quantization.tensor_quant.TensorQuantFunction": [[81, 2, 1, "", "backward"], [81, 2, 1, "", "forward"], [81, 2, 1, "", "symbolic"]], "modelopt.torch.quantization.utils": [[82, 4, 1, "", "export_torch_mode"], [82, 4, 1, "", "is_quantized"], [82, 4, 1, "", "is_quantized_column_parallel_linear"], [82, 4, 1, "", "is_quantized_layer_with_weight"], [82, 4, 1, "", "is_quantized_row_parallel_linear"], [82, 4, 1, "", "is_torch_library_supported"], [82, 4, 1, "", "reduce_amax"], [82, 4, 1, "", "replace_function"]], "modelopt.torch.sparsity": [[84, 0, 0, "-", "config"], [85, 0, 0, "-", "magnitude"], [86, 0, 0, "-", "mode"], [87, 0, 0, "-", "module"], [88, 0, 0, "-", "plugins"], [89, 0, 0, "-", "searcher"], [90, 0, 0, "-", "sparsegpt"], [91, 0, 0, "-", "sparsification"]], "modelopt.torch.sparsity.config": [[84, 6, 1, "", "ExportSparseConfig"], [84, 6, 1, "", "SparseGPTConfig"], [84, 6, 1, "", "SparseMagnitudeConfig"]], "modelopt.torch.sparsity.config.SparseGPTConfig": [[84, 7, 1, "", "nn_conv2d"], [84, 7, 1, "", "nn_linear"]], "modelopt.torch.sparsity.config.SparseMagnitudeConfig": [[84, 7, 1, "", "nn_conv2d"], [84, 7, 1, "", "nn_linear"]], "modelopt.torch.sparsity.magnitude": [[85, 1, 1, "", "MagnitudeSearcher"], [85, 4, 1, "", "compute_valid_1d_patterns"], [85, 4, 1, "", "create_asp_mask"], [85, 4, 1, "", "fill"], [85, 4, 1, "", "get_nmprune_info"], [85, 4, 1, "", "m4n2_1d"], [85, 4, 1, "", "mn_1d_best"], [85, 4, 1, "", "reshape_1d"]], "modelopt.torch.sparsity.mode": [[86, 1, 1, "", "ExportSparseModeDescriptor"], [86, 1, 1, "", "SparseGPTModeDescriptor"], [86, 1, 1, "", "SparseMagnitudeModeDescriptor"], [86, 4, 1, "", "convert_sparse_model"], [86, 4, 1, "", "export_sparse"], [86, 4, 1, "", "restore_export_sparse"], [86, 4, 1, "", "restore_sparse_model"], [86, 4, 1, "", "update_sparse_metadata"]], "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor": [[86, 3, 1, "", "config_class"], [86, 3, 1, "", "convert"], [86, 3, 1, "", "is_export_mode"], [86, 3, 1, "", "name"], [86, 3, 1, "", "restore"]], "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor": [[86, 3, 1, "", "config_class"], [86, 3, 1, "", "name"], [86, 3, 1, "", "search_algorithm"]], "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor": [[86, 3, 1, "", "config_class"], [86, 3, 1, "", "convert"], [86, 3, 1, "", "export_mode"], [86, 3, 1, "", "name"], [86, 3, 1, "", "next_modes"], [86, 3, 1, "", "restore"], [86, 3, 1, "", "search_algorithm"], [86, 3, 1, "", "update_for_new_mode"], [86, 3, 1, "", "update_for_save"]], "modelopt.torch.sparsity.module": [[87, 1, 1, "", "SparseModule"]], "modelopt.torch.sparsity.module.SparseModule": [[87, 2, 1, "", "modify"], [87, 2, 1, "", "set_mask"]], "modelopt.torch.sparsity.searcher": [[89, 1, 1, "", "BaseSparseSearcher"]], "modelopt.torch.sparsity.searcher.BaseSparseSearcher": [[89, 3, 1, "", "default_search_config"], [89, 3, 1, "", "default_state_dict"], [89, 2, 1, "", "run_search"], [89, 2, 1, "", "sanitize_search_config"]], "modelopt.torch.sparsity.sparsegpt": [[90, 1, 1, "", "SparseGPTSearcher"], [90, 4, 1, "", "create_sgpt_mask"], [90, 4, 1, "", "invert"], [90, 4, 1, "", "prepare"]], "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher": [[90, 2, 1, "", "after_search"], [90, 2, 1, "", "before_search"], [90, 3, 1, "", "default_search_config"]], "modelopt.torch.sparsity.sparsification": [[91, 4, 1, "", "export"], [91, 4, 1, "", "sparsify"]], "modelopt.torch.utils": [[93, 0, 0, "-", "cpp_extension"], [94, 0, 0, "-", "dataset_utils"], [95, 0, 0, "-", "distributed"], [96, 0, 0, "-", "graph"], [97, 0, 0, "-", "list"], [98, 0, 0, "-", "logging"], [99, 0, 0, "-", "network"], [100, 0, 0, "-", "perf"], [101, 0, 0, "-", "random"], [102, 0, 0, "-", "tensor"]], "modelopt.torch.utils.cpp_extension": [[93, 4, 1, "", "load_cpp_extension"]], "modelopt.torch.utils.dataset_utils": [[94, 4, 1, "", "create_forward_loop"], [94, 4, 1, "", "get_dataset_dataloader"]], "modelopt.torch.utils.distributed": [[95, 4, 1, "", "backend"], [95, 4, 1, "", "barrier"], [95, 4, 1, "", "get_data_parallel_group"], [95, 4, 1, "", "get_tensor_parallel_group"], [95, 4, 1, "", "is_master"], [95, 4, 1, "", "rank"], [95, 4, 1, "", "set_data_parallel_group"], [95, 4, 1, "", "set_tensor_parallel_group"], [95, 4, 1, "", "size"]], "modelopt.torch.utils.graph": [[96, 4, 1, "", "match"]], "modelopt.torch.utils.list": [[97, 4, 1, "", "list_closest_to_median"], [97, 4, 1, "", "stats"], [97, 4, 1, "", "val2list"], [97, 4, 1, "", "val2tuple"]], "modelopt.torch.utils.logging": [[98, 8, 1, "", "DeprecatedError"], [98, 4, 1, "", "no_stdout"], [98, 4, 1, "", "num2hrb"], [98, 4, 1, "", "print_rank_0"]], "modelopt.torch.utils.network": [[99, 4, 1, "", "compare_dict"], [99, 4, 1, "", "get_model_attributes"], [99, 4, 1, "", "get_module_device"], [99, 4, 1, "", "get_same_padding"], [99, 4, 1, "", "init_model_from_model_like"], [99, 4, 1, "", "is_channels_last"], [99, 4, 1, "", "is_parallel"], [99, 4, 1, "", "make_divisible"], [99, 4, 1, "", "model_to"], [99, 4, 1, "", "param_num"], [99, 4, 1, "", "param_num_from_forward"], [99, 4, 1, "", "remove_bn"], [99, 4, 1, "", "run_forward_loop"], [99, 4, 1, "", "set_submodule"], [99, 4, 1, "", "standardize_constructor_args"], [99, 4, 1, "", "standardize_model_args"], [99, 4, 1, "", "standardize_model_like_tuple"], [99, 4, 1, "", "standardize_named_model_args"], [99, 4, 1, "", "unwrap_model"], [99, 4, 1, "", "zero_grad"]], "modelopt.torch.utils.perf": [[100, 1, 1, "", "Timer"], [100, 4, 1, "", "clear_cuda_cache"], [100, 4, 1, "", "get_cuda_memory_stats"], [100, 4, 1, "", "report_memory"]], "modelopt.torch.utils.perf.Timer": [[100, 2, 1, "", "__init__"], [100, 2, 1, "", "start"], [100, 2, 1, "", "stop"]], "modelopt.torch.utils.random": [[101, 4, 1, "", "centroid"], [101, 4, 1, "", "choice"], [101, 4, 1, "", "original"], [101, 4, 1, "", "random"], [101, 4, 1, "", "sample"], [101, 4, 1, "", "shuffle"]], "modelopt.torch.utils.tensor": [[102, 4, 1, "", "numpy_to_torch"], [102, 4, 1, "", "torch_detach"], [102, 4, 1, "", "torch_to"], [102, 4, 1, "", "torch_to_numpy"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:property", "4": "py:function", "5": "py:attribute", "6": "py:pydantic_model", "7": "py:pydantic_field", "8": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "property", "Python property"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"], "6": ["py", "pydantic_model", "Python model"], "7": ["py", "pydantic_field", "Python field"], "8": ["py", "exception", "Python exception"]}, "titleterms": {"tensorrt": [0, 2], "llm": [0, 16], "deploy": [0, 4, 12], "export": [0, 36], "quantiz": [0, 2, 4, 6, 8, 9, 10, 11, 22, 33, 55, 60], "model": [0, 2, 3, 4, 5, 7, 10, 11, 12, 13], "support": [0, 12], "matrix": 0, "checkpoint": 0, "convert": 0, "all": 1, "modelopt": [1, 12, 14], "exampl": [1, 12], "overview": 2, "nvidia": 2, "optim": [2, 3, 12, 13, 78], "techniqu": 2, "sparsiti": [2, 5, 7, 83], "instal": 3, "system": 3, "requir": [3, 10], "check": 3, "quick": [4, 5], "start": [4, 5, 12], "ptq": [4, 10, 11], "pytorch": [4, 5, 11], "post": [5, 7, 10, 11], "train": [5, 7, 8, 10, 11], "sparsif": [5, 7, 91], "pt": 5, "introduct": 7, "save": 7, "restor": 7, "spars": 7, "concept": [7, 8], "structur": 7, "unstructur": 7, "n": 7, "m": 7, "algorithm": [7, 8], "basic": 8, "precis": 8, "format": [8, 60], "scale": 8, "factor": 8, "block": 8, "calibr": [8, 10, 57], "awar": [8, 11], "qat": [8, 11], "more": 8, "read": 8, "best": 9, "practic": 9, "choos": 9, "right": 9, "method": 9, "onnx": [10, 20], "beta": 10, "appli": [10, 11], "prepar": 10, "dataset": 10, "call": 10, "function": [10, 67], "deploi": [10, 15], "compar": 10, "perform": 10, "store": 11, "load": 11, "advanc": 11, "topic": 11, "tensorquant": 11, "custom": 11, "config": [11, 47, 60, 84], "modul": [11, 68, 87], "placement": 11, "fast": 11, "evalu": 11, "welcom": 12, "document": 12, "get": 12, "guid": 12, "refer": 12, "changelog": 13, "0": 13, "11": 13, "2024": 13, "05": 13, "07": 13, "api": 14, "gener": 17, "model_config_trt": 18, "nemo_util": 19, "op_typ": 21, "calib_util": 23, "graph_util": 24, "gs_patch": 25, "int4": 26, "oper": 27, "ort_patch": 28, "ort_util": 29, "partit": 30, "qdq_util": 31, "quant_util": 32, "util": [34, 54, 82, 92], "torch": 35, "distribut": [37, 95], "layer_util": 38, "model_config": 39, "model_config_export": 40, "model_config_util": 41, "postprocess": 42, "scaling_factor_util": 43, "tensorrt_llm_util": 44, "transformer_engin": 45, "opt": 46, "convers": [48, 61], "dynam": 49, "hparam": 50, "mode": [51, 63, 86], "plugin": [52, 79, 88], "searcher": [53, 89], "calib": 56, "histogram": 58, "max": 59, "extens": 62, "model_calib": 64, "model_qu": 65, "nn": 66, "clip": 69, "quant_activ": 70, "quant_batchnorm": 71, "quant_conv": 72, "quant_instancenorm": 73, "quant_linear": 74, "quant_modul": [75, 80], "quant_pool": 76, "tensor_quant": 77, "tensor_qu": 81, "magnitud": 85, "sparsegpt": 90, "cpp_extens": 93, "dataset_util": 94, "graph": 96, "list": 97, "log": 98, "network": 99, "perf": 100, "random": 101, "tensor": 102, "contact": 103, "u": 103, "faq": 104, "1": 104, "potenti": 104, "memori": 104, "leak": 104, "fsdp": 104, "use_orig_param": 104, "true": 104}, "envversion": {"sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx": 60}, "alltitles": {"TensorRT-LLM Deployment": [[0, "tensorrt-llm-deployment"]], "Export Quantized Model": [[0, "export-quantized-model"]], "Model support matrix for the TensorRT-LLM checkpoint export": [[0, "id1"]], "Convert to TensorRT-LLM": [[0, "convert-to-tensorrt-llm"]], "All ModelOpt Examples": [[1, "all-modelopt-examples"]], "Overview": [[2, "overview"]], "NVIDIA TensorRT Model Optimizer": [[2, "nvidia-tensorrt-model-optimizer"]], "Techniques": [[2, "techniques"]], "Quantization": [[2, "quantization"], [4, "quantization"], [6, "quantization"]], "Sparsity": [[2, "sparsity"], [5, "sparsity"], [7, "sparsity"]], "Installation": [[3, "installation"]], "System requirements": [[3, "system-requirements"]], "Install Model Optimizer": [[3, "install-model-optimizer"]], "Check installation": [[3, "check-installation"]], "Quick Start: Quantization": [[4, "quick-start-quantization"]], "PTQ for PyTorch models": [[4, "ptq-for-pytorch-models"]], "Deployment": [[4, "deployment"], [12, null]], "Quick Start: Sparsity": [[5, "quick-start-sparsity"]], "Post-Training Sparsification (PTS) for PyTorch models": [[5, "post-training-sparsification-pts-for-pytorch-models"]], "Introduction": [[7, "introduction"]], "Post-Training Sparsification": [[7, "post-training-sparsification"]], "Save and restore the sparse model": [[7, "save-and-restore-the-sparse-model"]], "Sparsity Concepts": [[7, "sparsity-concepts"]], "Structured and Unstructured Sparsity": [[7, "structured-and-unstructured-sparsity"]], "N:M Sparsity": [[7, "n-m-sparsity"]], "Sparsification algorithm": [[7, "sparsification-algorithm"]], "Basic Concepts": [[8, "basic-concepts"]], "Precision format": [[8, "precision-format"]], "Scaling factor": [[8, "scaling-factor"]], "Block format": [[8, "block-format"]], "Calibration algorithm": [[8, "calibration-algorithm"]], "Quantization-aware training (QAT)": [[8, "quantization-aware-training-qat"]], "More Readings": [[8, "more-readings"]], "Best practices to choose the right quantization methods": [[9, "best-practices-to-choose-the-right-quantization-methods"]], "ONNX Quantization (Beta)": [[10, "onnx-quantization-beta"]], "Requirements": [[10, "requirements"]], "Apply Post Training Quantization (PTQ)": [[10, "apply-post-training-quantization-ptq"], [11, "apply-post-training-quantization-ptq"]], "Prepare calibration dataset": [[10, "prepare-calibration-dataset"]], "Call PTQ function": [[10, "call-ptq-function"]], "Deploy Quantized ONNX Model": [[10, "deploy-quantized-onnx-model"]], "Compare the performance": [[10, "compare-the-performance"]], "PyTorch Quantization": [[11, "pytorch-quantization"]], "Quantization-aware Training (QAT)": [[11, "quantization-aware-training-qat"]], "Storing and loading quantized model": [[11, "storing-and-loading-quantized-model"]], "Advanced Topics": [[11, "advanced-topics"]], "TensorQuantizer": [[11, "tensorquantizer"]], "Customize quantizer config": [[11, "customize-quantizer-config"]], "Custom quantized module and quantizer placement": [[11, "custom-quantized-module-and-quantizer-placement"]], "Fast evaluation": [[11, "fast-evaluation"]], "Welcome to Model Optimizer (ModelOpt) documentation!": [[12, "welcome-to-model-optimizer-modelopt-documentation"]], "Getting Started": [[12, null]], "Optimization Guides": [[12, null]], "Examples": [[12, null]], "Reference": [[12, null]], "Support": [[12, null]], "Model Optimizer Changelog": [[13, "model-optimizer-changelog"]], "0.11 (2024-05-07)": [[13, "id1"]], "modelopt API": [[14, "modelopt-api"]], "deploy": [[15, "deploy"]], "llm": [[16, "llm"]], "generate": [[17, "generate"]], "model_config_trt": [[18, "model-config-trt"]], "nemo_utils": [[19, "nemo-utils"]], "onnx": [[20, "onnx"]], "op_types": [[21, "op-types"]], "quantization": [[22, "quantization"], [55, "quantization"]], "calib_utils": [[23, "calib-utils"]], "graph_utils": [[24, "graph-utils"]], "gs_patching": [[25, "gs-patching"]], "int4": [[26, "int4"]], "operators": [[27, "operators"]], "ort_patching": [[28, "ort-patching"]], "ort_utils": [[29, "ort-utils"]], "partitioning": [[30, "partitioning"]], "qdq_utils": [[31, "qdq-utils"]], "quant_utils": [[32, "quant-utils"]], "quantize": [[33, "quantize"]], "utils": [[34, "utils"], [54, "utils"], [82, "utils"], [92, "utils"]], "torch": [[35, "torch"]], "export": [[36, "export"]], "distribute": [[37, "distribute"]], "layer_utils": [[38, "layer-utils"]], "model_config": [[39, "model-config"]], "model_config_export": [[40, "model-config-export"]], "model_config_utils": [[41, "model-config-utils"]], "postprocess": [[42, "postprocess"]], "scaling_factor_utils": [[43, "scaling-factor-utils"]], "tensorrt_llm_utils": [[44, "tensorrt-llm-utils"]], "transformer_engine": [[45, "transformer-engine"]], "opt": [[46, "opt"]], "config": [[47, "config"], [60, "config"], [84, "config"]], "conversion": [[48, "conversion"], [61, "conversion"]], "dynamic": [[49, "dynamic"]], "hparam": [[50, "hparam"]], "mode": [[51, "mode"], [63, "mode"], [86, "mode"]], "plugins": [[52, "plugins"], [79, "plugins"], [88, "plugins"]], "searcher": [[53, "searcher"], [89, "searcher"]], "calib": [[56, "calib"]], "calibrator": [[57, "calibrator"]], "histogram": [[58, "histogram"]], "max": [[59, "max"]], "Quantization Formats": [[60, "quantization-formats"]], "Quantization Configs": [[60, "quantization-configs"]], "extensions": [[62, "extensions"]], "model_calib": [[64, "model-calib"]], "model_quant": [[65, "model-quant"]], "nn": [[66, "nn"]], "functional": [[67, "functional"]], "modules": [[68, "modules"]], "clip": [[69, "clip"]], "quant_activations": [[70, "quant-activations"]], "quant_batchnorm": [[71, "quant-batchnorm"]], "quant_conv": [[72, "quant-conv"]], "quant_instancenorm": [[73, "quant-instancenorm"]], "quant_linear": [[74, "quant-linear"]], "quant_module": [[75, "quant-module"]], "quant_pooling": [[76, "quant-pooling"]], "tensor_quantizer": [[77, "tensor-quantizer"]], "optim": [[78, "optim"]], "quant_modules": [[80, "quant-modules"]], "tensor_quant": [[81, "tensor-quant"]], "sparsity": [[83, "sparsity"]], "magnitude": [[85, "magnitude"]], "module": [[87, "module"]], "sparsegpt": [[90, "sparsegpt"]], "sparsification": [[91, "sparsification"]], "cpp_extension": [[93, "cpp-extension"]], "dataset_utils": [[94, "dataset-utils"]], "distributed": [[95, "distributed"]], "graph": [[96, "graph"]], "list": [[97, "list"]], "logging": [[98, "logging"]], "network": [[99, "network"]], "perf": [[100, "perf"]], "random": [[101, "random"]], "tensor": [[102, "tensor"]], "Contact us": [[103, "contact-us"]], "FAQs": [[104, "faqs"]], "1. Potential memory leak for FSDP with use_orig_params=True": [[104, "potential-memory-leak-for-fsdp-with-use-orig-params-true"]]}, "indexentries": {"modelopt.deploy": [[15, "module-modelopt.deploy"]], "module": [[15, "module-modelopt.deploy"], [16, "module-modelopt.deploy.llm"], [17, "module-modelopt.deploy.llm.generate"], [18, "module-modelopt.deploy.llm.model_config_trt"], [19, "module-modelopt.deploy.llm.nemo_utils"], [20, "module-modelopt.onnx"], [21, "module-modelopt.onnx.op_types"], [22, "module-modelopt.onnx.quantization"], [23, "module-modelopt.onnx.quantization.calib_utils"], [24, "module-modelopt.onnx.quantization.graph_utils"], [25, "module-modelopt.onnx.quantization.gs_patching"], [26, "module-modelopt.onnx.quantization.int4"], [27, "module-modelopt.onnx.quantization.operators"], [28, "module-modelopt.onnx.quantization.ort_patching"], [29, "module-modelopt.onnx.quantization.ort_utils"], [30, "module-modelopt.onnx.quantization.partitioning"], [31, "module-modelopt.onnx.quantization.qdq_utils"], [32, "module-modelopt.onnx.quantization.quant_utils"], [33, "module-modelopt.onnx.quantization.quantize"], [34, "module-modelopt.onnx.utils"], [35, "module-modelopt.torch"], [36, "module-modelopt.torch.export"], [37, "module-modelopt.torch.export.distribute"], [38, "module-modelopt.torch.export.layer_utils"], [39, "module-modelopt.torch.export.model_config"], [40, "module-modelopt.torch.export.model_config_export"], [41, "module-modelopt.torch.export.model_config_utils"], [42, "module-modelopt.torch.export.postprocess"], [43, "module-modelopt.torch.export.scaling_factor_utils"], [44, "module-modelopt.torch.export.tensorrt_llm_utils"], [45, "module-modelopt.torch.export.transformer_engine"], [46, "module-modelopt.torch.opt"], [47, "module-modelopt.torch.opt.config"], [48, "module-modelopt.torch.opt.conversion"], [49, "module-modelopt.torch.opt.dynamic"], [50, "module-modelopt.torch.opt.hparam"], [51, "module-modelopt.torch.opt.mode"], [52, "module-modelopt.torch.opt.plugins"], [53, "module-modelopt.torch.opt.searcher"], [54, "module-modelopt.torch.opt.utils"], [55, "module-modelopt.torch.quantization"], [56, "module-modelopt.torch.quantization.calib"], [57, "module-modelopt.torch.quantization.calib.calibrator"], [58, "module-modelopt.torch.quantization.calib.histogram"], [59, "module-modelopt.torch.quantization.calib.max"], [60, "module-modelopt.torch.quantization.config"], [61, "module-modelopt.torch.quantization.conversion"], [62, "module-modelopt.torch.quantization.extensions"], [63, "module-modelopt.torch.quantization.mode"], [64, "module-modelopt.torch.quantization.model_calib"], [65, "module-modelopt.torch.quantization.model_quant"], [66, "module-modelopt.torch.quantization.nn"], [67, "module-modelopt.torch.quantization.nn.functional"], [68, "module-modelopt.torch.quantization.nn.modules"], [69, "module-modelopt.torch.quantization.nn.modules.clip"], [70, "module-modelopt.torch.quantization.nn.modules.quant_activations"], [71, "module-modelopt.torch.quantization.nn.modules.quant_batchnorm"], [72, "module-modelopt.torch.quantization.nn.modules.quant_conv"], [73, "module-modelopt.torch.quantization.nn.modules.quant_instancenorm"], [74, "module-modelopt.torch.quantization.nn.modules.quant_linear"], [75, "module-modelopt.torch.quantization.nn.modules.quant_module"], [76, "module-modelopt.torch.quantization.nn.modules.quant_pooling"], [77, "module-modelopt.torch.quantization.nn.modules.tensor_quantizer"], [78, "module-modelopt.torch.quantization.optim"], [79, "module-modelopt.torch.quantization.plugins"], [80, "module-modelopt.torch.quantization.quant_modules"], [81, "module-modelopt.torch.quantization.tensor_quant"], [82, "module-modelopt.torch.quantization.utils"], [83, "module-modelopt.torch.sparsity"], [84, "module-modelopt.torch.sparsity.config"], [85, "module-modelopt.torch.sparsity.magnitude"], [86, "module-modelopt.torch.sparsity.mode"], [87, "module-modelopt.torch.sparsity.module"], [88, "module-modelopt.torch.sparsity.plugins"], [89, "module-modelopt.torch.sparsity.searcher"], [90, "module-modelopt.torch.sparsity.sparsegpt"], [91, "module-modelopt.torch.sparsity.sparsification"], [92, "module-modelopt.torch.utils"], [93, "module-modelopt.torch.utils.cpp_extension"], [94, "module-modelopt.torch.utils.dataset_utils"], [95, "module-modelopt.torch.utils.distributed"], [96, "module-modelopt.torch.utils.graph"], [97, "module-modelopt.torch.utils.list"], [98, "module-modelopt.torch.utils.logging"], [99, "module-modelopt.torch.utils.network"], [100, "module-modelopt.torch.utils.perf"], [101, "module-modelopt.torch.utils.random"], [102, "module-modelopt.torch.utils.tensor"]], "modelopt.deploy.llm": [[16, "module-modelopt.deploy.llm"]], "llm (class in modelopt.deploy.llm.generate)": [[17, "modelopt.deploy.llm.generate.LLM"]], "__init__() (llm method)": [[17, "modelopt.deploy.llm.generate.LLM.__init__"]], "generate_text() (llm method)": [[17, "modelopt.deploy.llm.generate.LLM.generate_text"]], "max_beam_width (llm property)": [[17, "modelopt.deploy.llm.generate.LLM.max_beam_width"]], "max_input_len (llm property)": [[17, "modelopt.deploy.llm.generate.LLM.max_input_len"]], "modelopt.deploy.llm.generate": [[17, "module-modelopt.deploy.llm.generate"]], "build_tensorrt_llm() (in module modelopt.deploy.llm.model_config_trt)": [[18, "modelopt.deploy.llm.model_config_trt.build_tensorrt_llm"]], "build_tensorrt_llm_rank() (in module modelopt.deploy.llm.model_config_trt)": [[18, "modelopt.deploy.llm.model_config_trt.build_tensorrt_llm_rank"]], "modelopt.deploy.llm.model_config_trt": [[18, "module-modelopt.deploy.llm.model_config_trt"]], "customsentencepiecetokenizer (class in modelopt.deploy.llm.nemo_utils)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer"]], "__init__() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.__init__"]], "batch_decode() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.batch_decode"]], "batch_encode_plus() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.batch_encode_plus"]], "decode() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.decode"]], "encode() (customsentencepiecetokenizer method)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.encode"]], "eos_token (customsentencepiecetokenizer property)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.eos_token"]], "eos_token_id (customsentencepiecetokenizer property)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.eos_token_id"]], "get_nemo_tokenizer() (in module modelopt.deploy.llm.nemo_utils)": [[19, "modelopt.deploy.llm.nemo_utils.get_nemo_tokenizer"]], "get_tokenzier() (in module modelopt.deploy.llm.nemo_utils)": [[19, "modelopt.deploy.llm.nemo_utils.get_tokenzier"]], "modelopt.deploy.llm.nemo_utils": [[19, "module-modelopt.deploy.llm.nemo_utils"]], "pad_token (customsentencepiecetokenizer property)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.pad_token"]], "pad_token_id (customsentencepiecetokenizer property)": [[19, "modelopt.deploy.llm.nemo_utils.CustomSentencePieceTokenizer.pad_token_id"]], "modelopt.onnx": [[20, "module-modelopt.onnx"]], "get_quantizable_op_types() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.get_quantizable_op_types"]], "is_binary_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_binary_op"]], "is_control_flow_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_control_flow_op"]], "is_conversion_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_conversion_op"]], "is_copy_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_copy_op"]], "is_default_quantizable_op_by_ort() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_default_quantizable_op_by_ort"]], "is_fusible_reduction_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_fusible_reduction_op"]], "is_generator_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_generator_op"]], "is_irregular_mem_access_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_irregular_mem_access_op"]], "is_linear_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_linear_op"]], "is_modifier_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_modifier_op"]], "is_multiclass_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_multiclass_op"]], "is_non_reshape_copy_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_non_reshape_copy_op"]], "is_normalization_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_normalization_op"]], "is_pointwise_or_elementwise_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_pointwise_or_elementwise_op"]], "is_pooling_or_window_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_pooling_or_window_op"]], "is_recurrent_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_recurrent_op"]], "is_selection_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_selection_op"]], "is_sequence_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_sequence_op"]], "is_shape_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_shape_op"]], "is_unary_op() (in module modelopt.onnx.op_types)": [[21, "modelopt.onnx.op_types.is_unary_op"]], "modelopt.onnx.op_types": [[21, "module-modelopt.onnx.op_types"]], "modelopt.onnx.quantization": [[22, "module-modelopt.onnx.quantization"]], "calibrationdataprovider (class in modelopt.onnx.quantization.calib_utils)": [[23, "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider"]], "randomdataprovider (class in modelopt.onnx.quantization.calib_utils)": [[23, "modelopt.onnx.quantization.calib_utils.RandomDataProvider"]], "__init__() (calibrationdataprovider method)": [[23, "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider.__init__"]], "__init__() (randomdataprovider method)": [[23, "modelopt.onnx.quantization.calib_utils.RandomDataProvider.__init__"]], "get_next() (calibrationdataprovider method)": [[23, "modelopt.onnx.quantization.calib_utils.CalibrationDataProvider.get_next"]], "get_next() (randomdataprovider method)": [[23, "modelopt.onnx.quantization.calib_utils.RandomDataProvider.get_next"]], "modelopt.onnx.quantization.calib_utils": [[23, "module-modelopt.onnx.quantization.calib_utils"]], "build_non_residual_input_map() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.build_non_residual_input_map"]], "classify_partition_nodes() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.classify_partition_nodes"]], "filter_quantizable_kgen_heads() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.filter_quantizable_kgen_heads"]], "get_fusible_backbone() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.get_fusible_backbone"]], "has_const_input() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.has_const_input"]], "has_path_type() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.has_path_type"]], "is_const_input() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.is_const_input"]], "modelopt.onnx.quantization.graph_utils": [[24, "module-modelopt.onnx.quantization.graph_utils"]], "print_stat() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.print_stat"]], "remove_partial_input_qdq() (in module modelopt.onnx.quantization.graph_utils)": [[24, "modelopt.onnx.quantization.graph_utils.remove_partial_input_qdq"]], "modelopt.onnx.quantization.gs_patching": [[25, "module-modelopt.onnx.quantization.gs_patching"]], "patch_gs_modules() (in module modelopt.onnx.quantization.gs_patching)": [[25, "modelopt.onnx.quantization.gs_patching.patch_gs_modules"]], "awqcliphelper (class in modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper"]], "__init__() (awqcliphelper method)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.__init__"]], "alpha_step (awqcliphelper attribute)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.alpha_step"]], "alphas (awqcliphelper attribute)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.alphas"]], "dq_tensor() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.dq_tensor"]], "find_scales() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.find_scales"]], "min_alpha (awqcliphelper attribute)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.min_alpha"]], "modelopt.onnx.quantization.int4": [[26, "module-modelopt.onnx.quantization.int4"]], "quant_tensor() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.quant_tensor"]], "quantize_int4() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.quantize_int4"]], "quantize_int4_awq_clip() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.quantize_int4_awq_clip"]], "quantize_int4_rtn() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.quantize_int4_rtn"]], "rtn() (in module modelopt.onnx.quantization.int4)": [[26, "modelopt.onnx.quantization.int4.rtn"]], "update_best_params() (awqcliphelper method)": [[26, "modelopt.onnx.quantization.int4.AWQClipHelper.update_best_params"]], "qdqconvtranspose (class in modelopt.onnx.quantization.operators)": [[27, "modelopt.onnx.quantization.operators.QDQConvTranspose"]], "qdqnormalization (class in modelopt.onnx.quantization.operators)": [[27, "modelopt.onnx.quantization.operators.QDQNormalization"]], "__init__() (qdqconvtranspose method)": [[27, "modelopt.onnx.quantization.operators.QDQConvTranspose.__init__"]], "__init__() (qdqnormalization method)": [[27, "modelopt.onnx.quantization.operators.QDQNormalization.__init__"]], "modelopt.onnx.quantization.operators": [[27, "module-modelopt.onnx.quantization.operators"]], "quantize() (qdqconvtranspose method)": [[27, "modelopt.onnx.quantization.operators.QDQConvTranspose.quantize"]], "quantize() (qdqnormalization method)": [[27, "modelopt.onnx.quantization.operators.QDQNormalization.quantize"]], "modelopt.onnx.quantization.ort_patching": [[28, "module-modelopt.onnx.quantization.ort_patching"]], "patch_ort_modules() (in module modelopt.onnx.quantization.ort_patching)": [[28, "modelopt.onnx.quantization.ort_patching.patch_ort_modules"]], "create_inference_session() (in module modelopt.onnx.quantization.ort_utils)": [[29, "modelopt.onnx.quantization.ort_utils.create_inference_session"]], "modelopt.onnx.quantization.ort_utils": [[29, "module-modelopt.onnx.quantization.ort_utils"]], "find_fusible_partitions() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_fusible_partitions"]], "find_hardcoded_patterns() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_hardcoded_patterns"]], "find_layer_norm_partitions() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_layer_norm_partitions"]], "find_mha_partitions() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_mha_partitions"]], "find_non_quantizable_partitions_from_patterns() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_non_quantizable_partitions_from_patterns"]], "find_quantizable_nodes() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.find_quantizable_nodes"]], "get_skiped_output_layers() (in module modelopt.onnx.quantization.partitioning)": [[30, "modelopt.onnx.quantization.partitioning.get_skiped_output_layers"]], "modelopt.onnx.quantization.partitioning": [[30, "module-modelopt.onnx.quantization.partitioning"]], "insert_dq_nodes() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.insert_dq_nodes"]], "insert_qdq_nodes() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.insert_qdq_nodes"]], "make_gs_dequantize_node() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_dequantize_node"]], "make_gs_dequantize_output() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_dequantize_output"]], "make_gs_quantize_node() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_quantize_node"]], "make_gs_quantize_output() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_quantize_output"]], "make_gs_quantized_weight() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_quantized_weight"]], "make_gs_scale() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_scale"]], "make_gs_zp() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.make_gs_zp"]], "modelopt.onnx.quantization.qdq_utils": [[31, "module-modelopt.onnx.quantization.qdq_utils"]], "use_trt_qdq_ops() (in module modelopt.onnx.quantization.qdq_utils)": [[31, "modelopt.onnx.quantization.qdq_utils.use_trt_qdq_ops"]], "modelopt.onnx.quantization.quant_utils": [[32, "module-modelopt.onnx.quantization.quant_utils"]], "pack_float32_to_4bit_optimized() (in module modelopt.onnx.quantization.quant_utils)": [[32, "modelopt.onnx.quantization.quant_utils.pack_float32_to_4bit_optimized"]], "modelopt.onnx.quantization.quantize": [[33, "module-modelopt.onnx.quantization.quantize"]], "quantize() (in module modelopt.onnx.quantization.quantize)": [[33, "modelopt.onnx.quantization.quantize.quantize"]], "duplicate_shared_linear_weights() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.duplicate_shared_linear_weights"]], "find_lowest_common_ancestor() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.find_lowest_common_ancestor"]], "gen_random_inputs() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.gen_random_inputs"]], "get_all_input_names() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_all_input_names"]], "get_batch_size() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_batch_size"]], "get_batch_size_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_batch_size_from_bytes"]], "get_child_nodes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_child_nodes"]], "get_input_names() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_input_names"]], "get_input_names_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_input_names_from_bytes"]], "get_input_shapes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_input_shapes"]], "get_input_shapes_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_input_shapes_from_bytes"]], "get_node_names() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_node_names"]], "get_node_names_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_node_names_from_bytes"]], "get_output_names() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_output_names"]], "get_output_names_from_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_output_names_from_bytes"]], "get_output_shapes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_output_shapes"]], "get_parent_nodes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_parent_nodes"]], "get_variable_inputs() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.get_variable_inputs"]], "is_valid_onnx_model() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.is_valid_onnx_model"]], "modelopt.onnx.utils": [[34, "module-modelopt.onnx.utils"]], "name_onnx_nodes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.name_onnx_nodes"]], "randomize_weights() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.randomize_weights"]], "randomize_weights_onnx_bytes() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.randomize_weights_onnx_bytes"]], "remove_weights_data() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.remove_weights_data"]], "save_onnx() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.save_onnx"]], "save_onnx_bytes_to_dir() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.save_onnx_bytes_to_dir"]], "validate_batch_size() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.validate_batch_size"]], "validate_onnx() (in module modelopt.onnx.utils)": [[34, "modelopt.onnx.utils.validate_onnx"]], "modelopt.torch": [[35, "module-modelopt.torch"]], "modelopt.torch.export": [[36, "module-modelopt.torch.export"]], "nfsworkspace (class in modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.NFSWorkspace"]], "__init__() (nfsworkspace method)": [[37, "modelopt.torch.export.distribute.NFSWorkspace.__init__"]], "barrier() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.barrier"]], "get_configs_parallel() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_configs_parallel"]], "get_group() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_group"]], "get_rank() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_rank"]], "get_tensors_parallel() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_tensors_parallel"]], "get_world_size() (in module modelopt.torch.export.distribute)": [[37, "modelopt.torch.export.distribute.get_world_size"]], "is_initialized (nfsworkspace property)": [[37, "modelopt.torch.export.distribute.NFSWorkspace.is_initialized"]], "modelopt.torch.export.distribute": [[37, "module-modelopt.torch.export.distribute"]], "read_configs_and_weights_from_rank() (nfsworkspace method)": [[37, "modelopt.torch.export.distribute.NFSWorkspace.read_configs_and_weights_from_rank"]], "write_configs_and_weights() (nfsworkspace method)": [[37, "modelopt.torch.export.distribute.NFSWorkspace.write_configs_and_weights"]], "build_attention_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_attention_config"]], "build_decoder_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_decoder_config"]], "build_embedding_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_embedding_config"]], "build_layernorm_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_layernorm_config"]], "build_linear_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_linear_config"]], "build_mlp_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_mlp_config"]], "build_moe_config() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_moe_config"]], "build_qkv() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_qkv"]], "build_stacked_experts() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.build_stacked_experts"]], "check_model_compatibility() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.check_model_compatibility"]], "get_activation_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_activation_scaling_factor"]], "get_kv_cache_dtype() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_kv_cache_dtype"]], "get_kv_cache_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_kv_cache_scaling_factor"]], "get_prequant_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_prequant_scaling_factor"]], "get_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_scaling_factor"]], "get_transformer_layers() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_transformer_layers"]], "get_weight_block_size() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_weight_block_size"]], "get_weight_scaling_factor() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_weight_scaling_factor"]], "get_weight_scaling_factor_2() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.get_weight_scaling_factor_2"]], "is_attention() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_attention"]], "is_decoder_list() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_decoder_list"]], "is_embedding() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_embedding"]], "is_layernorm() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_layernorm"]], "is_linear() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_linear"]], "is_mlp() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_mlp"]], "is_moe() (in module modelopt.torch.export.layer_utils)": [[38, "modelopt.torch.export.layer_utils.is_moe"]], "modelopt.torch.export.layer_utils": [[38, "module-modelopt.torch.export.layer_utils"]], "attentionconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.AttentionConfig"]], "decoderlayerconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig"]], "embeddingconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig"]], "expertconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.ExpertConfig"]], "layernormconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.LayernormConfig"]], "linearconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.LinearConfig"]], "mlpconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.MLPConfig"]], "moeconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.MOEConfig"]], "modelconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.ModelConfig"]], "qkvconfig (class in modelopt.torch.export.model_config)": [[39, "modelopt.torch.export.model_config.QKVConfig"]], "__init__() (attentionconfig method)": [[39, "modelopt.torch.export.model_config.AttentionConfig.__init__"]], "__init__() (decoderlayerconfig method)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.__init__"]], "__init__() (embeddingconfig method)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig.__init__"]], "__init__() (expertconfig method)": [[39, "modelopt.torch.export.model_config.ExpertConfig.__init__"]], "__init__() (layernormconfig method)": [[39, "modelopt.torch.export.model_config.LayernormConfig.__init__"]], "__init__() (linearconfig method)": [[39, "modelopt.torch.export.model_config.LinearConfig.__init__"]], "__init__() (mlpconfig method)": [[39, "modelopt.torch.export.model_config.MLPConfig.__init__"]], "__init__() (moeconfig method)": [[39, "modelopt.torch.export.model_config.MOEConfig.__init__"]], "__init__() (modelconfig method)": [[39, "modelopt.torch.export.model_config.ModelConfig.__init__"]], "__init__() (qkvconfig method)": [[39, "modelopt.torch.export.model_config.QKVConfig.__init__"]], "activation_scaling_factor (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.activation_scaling_factor"]], "activation_scaling_factor (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.activation_scaling_factor"]], "alibi_bias_max (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.alibi_bias_max"]], "apply_residual_connection_post_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.apply_residual_connection_post_layernorm"]], "attention (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.attention"]], "attention_head_size (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.attention_head_size"]], "awq_block_size (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.awq_block_size"]], "awq_block_size (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.awq_block_size"]], "bias (layernormconfig attribute)": [[39, "modelopt.torch.export.model_config.LayernormConfig.bias"]], "bias (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.bias"]], "bias (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.bias"]], "clip_qkv (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.clip_qkv"]], "decoder_type (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.decoder_type"]], "dense (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.dense"]], "dtype (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.dtype"]], "eps (layernormconfig attribute)": [[39, "modelopt.torch.export.model_config.LayernormConfig.eps"]], "experts (moeconfig attribute)": [[39, "modelopt.torch.export.model_config.MOEConfig.experts"]], "fc (expertconfig attribute)": [[39, "modelopt.torch.export.model_config.ExpertConfig.fc"]], "fc (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.fc"]], "fc (moeconfig property)": [[39, "modelopt.torch.export.model_config.MOEConfig.fc"]], "ffn_hidden_size_local (decoderlayerconfig property)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.ffn_hidden_size_local"]], "gate (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.gate"]], "hidden_act (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.hidden_act"]], "hidden_act (moeconfig attribute)": [[39, "modelopt.torch.export.model_config.MOEConfig.hidden_act"]], "hidden_act (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.hidden_act"]], "hidden_size (decoderlayerconfig property)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.hidden_size"]], "hidden_size (embeddingconfig property)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig.hidden_size"]], "hidden_size (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.hidden_size"]], "input_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.input_layernorm"]], "k (qkvconfig attribute)": [[39, "modelopt.torch.export.model_config.QKVConfig.k"]], "kv_cache_dtype (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.kv_cache_dtype"]], "kv_cache_scaling_factor (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.kv_cache_scaling_factor"]], "layernorm_type (layernormconfig attribute)": [[39, "modelopt.torch.export.model_config.LayernormConfig.layernorm_type"]], "layers (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.layers"]], "linear_type (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.linear_type"]], "lm_head (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.lm_head"]], "ln_embed (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.ln_embed"]], "ln_f (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.ln_f"]], "local_vocab_size (embeddingconfig property)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig.local_vocab_size"]], "max_position_embeddings (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.max_position_embeddings"]], "max_position_embeddings (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.max_position_embeddings"]], "merged_fc1_gate (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.merged_fc1_gate"]], "mlp (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.mlp"]], "mlp_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.mlp_layernorm"]], "model_name (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.model_name"]], "modelopt.torch.export.model_config": [[39, "module-modelopt.torch.export.model_config"]], "moe_num_experts (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_num_experts"]], "moe_renorm_mode (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_renorm_mode"]], "moe_top_k (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_top_k"]], "moe_tp_mode (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.moe_tp_mode"]], "new_decoder_architecture (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.new_decoder_architecture"]], "num_attention_heads (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.num_attention_heads"]], "num_attention_heads (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.num_attention_heads"]], "num_kv_heads (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.num_kv_heads"]], "num_kv_heads (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.num_kv_heads"]], "parallel_attention (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.parallel_attention"]], "partial_rotary_factor (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.partial_rotary_factor"]], "pipeline_parallel (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.pipeline_parallel"]], "position_embedding (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.position_embedding"]], "post_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.post_layernorm"]], "prequant_scaling_factor (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.prequant_scaling_factor"]], "prequant_scaling_factor (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.prequant_scaling_factor"]], "proj (expertconfig attribute)": [[39, "modelopt.torch.export.model_config.ExpertConfig.proj"]], "proj (mlpconfig attribute)": [[39, "modelopt.torch.export.model_config.MLPConfig.proj"]], "q (qkvconfig attribute)": [[39, "modelopt.torch.export.model_config.QKVConfig.q"]], "qkv (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.qkv"]], "quantization (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.quantization"]], "quantization (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.quantization"]], "rank (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.rank"]], "residual_layernorm (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.residual_layernorm"]], "residual_mlp (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.residual_mlp"]], "rope_ratio (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.rope_ratio"]], "rotary_base (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.rotary_base"]], "rotary_dim (attentionconfig attribute)": [[39, "modelopt.torch.export.model_config.AttentionConfig.rotary_dim"]], "rotary_pct (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.rotary_pct"]], "router (moeconfig attribute)": [[39, "modelopt.torch.export.model_config.MOEConfig.router"]], "seq_length (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.seq_length"]], "share_embedding_table (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.share_embedding_table"]], "tensor_parallel (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.tensor_parallel"]], "use_alibi (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.use_alibi"]], "use_cache (decoderlayerconfig attribute)": [[39, "modelopt.torch.export.model_config.DecoderLayerConfig.use_cache"]], "v (qkvconfig attribute)": [[39, "modelopt.torch.export.model_config.QKVConfig.v"]], "version (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.version"]], "vocab_embedding (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.vocab_embedding"]], "vocab_size (modelconfig attribute)": [[39, "modelopt.torch.export.model_config.ModelConfig.vocab_size"]], "vocab_size_padded (modelconfig property)": [[39, "modelopt.torch.export.model_config.ModelConfig.vocab_size_padded"]], "weight (embeddingconfig attribute)": [[39, "modelopt.torch.export.model_config.EmbeddingConfig.weight"]], "weight (layernormconfig attribute)": [[39, "modelopt.torch.export.model_config.LayernormConfig.weight"]], "weight (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.weight"]], "weight (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.weight"]], "weights_scaling_factor (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.weights_scaling_factor"]], "weights_scaling_factor (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.weights_scaling_factor"]], "weights_scaling_factor_2 (linearconfig attribute)": [[39, "modelopt.torch.export.model_config.LinearConfig.weights_scaling_factor_2"]], "weights_scaling_factor_2 (qkvconfig property)": [[39, "modelopt.torch.export.model_config.QKVConfig.weights_scaling_factor_2"]], "export_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)": [[40, "modelopt.torch.export.model_config_export.export_tensorrt_llm_checkpoint"]], "modelopt.torch.export.model_config_export": [[40, "module-modelopt.torch.export.model_config_export"]], "torch_to_tensorrt_llm_checkpoint() (in module modelopt.torch.export.model_config_export)": [[40, "modelopt.torch.export.model_config_export.torch_to_tensorrt_llm_checkpoint"]], "from_quantized_weight() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.from_quantized_weight"]], "merge_fc1_gate() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.merge_fc1_gate"]], "merge_qkv() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.merge_qkv"]], "model_config_from_dict() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.model_config_from_dict"]], "model_config_to_dict() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.model_config_to_dict"]], "modelopt.torch.export.model_config_utils": [[41, "module-modelopt.torch.export.model_config_utils"]], "naive_quantization() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.naive_quantization"]], "pack_linear_weights() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.pack_linear_weights"]], "pad_weights() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.pad_weights"]], "restore_model_config() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.restore_model_config"]], "split_config_and_weights() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.split_config_and_weights"]], "to_quantized_weight() (in module modelopt.torch.export.model_config_utils)": [[41, "modelopt.torch.export.model_config_utils.to_quantized_weight"]], "check_weight_shape_valid() (in module modelopt.torch.export.postprocess)": [[42, "modelopt.torch.export.postprocess.check_weight_shape_valid"]], "modelopt.torch.export.postprocess": [[42, "module-modelopt.torch.export.postprocess"]], "pad_embedding_lm_head() (in module modelopt.torch.export.postprocess)": [[42, "modelopt.torch.export.postprocess.pad_embedding_lm_head"]], "postprocess_model_config() (in module modelopt.torch.export.postprocess)": [[42, "modelopt.torch.export.postprocess.postprocess_model_config"]], "postprocess_tensors() (in module modelopt.torch.export.postprocess)": [[42, "modelopt.torch.export.postprocess.postprocess_tensors"]], "get_weights_scaling_factor() (in module modelopt.torch.export.scaling_factor_utils)": [[43, "modelopt.torch.export.scaling_factor_utils.get_weights_scaling_factor"]], "modelopt.torch.export.scaling_factor_utils": [[43, "module-modelopt.torch.export.scaling_factor_utils"]], "resmooth_and_get_scale() (in module modelopt.torch.export.scaling_factor_utils)": [[43, "modelopt.torch.export.scaling_factor_utils.resmooth_and_get_scale"]], "convert_to_tensorrt_llm_config() (in module modelopt.torch.export.tensorrt_llm_utils)": [[44, "modelopt.torch.export.tensorrt_llm_utils.convert_to_tensorrt_llm_config"]], "is_tensorrt_llm_0_8_or_9() (in module modelopt.torch.export.tensorrt_llm_utils)": [[44, "modelopt.torch.export.tensorrt_llm_utils.is_tensorrt_llm_0_8_or_9"]], "modelopt.torch.export.tensorrt_llm_utils": [[44, "module-modelopt.torch.export.tensorrt_llm_utils"]], "weights_to_npz() (in module modelopt.torch.export.tensorrt_llm_utils)": [[44, "modelopt.torch.export.tensorrt_llm_utils.weights_to_npz"]], "convert_to_transformer_engine() (in module modelopt.torch.export.transformer_engine)": [[45, "modelopt.torch.export.transformer_engine.convert_to_transformer_engine"]], "modelopt.torch.export.transformer_engine": [[45, "module-modelopt.torch.export.transformer_engine"]], "modelopt.torch.opt": [[46, "module-modelopt.torch.opt"]], "modeloptfield() (in module modelopt.torch.opt.config)": [[47, "modelopt.torch.opt.config.ModeloptField"]], "customize_rule() (modeloptbaserule class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRule.customize_rule"]], "get() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.get"]], "get_field_name_from_key() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.get_field_name_from_key"]], "get_kwargs_for_create_model_with_rules() (in module modelopt.torch.opt.config)": [[47, "modelopt.torch.opt.config.get_kwargs_for_create_model_with_rules"]], "get_rule_type() (modeloptbaserule class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRule.get_rule_type"]], "items() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.items"]], "keys() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.keys"]], "model_dump() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.model_dump"]], "model_dump_json() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.model_dump_json"]], "modelopt.torch.opt.config": [[47, "module-modelopt.torch.opt.config"]], "register_default() (modeloptbaseruleconfig class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRuleConfig.register_default"]], "unregister_default() (modeloptbaseruleconfig class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRuleConfig.unregister_default"]], "update() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.update"]], "validate_rule() (modeloptbaserule class method)": [[47, "modelopt.torch.opt.config.ModeloptBaseRule.validate_rule"]], "values() (modeloptbaseconfig method)": [[47, "modelopt.torch.opt.config.ModeloptBaseConfig.values"]], "modeloptstatemanager (class in modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager"]], "__init__() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.__init__"]], "add_mode() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.add_mode"]], "apply_mode() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.apply_mode"]], "check_mode() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.check_mode"]], "get_config_class() (modeloptstatemanager static method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.get_config_class"]], "has_state (modeloptstatemanager property)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.has_state"]], "is_converted() (modeloptstatemanager class method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.is_converted"]], "last_mode (modeloptstatemanager property)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.last_mode"]], "load_state_dict() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.load_state_dict"]], "modelopt.torch.opt.conversion": [[48, "module-modelopt.torch.opt.conversion"]], "modelopt_state() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.modelopt_state"]], "modes_with_states() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.modes_with_states"]], "restore() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.restore"]], "restore_from_modelopt_state() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.restore_from_modelopt_state"]], "save() (in module modelopt.torch.opt.conversion)": [[48, "modelopt.torch.opt.conversion.save"]], "state_dict() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.state_dict"]], "transfer_state_dict() (modeloptstatemanager class method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.transfer_state_dict"]], "update_last_state_before_new_mode() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.update_last_state_before_new_mode"]], "update_last_state_before_save() (modeloptstatemanager method)": [[48, "modelopt.torch.opt.conversion.ModeloptStateManager.update_last_state_before_save"]], "dynamicmodule (class in modelopt.torch.opt.dynamic)": [[49, "modelopt.torch.opt.dynamic.DynamicModule"]], "dynamicspace (class in modelopt.torch.opt.dynamic)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace"]], "__init__() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.__init__"]], "__init__() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.__init__"]], "config() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.config"]], "convert() (dynamicmodule class method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.convert"]], "convert_to_dynamic() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.convert_to_dynamic"]], "export() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.export"]], "export() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.export"]], "extra_repr() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.extra_repr"]], "force_assign() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.force_assign"]], "freeze() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.freeze"]], "get_hparam() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.get_hparam"]], "get_hparam() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.get_hparam"]], "is_configurable() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.is_configurable"]], "is_dynamic() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.is_dynamic"]], "modelopt.torch.opt.dynamic": [[49, "module-modelopt.torch.opt.dynamic"]], "modify() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.modify"]], "named_dynamic_modules() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.named_dynamic_modules"]], "named_hparams() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.named_hparams"]], "named_hparams() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.named_hparams"]], "original_cls (dynamicmodule property)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.original_cls"]], "reset_dynamic_attributes() (dynamicmodule method)": [[49, "modelopt.torch.opt.dynamic.DynamicModule.reset_dynamic_attributes"]], "select() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.select"]], "size() (dynamicspace method)": [[49, "modelopt.torch.opt.dynamic.DynamicSpace.size"]], "activeslice (hparam attribute)": [[50, "modelopt.torch.opt.hparam.Hparam.ActiveSlice"]], "hparam (class in modelopt.torch.opt.hparam)": [[50, "modelopt.torch.opt.hparam.Hparam"]], "importance (hparam attribute)": [[50, "modelopt.torch.opt.hparam.Hparam.Importance"]], "importanceestimator (hparam attribute)": [[50, "modelopt.torch.opt.hparam.Hparam.ImportanceEstimator"]], "__init__() (hparam method)": [[50, "modelopt.torch.opt.hparam.Hparam.__init__"]], "active (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.active"]], "active_slice (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.active_slice"]], "choices (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.choices"]], "enforce_order() (hparam method)": [[50, "modelopt.torch.opt.hparam.Hparam.enforce_order"]], "importance (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.importance"]], "is_configurable (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.is_configurable"]], "is_sortable (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.is_sortable"]], "max (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.max"]], "min (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.min"]], "modelopt.torch.opt.hparam": [[50, "module-modelopt.torch.opt.hparam"]], "original (hparam property)": [[50, "modelopt.torch.opt.hparam.Hparam.original"]], "register_importance() (hparam method)": [[50, "modelopt.torch.opt.hparam.Hparam.register_importance"]], "modelopt.torch.opt.mode": [[51, "module-modelopt.torch.opt.mode"]], "modelopt.torch.opt.plugins": [[52, "module-modelopt.torch.opt.plugins"]], "basesearcher (class in modelopt.torch.opt.searcher)": [[53, "modelopt.torch.opt.searcher.BaseSearcher"]], "__init__() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.__init__"]], "after_search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.after_search"]], "before_search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.before_search"]], "config (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.config"]], "constraints (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.constraints"]], "construct_forward_loop() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.construct_forward_loop"]], "default_search_config (basesearcher property)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.default_search_config"]], "default_state_dict (basesearcher property)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.default_state_dict"]], "dummy_input (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.dummy_input"]], "eval_score() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.eval_score"]], "forward_loop (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.forward_loop"]], "has_score (basesearcher property)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.has_score"]], "load_search_checkpoint() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.load_search_checkpoint"]], "model (basesearcher attribute)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.model"]], "modelopt.torch.opt.searcher": [[53, "module-modelopt.torch.opt.searcher"]], "reset_search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.reset_search"]], "run_search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.run_search"]], "sanitize_search_config() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.sanitize_search_config"]], "save_search_checkpoint() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.save_search_checkpoint"]], "search() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.search"]], "state_dict() (basesearcher method)": [[53, "modelopt.torch.opt.searcher.BaseSearcher.state_dict"]], "is_configurable() (in module modelopt.torch.opt.utils)": [[54, "modelopt.torch.opt.utils.is_configurable"]], "is_dynamic() (in module modelopt.torch.opt.utils)": [[54, "modelopt.torch.opt.utils.is_dynamic"]], "modelopt.torch.opt.utils": [[54, "module-modelopt.torch.opt.utils"]], "named_hparams() (in module modelopt.torch.opt.utils)": [[54, "modelopt.torch.opt.utils.named_hparams"]], "search_space_size() (in module modelopt.torch.opt.utils)": [[54, "modelopt.torch.opt.utils.search_space_size"]], "modelopt.torch.quantization": [[55, "module-modelopt.torch.quantization"]], "modelopt.torch.quantization.calib": [[56, "module-modelopt.torch.quantization.calib"]], "modelopt.torch.quantization.calib.calibrator": [[57, "module-modelopt.torch.quantization.calib.calibrator"]], "histogramcalibrator (class in modelopt.torch.quantization.calib.histogram)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator"]], "__init__() (histogramcalibrator method)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.__init__"]], "calibrate_weights() (in module modelopt.torch.quantization.calib.histogram)": [[58, "modelopt.torch.quantization.calib.histogram.calibrate_weights"]], "collect() (histogramcalibrator method)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.collect"]], "compute_amax() (histogramcalibrator method)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.compute_amax"]], "modelopt.torch.quantization.calib.histogram": [[58, "module-modelopt.torch.quantization.calib.histogram"]], "reset() (histogramcalibrator method)": [[58, "modelopt.torch.quantization.calib.histogram.HistogramCalibrator.reset"]], "maxcalibrator (class in modelopt.torch.quantization.calib.max)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator"]], "__init__() (maxcalibrator method)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.__init__"]], "amaxs (maxcalibrator property)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.amaxs"]], "collect() (maxcalibrator method)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.collect"]], "compute_amax() (maxcalibrator method)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.compute_amax"]], "modelopt.torch.quantization.calib.max": [[59, "module-modelopt.torch.quantization.calib.max"]], "reset() (maxcalibrator method)": [[59, "modelopt.torch.quantization.calib.max.MaxCalibrator.reset"]], "algorithm (quantizeconfig attribute)": [[60, "modelopt.torch.quantization.config.QuantizeConfig.algorithm"]], "modelopt.torch.quantization.config": [[60, "module-modelopt.torch.quantization.config"]], "quant_cfg (quantizeconfig attribute)": [[60, "modelopt.torch.quantization.config.QuantizeConfig.quant_cfg"]], "modelopt.torch.quantization.conversion": [[61, "module-modelopt.torch.quantization.conversion"]], "register() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.register"]], "replace_quant_module() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.replace_quant_module"]], "set_quantizer_attribute() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.set_quantizer_attribute"]], "set_quantizer_by_cfg() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.set_quantizer_by_cfg"]], "unregister() (in module modelopt.torch.quantization.conversion)": [[61, "modelopt.torch.quantization.conversion.unregister"]], "modelopt.torch.quantization.extensions": [[62, "module-modelopt.torch.quantization.extensions"]], "quantizeexportmodedescriptor (class in modelopt.torch.quantization.mode)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor"]], "quantizemodedescriptor (class in modelopt.torch.quantization.mode)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor"]], "config_class (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.config_class"]], "config_class (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.config_class"]], "convert (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.convert"]], "convert (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.convert"]], "export_mode (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.export_mode"]], "is_export_mode (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.is_export_mode"]], "modelopt.torch.quantization.mode": [[63, "module-modelopt.torch.quantization.mode"]], "name (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.name"]], "name (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.name"]], "next_modes (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.next_modes"]], "restore (quantizeexportmodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeExportModeDescriptor.restore"]], "restore (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.restore"]], "update_for_new_mode (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.update_for_new_mode"]], "update_for_save (quantizemodedescriptor property)": [[63, "modelopt.torch.quantization.mode.QuantizeModeDescriptor.update_for_save"]], "calibrate() (in module modelopt.torch.quantization.model_calib)": [[64, "modelopt.torch.quantization.model_calib.calibrate"]], "modelopt.torch.quantization.model_calib": [[64, "module-modelopt.torch.quantization.model_calib"]], "postprocess_amax() (in module modelopt.torch.quantization.model_calib)": [[64, "modelopt.torch.quantization.model_calib.postprocess_amax"]], "disable_quantizer() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.disable_quantizer"]], "enable_quantizer() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.enable_quantizer"]], "fold_weight() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.fold_weight"]], "modelopt.torch.quantization.model_quant": [[65, "module-modelopt.torch.quantization.model_quant"]], "print_quant_summary() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.print_quant_summary"]], "quantize() (in module modelopt.torch.quantization.model_quant)": [[65, "modelopt.torch.quantization.model_quant.quantize"]], "modelopt.torch.quantization.nn": [[66, "module-modelopt.torch.quantization.nn"]], "clipfunction (class in modelopt.torch.quantization.nn.functional)": [[67, "modelopt.torch.quantization.nn.functional.ClipFunction"]], "backward() (clipfunction static method)": [[67, "modelopt.torch.quantization.nn.functional.ClipFunction.backward"]], "forward() (clipfunction static method)": [[67, "modelopt.torch.quantization.nn.functional.ClipFunction.forward"]], "modelopt.torch.quantization.nn.functional": [[67, "module-modelopt.torch.quantization.nn.functional"]], "modelopt.torch.quantization.nn.modules": [[68, "module-modelopt.torch.quantization.nn.modules"]], "clip (class in modelopt.torch.quantization.nn.modules.clip)": [[69, "modelopt.torch.quantization.nn.modules.clip.Clip"]], "__init__() (clip method)": [[69, "modelopt.torch.quantization.nn.modules.clip.Clip.__init__"]], "forward() (clip method)": [[69, "modelopt.torch.quantization.nn.modules.clip.Clip.forward"]], "modelopt.torch.quantization.nn.modules.clip": [[69, "module-modelopt.torch.quantization.nn.modules.clip"]], "modelopt.torch.quantization.nn.modules.quant_activations": [[70, "module-modelopt.torch.quantization.nn.modules.quant_activations"]], "modelopt.torch.quantization.nn.modules.quant_batchnorm": [[71, "module-modelopt.torch.quantization.nn.modules.quant_batchnorm"]], "conv1d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.Conv1d"]], "conv2d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.Conv2d"]], "conv3d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.Conv3d"]], "convtranspose1d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.ConvTranspose1d"]], "convtranspose2d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.ConvTranspose2d"]], "convtranspose3d (in module modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.ConvTranspose3d"]], "quantconv1d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d"]], "quantconv2d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d"]], "quantconv3d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d"]], "quantconvtranspose1d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d"]], "quantconvtranspose2d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d"]], "quantconvtranspose3d (class in modelopt.torch.quantization.nn.modules.quant_conv)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d"]], "default_quant_desc_weight (quantconv1d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv1d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconv2d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv2d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconv3d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConv3d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconvtranspose1d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose1d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconvtranspose2d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose2d.default_quant_desc_weight"]], "default_quant_desc_weight (quantconvtranspose3d attribute)": [[72, "modelopt.torch.quantization.nn.modules.quant_conv.QuantConvTranspose3d.default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_conv": [[72, "module-modelopt.torch.quantization.nn.modules.quant_conv"]], "quantinstancenorm1d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)": [[73, "modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm1d"]], "quantinstancenorm2d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)": [[73, "modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm2d"]], "quantinstancenorm3d (class in modelopt.torch.quantization.nn.modules.quant_instancenorm)": [[73, "modelopt.torch.quantization.nn.modules.quant_instancenorm.QuantInstanceNorm3d"]], "modelopt.torch.quantization.nn.modules.quant_instancenorm": [[73, "module-modelopt.torch.quantization.nn.modules.quant_instancenorm"]], "linear (in module modelopt.torch.quantization.nn.modules.quant_linear)": [[74, "modelopt.torch.quantization.nn.modules.quant_linear.Linear"]], "quantlinear (class in modelopt.torch.quantization.nn.modules.quant_linear)": [[74, "modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear"]], "default_quant_desc_weight (quantlinear attribute)": [[74, "modelopt.torch.quantization.nn.modules.quant_linear.QuantLinear.default_quant_desc_weight"]], "modelopt.torch.quantization.nn.modules.quant_linear": [[74, "module-modelopt.torch.quantization.nn.modules.quant_linear"]], "quantinputbase (class in modelopt.torch.quantization.nn.modules.quant_module)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase"]], "quantlinearconvbase (class in modelopt.torch.quantization.nn.modules.quant_module)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase"]], "default_quant_desc_input (quantinputbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_input"]], "default_quant_desc_output (quantinputbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.default_quant_desc_output"]], "default_quant_desc_weight (quantlinearconvbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.default_quant_desc_weight"]], "forward() (quantinputbase method)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.forward"]], "forward() (quantlinearconvbase method)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.forward"]], "initialize_quantizer_with_dummy_states() (quantlinearconvbase static method)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.initialize_quantizer_with_dummy_states"]], "input_quantizer (quantinputbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.input_quantizer"]], "modelopt.torch.quantization.nn.modules.quant_module": [[75, "module-modelopt.torch.quantization.nn.modules.quant_module"]], "output_quantizer (quantinputbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantInputBase.output_quantizer"]], "quantize_weight() (quantlinearconvbase method)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.quantize_weight"]], "weight_quantizer (quantlinearconvbase attribute)": [[75, "modelopt.torch.quantization.nn.modules.quant_module.QuantLinearConvBase.weight_quantizer"]], "adaptiveavgpool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool1d"]], "adaptiveavgpool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool2d"]], "adaptiveavgpool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AdaptiveAvgPool3d"]], "avgpool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool1d"]], "avgpool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool2d"]], "avgpool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.AvgPool3d"]], "maxpool1d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool1d"]], "maxpool2d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool2d"]], "maxpool3d (in module modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.MaxPool3d"]], "quantadaptiveavgpool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAdaptiveAvgPool1d"]], "quantadaptiveavgpool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAdaptiveAvgPool2d"]], "quantadaptiveavgpool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAdaptiveAvgPool3d"]], "quantavgpool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAvgPool1d"]], "quantavgpool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAvgPool2d"]], "quantavgpool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantAvgPool3d"]], "quantmaxpool1d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool1d"]], "quantmaxpool2d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool2d"]], "quantmaxpool3d (class in modelopt.torch.quantization.nn.modules.quant_pooling)": [[76, "modelopt.torch.quantization.nn.modules.quant_pooling.QuantMaxPool3d"]], "modelopt.torch.quantization.nn.modules.quant_pooling": [[76, "module-modelopt.torch.quantization.nn.modules.quant_pooling"]], "sequentialquantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer"]], "tensorquantizer (class in modelopt.torch.quantization.nn.modules.tensor_quantizer)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer"]], "__init__() (sequentialquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.__init__"]], "__init__() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.__init__"]], "amax (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.amax"]], "axis (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.axis"]], "block_sizes (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.block_sizes"]], "clean_up_after_set_from_modelopt_state() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.clean_up_after_set_from_modelopt_state"]], "disable() (sequentialquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.disable"]], "disable() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable"]], "disable_calib() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_calib"]], "disable_clip() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_clip"]], "disable_quant() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.disable_quant"]], "enable() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable"]], "enable_calib() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_calib"]], "enable_clip() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_clip"]], "enable_quant() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.enable_quant"]], "export_amax() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.export_amax"]], "extra_repr() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.extra_repr"]], "fake_quant (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.fake_quant"]], "forward() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.forward"]], "get_modelopt_state() (sequentialquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.get_modelopt_state"]], "get_modelopt_state() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.get_modelopt_state"]], "init_learn_amax() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.init_learn_amax"]], "is_enabled (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.is_enabled"]], "load_calib_amax() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.load_calib_amax"]], "maxbound (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.maxbound"]], "modelopt.torch.quantization.nn.modules.tensor_quantizer": [[77, "module-modelopt.torch.quantization.nn.modules.tensor_quantizer"]], "narrow_range (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.narrow_range"]], "num_bits (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.num_bits"]], "pre_quant_scale (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.pre_quant_scale"]], "replace_sequential_quantizer_with_single_quantizer() (sequentialquantizer static method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.replace_sequential_quantizer_with_single_quantizer"]], "reset_amax() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.reset_amax"]], "scale (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.scale"]], "set_from_attribute_dict() (sequentialquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.set_from_attribute_dict"]], "set_from_attribute_dict() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_attribute_dict"]], "set_from_modelopt_state() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.set_from_modelopt_state"]], "step_size (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.step_size"]], "sync_amax_across_distributed_group() (tensorquantizer method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.sync_amax_across_distributed_group"]], "tensor_quantizer_iterator() (sequentialquantizer static method)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.SequentialQuantizer.tensor_quantizer_iterator"]], "unsigned (tensorquantizer property)": [[77, "modelopt.torch.quantization.nn.modules.tensor_quantizer.TensorQuantizer.unsigned"]], "freeze_parameters() (in module modelopt.torch.quantization.optim)": [[78, "modelopt.torch.quantization.optim.freeze_parameters"]], "group_parameters() (in module modelopt.torch.quantization.optim)": [[78, "modelopt.torch.quantization.optim.group_parameters"]], "match_parameters() (in module modelopt.torch.quantization.optim)": [[78, "modelopt.torch.quantization.optim.match_parameters"]], "modelopt.torch.quantization.optim": [[78, "module-modelopt.torch.quantization.optim"]], "quant_weight_inplace() (in module modelopt.torch.quantization.optim)": [[78, "modelopt.torch.quantization.optim.quant_weight_inplace"]], "modelopt.torch.quantization.plugins": [[79, "module-modelopt.torch.quantization.plugins"]], "deactivate() (in module modelopt.torch.quantization.quant_modules)": [[80, "modelopt.torch.quantization.quant_modules.deactivate"]], "enable_onnx_export() (in module modelopt.torch.quantization.quant_modules)": [[80, "modelopt.torch.quantization.quant_modules.enable_onnx_export"]], "initialize() (in module modelopt.torch.quantization.quant_modules)": [[80, "modelopt.torch.quantization.quant_modules.initialize"]], "modelopt.torch.quantization.quant_modules": [[80, "module-modelopt.torch.quantization.quant_modules"]], "fakeaffinetensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction"]], "faketensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction"]], "legacyfaketensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction"]], "quantdescriptor (in module modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.QuantDescriptor"]], "scalede4m3function (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function"]], "scaledquantdescriptor (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor"]], "tensorquantfunction (class in modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction"]], "__init__() (scaledquantdescriptor method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.__init__"]], "amax (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.amax"]], "axis (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.axis"]], "backward() (fakeaffinetensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction.backward"]], "backward() (faketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.backward"]], "backward() (legacyfaketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction.backward"]], "backward() (scalede4m3function static method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.backward"]], "backward() (tensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction.backward"]], "block_sizes (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.block_sizes"]], "calib_method (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.calib_method"]], "dict() (scaledquantdescriptor method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.dict"]], "fake_quant (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.fake_quant"]], "forward() (fakeaffinetensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeAffineTensorQuantFunction.forward"]], "forward() (faketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.forward"]], "forward() (legacyfaketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.LegacyFakeTensorQuantFunction.forward"]], "forward() (scalede4m3function static method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.forward"]], "forward() (tensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction.forward"]], "get_block_quant_axes_and_sizes() (scaledquantdescriptor static method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.get_block_quant_axes_and_sizes"]], "learn_amax (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.learn_amax"]], "modelopt.torch.quantization.tensor_quant": [[81, "module-modelopt.torch.quantization.tensor_quant"]], "name (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.name"]], "narrow_range (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.narrow_range"]], "num_bits (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.num_bits"]], "scale_amax (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.scale_amax"]], "scaled_e4m3_abstract() (in module modelopt.torch.quantization.tensor_quant)": [[81, "modelopt.torch.quantization.tensor_quant.scaled_e4m3_abstract"]], "symbolic() (faketensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.FakeTensorQuantFunction.symbolic"]], "symbolic() (scalede4m3function static method)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledE4M3Function.symbolic"]], "symbolic() (tensorquantfunction static method)": [[81, "modelopt.torch.quantization.tensor_quant.TensorQuantFunction.symbolic"]], "unsigned (scaledquantdescriptor property)": [[81, "modelopt.torch.quantization.tensor_quant.ScaledQuantDescriptor.unsigned"]], "export_torch_mode() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.export_torch_mode"]], "is_quantized() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_quantized"]], "is_quantized_column_parallel_linear() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_quantized_column_parallel_linear"]], "is_quantized_layer_with_weight() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_quantized_layer_with_weight"]], "is_quantized_row_parallel_linear() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_quantized_row_parallel_linear"]], "is_torch_library_supported() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.is_torch_library_supported"]], "modelopt.torch.quantization.utils": [[82, "module-modelopt.torch.quantization.utils"]], "reduce_amax() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.reduce_amax"]], "replace_function() (in module modelopt.torch.quantization.utils)": [[82, "modelopt.torch.quantization.utils.replace_function"]], "modelopt.torch.sparsity": [[83, "module-modelopt.torch.sparsity"]], "modelopt.torch.sparsity.config": [[84, "module-modelopt.torch.sparsity.config"]], "nn_conv2d (sparsegptconfig attribute)": [[84, "modelopt.torch.sparsity.config.SparseGPTConfig.nn_conv2d"]], "nn_conv2d (sparsemagnitudeconfig attribute)": [[84, "modelopt.torch.sparsity.config.SparseMagnitudeConfig.nn_conv2d"]], "nn_linear (sparsegptconfig attribute)": [[84, "modelopt.torch.sparsity.config.SparseGPTConfig.nn_linear"]], "nn_linear (sparsemagnitudeconfig attribute)": [[84, "modelopt.torch.sparsity.config.SparseMagnitudeConfig.nn_linear"]], "magnitudesearcher (class in modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.MagnitudeSearcher"]], "compute_valid_1d_patterns() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.compute_valid_1d_patterns"]], "create_asp_mask() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.create_asp_mask"]], "fill() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.fill"]], "get_nmprune_info() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.get_nmprune_info"]], "m4n2_1d() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.m4n2_1d"]], "mn_1d_best() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.mn_1d_best"]], "modelopt.torch.sparsity.magnitude": [[85, "module-modelopt.torch.sparsity.magnitude"]], "reshape_1d() (in module modelopt.torch.sparsity.magnitude)": [[85, "modelopt.torch.sparsity.magnitude.reshape_1d"]], "exportsparsemodedescriptor (class in modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor"]], "sparsegptmodedescriptor (class in modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor"]], "sparsemagnitudemodedescriptor (class in modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor"]], "config_class (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.config_class"]], "config_class (sparsegptmodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.config_class"]], "config_class (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.config_class"]], "convert (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.convert"]], "convert (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.convert"]], "convert_sparse_model() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.convert_sparse_model"]], "export_mode (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.export_mode"]], "export_sparse() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.export_sparse"]], "is_export_mode (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.is_export_mode"]], "modelopt.torch.sparsity.mode": [[86, "module-modelopt.torch.sparsity.mode"]], "name (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.name"]], "name (sparsegptmodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.name"]], "name (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.name"]], "next_modes (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.next_modes"]], "restore (exportsparsemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.ExportSparseModeDescriptor.restore"]], "restore (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.restore"]], "restore_export_sparse() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.restore_export_sparse"]], "restore_sparse_model() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.restore_sparse_model"]], "search_algorithm (sparsegptmodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseGPTModeDescriptor.search_algorithm"]], "search_algorithm (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.search_algorithm"]], "update_for_new_mode (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.update_for_new_mode"]], "update_for_save (sparsemagnitudemodedescriptor property)": [[86, "modelopt.torch.sparsity.mode.SparseMagnitudeModeDescriptor.update_for_save"]], "update_sparse_metadata() (in module modelopt.torch.sparsity.mode)": [[86, "modelopt.torch.sparsity.mode.update_sparse_metadata"]], "sparsemodule (class in modelopt.torch.sparsity.module)": [[87, "modelopt.torch.sparsity.module.SparseModule"]], "modelopt.torch.sparsity.module": [[87, "module-modelopt.torch.sparsity.module"]], "modify() (sparsemodule method)": [[87, "modelopt.torch.sparsity.module.SparseModule.modify"]], "set_mask() (sparsemodule method)": [[87, "modelopt.torch.sparsity.module.SparseModule.set_mask"]], "modelopt.torch.sparsity.plugins": [[88, "module-modelopt.torch.sparsity.plugins"]], "basesparsesearcher (class in modelopt.torch.sparsity.searcher)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher"]], "default_search_config (basesparsesearcher property)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.default_search_config"]], "default_state_dict (basesparsesearcher property)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.default_state_dict"]], "modelopt.torch.sparsity.searcher": [[89, "module-modelopt.torch.sparsity.searcher"]], "run_search() (basesparsesearcher method)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.run_search"]], "sanitize_search_config() (basesparsesearcher method)": [[89, "modelopt.torch.sparsity.searcher.BaseSparseSearcher.sanitize_search_config"]], "sparsegptsearcher (class in modelopt.torch.sparsity.sparsegpt)": [[90, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher"]], "after_search() (sparsegptsearcher method)": [[90, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.after_search"]], "before_search() (sparsegptsearcher method)": [[90, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.before_search"]], "create_sgpt_mask() (in module modelopt.torch.sparsity.sparsegpt)": [[90, "modelopt.torch.sparsity.sparsegpt.create_sgpt_mask"]], "default_search_config (sparsegptsearcher property)": [[90, "modelopt.torch.sparsity.sparsegpt.SparseGPTSearcher.default_search_config"]], "invert() (in module modelopt.torch.sparsity.sparsegpt)": [[90, "modelopt.torch.sparsity.sparsegpt.invert"]], "modelopt.torch.sparsity.sparsegpt": [[90, "module-modelopt.torch.sparsity.sparsegpt"]], "prepare() (in module modelopt.torch.sparsity.sparsegpt)": [[90, "modelopt.torch.sparsity.sparsegpt.prepare"]], "export() (in module modelopt.torch.sparsity.sparsification)": [[91, "modelopt.torch.sparsity.sparsification.export"]], "modelopt.torch.sparsity.sparsification": [[91, "module-modelopt.torch.sparsity.sparsification"]], "sparsify() (in module modelopt.torch.sparsity.sparsification)": [[91, "modelopt.torch.sparsity.sparsification.sparsify"]], "modelopt.torch.utils": [[92, "module-modelopt.torch.utils"]], "load_cpp_extension() (in module modelopt.torch.utils.cpp_extension)": [[93, "modelopt.torch.utils.cpp_extension.load_cpp_extension"]], "modelopt.torch.utils.cpp_extension": [[93, "module-modelopt.torch.utils.cpp_extension"]], "create_forward_loop() (in module modelopt.torch.utils.dataset_utils)": [[94, "modelopt.torch.utils.dataset_utils.create_forward_loop"]], "get_dataset_dataloader() (in module modelopt.torch.utils.dataset_utils)": [[94, "modelopt.torch.utils.dataset_utils.get_dataset_dataloader"]], "modelopt.torch.utils.dataset_utils": [[94, "module-modelopt.torch.utils.dataset_utils"]], "backend() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.backend"]], "barrier() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.barrier"]], "get_data_parallel_group() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.get_data_parallel_group"]], "get_tensor_parallel_group() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.get_tensor_parallel_group"]], "is_master() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.is_master"]], "modelopt.torch.utils.distributed": [[95, "module-modelopt.torch.utils.distributed"]], "rank() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.rank"]], "set_data_parallel_group() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.set_data_parallel_group"]], "set_tensor_parallel_group() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.set_tensor_parallel_group"]], "size() (in module modelopt.torch.utils.distributed)": [[95, "modelopt.torch.utils.distributed.size"]], "match() (in module modelopt.torch.utils.graph)": [[96, "modelopt.torch.utils.graph.match"]], "modelopt.torch.utils.graph": [[96, "module-modelopt.torch.utils.graph"]], "list_closest_to_median() (in module modelopt.torch.utils.list)": [[97, "modelopt.torch.utils.list.list_closest_to_median"]], "modelopt.torch.utils.list": [[97, "module-modelopt.torch.utils.list"]], "stats() (in module modelopt.torch.utils.list)": [[97, "modelopt.torch.utils.list.stats"]], "val2list() (in module modelopt.torch.utils.list)": [[97, "modelopt.torch.utils.list.val2list"]], "val2tuple() (in module modelopt.torch.utils.list)": [[97, "modelopt.torch.utils.list.val2tuple"]], "deprecatederror": [[98, "modelopt.torch.utils.logging.DeprecatedError"]], "modelopt.torch.utils.logging": [[98, "module-modelopt.torch.utils.logging"]], "no_stdout() (in module modelopt.torch.utils.logging)": [[98, "modelopt.torch.utils.logging.no_stdout"]], "num2hrb() (in module modelopt.torch.utils.logging)": [[98, "modelopt.torch.utils.logging.num2hrb"]], "print_rank_0() (in module modelopt.torch.utils.logging)": [[98, "modelopt.torch.utils.logging.print_rank_0"]], "compare_dict() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.compare_dict"]], "get_model_attributes() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.get_model_attributes"]], "get_module_device() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.get_module_device"]], "get_same_padding() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.get_same_padding"]], "init_model_from_model_like() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.init_model_from_model_like"]], "is_channels_last() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.is_channels_last"]], "is_parallel() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.is_parallel"]], "make_divisible() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.make_divisible"]], "model_to() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.model_to"]], "modelopt.torch.utils.network": [[99, "module-modelopt.torch.utils.network"]], "param_num() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.param_num"]], "param_num_from_forward() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.param_num_from_forward"]], "remove_bn() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.remove_bn"]], "run_forward_loop() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.run_forward_loop"]], "set_submodule() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.set_submodule"]], "standardize_constructor_args() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.standardize_constructor_args"]], "standardize_model_args() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.standardize_model_args"]], "standardize_model_like_tuple() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.standardize_model_like_tuple"]], "standardize_named_model_args() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.standardize_named_model_args"]], "unwrap_model() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.unwrap_model"]], "zero_grad() (in module modelopt.torch.utils.network)": [[99, "modelopt.torch.utils.network.zero_grad"]], "timer (class in modelopt.torch.utils.perf)": [[100, "modelopt.torch.utils.perf.Timer"]], "__init__() (timer method)": [[100, "modelopt.torch.utils.perf.Timer.__init__"]], "clear_cuda_cache() (in module modelopt.torch.utils.perf)": [[100, "modelopt.torch.utils.perf.clear_cuda_cache"]], "get_cuda_memory_stats() (in module modelopt.torch.utils.perf)": [[100, "modelopt.torch.utils.perf.get_cuda_memory_stats"]], "modelopt.torch.utils.perf": [[100, "module-modelopt.torch.utils.perf"]], "report_memory() (in module modelopt.torch.utils.perf)": [[100, "modelopt.torch.utils.perf.report_memory"]], "start() (timer method)": [[100, "modelopt.torch.utils.perf.Timer.start"]], "stop() (timer method)": [[100, "modelopt.torch.utils.perf.Timer.stop"]], "centroid() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.centroid"]], "choice() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.choice"]], "modelopt.torch.utils.random": [[101, "module-modelopt.torch.utils.random"]], "original() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.original"]], "random() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.random"]], "sample() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.sample"]], "shuffle() (in module modelopt.torch.utils.random)": [[101, "modelopt.torch.utils.random.shuffle"]], "modelopt.torch.utils.tensor": [[102, "module-modelopt.torch.utils.tensor"]], "numpy_to_torch() (in module modelopt.torch.utils.tensor)": [[102, "modelopt.torch.utils.tensor.numpy_to_torch"]], "torch_detach() (in module modelopt.torch.utils.tensor)": [[102, "modelopt.torch.utils.tensor.torch_detach"]], "torch_to() (in module modelopt.torch.utils.tensor)": [[102, "modelopt.torch.utils.tensor.torch_to"]], "torch_to_numpy() (in module modelopt.torch.utils.tensor)": [[102, "modelopt.torch.utils.tensor.torch_to_numpy"]]}}) \ No newline at end of file diff --git a/support/1_contact.html b/support/1_contact.html new file mode 100644 index 0000000..8439256 --- /dev/null +++ b/support/1_contact.html @@ -0,0 +1,157 @@ + + + + + + + Contact us — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Contact us

+

You may raise an issue on GitHub +for any questions or issues you may have.

+
+ + +
+
+ +
+
+
+
+ + + + diff --git a/support/2_faqs.html b/support/2_faqs.html new file mode 100644 index 0000000..49da1e6 --- /dev/null +++ b/support/2_faqs.html @@ -0,0 +1,162 @@ + + + + + + + FAQs — Model Optimizer 0.11.2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

FAQs

+
+

1. Potential memory leak for FSDP with use_orig_params=True

+

When using FSDP with use_orig_params=True, there is a potential memory leak during training +when using FSDP in conjunction with modelopt-converted models. Please use +use_orig_params=False to avoid this issue.

+
+
+ + +
+
+ +
+
+
+
+ + + +