Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix tpc in pruning and qat keras notebooks #1325

Merged
merged 6 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -217,41 +217,42 @@
{
"cell_type": "code",
"source": [
"from model_compression_toolkit.target_platform_capabilities.target_platform import Signedness\n",
"tp = mct.target_platform\n",
"from mct_quantizers import QuantizationMethod\n",
"from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import schema\n",
"\n",
"simd_size = 1\n",
"\n",
"def get_tpc():\n",
" # Define the default weight attribute configuration\n",
" default_weight_attr_config = tp.AttributeQuantizationConfig(\n",
" weights_quantization_method=tp.QuantizationMethod.UNIFORM,\n",
" default_weight_attr_config = schema.AttributeQuantizationConfig(\n",
" weights_quantization_method=QuantizationMethod.UNIFORM,\n",
" )\n",
"\n",
" # Define the OpQuantizationConfig\n",
" default_config = tp.OpQuantizationConfig(\n",
" default_config = schema.OpQuantizationConfig(\n",
" default_weight_attr_config=default_weight_attr_config,\n",
" attr_weights_configs_mapping={},\n",
" activation_quantization_method=tp.QuantizationMethod.UNIFORM,\n",
" activation_quantization_method=QuantizationMethod.UNIFORM,\n",
" activation_n_bits=8,\n",
" supported_input_activation_n_bits=8,\n",
" enable_activation_quantization=None,\n",
" quantization_preserving=None,\n",
" enable_activation_quantization=False,\n",
" quantization_preserving=False,\n",
" fixed_scale=None,\n",
" fixed_zero_point=None,\n",
" simd_size=simd_size,\n",
" signedness=Signedness.AUTO\n",
" signedness=schema.Signedness.AUTO\n",
" )\n",
" \n",
" # In this tutorial, we will use the default OpQuantizationConfig for all operator sets.\n",
" operator_set=[]\n",
"\n",
" # Create the quantization configuration options and model\n",
" default_configuration_options = tp.QuantizationConfigOptions([default_config])\n",
" tp_model = tp.TargetPlatformCapabilities(default_configuration_options,\n",
" tpc_minor_version=1,\n",
" tpc_patch_version=0,\n",
" tpc_platform_type=\"custom_pruning_notebook_tpc\")\n",
"\n",
" # Return the target platform capabilities\n",
" tpc = tp.FrameworkQuantizationCapabilities(tp_model)\n",
" default_configuration_options = schema.QuantizationConfigOptions(quantization_configurations=tuple([default_config]))\n",
" tpc = schema.TargetPlatformCapabilities(default_qco=default_configuration_options,\n",
" tpc_minor_version=1,\n",
" tpc_patch_version=0,\n",
" tpc_platform_type=\"custom_pruning_notebook_tpc\",\n",
" operator_set=tuple(operator_set))\n",
" return tpc\n"
],
"metadata": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,10 @@
},
"outputs": [],
"source": [
"from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness, AttributeQuantizationConfig\n",
"from model_compression_toolkit import DefaultDict\n",
"from mct_quantizers import QuantizationMethod\n",
"from model_compression_toolkit.constants import FLOAT_BITWIDTH\n",
"from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, BIAS\n",
"\n",
"tp = mct.target_platform\n",
"\n",
"from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR\n",
"from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import schema\n",
"\n",
"def get_tpc():\n",
" \"\"\"\n",
Expand All @@ -210,24 +207,24 @@
" \"\"\"\n",
"\n",
" # define a default quantization config for all non-specified weights attributes.\n",
" default_weight_attr_config = AttributeQuantizationConfig(\n",
" weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,\n",
" default_weight_attr_config = schema.AttributeQuantizationConfig(\n",
" weights_quantization_method=QuantizationMethod.POWER_OF_TWO,\n",
" weights_n_bits=8,\n",
" weights_per_channel_threshold=False,\n",
" enable_weights_quantization=False,\n",
" lut_values_bitwidth=None)\n",
"\n",
" # define a quantization config to quantize the kernel (for layers where there is a kernel attribute).\n",
" kernel_base_config = AttributeQuantizationConfig(\n",
" weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,\n",
" kernel_base_config = schema.AttributeQuantizationConfig(\n",
" weights_quantization_method=QuantizationMethod.SYMMETRIC,\n",
" weights_n_bits=2,\n",
" weights_per_channel_threshold=True,\n",
" enable_weights_quantization=True,\n",
" lut_values_bitwidth=None)\n",
"\n",
" # define a quantization config to quantize the bias (for layers where there is a bias attribute).\n",
" bias_config = AttributeQuantizationConfig(\n",
" weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,\n",
" bias_config = schema.AttributeQuantizationConfig(\n",
" weights_quantization_method=QuantizationMethod.POWER_OF_TWO,\n",
" weights_n_bits=FLOAT_BITWIDTH,\n",
" weights_per_channel_threshold=False,\n",
" enable_weights_quantization=False,\n",
Expand All @@ -237,45 +234,37 @@
" # AttributeQuantizationConfig for weights with no specific AttributeQuantizationConfig.\n",
" # MCT will compress a layer's kernel and bias according to the configurations that are\n",
" # set in KERNEL_ATTR and BIAS_ATTR that are passed in attr_weights_configs_mapping.\n",
" default_config = tp.OpQuantizationConfig(\n",
" default_config = schema.OpQuantizationConfig(\n",
" default_weight_attr_config=default_weight_attr_config,\n",
" attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config,\n",
" BIAS_ATTR: bias_config},\n",
" activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,\n",
" activation_quantization_method=QuantizationMethod.POWER_OF_TWO,\n",
" activation_n_bits=3,\n",
" supported_input_activation_n_bits=8,\n",
" enable_activation_quantization=True,\n",
" quantization_preserving=False,\n",
" fixed_scale=None,\n",
" fixed_zero_point=None,\n",
" simd_size=None,\n",
" signedness=Signedness.AUTO)\n",
" signedness=schema.Signedness.AUTO)\n",
"\n",
" # Set default QuantizationConfigOptions in new TargetPlatformCapabilities to be used when no other\n",
" # QuantizationConfigOptions is set for an OperatorsSet.\n",
" default_configuration_options = tp.QuantizationConfigOptions([default_config])\n",
" tp_model = tp.TargetPlatformCapabilities(default_configuration_options,\n",
" tpc_minor_version=1,\n",
" tpc_patch_version=0,\n",
" tpc_platform_type=\"custom_qat_notebook_tpc\")\n",
" with tp_model:\n",
" default_qco = tp.get_default_quantization_config_options()\n",
" # Group of OperatorsSets that should not be quantized.\n",
" tp.OperatorsSet(\"NoQuantization\",\n",
" default_qco.clone_and_edit(enable_activation_quantization=False)\n",
" .clone_and_edit_weight_attribute(enable_weights_quantization=False))\n",
" # Group of linear OperatorsSets such as convolution and matmul.\n",
" tp.OperatorsSet(\"LinearOp\")\n",
" default_configuration_options = schema.QuantizationConfigOptions(quantization_configurations=[default_config])\n",
" no_quantization_config = (default_configuration_options.clone_and_edit(enable_activation_quantization=False)\n",
" .clone_and_edit_weight_attribute(enable_weights_quantization=False))\n",
"\n",
" operator_set = []\n",
"\n",
" operator_set.append(schema.OperatorsSet(name=schema.OperatorSetNames.DROPOUT, qc_options=no_quantization_config))\n",
" operator_set.append(schema.OperatorsSet(name=schema.OperatorSetNames.FLATTEN, qc_options=no_quantization_config))\n",
"\n",
"\n",
" tpc = tp.FrameworkQuantizationCapabilities(tp_model)\n",
" with tpc:\n",
" # No need to quantize Flatten and Dropout layers\n",
" tp.OperationsSetToLayers(\"NoQuantization\", [layers.Flatten, layers.Dropout])\n",
" # Assign the framework layers' attributes to KERNEL_ATTR and BIAS_ATTR that were used during creation\n",
" # of the default OpQuantizationConfig.\n",
" tp.OperationsSetToLayers(\"LinearOp\", [layers.Dense, layers.Conv2D],\n",
" attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL),\n",
" BIAS_ATTR: DefaultDict(default_value=BIAS)})\n",
" tpc = schema.TargetPlatformCapabilities(default_qco=default_configuration_options,\n",
" tpc_minor_version=1,\n",
" tpc_patch_version=0,\n",
" tpc_platform_type=\"custom_qat_notebook_tpc\",\n",
" operator_set=tuple(operator_set))\n",
" return tpc\n"
]
},
Expand Down
Loading