diff --git a/README.md b/README.md
index 0e9ff13f..a89baa86 100644
--- a/README.md
+++ b/README.md
@@ -101,6 +101,7 @@ Inference cost for CNV_2W2A.onnx
}
```
+You can use the `--cost-breakdown` option to generate a more detailed report that covers per-node (by name) and per-op-type information.
You can read more about the BOPS metric in [this paper](https://www.frontiersin.org/articles/10.3389/frai.2021.676564/full), Section 4.2 Bit Operations.
### Convert between different quantization representations
@@ -114,16 +115,19 @@ Please see the documentation of the `QuantToQCDQ` transformation to learn more a
## Development
-Install in editable mode in a venv:
+Install in editable mode in a Python virtual environment:
```
git clone https://github.com/fastmachinelearning/qonnx
cd qonnx
virtualenv -p python3.8 venv
source venv/bin/activate
+pip install --upgrade pip
pip install -e .[qkeras,testing]
```
+### Running tests
+
Run entire test suite, parallelized across CPU cores:
```
pytest -n auto --verbose
@@ -134,6 +138,22 @@ Run a particular test and fall into pdb if it fails:
pytest --pdb -k "test_extend_partition.py::test_extend_partition[extend_id1-2]"
```
+### Linting
+
+If you plan to make pull requests to the qonnx repo, linting will be required.
+We use a pre-commit hook to auto-format Python code and check for issues. See https://pre-commit.com/ for installation. Once you have `pre-commit`,
+you can install the hooks into your local clone of the qonnx repo:
+
+```
+cd qonnx
+source venv/bin/activate
+pip install pre-commit
+pre-commit install
+```
+
+Every time you commit some code, the pre-commit hooks will first run, performing various checks and fixes. In some cases pre-commit won’t be able to
+fix the issues and you may have to fix it manually, then run git commit once again. The checks are configured in .pre-commit-config.yaml under the repo root.
+
## Why QONNX?
The QONNX representation has several advantages compared to other alternatives, as summarized in the table below.
diff --git a/docs/index.rst b/docs/index.rst
index f07ba086..53b9c159 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -63,6 +63,9 @@ Install in editable mode in a venv:
pip install -e .[testing, docs, notebooks]
+Test suite
+++++++++++
+
Run entire test suite, parallelized across CPU cores:
::
diff --git a/docs/license.rst b/docs/license.rst
index e647e180..a5103f77 100644
--- a/docs/license.rst
+++ b/docs/license.rst
@@ -1,7 +1,7 @@
.. _license:
-=======
+========
License
-=======
+========
.. include:: ../LICENSE
diff --git a/notebooks/0_how_to_work_with_onnx.ipynb b/notebooks/0_how_to_work_with_onnx.ipynb
index 69e27546..052f6a77 100644
--- a/notebooks/0_how_to_work_with_onnx.ipynb
+++ b/notebooks/0_how_to_work_with_onnx.ipynb
@@ -36,7 +36,7 @@
"outputs": [],
"source": [
"import onnx\n",
- "from qonnx.basic.util import qonnx_make_model\n",
+ "from qonnx.util.basic import qonnx_make_model\n",
"\n",
"Add1_node = onnx.helper.make_node(\n",
" 'Add',\n",
@@ -68,13 +68,6 @@
" name='Add2',\n",
")\n",
"\n",
- "Add3_node = onnx.helper.make_node(\n",
- " 'Add',\n",
- " inputs=['abs1', 'abs1'],\n",
- " outputs=['sum3'],\n",
- " name='Add3',\n",
- ")\n",
- "\n",
"Abs_node = onnx.helper.make_node(\n",
" 'Abs',\n",
" inputs=['sum2'],\n",
@@ -82,12 +75,19 @@
" name='Abs'\n",
")\n",
"\n",
+ "Add3_node = onnx.helper.make_node(\n",
+ " 'Add',\n",
+ " inputs=['abs1', 'abs1'],\n",
+ " outputs=['sum3'],\n",
+ " name='Add3',\n",
+ ")\n",
+ "\n",
"Round_node = onnx.helper.make_node(\n",
" 'Round',\n",
" inputs=['sum3'],\n",
" outputs=['out1'],\n",
" name='Round',\n",
- ")\n"
+ ")"
]
},
{
@@ -253,7 +253,7 @@
"metadata": {},
"outputs": [],
"source": [
- "in1_values =np.asarray(np.random.uniform(low=-5, high=5, size=(4,4)), dtype=np.float32)\n",
+ "in1_values = np.asarray(np.random.uniform(low=-5, high=5, size=(4,4)), dtype=np.float32)\n",
"in2_values = np.asarray(np.random.uniform(low=-5, high=5, size=(4,4)), dtype=np.float32)\n",
"in3_values = np.asarray(np.random.uniform(low=-5, high=5, size=(4,4)), dtype=np.float32)"
]
@@ -350,6 +350,7 @@
"metadata": {},
"outputs": [],
"source": [
+ "import qonnx\n",
"from qonnx.core.modelwrapper import ModelWrapper\n",
"finn_model = ModelWrapper(onnx_model)"
]
@@ -608,7 +609,7 @@
"metadata": {},
"outputs": [],
"source": [
- "onnx_model1 = onnx.qonnx_make_model(graph, producer_name=\"simple-model1\")\n",
+ "onnx_model1 = qonnx.util.basic.qonnx_make_model(graph, producer_name=\"simple-model1\")\n",
"onnx.save(onnx_model1, '/tmp/simple_model1.onnx')"
]
},
@@ -656,7 +657,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "venv",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -670,7 +671,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.8.16"
+ "version": "3.8.18"
},
"vscode": {
"interpreter": {
@@ -679,5 +680,5 @@
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/notebooks/4_quant_lstm.ipynb b/notebooks/4_quant_lstm.ipynb
new file mode 100644
index 00000000..bc2b5e2e
--- /dev/null
+++ b/notebooks/4_quant_lstm.ipynb
@@ -0,0 +1,933 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# QuantLSTM - ONNX (QCDQ) representation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook is divided into `six` parts:\n",
+ "\n",
+ "
Part 0 : Package Installations.\n",
+ "
\n",
+ "
Part 1 : Introduction to LSTMs.\n",
+ "
\n",
+ "
Part 2 : Model creation with brevitas QuantLSTM layer. \n",
+ "
\n",
+ "
Part 3 : Build ONNX model representing the LSTM computation used to process a single input with `QCDQ quantization` (weights/inputs/activations) \n",
+ "
\n",
+ "
Part 4 : Integration of the QCDQ-LSTM graph with the `SCAN` operator. This operator allows cyclic computations (required for state updates in recurrent neural networks) that are currently not supported in ONNX.\n",
+ "
\n",
+ "
Part 5 : Functional verification of the `QCDQ-LSTM` model with brevitas `QuantLSTM` model output."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Package Installations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Required package installations, This cell only needs to be executed once at the start\n",
+ "!pip install torch==1.13.1\n",
+ "!pip install brevitas==0.9.1\n",
+ "!pip install onnx==1.13.0\n",
+ "!pip install onnxoptimizer==0.3.13\n",
+ "!pip install onnxruntime==1.11.1\n",
+ "!pip install netron==7.2.5\n",
+ "!pip install qonnx==0.2.0\n",
+ "!pip install IPython\n",
+ "!pip install ipykernel\n",
+ "!ipython kernel install --user --name=venv\n",
+ "\n",
+ "#The below location can change depending on your installation of the 'venv' virtual environment\n",
+ "!cp ./4_quant_lstm_helper/function.py ../venv/lib/python3.8/site-packages/brevitas/export/onnx/standard/\n",
+ "!cp ./4_quant_lstm_helper/handler.py ../venv/lib/python3.8/site-packages/brevitas/export/onnx/standard/qcdq/\n",
+ "\n",
+ "#NOTE : Make sure to chnage the kernel to from \"Python 3\" to \"venv\" before running the below commands"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Introduction to LSTM's "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`LSTM’s (Long Short-Term Memory)` are sequential neural networks that are capable of learning long term dependencies especially in sequence prediction problems. They are deployed in machine translation, speech recognition, image captioning and especially used for time-series analysis applications.\n",
+ "\n",
+ "LSTM's have `feedback connections`, unlike conventional feed-forward neural networks (where the compute path goes only in the forward direction). This makes them capable of processing time-series data like vide streams or analyzing network traffic patterns.\n",
+ "Such feedback connections though also make their hardware implementations compiliacted as they require state updates unlike feed-forward neural networks.\n",
+ "
\n",
+ "
\n",
+ "The LSTM compute requires the following six compute equations:\n",
+ "$$\n",
+ " f_t = \\sigma (W_f * x_t + U_f * H_{t-1} + b_f) \n",
+ "$$\n",
+ "$$\n",
+ " i_t = \\sigma (W_i * x_t + U_i * H_{t-1} + b_i)\n",
+ "$$\n",
+ "$$\n",
+ " \\tilde{C_t} = tanh(W_c * x_t + U_c * H_{t-1} + b_c)\n",
+ "$$\n",
+ "$$\n",
+ " o_t = \\sigma (W_o * x_t + U_o * H_{t-1} + b_o)\n",
+ "$$\n",
+ "$$\n",
+ " C_t = f_t \\odot C_{t-1} + i_t \\odot \\tilde{C_t}\n",
+ "$$\n",
+ "$$\n",
+ " H_t = tanh(C_t) \\odot o_t \n",
+ "$$\n",
+ "\n",
+ "The first four equations represent the `gate computations`.\n",
+ "We compute the `cell state` and the `hidden state` in the last two equations respectively. \n",
+ "These two states are then fed back into the LSTM cell for the computation of the next input."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# QuantLSTM model creation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the 2nd part of the notebook, we will create a single layer `QuantLSTM` model in brevitas. We will evaluate with a given set of inputs. We then export this model to `QONNX` so that the same parameters (weights/biases/scales) can be extracted and used in the `QCDQ-LSTM` implementation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We import the required libraries to execute different functions in the notebook.\n",
+ "# The first four imports are required to build the QuantLSTM model in brevitas. \n",
+ "# The model created will then be exported to QONNX and it's parameters used in the QCDQ implementation.\n",
+ "\n",
+ "import torch\n",
+ "from torch import nn\n",
+ "from brevitas.nn import QuantLSTM\n",
+ "from brevitas.export import export_onnx_qcdq\n",
+ "\n",
+ "#We need the onnx and onnx helper nodes to build the onnx graph for the LSTM compute.\n",
+ "import onnx\n",
+ "from onnx import numpy_helper\n",
+ "from onnx.helper import make_tensor_value_info, make_node, make_graph, make_model, make_tensor\n",
+ "#onnxruntime will be used to execute our onnx model.\n",
+ "import onnxruntime as rt \n",
+ "from qonnx.util.basic import qonnx_make_model\n",
+ "#numpy allows us to manipulate outputs from the brevitas and the ONNX model\n",
+ "import numpy as np \n",
+ "# Netron visualization tool will help us view interactable graphs\n",
+ "import netron"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# In this block of code we will create the QuantLSTM model using the brevitas layer\n",
+ "torch.manual_seed(0) #Setting the manual seeds to 0 for consistency in outputs.\n",
+ "\n",
+ "# Initializing attributes that can be changed accordingly depending on users requirements.\n",
+ "\n",
+ "num_inputs = 25 #Defining the number of inputs \n",
+ "num_features_brevitas = 10 #This attribute defines number of features each input comprises of\n",
+ "num_hidden_cells_brevitas = 20 #This attribute defines the number of hidden cells in the QuantLSTM layer\n",
+ "\n",
+ "# Creating a sequential model\n",
+ "\n",
+ "model_lstm = nn.Sequential( \n",
+ " QuantLSTM(input_size = num_features_brevitas, hidden_size = num_hidden_cells_brevitas, bias_quant=None) \n",
+ " ) #No other feature described here implies quantization of inputs/parametersers/activations to 8-bits.\n",
+ "model_lstm.eval() #Setting the model to eval mode to make sure all the parameters and scales are frozen and not updated on runtime.\n",
+ "export_path = './quant_lstm_quantization_qcdq.onnx' #Setting export path for the model\n",
+ "export_onnx_qcdq(model_lstm,(torch.randn(num_inputs, 1, num_features_brevitas)), opset_version=14, export_path=export_path) #Exporting the model to QCDQ representation. \n",
+ "\n",
+ "# Creating a test input to execute the above created model\n",
+ "\n",
+ "in_qcdq_node = np.empty([num_inputs,1,num_features_brevitas],dtype=np.float32).reshape([num_inputs,1,num_features_brevitas])\n",
+ "in_qcdq_node.fill(0.8) #Using the fill function to fill the numpy array with a value of 0.8\n",
+ "test_input = torch.from_numpy(in_qcdq_node) #Converting the array to a torch tensor\n",
+ "brevitas_output = model_lstm(test_input) #Executing the model with the set input\n",
+ "brevitas_output = brevitas_output[0].detach().numpy()\n",
+ "print(brevitas_output)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`Abbreviations` : Short-forms defined in the next code block can be referenced here for definitions.\n",
+ "\n",
+ "* Wi = \"Weight matrix for the input gate\" (Similarily for the other three gates)\n",
+ "* Ui = \"Recurrence matrix for the input gate\" (Similarily for the other three gates)\n",
+ "* bi = \"Bias for the input gate\" (Similarily for the other three gates)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# In this block of code we store all the parameters (weight matrices, recurrence matrices, biases, scales and zero-points) that we will need to import in the QCDQ implementation.\n",
+ "# Importing the exported quantized model from brevitas\n",
+ "brevitas_lstm_export = onnx.load(\"./quant_lstm_quantization_qcdq.onnx\")\n",
+ "parameters = brevitas_lstm_export.graph.initializer #Extracting all the parameters from the loaded graph\n",
+ "\n",
+ "# In this loop we will be printing all the parameters to correctly import the parameters values to the right variables\n",
+ "for i in range(len(parameters)):\n",
+ " w = numpy_helper.to_array(parameters[i])\n",
+ " print (brevitas_lstm_export.graph.initializer[i].name)\n",
+ " print(w.shape)\n",
+ " print(w,',',i)\n",
+ " print(\"-------------------------\")\n",
+ " \n",
+ "# Storing the extracted parameters (weights/biases/scales) to the right variables depending on the order in which they are exported. \n",
+ "# The abbreviation described in the above block will help in understanding what each variable denotes\n",
+ "\n",
+ "bi_val = numpy_helper.to_array(parameters[0])\n",
+ "Wi_val = numpy_helper.to_array(parameters[1])\n",
+ "Ui_val = numpy_helper.to_array(parameters[2])\n",
+ "bf_val = numpy_helper.to_array(parameters[3])\n",
+ "Wf_val = numpy_helper.to_array(parameters[4])\n",
+ "Uf_val = numpy_helper.to_array(parameters[5])\n",
+ "bc_val = numpy_helper.to_array(parameters[6])\n",
+ "Wc_val = numpy_helper.to_array(parameters[7])\n",
+ "Uc_val = numpy_helper.to_array(parameters[8])\n",
+ "bo_val = numpy_helper.to_array(parameters[9])\n",
+ "Wo_val = numpy_helper.to_array(parameters[10])\n",
+ "Uo_val = numpy_helper.to_array(parameters[11])\n",
+ "# Scalar values can either be int or float\n",
+ "inp_scale_val = float(numpy_helper.to_array(parameters[12])) \n",
+ "w1_scale_val = float(numpy_helper.to_array(parameters[15]))\n",
+ "w2_scale_val = float(numpy_helper.to_array(parameters[18]))\n",
+ "w3_scale_val = float(numpy_helper.to_array(parameters[19]))\n",
+ "w4_scale_val = float(numpy_helper.to_array(parameters[20]))\n",
+ "eq_scale_val_1 = float(numpy_helper.to_array(parameters[12]))\n",
+ "eq_scale_val_2 = float(numpy_helper.to_array(parameters[22]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# LSTM ONNX model"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the 3rd part of the notebook, we will construct the `QCDQ-LSTM` model with standard ONNX operators. After loading all the parameters in the above block we can now start building our ONNX model with QCDQ quantization to represent the LSTM computations described in part-1.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Setting parameters : Matching the input output lengths exported from brevitas\n",
+ "num_features = 10\n",
+ "num_hidden_cells = 20\n",
+ "activation_bit_width = 8\n",
+ "\n",
+ "# The below two parameters are for the 'Clip' operation. \n",
+ "# Clip node parameters\n",
+ "max_clip_val = (2 ** (activation_bit_width -1) - 1)\n",
+ "min_clip_val = -(2 ** (activation_bit_width -1) - 1)\n",
+ "\n",
+ "# Zero-point datatype decides the datatype of the output tensor for the quantization operations hence we defined two. One for signed and other for unsigned.\n",
+ "# Zero point values for quantization\n",
+ "zero_point_signed_val = 0\n",
+ "zero_point_unsigned_val = 0"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "`Abbreviations` : These describe different short-forms used in the next two blocks.\n",
+ "\n",
+ "* ql = \"QuantizeLinear\"\n",
+ "* dql = \"DequantizeLinear\"\n",
+ "* clp = \"Clip\"\n",
+ "* id = \"Identity\"\n",
+ "* matmul = \"Matrix Multiplication\"\n",
+ "* el_mul = \"Elementwise Multiplication\"\n",
+ "* sig = \"Sigmoid\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We start defining the model by defining the `inputs` and `outputs` defined as value_info tensors in ONNX.\n",
+ "For LSTMs we need three inputs : `inputs`, `previous hidden state` and `previous cell state`. \n",
+ "We get three outputs : `hidden_state`, `cell_state` and `concatenated_hidden_states`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Defining the inputs 'value info' tensors for the compute graph.\n",
+ "hidden_state = make_tensor_value_info(\"h_t-1\",onnx.TensorProto.FLOAT, [num_hidden_cells,1])\n",
+ "cell_state = make_tensor_value_info(\"c_t-1\", onnx.TensorProto.FLOAT, [num_hidden_cells,1])\n",
+ "inputs = make_tensor_value_info(\"inp\",onnx.TensorProto.FLOAT, [num_features,1])\n",
+ "\n",
+ "#Output value info tensor definitions\n",
+ "out_hidden_state = make_tensor_value_info(\"h_t\", onnx.TensorProto.FLOAT, [num_hidden_cells,1])\n",
+ "out_cell_state = make_tensor_value_info(\"c_t\", onnx.TensorProto.FLOAT, [num_hidden_cells,1])\n",
+ "out_hidden_state_concat = make_tensor_value_info(\"h_t_concat\", onnx.TensorProto.FLOAT, [num_hidden_cells,1])#maybe this will have one more dimension"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Once we have defined the inputs and outputs, we will now start defining the operations in the LSTM compute graph.\n",
+ "# We start by quantizing the input with the standard QDQ operation which is 8-bit quantization. \n",
+ "# Note: For quantization to lower bit-width's we can use the clip node.\n",
+ "\n",
+ "# Input quantization\n",
+ "ql_input = make_node(\"QuantizeLinear\", inputs=[\"inp\",\"inp_scale\",\"zero_point_signed\"], outputs=[\"ql_input_out\"],name=\"ql_input\")\n",
+ "id_0_input = make_node(\"Identity\", inputs=[\"ql_input_out\"], outputs=[\"first_input_out\"], name=\"id_0_input\")\n",
+ "dql_input = make_node(\"DequantizeLinear\", inputs=[\"ql_input_out\", 'inp_scale', \"zero_point_signed\"], outputs=[\"dql_input_out\"],name=\"dql_input\")\n",
+ "\n",
+ "# Quantization of the four weight matrices showing QCDQ quantization\n",
+ "ql_w1 = make_node(\"QuantizeLinear\", inputs=[\"W_f\",\"scale_f\",\"zero_point_signed\"], outputs=[\"ql_wf_out\"], name=\"ql_w1\")\n",
+ "clp_w1 = make_node(\"Clip\", inputs=[\"ql_wf_out\",\"min\",\"max\"], outputs=[\"clp_wf\"], name=\"clp_w1\")\n",
+ "dql_w1 = make_node(\"DequantizeLinear\", inputs=[\"clp_wf\",\"scale_f\",\"zero_point_signed\"], outputs=[\"dql_wf_out\"], name=\"dql_w1\")\n",
+ "\n",
+ "ql_w2 = make_node(\"QuantizeLinear\", inputs=[\"W_i\",\"scale_i\",\"zero_point_signed\"], outputs=[\"ql_wi_out\"], name=\"ql_w2\")\n",
+ "clp_w2 = make_node(\"Clip\", inputs=[\"ql_wi_out\",\"min\",\"max\"], outputs=[\"clp_wi\"], name=\"clp_w2\")\n",
+ "dql_w2 = make_node(\"DequantizeLinear\", inputs=[\"clp_wi\",\"scale_i\",\"zero_point_signed\"], outputs=[\"dql_wi_out\"], name=\"dql_w2\")\n",
+ "\n",
+ "ql_w3 = make_node(\"QuantizeLinear\", inputs=[\"W_c\",\"scale_c\",\"zero_point_signed\"], outputs=[\"ql_wc_out\"], name=\"ql_w3\")\n",
+ "clp_w3 = make_node(\"Clip\", inputs=[\"ql_wc_out\",\"min\",\"max\"], outputs=[\"clp_wc\"], name=\"clp_w3\")\n",
+ "dql_w3 = make_node(\"DequantizeLinear\", inputs=[\"clp_wc\",\"scale_c\",\"zero_point_signed\"], outputs=[\"dql_wc_out\"], name=\"dql_w3\")\n",
+ "\n",
+ "ql_w4 = make_node(\"QuantizeLinear\", inputs=[\"W_o\",\"scale_o\",\"zero_point_signed\"], outputs=[\"ql_wo_out\"], name=\"ql_w4\")\n",
+ "clp_w4 = make_node(\"Clip\", inputs=[\"ql_wo_out\",\"min\",\"max\"], outputs=[\"clp_wo\"], name=\"clp_w4\")\n",
+ "dql_w4 = make_node(\"DequantizeLinear\", inputs=[\"clp_wo\",\"scale_o\",\"zero_point_signed\"], outputs=[\"dql_wo_out\"], name=\"dql_w4\")\n",
+ "\n",
+ "# Quantizations for the four recurrence weight matrices showing QCDQ quantization\n",
+ "ql_u1 = make_node(\"QuantizeLinear\", inputs=[\"U_f\",\"scale_f\",\"zero_point_signed\"], outputs=[\"ql_uf_out\"], name=\"ql_u1\")\n",
+ "clp_u1 = make_node(\"Clip\", inputs=[\"ql_uf_out\",\"min\",\"max\"], outputs=[\"clp_uf\"], name=\"clp_u1\")\n",
+ "dql_u1 = make_node(\"DequantizeLinear\", inputs=[\"clp_uf\",\"scale_f\",\"zero_point_signed\"], outputs=[\"dql_uf_out\"], name=\"dql_u1\")\n",
+ "\n",
+ "ql_u2 = make_node(\"QuantizeLinear\", inputs=[\"U_i\",\"scale_i\",\"zero_point_signed\"], outputs=[\"ql_ui_out\"], name=\"ql_u2\")\n",
+ "clp_u2 = make_node(\"Clip\", inputs=[\"ql_ui_out\",\"min\",\"max\"], outputs=[\"clp_ui\"], name=\"clp_u2\")\n",
+ "dql_u2 = make_node(\"DequantizeLinear\", inputs=[\"clp_ui\",\"scale_i\",\"zero_point_signed\"], outputs=[\"dql_ui_out\"], name=\"dql_u2\")\n",
+ "\n",
+ "ql_u3 = make_node(\"QuantizeLinear\", inputs=[\"U_c\",\"scale_c\",\"zero_point_signed\"], outputs=[\"ql_uc_out\"], name=\"ql_u3\")\n",
+ "clp_u3 = make_node(\"Clip\", inputs=[\"ql_uc_out\",\"min\",\"max\"], outputs=[\"clp_uc\"], name=\"clp_u3\")\n",
+ "dql_u3 = make_node(\"DequantizeLinear\", inputs=[\"clp_uc\",\"scale_c\",\"zero_point_signed\"], outputs=[\"dql_uc_out\"], name=\"dql_u3\")\n",
+ "\n",
+ "ql_u4 = make_node(\"QuantizeLinear\", inputs=[\"U_o\",\"scale_o\",\"zero_point_signed\"], outputs=[\"ql_uo_out\"], name=\"ql_u4\")\n",
+ "clp_u4 = make_node(\"Clip\", inputs=[\"ql_uo_out\",\"min\",\"max\"], outputs=[\"clp_uo\"], name=\"clp_u4\")\n",
+ "dql_u4 = make_node(\"DequantizeLinear\", inputs=[\"clp_uo\",\"scale_o\",\"zero_point_signed\"], outputs=[\"dql_uo_out\"], name=\"dql_u4\")\n",
+ "\n",
+ "# Once we have quantized the weights and inputs we can now start defining the operations for the 6 LSTM equations.\n",
+ "# The first four gate equations have a very similar compute structure. We define the first four gate computations in this order : Forget, Input, Output, Cell \n",
+ "\n",
+ "# 1st Equation : Forget gate\n",
+ "matmul_1_e1 = make_node(\"MatMul\", inputs=[\"dql_wf_out\",\"dql_input_out\"], outputs=[\"out_m1_e1\"], name=\"matmul_1_e1\")\n",
+ "matmul_2_e1 = make_node(\"MatMul\", inputs=[\"dql_uf_out\",\"h_t-1\"], outputs=[\"out_m2_e1\"],name=\"matmul_2_e1\")\n",
+ "add_1_e1 = make_node(\"Add\", inputs=[\"out_m1_e1\",\"out_m2_e1\"], outputs=[\"out_add1_e1\"],name=\"add_1_e1\")\n",
+ "add_2_e1 = make_node(\"Add\", inputs=[\"out_add1_e1\",\"b_f\"], outputs=[\"f_t_ba\"],name=\"add_2_e1\")\n",
+ "ql_1_e1 = make_node(\"QuantizeLinear\", inputs=[\"f_t_ba\",\"scale_3\",\"zero_point_signed\"], outputs=[\"f_t_ql1\"],name=\"ql_1_e1\")\n",
+ "dql_1_e1 = make_node(\"DequantizeLinear\", inputs=[\"f_t_ql1\", \"scale_4\", \"zero_point_signed\"], outputs=[\"f_t_dql1\"], name=\"dql_1_e1\")\n",
+ "sig_f_e1 = make_node(\"Sigmoid\", inputs=[\"f_t_dql1\"], outputs=[\"f_t\"],name=\"sig_f_e1\")\n",
+ "ql_2_e1 = make_node(\"QuantizeLinear\", inputs=[\"f_t\",\"scale_4\",\"zero_point_unsigned\"], outputs=[\"f_t_ql2\"],name=\"ql_2_e1\")\n",
+ "dql_2_e1 = make_node(\"DequantizeLinear\", inputs=[\"f_t_ql2\", \"scale_4\", \"zero_point_unsigned\"], outputs=[\"f_t_dql2\"], name=\"dql_2_e1\")\n",
+ "\n",
+ "# 2nd Equation : Input gate\n",
+ "matmul_1_e2 = make_node(\"MatMul\", inputs=[\"dql_wi_out\",\"dql_input_out\"], outputs=[\"out_m1_e2\"], name=\"matmul_1_e2\")\n",
+ "matmul_2_e2 = make_node(\"MatMul\", inputs=[\"dql_ui_out\",\"h_t-1\"], outputs=[\"out_m2_e2\"],name=\"matmul_2_e2\")\n",
+ "add_1_e2 = make_node(\"Add\", inputs=[\"out_m1_e2\",\"out_m2_e2\"], outputs=[\"out_add1_e2\"],name=\"add_1_e2\")\n",
+ "add_2_e2 = make_node(\"Add\", inputs=[\"out_add1_e2\",\"b_i\"], outputs=[\"i_t_ba\"],name=\"add_2_e2\")\n",
+ "ql_1_e2 = make_node(\"QuantizeLinear\", inputs=[\"i_t_ba\",\"scale_1\",\"zero_point_signed\"], outputs=[\"i_t_ql1\"],name=\"ql_1_e2\")\n",
+ "dql_1_e2 = make_node(\"DequantizeLinear\", inputs=[\"i_t_ql1\",\"scale_1\", \"zero_point_signed\"], outputs=[\"i_t_dql1\"], name=\"dql_1_e2\")\n",
+ "sig_i_e2 = make_node(\"Sigmoid\", inputs=[\"i_t_dql1\"], outputs=[\"i_t\"],name=\"sig_i_e2\")\n",
+ "ql_2_e2 = make_node(\"QuantizeLinear\", inputs=[\"i_t\",\"scale_2\",\"zero_point_unsigned\"], outputs=[\"i_t_ql2\"],name=\"ql_2_e2\")\n",
+ "dql_2_e2 = make_node(\"DequantizeLinear\", inputs=[\"i_t_ql2\", \"scale_2\", \"zero_point_unsigned\"], outputs=[\"i_t_dql2\"], name=\"dql_2_e2\")\n",
+ "\n",
+ "# 3rd Equation : Output gate\n",
+ "matmul_1_e3 = make_node(\"MatMul\", inputs=[\"dql_wo_out\",\"dql_input_out\"], outputs=[\"out_m1_e3\"], name=\"matmul_1_e3\")\n",
+ "matmul_2_e3 = make_node(\"MatMul\", inputs=[\"dql_uo_out\",\"h_t-1\"], outputs=[\"out_m2_e3\"],name=\"matmul_2_e3\")\n",
+ "add_1_e3 = make_node(\"Add\", inputs=[\"out_m1_e3\",\"out_m2_e3\"], outputs=[\"out_add1_e3\"],name=\"add_1_e3\")\n",
+ "add_2_e3 = make_node(\"Add\", inputs=[\"out_add1_e3\",\"b_o\"], outputs=[\"o_t_ba\"],name=\"add_2_e3\" )\n",
+ "ql_1_e3 = make_node(\"QuantizeLinear\", inputs=[\"o_t_ba\",\"scale_7\",\"zero_point_signed\"], outputs=[\"o_t_ql1\"],name=\"ql_1_e3\")\n",
+ "dql_1_e3 = make_node(\"DequantizeLinear\", inputs=[\"o_t_ql1\",\"scale_7\", \"zero_point_signed\"], outputs=[\"o_t_dql1\"], name=\"dql_1_e3\")\n",
+ "sig_o_e3 = make_node(\"Sigmoid\", inputs=[\"o_t_dql1\"], outputs=[\"o_t\"],name=\"sig_o_e3\")\n",
+ "ql_2_e3 = make_node(\"QuantizeLinear\", inputs=[\"o_t\",\"scale_8\",\"zero_point_unsigned\"], outputs=[\"o_t_ql2\"],name=\"ql_2_e3\")\n",
+ "dql_2_e3 = make_node(\"DequantizeLinear\", inputs=[\"o_t_ql2\", \"scale_8\", \"zero_point_unsigned\"], outputs=[\"o_t_dql2\"], name=\"dql_2_e3\")\n",
+ "\n",
+ "# 4th Equation : Cell gate\n",
+ "matmul_1_e4 = make_node(\"MatMul\", inputs=[\"dql_wc_out\",\"dql_input_out\"], outputs=[\"out_m1_e4\"], name=\"matmul_1_e4\")\n",
+ "matmul_2_e4 = make_node(\"MatMul\", inputs=[\"dql_uc_out\",\"h_t-1\"], outputs=[\"out_m2_e4\"],name=\"matmul_2_e4\")\n",
+ "add_1_e4 = make_node(\"Add\", inputs=[\"out_m1_e4\",\"out_m2_e4\"], outputs=[\"out_add1_e4\"],name=\"add_1_e4\")\n",
+ "add_2_e4 = make_node(\"Add\", inputs=[\"out_add1_e4\",\"b_c\"], outputs=[\"c_t_ba\"],name=\"add_2_e4\")\n",
+ "ql_1_e4 = make_node(\"QuantizeLinear\", inputs=[\"c_t_ba\",\"scale_5\",\"zero_point_signed\"], outputs=[\"c_t_ql1\"],name=\"ql_1_e4\")\n",
+ "dql_1_e4 = make_node(\"DequantizeLinear\", inputs=[\"c_t_ql1\",\"scale_5\", \"zero_point_signed\"], outputs=[\"c_t_dql1\"], name=\"dql_1_e4\")\n",
+ "tanh_c_e4 = make_node(\"Tanh\", inputs=[\"c_t_dql1\"], outputs=[\"c_t_partial\"],name=\"tanh_c_e4\")\n",
+ "ql_2_e4 = make_node(\"QuantizeLinear\", inputs=[\"c_t_partial\",\"scale_6\",\"zero_point_signed\"], outputs=[\"c_t_ql2\"],name=\"ql_2_e4\")\n",
+ "dql_2_e4 = make_node(\"DequantizeLinear\", inputs=[\"c_t_ql2\", \"scale_6\", \"zero_point_signed\"], outputs=[\"c_t_dql2\"], name=\"dql_2_e4\")\n",
+ "\n",
+ "# Once we have the first four gate computations we can procedd with the computation of the cell_state and the hidden_state in the 5th and the 6th equations.\n",
+ "# 5th Equation : Cell state compute\n",
+ "el_mul_1_e5 = make_node(\"Mul\", inputs=[\"f_t_dql2\",\"c_t-1\"], outputs=[\"out_el_mul1_e5\"],name=\"el_mul_1_e5\")\n",
+ "ql_1_e5 = make_node(\"QuantizeLinear\", inputs=[\"out_el_mul1_e5\",\"scale_9\",\"zero_point_signed\"], outputs=[\"fifth_ql1\"],name=\"ql_1_e5\")\n",
+ "dql_1_e5 = make_node(\"DequantizeLinear\", inputs=[\"fifth_ql1\",\"scale_9\", \"zero_point_signed\"], outputs=[\"fifth_dql1\"], name=\"dql_1_e5\")\n",
+ "el_mul_2_e5 = make_node(\"Mul\", inputs=[\"i_t_dql2\",\"c_t_dql2\"], outputs=[\"out_el_mul2_e5\"], name=\"el_mul_2_e5\") \n",
+ "ql_2_e5 = make_node(\"QuantizeLinear\", inputs=[\"out_el_mul2_e5\",\"scale_9\",\"zero_point_signed\"], outputs=[\"fifth_ql2\"],name=\"ql_2_e5\")\n",
+ "dql_2_e5 = make_node(\"DequantizeLinear\", inputs=[\"fifth_ql2\",\"scale_9\", \"zero_point_signed\"], outputs=[\"fifth_dql2\"], name=\"dql_2_e5\")\n",
+ "add_1_e5 = make_node(\"Add\", inputs=[\"fifth_dql1\",\"fifth_dql2\"], outputs=[\"c_t\"], name=\"add_1_e5\") #-----------------> The first output is computed here.\n",
+ "ql_3_e5 = make_node(\"QuantizeLinear\", inputs=[\"c_t\",\"scale_9\",\"zero_point_signed\"], outputs=[\"h_t_ql\"], name=\"ql_3_e5\")\n",
+ "dql_3_e5 = make_node(\"DequantizeLinear\", inputs=[\"h_t_ql\",\"scale_9\",\"zero_point_signed\"], outputs=[\"h_t_dql\"], name=\"dql_3_e5\")\n",
+ "\n",
+ "# 6th Equation : Hidden state compute\n",
+ "tanh_node_e6 = make_node(\"Tanh\", inputs=[\"h_t_dql\"], outputs=[\"out_tanh_e6\"], name=\"tanh_node_e6\") \n",
+ "ql_1_e6 = make_node(\"QuantizeLinear\", inputs=[\"out_tanh_e6\",\"scale_10\",\"zero_point_signed\"], outputs=[\"sixth_ql1\"], name=\"ql_1_e6\")\n",
+ "dql_1_e6 = make_node(\"DequantizeLinear\", inputs=[\"sixth_ql1\",\"scale_10\",\"zero_point_signed\"], outputs=[\"sixth_dql1\"], name=\"dql_1_e6\")\n",
+ "el_mul_1_e6 = make_node(\"Mul\", inputs=[\"sixth_dql1\",\"o_t_dql2\"], outputs=[\"h_t_inter\"], name=\"el_mul_1_e6\")#h_t_inter\n",
+ "ql_2_e6 = make_node(\"QuantizeLinear\", inputs=[\"h_t_inter\",\"scale_11\",\"zero_point_signed\"], outputs=[\"sixth_ql2\"], name=\"ql_2_e6\")\n",
+ "dql_2_e6 = make_node(\"DequantizeLinear\", inputs=[\"sixth_ql2\",\"scale_11\",\"zero_point_signed\"], outputs=[\"h_t\"], name=\"dql_2_e6\") #-----------------> The second output is computed here.\n",
+ "id_1_e6 = make_node(\"Identity\", inputs=[\"h_t\"], outputs=[\"h_t_concat\"], name=\"id_1_e6\") #-----------------> The third output is computed here."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "After defining the above operations we now connect them and create a graph with the help of onnx.helper `make_graph` utility function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lstm_body = make_graph(\n",
+ " nodes=[\n",
+ " ql_input,\n",
+ " dql_input, \n",
+ " ql_w1,\n",
+ " clp_w1, \n",
+ " dql_w1,\n",
+ " ql_w2,\n",
+ " clp_w2, \n",
+ " dql_w2,\n",
+ " ql_w3,\n",
+ " clp_w3, \n",
+ " dql_w3,\n",
+ " ql_w4,\n",
+ " clp_w4, \n",
+ " dql_w4,\n",
+ " ql_u1,\n",
+ " clp_u1, \n",
+ " dql_u1,\n",
+ " ql_u2,\n",
+ " clp_u2,\n",
+ " dql_u2, \n",
+ " ql_u3,\n",
+ " clp_u3,\n",
+ " dql_u3, \n",
+ " ql_u4,\n",
+ " clp_u4,\n",
+ " dql_u4, \n",
+ " matmul_1_e1,\n",
+ " matmul_2_e1, \n",
+ " add_1_e1, \n",
+ " add_2_e1,\n",
+ " ql_1_e1,\n",
+ " dql_1_e1,\n",
+ " sig_f_e1,\n",
+ " ql_2_e1, \n",
+ " dql_2_e1, \n",
+ " matmul_1_e2,\n",
+ " matmul_2_e2, \n",
+ " add_1_e2, \n",
+ " add_2_e2,\n",
+ " ql_1_e2,\n",
+ " dql_1_e2,\n",
+ " sig_i_e2,\n",
+ " ql_2_e2, \n",
+ " dql_2_e2, \n",
+ " matmul_1_e3,\n",
+ " matmul_2_e3, \n",
+ " add_1_e3, \n",
+ " add_2_e3,\n",
+ " ql_1_e3,\n",
+ " dql_1_e3,\n",
+ " sig_o_e3,\n",
+ " ql_2_e3, \n",
+ " dql_2_e3, \n",
+ " matmul_1_e4,\n",
+ " matmul_2_e4, \n",
+ " add_1_e4, \n",
+ " add_2_e4,\n",
+ " ql_1_e4,\n",
+ " dql_1_e4,\n",
+ " tanh_c_e4,\n",
+ " ql_2_e4, \n",
+ " dql_2_e4, \n",
+ " el_mul_1_e5,\n",
+ " ql_1_e5, \n",
+ " dql_1_e5,\n",
+ " el_mul_2_e5,\n",
+ " ql_2_e5,\n",
+ " dql_2_e5,\n",
+ " add_1_e5,\n",
+ " ql_3_e5, \n",
+ " dql_3_e5,\n",
+ " tanh_node_e6,\n",
+ " ql_1_e6, \n",
+ " dql_1_e6,\n",
+ " el_mul_1_e6,\n",
+ " ql_2_e6,\n",
+ " dql_2_e6, \n",
+ " id_1_e6\n",
+ " ],\n",
+ " name = \"qcdq-lsmt-body\",\n",
+ " inputs=[hidden_state,cell_state,inputs], #The order in which the inputs are defined here should match the input order when the scan node is defined.\n",
+ " outputs = [out_hidden_state, out_cell_state, out_hidden_state_concat],\n",
+ " value_info=[\n",
+ " make_tensor_value_info(\"ql_input_out\",onnx.TensorProto.INT8, [num_features,1]),\n",
+ " make_tensor_value_info(\"dql_input_out\",onnx.TensorProto.FLOAT, [num_features,1]),\n",
+ " make_tensor_value_info(\"out_m1_e1\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_m2_e1\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_add1_e1\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"f_t_ba\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"f_t_ql1\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"f_t_dql1\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"f_t_ql2\",onnx.TensorProto.UINT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"f_t_dql2\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_m1_e2\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_m2_e2\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_add1_e2\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"i_t_ba\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"i_t_ql1\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"i_t_dql1\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"i_t_ql2\",onnx.TensorProto.UINT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"i_t_dql2\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_m1_e3\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_m2_e3\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_add1_e3\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"o_t_ba\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"o_t_ql1\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"o_t_dql1\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"o_t_ql2\",onnx.TensorProto.UINT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"o_t_dql2\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_m1_e4\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_m2_e4\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_add1_e4\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"c_t_ba\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"c_t_ql1\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"c_t_dql1\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"c_t_ql2\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"c_t_dql2\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"f_t\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"i_t\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"o_t\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"c_t_partial\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_el_mul1_e5\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_el_mul2_e5\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"fifth_ql1\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"fifth_dql1\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"fifth_ql2\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"fifth_dql2\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"h_t_ql\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"h_t_dql\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"out_tanh_e6\",onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"sixth_ql1\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"sixth_dql1\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"sixth_ql2\",onnx.TensorProto.INT8, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"h_t_inter\", onnx.TensorProto.FLOAT, [num_hidden_cells,1]),\n",
+ " make_tensor_value_info(\"ql_wf_out\", onnx.TensorProto.INT8, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"dql_wf_out\",onnx.TensorProto.FLOAT, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"ql_wi_out\", onnx.TensorProto.INT8, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"dql_wi_out\",onnx.TensorProto.FLOAT, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"ql_wc_out\", onnx.TensorProto.INT8, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"dql_wc_out\",onnx.TensorProto.FLOAT, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"ql_wo_out\", onnx.TensorProto.INT8, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"dql_wo_out\",onnx.TensorProto.FLOAT, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"ql_uf_out\",onnx.TensorProto.INT8, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"dql_uf_out\",onnx.TensorProto.FLOAT, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"ql_ui_out\",onnx.TensorProto.INT8, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"dql_ui_out\",onnx.TensorProto.FLOAT, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"ql_uc_out\",onnx.TensorProto.INT8, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"dql_uc_out\",onnx.TensorProto.FLOAT, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"ql_uo_out\",onnx.TensorProto.INT8, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"dql_uo_out\",onnx.TensorProto.FLOAT, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"clp_wf\",onnx.TensorProto.INT8, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"clp_wi\",onnx.TensorProto.INT8, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"clp_wc\",onnx.TensorProto.INT8, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"clp_wo\",onnx.TensorProto.INT8, [num_hidden_cells,num_features]),\n",
+ " make_tensor_value_info(\"clp_uf\",onnx.TensorProto.INT8, [num_hidden_cells,num_hidden_cells]), \n",
+ " make_tensor_value_info(\"clp_ui\",onnx.TensorProto.INT8, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"clp_uc\",onnx.TensorProto.INT8, [num_hidden_cells,num_hidden_cells]),\n",
+ " make_tensor_value_info(\"clp_uo\",onnx.TensorProto.INT8, [num_hidden_cells,num_hidden_cells]),\n",
+ " ],\n",
+ " initializer=[\n",
+ " # Initializing the weight and recurrecne matrices\n",
+ " make_tensor('W_f',onnx.TensorProto.FLOAT, [num_hidden_cells,num_features], (Wf_val)),\n",
+ " make_tensor('U_f',onnx.TensorProto.FLOAT, [num_hidden_cells,num_hidden_cells], (Uf_val)),\n",
+ " make_tensor('b_f',onnx.TensorProto.FLOAT, [num_hidden_cells,1], (bf_val)),\n",
+ " make_tensor('W_i',onnx.TensorProto.FLOAT, [num_hidden_cells,num_features], (Wi_val)),\n",
+ " make_tensor('U_i',onnx.TensorProto.FLOAT, [num_hidden_cells,num_hidden_cells], (Ui_val)),\n",
+ " make_tensor('b_i',onnx.TensorProto.FLOAT, [num_hidden_cells,1], (bi_val)),\n",
+ " make_tensor('W_o',onnx.TensorProto.FLOAT, [num_hidden_cells,num_features], (Wo_val)),\n",
+ " make_tensor('U_o',onnx.TensorProto.FLOAT, [num_hidden_cells,num_hidden_cells], (Uo_val)),\n",
+ " make_tensor('b_o',onnx.TensorProto.FLOAT, [num_hidden_cells,1], (bo_val)),\n",
+ " make_tensor('W_c',onnx.TensorProto.FLOAT, [num_hidden_cells,num_features], (Wc_val)),\n",
+ " make_tensor('U_c',onnx.TensorProto.FLOAT, [num_hidden_cells,num_hidden_cells], (Uc_val)),\n",
+ " make_tensor('b_c',onnx.TensorProto.FLOAT, [num_hidden_cells,1], (bc_val)),\n",
+ " # Input scale value\n",
+ " make_tensor('inp_scale',onnx.TensorProto.FLOAT, [],[inp_scale_val]),\n",
+ " # Scale weight values\n",
+ " make_tensor('scale_i',onnx.TensorProto.FLOAT, [],[w1_scale_val]),\n",
+ " make_tensor('scale_c',onnx.TensorProto.FLOAT, [],[w2_scale_val]),\n",
+ " make_tensor('scale_o',onnx.TensorProto.FLOAT, [],[w3_scale_val]),\n",
+ " make_tensor('scale_f',onnx.TensorProto.FLOAT, [],[w4_scale_val]),\n",
+ " # Scale values for the six equations\n",
+ " make_tensor('scale_1',onnx.TensorProto.FLOAT, [],[eq_scale_val_1]),\n",
+ " make_tensor('scale_2',onnx.TensorProto.FLOAT, [],[eq_scale_val_1]), \n",
+ " make_tensor('scale_3',onnx.TensorProto.FLOAT, [],[eq_scale_val_1]),\n",
+ " make_tensor('scale_test',onnx.TensorProto.FLOAT, [],[eq_scale_val_1]),\n",
+ " make_tensor('scale_4',onnx.TensorProto.FLOAT, [],[eq_scale_val_1]),\n",
+ " make_tensor('scale_5',onnx.TensorProto.FLOAT, [],[eq_scale_val_1]),\n",
+ " make_tensor('scale_6',onnx.TensorProto.FLOAT, [],[eq_scale_val_1]),\n",
+ " make_tensor('scale_7',onnx.TensorProto.FLOAT, [],[eq_scale_val_2]), \n",
+ " make_tensor('scale_8',onnx.TensorProto.FLOAT, [],[eq_scale_val_2]),\n",
+ " make_tensor('scale_9',onnx.TensorProto.FLOAT, [],[eq_scale_val_1]),\n",
+ " make_tensor('scale_10',onnx.TensorProto.FLOAT, [],[eq_scale_val_2]),\n",
+ " make_tensor('scale_11',onnx.TensorProto.FLOAT, [],[eq_scale_val_1]),\n",
+ " # Scales for zero-points : Zero-point datatype defines the dataype of the output for that quantization\n",
+ " make_tensor('zero_point_signed',onnx.TensorProto.INT8,[],[zero_point_signed_val]),\n",
+ " make_tensor('zero_point_unsigned',onnx.TensorProto.UINT8,[],[zero_point_unsigned_val]),\n",
+ " # Introducing scalars for the clip operators.\n",
+ " make_tensor('min', onnx.TensorProto.INT8, [], [min_clip_val]),\n",
+ " make_tensor('max', onnx.TensorProto.INT8, [], [max_clip_val]),\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The above created graph can now be converted into a qonnx model with the `qonnx_make_model` utility. We save the model with `onnx.save` utility and then view it in Netron with the help of `showInNetron` utility. \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lstm_model = qonnx_make_model(lstm_body, producer_name=\"QuantizeLSTM_scan\")\n",
+ "onnx.save(lstm_model, './lstm_full_graph.onnx')\n",
+ "netron.start('./lstm_full_graph.onnx')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In this block of code we execute the onnx graph to check that it can execute without any errors. We perform it's functional verification in the later part of the notebook."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Before the model can be executed, it'd opset version needs to be set to a minimum of '14' to accomodate clip nodes with INT8 and UINT8 input. Otherwise ONNX cannot create an execution session and we get errors.\n",
+ "lstm_model.opset_import[0].version = 14\n",
+ "\n",
+ "# Creating the inference session here for the updated model here\n",
+ "sess = rt.InferenceSession(lstm_model.SerializeToString())\n",
+ "\n",
+ "# Defining dummy inputs and the model parameters for dummy execution\n",
+ "X_inp = np.empty([num_features,1],dtype=np.float32).reshape([num_features,1])\n",
+ "X_inp.fill(0.8)\n",
+ "hidden_state_input = np.zeros((num_hidden_cells, 1)).astype(np.float32)\n",
+ "cell_state_input = np.zeros((num_hidden_cells, 1)).astype(np.float32)\n",
+ "\n",
+ "# Assigning the above defined values to the input dictionary of the ONNX model.\n",
+ "input_dict = {}\n",
+ "input_dict[\"inp\"] = X_inp\n",
+ "input_dict[\"h_t-1\"] = hidden_state_input\n",
+ "input_dict[\"c_t-1\"] = cell_state_input \n",
+ "\n",
+ "# Setting up the inference session and executing the onnx model here.\n",
+ "sess = rt.InferenceSession(lstm_model.SerializeToString())\n",
+ "output = sess.run(None, input_dict)\n",
+ "print(output)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# SCAN Operation Integration"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Introduction to ONNX Scan operation\n",
+ "Observations regarding the `Scan` operator in ONNX:\n",
+ "\n",
+ "1. `Scan` can be used to iterate over one or more scan input tensors constructing zero or more scan output tensors. It combines ideas from general recurrences, functional programming cnostructs such as scan, fold, map and zip.\n",
+ "2. The attribute `body` in the node must be a graph specifying the computation to be performed in every iteration.\n",
+ "3. Input is the current values of the `state variables` and the current `iterated element` of the scan input. Returns values of the `state variables` and the `scan output element tensors`. (Can be greater than 1)\n",
+ "4. The values of the scan output tensors are concatenated over all the iterations to produce the scan output values of the scan construct.\n",
+ "5. The properties that make a scan node unique and different from a normal compute node are:\n",
+ "* Allows update of state variable after each input computation; to be used in the processing of the next input.\n",
+ "* It needs to scan your inputs row by row or column by column; then keep computing the output with the updated hidden state for every input; while storing all the intermediate outputs in the form of hidden states.\n",
+ "\n",
+ "More information regarding this op can be found in these links:\n",
+ "\n",
+ "* https://github.com/onnx/onnx/blob/main/docs/Operators.md#Scan\n",
+ "* https://onnx.ai/onnx/intro/python.html#scan"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `Scan` operation is essentially a container operator which will consume the LSTM graph that we created above in it's body.\n",
+ "To create it, we need to define separate input and output value info tensors just for the Scan operator. We will then follow the same steps as the `QCDQ-LSTM` graph creation to convert the above graph into an executable ONNX model.\n",
+ "
\n",
+ "We start by defining the input and output value info tensors for the `scan_graph` creation. These tensors act as the wrapper to the previously defined graph.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Inputs\n",
+ "scan_input = make_tensor_value_info(\"scan_input\",onnx.TensorProto.FLOAT, [None,num_features,1])#X ; scan input. Here None defines the varibale number of inputs that can be supplied for input processing.\n",
+ "scan_hidden_state = make_tensor_value_info(\"scan_hidden_state\",onnx.TensorProto.FLOAT, [num_hidden_cells,1])# h_t-1\n",
+ "scan_cell_state = make_tensor_value_info(\"scan_cell_state\",onnx.TensorProto.FLOAT, [num_hidden_cells,1])# c_t-1\n",
+ "\n",
+ "# Outputs\n",
+ "scan_out_hidden_state = make_tensor_value_info(\"scan_out_hidden_state\", onnx.TensorProto.FLOAT, [num_hidden_cells,1])#h_t\n",
+ "scan_out_cell_state = make_tensor_value_info(\"scan_out_cell_state\", onnx.TensorProto.FLOAT, [num_hidden_cells,1])#c_t\n",
+ "scan_out_hidden_state_concat = make_tensor_value_info(\"scan_out_hidden_state_concat\", onnx.TensorProto.FLOAT, [None,num_hidden_cells,1])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We will now create the scan operator here now utilizing the `make_node` utility from ONNX.\n",
+ "Note, in the body of the operation we have included the `lstm_body` graph we created in the above steps."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scan_node_lstm = make_node(\n",
+ " \"Scan\", \n",
+ " inputs=[\"scan_hidden_state\",\"scan_cell_state\",\"scan_input\"], \n",
+ " outputs=[\"scan_out_hidden_state\",\"scan_out_cell_state\",\"scan_out_hidden_state_concat\"], \n",
+ " num_scan_inputs=1,\n",
+ " body=lstm_body, domain=''\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can now define the graph for the scan operator utilizing the `make_graph` utility."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "scan_lstm_node_graph = make_graph(\n",
+ " nodes = [scan_node_lstm],\n",
+ " name=\"lstm-scan-node\",\n",
+ " inputs=[scan_hidden_state,scan_cell_state,scan_input],#h_t-1, c_t-1, X\n",
+ " outputs=[scan_out_hidden_state,scan_out_cell_state,scan_out_hidden_state_concat]#h_t,c_t,h_t_concat\n",
+ ")\n",
+ "\n",
+ "# Creating the model from the above created graph and saving it.\n",
+ "lstm_scan_node_model = qonnx_make_model(scan_lstm_node_graph, producer_name=\"scan-lstm\")\n",
+ "onnx.save(lstm_scan_node_model, './lstm_scan_node_model.onnx')\n",
+ "netron.start('./lstm_scan_node_model.onnx')\n",
+ "\n",
+ "#Checking the model for any errors\n",
+ "onnx.checker.check_model(lstm_scan_node_model)\n",
+ "print(lstm_scan_node_model.graph.value_info)\n",
+ "\n",
+ "#Conversion to version 14 of onnx to accomodate clip nodes as done for the LSTM graph also.\n",
+ "lstm_scan_node_model.opset_import[0].version = 14"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that we have the SCAN based quantized LSTM model ready, we can now go forward and test it with the same sets of inputs we used for the testing of the brevitas model.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Defining the values of the varibales to test the execution of the scan model\n",
+ "num_inputs = 25\n",
+ "\n",
+ "#Initializing the initial values of the hidden state and the cell state. \n",
+ "# Also assigning the same input as the one used for the brevitas execution.\n",
+ "\n",
+ "hidden_state_inp = np.zeros((num_hidden_cells, 1)).astype(np.float32)#'h_t-1'\n",
+ "cell_state_inp = np.zeros((num_hidden_cells, 1)).astype(np.float32)#'c_t-1'\n",
+ "scan_inp = np.empty([num_inputs,num_features,1],dtype=np.float32).reshape([num_inputs,num_features,1])\n",
+ "scan_inp.fill(0.8)\n",
+ "\n",
+ "# Assigning the defined input values to the input dictionary of the scan model\n",
+ "input_dict = {}\n",
+ "input_dict[\"scan_hidden_state\"] = hidden_state_inp\n",
+ "input_dict[\"scan_cell_state\"] = cell_state_inp\n",
+ "input_dict[\"scan_input\"] = scan_inp\n",
+ "\n",
+ "# We can now set up the inference session and execute the scan onnx model here. \n",
+ "# The execution session gives some warnings which can be ignored.\n",
+ "\n",
+ "sess = rt.InferenceSession(lstm_scan_node_model.SerializeToString())\n",
+ "scan_output = sess.run(None, input_dict)\n",
+ "print('Final Hidden State',scan_output[0])\n",
+ "print(\"------------------------\")\n",
+ "print('Final Cell State',scan_output[1])\n",
+ "print(\"------------------------\")\n",
+ "print('All Hidden States',scan_output[2])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Functional Verification"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In the final part of the notebook, we compare the output of the 8-bit quantized `(QCDQ)-LSTM` implementation with the `QuantLSTM` brevitas model.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We first match the shape of both the outputs to perform the functional verification correctly\n",
+ "\n",
+ "print('Brevitas Output shape : ', brevitas_output.shape)\n",
+ "all_hidden_states = np.array(scan_output[2])\n",
+ "all_hidden_states = all_hidden_states.reshape([num_inputs,1,num_hidden_cells])\n",
+ "print('SCAN-QCDQ-LSTM output shape :', all_hidden_states.shape)\n",
+ "print('-----------------------------------')\n",
+ "print('Brevitas Output = ',brevitas_output)\n",
+ "print('-----------------------------------')\n",
+ "print('SCAN-QCDQ-LSTM output',all_hidden_states)\n",
+ "print('-----------------------------------')\n",
+ "\n",
+ "# Comparison between the 'Scan-LSTM output' and the brevitas 'QuantLSTM' ouptut\n",
+ "# Since the outputs from both models are floating-point, to get a better understanding of the differences we scale the outputs to INT8 precision and then compare their differences.\n",
+ "# The scale used to do that is the last scale of the LSTM graph.\n",
+ "\n",
+ "scale = inp_scale_val #The scale value is equal to the value of the inp_scale_val\n",
+ "all_hidden_states = np.array(scan_output[2])\n",
+ "all_hidden_states = all_hidden_states.reshape([num_inputs,1,num_hidden_cells])\n",
+ "all_hidden_state_diff = (all_hidden_states - brevitas_output)\n",
+ "print(all_hidden_state_diff/scale)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Note the difference in outputs increases as we progress with processing the inputs. The first two outputs are very close to one another, but as we get the outputs for more inputs we see for some values differ from the brevitas output by a considerable amount.\n",
+ "This behaviour can be attributed to some values being slightly different in the first few outputs (which are not visible) which eventually cause an increase in differences between both values as more inputs are processed."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "venv",
+ "language": "python",
+ "name": "venv"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.0"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/4_quant_lstm_helper/function.py b/notebooks/4_quant_lstm_helper/function.py
new file mode 100644
index 00000000..935bf78a
--- /dev/null
+++ b/notebooks/4_quant_lstm_helper/function.py
@@ -0,0 +1,335 @@
+# Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+
+import torch
+from brevitas.export.onnx import onnx_export_opset
+from torch.autograd import Function
+
+AXIS_OPSET = 13
+DOMAIN_STRING = "onnx.brevitas"
+
+
+class DequantizeLinearFn(Function):
+ @staticmethod
+ def symbolic(g, x, input_scale, input_zero_point, input_axis):
+ opset_version = onnx_export_opset()
+
+ if input_axis is not None and opset_version < AXIS_OPSET:
+ raise RuntimeError("ONNX Opset 13 is required for per-channel quantization")
+ elif input_axis is not None and opset_version >= AXIS_OPSET:
+ ret = g.op("DequantizeLinear", x, input_scale, input_zero_point, axis_i=input_axis)
+ else:
+ ret = g.op("DequantizeLinear", x, input_scale, input_zero_point)
+ return ret
+
+ @staticmethod
+ def forward(ctx, int_x, input_scale, input_zero_point, input_axis):
+ return int_x.float()
+
+
+class IntClipFn(Function):
+ @staticmethod
+ def symbolic(g, int_x, min_int_val, max_int_val):
+ ret = g.op("Clip", int_x, min_int_val, max_int_val)
+ return ret
+
+ @staticmethod
+ def forward(ctx, int_x, min_int_val, max_int_val):
+ return int_x
+
+
+class QuantizeLinearFn(Function):
+ @staticmethod
+ def symbolic(g, x, output_scale, ouput_zero_point, output_dtype, output_axis):
+ opset_version = onnx_export_opset()
+
+ if output_axis is not None and opset_version < AXIS_OPSET:
+ raise RuntimeError("ONNX Opset 13 is required for per-channel quantization")
+ elif output_axis is not None and opset_version >= AXIS_OPSET:
+ ret = g.op("QuantizeLinear", x, output_scale, ouput_zero_point, axis_i=output_axis)
+ else:
+ ret = g.op("QuantizeLinear", x, output_scale, ouput_zero_point)
+ return ret
+
+ @staticmethod
+ def forward(ctx, x, output_scale, ouput_zero_point, output_dtype, output_axis):
+ return x.type(output_dtype)
+
+
+class BrevitasQuantLSTMCellFn(Function):
+ @staticmethod
+ def symbolic(
+ g, # args and kwargs passed from _QuantLSTMLayer
+ quant_input,
+ quant_hidden_state,
+ quant_cell_state,
+ quant_weight_ii,
+ quant_weight_if,
+ quant_weight_ic,
+ quant_weight_io,
+ quant_weight_hi,
+ quant_weight_hf,
+ quant_weight_hc,
+ quant_weight_ho,
+ quant_bias_input,
+ quant_bias_forget,
+ quant_bias_cell,
+ quant_bias_output, # Symbolic kwargs passed from BrevitasQuantLSTMLayerHandler
+ batch_first,
+ reverse_input,
+ cifg, # Output quant
+ output_scale,
+ output_zero_point,
+ output_bit_width,
+ output_narrow_range,
+ output_signed,
+ output_rounding_mode, # Cell state quant
+ cell_state_scale,
+ cell_state_zero_point,
+ cell_state_bit_width,
+ cell_state_narrow_range,
+ cell_state_signed,
+ cell_state_rounding_mode, # Input gate accumulator quant
+ input_acc_scale,
+ input_acc_zero_point,
+ input_acc_bit_width,
+ input_acc_narrow_range,
+ input_acc_signed,
+ input_acc_rounding_mode, # Forget gate accumulator quant
+ forget_acc_scale,
+ forget_acc_zero_point,
+ forget_acc_bit_width,
+ forget_acc_narrow_range,
+ forget_acc_signed,
+ forget_acc_rounding_mode, # Cell gate accumulator quant
+ cell_acc_scale,
+ cell_acc_zero_point,
+ cell_acc_bit_width,
+ cell_acc_narrow_range,
+ cell_acc_signed,
+ cell_acc_rounding_mode, # Output gate accumulator quant
+ output_acc_scale,
+ output_acc_zero_point,
+ output_acc_bit_width,
+ output_acc_narrow_range,
+ output_acc_signed,
+ output_acc_rounding_mode, # Input gate sigmoid quant
+ input_sigmoid_scale,
+ input_sigmoid_zero_point,
+ input_sigmoid_bit_width,
+ input_sigmoid_narrow_range,
+ input_sigmoid_signed,
+ input_sigmoid_rounding_mode, # Forget gate sigmoid quant
+ forget_sigmoid_scale,
+ forget_sigmoid_zero_point,
+ forget_sigmoid_bit_width,
+ forget_sigmoid_narrow_range,
+ forget_sigmoid_signed,
+ forget_sigmoid_rounding_mode, # Cell gate tanh quant
+ cell_tanh_scale,
+ cell_tanh_zero_point,
+ cell_tanh_bit_width,
+ cell_tanh_narrow_range,
+ cell_tanh_signed,
+ cell_tanh_rounding_mode, # Output gate sigmoid quant
+ output_sigmoid_scale,
+ output_sigmoid_zero_point,
+ output_sigmoid_bit_width,
+ output_sigmoid_narrow_range,
+ output_sigmoid_signed,
+ output_sigmoid_rounding_mode, # Hidden state tanh quant
+ hidden_state_tanh_scale,
+ hidden_state_tanh_zero_point,
+ hidden_state_tanh_bit_width,
+ hidden_state_tanh_narrow_range,
+ hidden_state_tanh_signed,
+ hidden_state_tanh_rounding_mode,
+ ):
+ return g.op(
+ f"{DOMAIN_STRING}::QuantLSTMCell", # Tensors
+ # Input values
+ quant_input,
+ quant_hidden_state,
+ quant_cell_state,
+ quant_weight_ii,
+ quant_weight_if,
+ quant_weight_ic,
+ quant_weight_io,
+ quant_weight_hi,
+ quant_weight_hf,
+ quant_weight_hc,
+ quant_weight_ho,
+ quant_bias_input,
+ quant_bias_forget,
+ quant_bias_cell,
+ quant_bias_output, # Output quant
+ output_scale,
+ output_zero_point,
+ output_bit_width, # Cell state quant
+ cell_state_scale,
+ cell_state_zero_point,
+ cell_state_bit_width, # Input gate accumulator quant
+ input_acc_scale,
+ input_acc_zero_point,
+ input_acc_bit_width, # Forget gate accumulator quant
+ forget_acc_scale,
+ forget_acc_zero_point,
+ forget_acc_bit_width, # Cell gate accumulator quant
+ cell_acc_scale,
+ cell_acc_zero_point,
+ cell_acc_bit_width, # Output gate accumulator quant
+ output_acc_scale,
+ output_acc_zero_point,
+ output_acc_bit_width, # Input gate sigmoid quant
+ input_sigmoid_scale,
+ input_sigmoid_zero_point,
+ input_sigmoid_bit_width, # Forget gate sigmoid quant
+ forget_sigmoid_scale,
+ forget_sigmoid_zero_point,
+ forget_sigmoid_bit_width, # Cell gate tanh quant
+ cell_tanh_scale,
+ cell_tanh_zero_point,
+ cell_tanh_bit_width, # Output gate sigmoid quant
+ output_sigmoid_scale,
+ output_sigmoid_zero_point,
+ output_sigmoid_bit_width, # Hidden state tanh quant
+ hidden_state_tanh_scale,
+ hidden_state_tanh_zero_point,
+ hidden_state_tanh_bit_width,
+ # Attributes
+ batch_first_i=batch_first,
+ reverse_input_i=reverse_input,
+ cifg_i=cifg,
+ output_narrow_i=output_narrow_range,
+ output_signed_i=output_signed,
+ output_rounding_mode_s=output_rounding_mode,
+ cell_state_narrow_i=cell_state_narrow_range,
+ cell_state_signed_i=cell_state_signed,
+ cell_state_rounding_mode_s=cell_state_rounding_mode,
+ input_acc_narrow_i=input_acc_narrow_range,
+ input_acc_signed_i=input_acc_signed,
+ input_acc_rounding_mode_s=input_acc_rounding_mode,
+ forget_acc_narrow_i=forget_acc_narrow_range,
+ forget_acc_signed_i=forget_acc_signed,
+ forget_acc_rounding_mode_s=forget_acc_rounding_mode,
+ cell_acc_narrow_i=cell_acc_narrow_range,
+ cell_acc_signed_i=cell_acc_signed,
+ cell_acc_rounding_mode_s=cell_acc_rounding_mode,
+ output_acc_narrow_i=output_acc_narrow_range,
+ output_acc_signed_i=output_acc_signed,
+ output_acc_rounding_mode_s=output_acc_rounding_mode,
+ input_sigmoid_narrow_i=input_sigmoid_narrow_range,
+ input_sigmoid_signed_i=input_sigmoid_signed,
+ input_sigmoid_rounding_mode_s=input_sigmoid_rounding_mode,
+ forget_sigmoid_narrow_i=forget_sigmoid_narrow_range,
+ forget_sigmoid_signed_i=forget_sigmoid_signed,
+ forget_sigmoid_rounding_mode_s=forget_sigmoid_rounding_mode,
+ cell_tanh_narrow_i=cell_tanh_narrow_range,
+ cell_tanh_signed_i=cell_tanh_signed,
+ cell_tanh_rounding_mode_s=cell_tanh_rounding_mode,
+ output_sigmoid_narrow_range_i=output_sigmoid_narrow_range,
+ output_sigmoid_signed_i=output_sigmoid_signed,
+ output_sigmoid_rounding_mode_s=output_sigmoid_rounding_mode,
+ hidden_state_tanh_narrow_i=hidden_state_tanh_narrow_range,
+ hidden_state_tanh_signed_i=hidden_state_tanh_signed,
+ hidden_state_tanh_rounding_mode_s=hidden_state_tanh_rounding_mode,
+ # PyTorch requires to specify the number of outputs manually
+ outputs=3,
+ )
+
+ @staticmethod
+ def forward(
+ ctx, # args and kwargs passed from _QuantLSTMLayer
+ quant_input,
+ quant_hidden_state,
+ quant_cell_state,
+ quant_weight_ii,
+ quant_weight_if,
+ quant_weight_ic,
+ quant_weight_io,
+ quant_weight_hi,
+ quant_weight_hf,
+ quant_weight_hc,
+ quant_weight_ho,
+ quant_bias_input,
+ quant_bias_forget,
+ quant_bias_cell,
+ quant_bias_output, # Symbolic kwargs passed from BrevitasQuantLSTMLayerHandler
+ batch_first,
+ reverse_input,
+ cifg, # Output quant
+ output_scale,
+ output_zero_point,
+ output_bit_width,
+ output_narrow_range,
+ output_signed,
+ output_rounding_mode, # Cell state quant
+ cell_state_scale,
+ cell_state_zero_point,
+ cell_state_bit_width,
+ cell_state_narrow_range,
+ cell_state_signed,
+ cell_state_rounding_mode, # Input gate accumulator quant
+ input_acc_scale,
+ input_acc_zero_point,
+ input_acc_bit_width,
+ input_acc_narrow_range,
+ input_acc_signed,
+ input_acc_rounding_mode, # Forget gate accumulator quant
+ forget_acc_scale,
+ forget_acc_zero_point,
+ forget_acc_bit_width,
+ forget_acc_narrow_range,
+ forget_acc_signed,
+ forget_acc_rounding_mode, # Cell gate accumulator quant
+ cell_acc_scale,
+ cell_acc_zero_point,
+ cell_acc_bit_width,
+ cell_acc_narrow_range,
+ cell_acc_signed,
+ cell_acc_rounding_mode, # Output gate accumulator quant
+ output_acc_scale,
+ output_acc_zero_point,
+ output_acc_bit_width,
+ output_acc_narrow_range,
+ output_acc_signed,
+ output_acc_rounding_mode, # Input gate sigmoid quant
+ input_sigmoid_scale,
+ input_sigmoid_zero_point,
+ input_sigmoid_bit_width,
+ input_sigmoid_narrow_range,
+ input_sigmoid_signed,
+ input_sigmoid_rounding_mode, # Forget gate sigmoid quant
+ forget_sigmoid_scale,
+ forget_sigmoid_zero_point,
+ forget_sigmoid_bit_width,
+ forget_sigmoid_narrow_range,
+ forget_sigmoid_signed,
+ forget_sigmoid_rounding_mode, # Cell gate tanh quant
+ cell_tanh_scale,
+ cell_tanh_zero_point,
+ cell_tanh_bit_width,
+ cell_tanh_narrow_range,
+ cell_tanh_signed,
+ cell_tanh_rounding_mode, # Output gate sigmoid quant
+ output_sigmoid_scale,
+ output_sigmoid_zero_point,
+ output_sigmoid_bit_width,
+ output_sigmoid_narrow_range,
+ output_sigmoid_signed,
+ output_sigmoid_rounding_mode, # Hidden state tanh quant
+ hidden_state_tanh_scale,
+ hidden_state_tanh_zero_point,
+ hidden_state_tanh_bit_width,
+ hidden_state_tanh_narrow_range,
+ hidden_state_tanh_signed,
+ hidden_state_tanh_rounding_mode,
+ ):
+ # Tp simplify things, here we are returning the outputs
+ # as if they were already concatenated. Scale/zp/bw are avoided too.
+ # This preserves output shapes but not values.
+ # See _QuantLSTMCell for the actual implementation.
+ quant_outputs = torch.zeros(
+ quant_input.size(0), quant_input.size(1), quant_hidden_state.size(1), device=quant_hidden_state.device
+ )
+ return quant_outputs, quant_hidden_state, quant_cell_state
diff --git a/notebooks/4_quant_lstm_helper/handler.py b/notebooks/4_quant_lstm_helper/handler.py
new file mode 100644
index 00000000..71cbdeb1
--- /dev/null
+++ b/notebooks/4_quant_lstm_helper/handler.py
@@ -0,0 +1,123 @@
+# Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+
+import torch
+from abc import ABC
+from brevitas.export.common.handler.qcdq import (
+ DQMixin,
+ QCDQActQuantProxyHandlerMixin,
+ QCDQBiasQuantProxyHandlerMixin,
+ QCDQDecoupledWeightQuantProxyHandlerMixin,
+ QCDQMixin,
+ QCDQTruncQuantProxyHandlerMixin,
+ QCDQWeightQuantProxyHandlerMixin,
+)
+from brevitas.export.onnx.handler import ONNXBaseHandler, QuantLSTMLayerHandler
+
+from ..function import BrevitasQuantLSTMCellFn, DequantizeLinearFn, IntClipFn, QuantizeLinearFn
+
+
+class StdDQONNXMixin(DQMixin, ABC):
+ def dequantize_fn(self, x, scale, zero_point, axis):
+ return DequantizeLinearFn.apply(x, scale, zero_point, axis)
+
+ @property
+ def flatten_dequantize_params(self):
+ return True
+
+ @property
+ def itemize_quantize_scalar_params(self):
+ return False
+
+
+class StdQCDQONNXMixin(QCDQMixin, StdDQONNXMixin, ABC):
+ @property
+ def clip_over_integers(self):
+ return True
+
+ @classmethod
+ def int8_dtype(cls):
+ return torch.int8
+
+ @classmethod
+ def uint8_dtype(cls):
+ return torch.uint8
+
+ @classmethod
+ def int32_dtype(cls):
+ return torch.int32
+
+ def validate(self, module):
+ self.validate_8b_bit_width(module.bit_width(), le_then=True)
+ assert module.bit_width() > 1.0, "Binary quant not supported"
+ assert module.rounding_mode.upper() == "ROUND", "Only round to nearest even supported"
+
+ def quantize_fn(self, x, scale, zero_point, dtype, axis):
+ return QuantizeLinearFn.apply(x, scale, zero_point, dtype, axis)
+
+ def clip_fn(self, x, min_val, max_val):
+ return IntClipFn.apply(x, min_val, max_val)
+
+
+class StdQCDQONNXWeightQuantProxyHandler(StdQCDQONNXMixin, QCDQWeightQuantProxyHandlerMixin, ONNXBaseHandler):
+ pass
+
+
+class StdQCDQONNXDecoupledWeightQuantProxyHandler(
+ StdQCDQONNXMixin, QCDQDecoupledWeightQuantProxyHandlerMixin, ONNXBaseHandler
+):
+ pass
+
+
+class StdQCDQONNXActQuantProxyHandler(StdQCDQONNXMixin, QCDQActQuantProxyHandlerMixin, ONNXBaseHandler):
+ pass
+
+
+class StdQCDQONNXBiasQuantProxyHandler(StdDQONNXMixin, QCDQBiasQuantProxyHandlerMixin, ONNXBaseHandler):
+ pass
+
+
+class StdQCDQONNXTruncQuantProxyHandler(StdQCDQONNXMixin, QCDQTruncQuantProxyHandlerMixin, ONNXBaseHandler):
+ pass
+
+
+class StdQCDQONNXQuantLSTMLayerHandler(QuantLSTMLayerHandler):
+ def quantized_cell_symbolic_execution(
+ self,
+ quant_input,
+ quant_hidden_state,
+ quant_cell_state,
+ quant_weight_ii,
+ quant_weight_if,
+ quant_weight_ic,
+ quant_weight_io,
+ quant_weight_hi,
+ quant_weight_hf,
+ quant_weight_hc,
+ quant_weight_ho,
+ quant_bias_input,
+ quant_bias_forget,
+ quant_bias_cell,
+ quant_bias_output,
+ ):
+ return BrevitasQuantLSTMCellFn.apply(
+ quant_input,
+ quant_hidden_state,
+ quant_cell_state,
+ quant_weight_ii,
+ quant_weight_if,
+ quant_weight_ic,
+ quant_weight_io,
+ quant_weight_hi,
+ quant_weight_hf,
+ quant_weight_hc,
+ quant_weight_ho,
+ quant_bias_input,
+ quant_bias_forget,
+ quant_bias_cell,
+ quant_bias_output,
+ *self.symbolic_kwargs.values()
+ )
+ # raise RuntimeError(
+ # "Quantized LSTM cell is not supported for ONNX QCDQ "
+ # "(weights only quantization is). Use export_qonnx.")
diff --git a/notebooks/README.md b/notebooks/README.md
index f25b99dd..f852fb09 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -8,5 +8,8 @@ git clone https://github.com/fastmachinelearning/qonnx
cd qonnx
virtualenv -p python3.8 venv
source venv/bin/activate
-pip install -e .[testing, docs, notebooks]
+pip install -e .[testing,docs,notebooks]
+cd notebooks
+jupyter notebook .
+# follow the link printed in the console to bring up Jupyter
```
diff --git a/setup.cfg b/setup.cfg
index fe89cbde..602d6ada 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -69,7 +69,7 @@ exclude =
# Note: pyparsing is actually needed by QKeras, but missing as dependency
qkeras =
pyparsing
- tf2onnx>=1.12.1
+ tf2onnx>=1.16.1
tensorflow==2.9.0
QKeras==0.9.0
diff --git a/src/qonnx/analysis/inference_cost.py b/src/qonnx/analysis/inference_cost.py
index da5e1f5d..c821d26a 100644
--- a/src/qonnx/analysis/inference_cost.py
+++ b/src/qonnx/analysis/inference_cost.py
@@ -117,6 +117,8 @@ def inference_cost_conv(model, node, discount_sparsity):
mac_op_type_str = "op_mac_%s_%s" % (idt_name, wdt_name)
w_mem_type_str = "mem_w_%s" % (wdt_name)
o_mem_type_str = "mem_o_%s" % (odt_name)
+ # keep in floats to remain compatible with json serialization
+ n_macs, w_mem, o_mem = float(n_macs), float(w_mem), float(o_mem)
ret = {mac_op_type_str: n_macs, w_mem_type_str: w_mem, o_mem_type_str: o_mem}
return ret
@@ -134,7 +136,7 @@ def inference_cost_matmul(model, node, discount_sparsity):
if tB is not None and tB.i == 1:
w_shape = w_shape[::-1]
# exclude common dim (last axis) from one side to avoid duplication
- n_macs = np.prod(i_shape[:-1]) * np.prod(w_shape)
+ n_macs = i_shape[-1] * np.prod(o_shape)
# deal with both dyn,param and dyn,dyn cases for weight memory
inp0_is_const = model.get_initializer(node.input[0]) is not None
inp1_is_const = model.get_initializer(node.input[1]) is not None
@@ -161,6 +163,8 @@ def inference_cost_matmul(model, node, discount_sparsity):
mac_op_type_str = "op_mac_%s_%s" % (idt_name, wdt_name)
w_mem_type_str = "mem_w_%s" % (wdt_name)
o_mem_type_str = "mem_o_%s" % (odt_name)
+ # keep in floats to remain compatible with json serialization
+ n_macs, w_mem, o_mem = float(n_macs), float(w_mem), float(o_mem)
ret = {mac_op_type_str: n_macs, w_mem_type_str: w_mem, o_mem_type_str: o_mem}
return ret
@@ -197,14 +201,16 @@ def inference_cost_upsample(model, node, discount_sparsity):
mac_op_type_str = "op_mac_%s_%s" % (idt_name, idt_name)
o_mem_type_str = "mem_o_%s" % (odt_name)
+ # keep in floats to remain compatible with json serialization
+ n_macs, o_mem = float(n_macs), float(o_mem)
ret = {mac_op_type_str: n_macs, o_mem_type_str: o_mem}
return ret
-def inference_cost(model, discount_sparsity=True):
+def inference_cost(model, discount_sparsity=True, cost_breakdown=False):
"Ensure all nodes have unique names prior to calling this analysis pass."
- node_costs = {}
+ ret, node_costs, nodes_per_optype = {}, {}, {}
zero_cost_ops = [
"MaxPool",
"AveragePool",
@@ -240,13 +246,24 @@ def inference_cost(model, discount_sparsity=True):
if node.op_type in inference_cost_fxn_map.keys():
node_cost = inference_cost_fxn_map[node.op_type](model, node, discount_sparsity)
node_costs[node.name] = node_cost
+ if node.op_type not in nodes_per_optype.keys():
+ new_optype = {}
+ new_optype[node.name] = node_cost
+ nodes_per_optype[node.op_type] = new_optype
+ else:
+ nodes_per_optype[node.op_type][node.name] = node_cost
elif node.op_type in zero_cost_ops:
continue
else:
unsupported_ops.add(node.op_type)
-
- ret = aggregate_dict_keys(node_costs)
- ret["unsupported"] = unsupported_ops
- ret["discount_sparsity"] = discount_sparsity
-
+ total = aggregate_dict_keys(node_costs)
+ total["unsupported"] = unsupported_ops
+ total["discount_sparsity"] = discount_sparsity
+ ret["total_cost"] = total
+ if cost_breakdown:
+ optype_cost = {}
+ for optype, resources in nodes_per_optype.items():
+ optype_cost[optype] = aggregate_dict_keys(resources)
+ ret["optype_cost"] = optype_cost
+ ret["node_cost"] = node_costs
return ret
diff --git a/src/qonnx/core/datatype.py b/src/qonnx/core/datatype.py
index 40584a4b..f37d4eea 100644
--- a/src/qonnx/core/datatype.py
+++ b/src/qonnx/core/datatype.py
@@ -145,6 +145,38 @@ def get_canonical_name(self):
return "FLOAT32"
+class Float16Type(BaseDataType):
+ def bitwidth(self):
+ return 16
+
+ def min(self):
+ return np.finfo(np.float16).min
+
+ def max(self):
+ return np.finfo(np.float16).max
+
+ def allowed(self, value):
+ return True
+
+ def get_num_possible_values(self):
+ raise Exception("Undefined for Float16Type")
+
+ def is_integer(self):
+ return False
+
+ def is_fixed_point(self):
+ return False
+
+ def get_hls_datatype_str(self):
+ return "float"
+
+ def to_numpy_dt(self):
+ return np.float16
+
+ def get_canonical_name(self):
+ return "FLOAT16"
+
+
class IntType(BaseDataType):
def __init__(self, bitwidth, signed):
super().__init__()
@@ -349,6 +381,7 @@ def resolve_datatype(name):
"BIPOLAR": BipolarType(),
"TERNARY": TernaryType(),
"FLOAT32": FloatType(),
+ "FLOAT16": Float16Type(),
}
if name in _special_types.keys():
return _special_types[name]
diff --git a/src/qonnx/core/modelwrapper.py b/src/qonnx/core/modelwrapper.py
index f78e1334..b95c6a33 100644
--- a/src/qonnx/core/modelwrapper.py
+++ b/src/qonnx/core/modelwrapper.py
@@ -38,7 +38,12 @@
import qonnx.util.onnx as onnxutil
from qonnx.core.datatype import DataType
from qonnx.transformation.double_to_single_float import DoubleToSingleFloat
-from qonnx.transformation.general import RemoveStaticGraphInputs, RemoveUnusedTensors, SortGraph
+from qonnx.transformation.general import (
+ RemoveStaticGraphInputs,
+ RemoveUnusedTensors,
+ SortCommutativeInputsInitializerLast,
+ SortGraph,
+)
class ModelWrapper:
@@ -149,6 +154,7 @@ def cleanup(self):
RemoveUnusedTensors(),
RemoveStaticGraphInputs(),
SortGraph(),
+ SortCommutativeInputsInitializerLast(),
]
for trn in cleanup_transforms:
transformed_model = transformed_model.transform(trn, cleanup=False, make_deepcopy=False)
@@ -346,16 +352,19 @@ def find_producer(self, tensor_name):
return x
return None
- def find_upstream(self, tensor_name, finder_fxn):
+ def find_upstream(self, tensor_name, finder_fxn, keep_if_not_found=False):
"""Follow the producer chain upstream, calling finder_fxn on each upstream
node until it returns True or there are no nodes left. Returns the list
- of nodes visited, or None if finder_fxn did not return True."""
+ of nodes visited, or None if finder_fxn did not return True. If
+ keep_if_not_found is specified, returns the list of nodes visited, even
+ if finder_fxn never returned True, i.e., if the search terminated at an
+ input or initializer."""
visit_list = []
current_tensor = tensor_name
while True:
current_producer = self.find_producer(current_tensor)
if current_producer is None:
- return []
+ return visit_list if keep_if_not_found else []
else:
found = finder_fxn(current_producer)
visit_list.append(current_producer)
@@ -364,7 +373,7 @@ def find_upstream(self, tensor_name, finder_fxn):
elif len(current_producer.input) > 0:
current_tensor = current_producer.input[0]
else:
- return None
+ return visit_list if keep_if_not_found else None
def find_consumer(self, tensor_name):
"""Finds and returns the node that consumes the tensor with given name.
@@ -532,7 +541,7 @@ def get_non_finn_nodes(self):
return list(filter(lambda x: not util.is_finn_op(x.domain), self.graph.node))
def get_node_index(self, node):
- """Returns current index of given node."""
+ """Returns current index of given node, or None if not found."""
n_ind = 0
try:
for n in self.graph.node:
@@ -541,6 +550,17 @@ def get_node_index(self, node):
n_ind += 1
except ValueError:
return None
+ return None
+
+ def get_node_from_name(self, node_name):
+ """Returns the node with the specified name, or None if not found."""
+ try:
+ for node in self.graph.node:
+ if node.name == node_name:
+ return node
+ except ValueError:
+ return None
+ return None
def get_tensor_layout(self, tensor_name):
"""Returns the data layout annotation of tensor with given name.
diff --git a/src/qonnx/core/onnx_exec.py b/src/qonnx/core/onnx_exec.py
index a5be9dee..a8f4774c 100644
--- a/src/qonnx/core/onnx_exec.py
+++ b/src/qonnx/core/onnx_exec.py
@@ -208,7 +208,6 @@ def execute_onnx_and_make_model(model, input_dict):
new_model.set_initializer(i, execution_context[i])
for vi in new_model.graph.value_info:
new_model.graph.output.append(vi)
- # import pdb; pdb.set_trace()
return new_model
diff --git a/src/qonnx/custom_op/base.py b/src/qonnx/custom_op/base.py
index bd2545fa..775d9f95 100644
--- a/src/qonnx/custom_op/base.py
+++ b/src/qonnx/custom_op/base.py
@@ -74,6 +74,8 @@ def get_nodeattr(self, name):
if dtype == "s":
# decode string attributes
ret = ret.decode("utf-8")
+ elif dtype == "strings":
+ ret = [x.decode("utf-8") for x in ret]
elif dtype == "t":
# use numpy helper to convert TensorProto -> np array
ret = np_helper.to_array(ret)
@@ -123,13 +125,15 @@ def set_nodeattr(self, name, value):
# encode string attributes
value = value.encode("utf-8")
attr.__setattr__(dtype, value)
+ elif dtype == "strings":
+ attr.strings[:] = [x.encode("utf-8") for x in value]
elif dtype == "floats": # list of floats
attr.floats[:] = value
elif dtype == "ints": # list of integers
attr.ints[:] = value
elif dtype == "t": # single tensor
attr.t.CopyFrom(value)
- elif dtype in ["strings", "tensors", "graphs", "sparse_tensors"]:
+ elif dtype in ["tensors", "graphs", "sparse_tensors"]:
# untested / unsupported attribute types
# add testcases & appropriate getters before enabling
raise Exception("Attribute type %s not yet supported" % dtype)
diff --git a/src/qonnx/custom_op/general/quantavgpool2d.py b/src/qonnx/custom_op/general/quantavgpool2d.py
index 9c06a871..c0e24071 100644
--- a/src/qonnx/custom_op/general/quantavgpool2d.py
+++ b/src/qonnx/custom_op/general/quantavgpool2d.py
@@ -140,7 +140,7 @@ def execute_node(self, context, graph):
sess = rt.InferenceSession(model_avgpool.SerializeToString())
result_temp = sess.run(None, idict)
# remove scaling introduced by average
- result_temp = result_temp[0] * (k * k)
+ result_temp = np.round(result_temp[0] * (k * k))
result = np.right_shift(result_temp.astype(int), self.get_shifts())
if self.get_nodeattr("data_layout") == "NHWC":
result = result.transpose(0, 2, 3, 1)
diff --git a/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/float_model.onnx b/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/float_model.onnx
new file mode 100644
index 00000000..5fe61c18
Binary files /dev/null and b/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/float_model.onnx differ
diff --git a/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/quant_model.onnx b/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/quant_model.onnx
new file mode 100644
index 00000000..b2a8bcd4
Binary files /dev/null and b/src/qonnx/data/onnx/bsd300x3-espcn/nn_resize/quant_model.onnx differ
diff --git a/src/qonnx/data/onnx/bsd300x3-espcn/float_model.onnx b/src/qonnx/data/onnx/bsd300x3-espcn/subpixel/float_model.onnx
similarity index 100%
rename from src/qonnx/data/onnx/bsd300x3-espcn/float_model.onnx
rename to src/qonnx/data/onnx/bsd300x3-espcn/subpixel/float_model.onnx
diff --git a/src/qonnx/data/onnx/bsd300x3-espcn/quant_model.onnx b/src/qonnx/data/onnx/bsd300x3-espcn/subpixel/quant_model.onnx
similarity index 100%
rename from src/qonnx/data/onnx/bsd300x3-espcn/quant_model.onnx
rename to src/qonnx/data/onnx/bsd300x3-espcn/subpixel/quant_model.onnx
diff --git a/src/qonnx/data/onnx/matmul_update/sdp.onnx b/src/qonnx/data/onnx/matmul_update/sdp.onnx
new file mode 100644
index 00000000..23375c80
Binary files /dev/null and b/src/qonnx/data/onnx/matmul_update/sdp.onnx differ
diff --git a/src/qonnx/transformation/base.py b/src/qonnx/transformation/base.py
index 75b16aba..eaf73ab9 100644
--- a/src/qonnx/transformation/base.py
+++ b/src/qonnx/transformation/base.py
@@ -107,8 +107,12 @@ def apply(self, model):
old_nodes.append(model.graph.node.pop())
# Execute transformation in parallel
- with mp.Pool(self._num_workers) as p:
- new_nodes_and_bool = p.map(self.applyNodeLocal, old_nodes, chunksize=1)
+ if self._num_workers > 1:
+ with mp.Pool(self._num_workers) as p:
+ new_nodes_and_bool = p.map(self.applyNodeLocal, old_nodes, chunksize=1)
+ # execute without mp.Pool in case of 1 worker to simplify debugging
+ else:
+ new_nodes_and_bool = [self.applyNodeLocal(node) for node in old_nodes]
# extract nodes and check if the transformation needs to run again
# Note: .pop() had initially reversed the node order
diff --git a/src/qonnx/transformation/extract_conv_bias.py b/src/qonnx/transformation/extract_conv_bias.py
index 2a50f725..bf2cf8b4 100644
--- a/src/qonnx/transformation/extract_conv_bias.py
+++ b/src/qonnx/transformation/extract_conv_bias.py
@@ -27,15 +27,15 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import warnings
-from onnx import TensorProto, helper
+from onnx import helper
from qonnx.transformation.base import Transformation
class ExtractBiasFromConv(Transformation):
"""
- Extracts the (optional) Bias from a Conv node and inserts it behind the
- Conv node as an Add node.
+ Extracts the (optional) Bias from a Conv(Transpose) node and inserts it behind the
+ Conv(Transpose) node as an Add node.
"""
def apply(self, model):
@@ -43,13 +43,13 @@ def apply(self, model):
node_ind = 0
for n in graph.node:
node_ind += 1
- if n.op_type == "Conv":
+ if n.op_type in ["Conv", "ConvTranspose"]:
# Check if the node has a bias input
if len(n.input) > 2:
# Extract bias
bias = model.get_initializer(n.input[2])
if bias is None:
- warnings.warn(f"Could not extract bias from Conv node {n}")
+ warnings.warn(f"Could not extract bias from node {n}")
continue
# Insert bias as Add node behind the Conv node
@@ -65,7 +65,7 @@ def apply(self, model):
act_add_tensor = helper.make_tensor_value_info(
model.make_new_valueinfo_name(),
- TensorProto.FLOAT,
+ model.get_tensor_valueinfo(n.output[0]).type.tensor_type.elem_type,
out_shape,
)
graph.value_info.append(act_add_tensor)
diff --git a/src/qonnx/transformation/extract_quant_scale_zeropt.py b/src/qonnx/transformation/extract_quant_scale_zeropt.py
new file mode 100644
index 00000000..58863f08
--- /dev/null
+++ b/src/qonnx/transformation/extract_quant_scale_zeropt.py
@@ -0,0 +1,142 @@
+# Copyright (c) 2023 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of qonnx nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+from onnx import TensorProto, helper
+
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import GiveUniqueParameterTensors, SortGraph
+from qonnx.transformation.remove import RemoveIdentityOps
+
+
+class ExtractQuantScaleZeroPt(Transformation):
+ """Extract any non-identity scale and zero-point Quant inputs as
+ separate Div/Mul (for scale) and Add/Sub (for zeropoint" nodes,
+ preceding and following the Quant node."""
+
+ def apply(self, model: ModelWrapper):
+ graph = model.graph
+ for node in graph.node:
+ if node.op_type == "Quant":
+ quant_node = node
+ input_nm, scale_nm, zeropt_nm, _ = node.input
+ scale_t = model.get_initializer(scale_nm)
+ zeropt_t = model.get_initializer(zeropt_nm)
+ ishp = model.get_tensor_shape(input_nm)
+ extract_scale = False
+ extract_zeropt = False
+ if scale_t is not None and (scale_t != 1).any():
+ extract_scale = True
+ if zeropt_t is not None and (zeropt_t != 0).any():
+ extract_zeropt = True
+ if (not extract_scale) and (not extract_zeropt):
+ continue
+ running_input = input_nm
+ if extract_scale:
+ # create new Div node that divides the input
+ # by the scale
+ inp_scaled_nm = model.make_new_valueinfo_name()
+ inp_scaled = helper.make_tensor_value_info(
+ inp_scaled_nm,
+ TensorProto.FLOAT,
+ ishp,
+ )
+ graph.value_info.append(inp_scaled)
+ inp_scale_node = helper.make_node("Div", [running_input, scale_nm], [inp_scaled_nm])
+ graph.node.append(inp_scale_node)
+ # create new Mul node
+ # remove scale from Quant node
+ new_scale_nm = model.make_new_valueinfo_name()
+ model.set_initializer(new_scale_nm, np.asarray(1.0, dtype=np.float32))
+ quant_node.input[1] = new_scale_nm
+ running_input = inp_scaled_nm
+ if extract_zeropt:
+ # create new Add node that adds the zeropoint to
+ # the scaled input
+ inp_zeropt_nm = model.make_new_valueinfo_name()
+ inp_zeropt = helper.make_tensor_value_info(
+ inp_zeropt_nm,
+ TensorProto.FLOAT,
+ ishp,
+ )
+ graph.value_info.append(inp_zeropt)
+ inp_zeropt_node = helper.make_node("Add", [running_input, zeropt_nm], [inp_zeropt_nm])
+ graph.node.append(inp_zeropt_node)
+ # remove zeropt from Quant node
+ new_zeropt_nm = model.make_new_valueinfo_name()
+ model.set_initializer(new_zeropt_nm, np.asarray(0.0, dtype=np.float32))
+ quant_node.input[2] = new_zeropt_nm
+ running_input = inp_zeropt_nm
+ # rewire node input to any newly created Div/Add nodes
+ quant_node.input[0] = running_input
+ last_node = quant_node
+ final_output = quant_node.output[0]
+ if extract_zeropt:
+ # create new Sub node that subtracts the zeropoint from
+ # the output
+ out_zeropt_nm = model.make_new_valueinfo_name()
+ out_zeropt = helper.make_tensor_value_info(
+ out_zeropt_nm,
+ TensorProto.FLOAT,
+ ishp,
+ )
+ graph.value_info.append(out_zeropt)
+ out_zeropt_node = helper.make_node("Sub", [out_zeropt_nm, zeropt_nm], [final_output])
+ last_node.output[0] = out_zeropt_nm
+ graph.node.append(out_zeropt_node)
+ # important: when tracking a pointer to newly added nodes,
+ # ensure the item from the container is used, and not the
+ # make_node result -- those are different objects
+ # e.g. if we use last_node = out_zeropt_node below,
+ # this will point to the wrong object and cause bugs later
+ last_node = graph.node[-1]
+ if extract_scale:
+ # create new Mul node that applies the output scale
+ out_scale_nm = model.make_new_valueinfo_name()
+ out_scale = helper.make_tensor_value_info(
+ out_scale_nm,
+ TensorProto.FLOAT,
+ ishp,
+ )
+ last_node.output[0] = out_scale_nm
+ graph.value_info.append(out_scale)
+ out_scale_node = helper.make_node("Mul", [out_scale_nm, scale_nm], [final_output])
+ graph.node.append(out_scale_node)
+
+ if extract_scale or extract_zeropt:
+ # since we used append() for new nodes, need to call
+ # SortGraph to ensure correct (topological) order
+ model = model.transform(SortGraph())
+ # Remove potential unity multiplications from alpha and beta attributes
+ model = model.transform(RemoveIdentityOps())
+ # Ensure unique parameter tensors
+ model = model.transform(GiveUniqueParameterTensors())
+ return model, True
+
+ return model, False
diff --git a/src/qonnx/transformation/general.py b/src/qonnx/transformation/general.py
index 5153e616..d69cee5a 100644
--- a/src/qonnx/transformation/general.py
+++ b/src/qonnx/transformation/general.py
@@ -29,6 +29,9 @@
import json
import numpy as np
import warnings
+
+# Protobuf onnx graph node type
+from onnx import NodeProto # noqa
from onnx import mapping
from toposort import toposort_flatten
@@ -359,3 +362,56 @@ def apply(self, model):
# one iteration is enough
return (model, False)
+
+
+# Groups inputs by categories, i.e., groups dynamic inputs first, followed by
+# initializers. Keeps order of inputs in each category.
+def group_inputs_by_category(node: NodeProto, model): # noqa
+ # Select all dynamic inputs, which are those without initializer tensor
+ dynamics = [i for i in node.input if model.get_initializer(i) is None]
+ # Select all input which are initializers, which, by exclusion, are all
+ # those not among the dynamic inputs
+ initializers = [i for i in node.input if i not in dynamics]
+ # Return lists of dynamic anc initializer inputs
+ return dynamics, initializers
+
+
+# Tidy-Up transformation sorting the inputs to all commutative operations to
+# have initializer inputs last
+class SortCommutativeInputsInitializerLast(Transformation):
+ """
+ Sorts inputs of nodes describing commutative operations to have initializer
+ inputs last. This order of inputs is assumed by many other transformations.
+ """
+
+ # Set of supported commutative operations
+ # TODO: There might be more valid operations
+ SUPPORTED_COMMUTATIVE_OPS = {"Add", "Mul", "And", "Or", "Xor", "Sum"}
+
+ # Applies the transform to a whole model graph
+ def apply(self, model): # noqa
+ # Get the model graph out of the model wrapper object
+ graph = model.graph
+ # Keep track of whether the graph has been modified
+ graph_modified = False
+ # Iterate all nodes in the graph keeping track of the index
+ for index, node in enumerate(graph.node):
+ # Check whether this node is among the supported
+ if node.op_type in self.SUPPORTED_COMMUTATIVE_OPS:
+ # Group node inputs by category
+ dynamics, initializers = group_inputs_by_category(node, model)
+ # Flatten the grouped input list
+ inputs = [*dynamics, *initializers]
+ # Length of sorted and original input list must match
+ assert len(inputs) == len(node.input)
+ # Reassigned inputs from sorted categories
+ for i, name in enumerate(inputs):
+ # The graph has been modified if any input is reordered
+ if node.input[i] != name:
+ # Note: This is never reset back to False
+ graph_modified = True
+ # Reassign input name at the new index
+ node.input[i] = name
+ # Return the transformed model and indicate whether the graph actually
+ # has been transformed
+ return model, graph_modified
diff --git a/src/qonnx/transformation/infer_data_layouts.py b/src/qonnx/transformation/infer_data_layouts.py
index bbfc7404..81143e45 100644
--- a/src/qonnx/transformation/infer_data_layouts.py
+++ b/src/qonnx/transformation/infer_data_layouts.py
@@ -46,11 +46,19 @@ def _dims_to_layout(model, node, ndims):
return DataLayout.NHWC
elif layout == "NCHW" and ndims == 4:
return DataLayout.NCHW
+ elif layout == "NWC" and ndims == 3:
+ return DataLayout.NWC
+ elif layout == "NC" and ndims == 2:
+ return DataLayout.NC
else:
return DataLayout.UNKNOWN
else:
if ndims == 4:
return DataLayout.NHWC
+ elif ndims == 3:
+ return DataLayout.NWC
+ elif ndims == 2:
+ return DataLayout.NC
else:
return DataLayout.UNKNOWN
else:
@@ -119,6 +127,10 @@ def apply(self, model):
warnings.warn("Assuming 4D input is NCHW")
model.set_tensor_layout(inp_name, DataLayout.NCHW)
graph_modified = True
+ elif len(inp_shape) == 3:
+ warnings.warn("Assuming 3D input is NWC")
+ model.set_tensor_layout(inp_name, DataLayout.NWC)
+ graph_modified = True
elif len(inp_shape) == 2:
graph_modified = True
warnings.warn("Assuming 2D input is NC")
diff --git a/src/qonnx/transformation/lower_convs_to_matmul.py b/src/qonnx/transformation/lower_convs_to_matmul.py
index 79e1f3f2..81f0b713 100644
--- a/src/qonnx/transformation/lower_convs_to_matmul.py
+++ b/src/qonnx/transformation/lower_convs_to_matmul.py
@@ -32,24 +32,7 @@
from qonnx.transformation.base import Transformation
from qonnx.transformation.extract_conv_bias import ExtractBiasFromConv
-from qonnx.util.basic import get_by_name
-
-
-def _auto_pad_to_explicit_padding(autopad_str, idim_h, idim_w, k_h, k_w, stride_h, stride_w, n_dims):
- pad_total_h = (stride_h - 1) * idim_h - stride_h + k_h
- pad_total_w = (stride_w - 1) * idim_w - stride_w + k_w
- pad_half_small_h = int((pad_total_h / 2))
- pad_half_small_w = int((pad_total_w / 2))
- pad_half_large_h = pad_total_h - pad_half_small_h
- pad_half_large_w = pad_total_w - pad_half_small_w
- if autopad_str == "VALID":
- return [0 for i in range(2 * n_dims)]
- elif autopad_str == "SAME_UPPER":
- return [pad_half_small_h, pad_half_small_w, pad_half_large_h, pad_half_large_w]
- elif autopad_str == "SAME_LOWER":
- return [pad_half_large_h, pad_half_large_w, pad_half_small_h, pad_half_small_w]
- else:
- raise Exception("Unsupported auto_pad: " + autopad_str)
+from qonnx.util.basic import auto_pad_to_explicit_padding, get_by_name
class LowerConvsToMatMul(Transformation):
@@ -59,167 +42,218 @@ class LowerConvsToMatMul(Transformation):
def apply(self, model):
model = model.transform(ExtractBiasFromConv())
graph = model.graph
- node_ind = 0
graph_modified = False
- for n in graph.node:
- node_ind += 1
- if n.op_type == "Conv":
- if len(n.input) == 3:
- warnings.warn("Found Conv node with bias, skipping")
- continue
- cnv_input = n.input[0]
- cnv_output = n.output[0]
- idt = model.get_tensor_datatype(cnv_input)
- odt = model.get_tensor_datatype(cnv_output)
- # extract conv parameters
- k = get_by_name(n.attribute, "kernel_shape").ints
- k_h = k[0]
- k_w = k[1]
- stride_h = get_by_name(n.attribute, "strides").ints[0]
- stride_w = get_by_name(n.attribute, "strides").ints[1]
- group = get_by_name(n.attribute, "group").i
- weight_name = n.input[1]
- W_conv = model.get_initializer(weight_name)
- ifm_ch = model.get_tensor_shape(n.input[0])[1] # assume NCHW
- ofm_ch = model.get_tensor_shape(n.output[0])[1] # assume NCHW
- ifm_dim_h = model.get_tensor_shape(n.input[0])[2] # assume NCHW
- ifm_dim_w = model.get_tensor_shape(n.input[0])[3]
- ofm_dim_h = model.get_tensor_shape(n.output[0])[2] # assume NCHW
- ofm_dim_w = model.get_tensor_shape(n.output[0])[3]
- dilation_attr = get_by_name(n.attribute, "dilations")
- if dilation_attr is not None:
- dilation = dilation_attr.ints
- else:
- dilation = [1, 1] # default value
- # handle both auto_pad and explicit padding
- auto_pad = get_by_name(n.attribute, "auto_pad")
- if auto_pad is not None:
- # find equivalent specified padding
- auto_pad = auto_pad.s.decode("utf-8")
- if auto_pad == "NOTSET":
- # use specified padding
- pad = get_by_name(n.attribute, "pads").ints
- else:
- pad = _auto_pad_to_explicit_padding(
- auto_pad,
- ifm_dim_h,
- ifm_dim_w,
- k_h,
- k_w,
- stride_h,
- stride_w,
- len(model.get_tensor_shape(n.input[0])) - 2,
- )
- else:
- # use specified padding
- pad = get_by_name(n.attribute, "pads").ints
-
- # If len(pad) == 2, assume no padding for other dimension
- if len(pad) == 2: # only one dimension should be padded
- assert ifm_dim_h == 1 or ifm_dim_w == 1, "Padding is assumed to be 1D, image is 2D"
-
- # if depthwise conv create sparse matrix and variable "dw"
- # to store as attribute in Im2Col that indicates that the created
+ for node_ind, node in enumerate(graph.node, start=1):
+ if node.op_type != "Conv":
+ continue
+
+ if len(node.input) == 3:
+ warnings.warn("Found Conv node with bias, skipping")
+ continue
+
+ # extract parameters of node
+ (
+ cnv_input,
+ cnv_output,
+ cnv_input_datatype,
+ cnv_output_datatype,
+ k_h,
+ k_w,
+ stride_h,
+ stride_w,
+ group,
+ weight_name,
+ conv_weight_inp_name,
+ conv_weight_q_scale_name,
+ W_conv,
+ ifm_ch,
+ ofm_ch,
+ ifm_dim_h,
+ ifm_dim_w,
+ ofm_dim_h,
+ ofm_dim_w,
+ dilation,
+ pad,
+ ) = self.extract_conv_params(model, node)
+
+ if W_conv is None:
+ warnings.warn("Found Conv node with non-initialized weight, skipping")
+ continue
+
+ # if depthwise conv create sparse matrix and variable "dw"
+ # to store as attribute in Im2Col that indicates that the created
+ # Im2Col node belongs to a depthwise convolution
+ dw = False
+ if group == ifm_ch and ofm_ch == ifm_ch:
+ W_sparse = np.zeros((ofm_ch, ifm_ch, k_h, k_w)) # (OFM, IFM, k_H, k_W)
+ # TODO: if the convolution is quantized with a non-zero zeropoint we
+ # should be using the zeropoint value here instead of np.zeros
+ for ch in range(ifm_ch):
+ W_sparse[ch][ch] = W_conv[ch][0] # W_conv = [OFM, IFM, k_H, k_W]
+ W_conv = W_sparse.astype(np.float32)
+ # we need to store information of the
+ # sparsity of the weight matrix. For this
+ # we use the sparsity annotation of the
+ # weight tensor
+ sparsity = {"dw": {"kernel_shape": [k_h, k_w]}}
+ model.set_tensor_sparsity(weight_name, sparsity)
+ # additionally create variable "dw" to store
+ # as attribute in Im2Col that indicates that the created
# Im2Col node belongs to a depthwise convolution
- dw = False
- if group == ifm_ch and ofm_ch == ifm_ch:
- W_sparse = np.zeros((ofm_ch, ifm_ch, k_h, k_w)) # (OFM, IFM, k_H, k_W)
- for ch in range(ifm_ch):
- W_sparse[ch][ch] = W_conv[ch][0] # W_conv = [OFM, IFM, k_H, k_W]
- W_conv = W_sparse.astype(np.float32)
- # we need to store information of the
- # sparsity of the weight matrix. For this
- # we use the sparsity annotation of the
- # weight tensor
- sparsity = {"dw": {"kernel_shape": [k_h, k_w]}}
- model.set_tensor_sparsity(weight_name, sparsity)
- # additionally create variable "dw" to store
- # as attribute in Im2Col that indicates that the created
- # Im2Col node belongs to a depthwise convolution
- dw = True
-
- # reuse conv weights for new matmul weights
- # conv weights are [OFM][IFM][k][k]
- # first convert to [OFM][k][k][IFM] (to remain compatible with
- # finn-hlslib and how it does im2col/sliding window)
- W_matmul = W_conv.transpose(0, 2, 3, 1) # W_conv = [OFM, IFM, k_H, k_W]
- # reshape into [OFM][k*k*IFM] matrix
- W_matmul = W_matmul.reshape(ofm_ch, ifm_ch * k_h * k_w)
- # transpose to get ONNX-compatible [k*k*IFM][OFM] matrix
- W_matmul = W_matmul.T
- model.set_initializer(weight_name, W_matmul)
-
- # create new intermediate values
- inp_trans_out = helper.make_tensor_value_info(
- model.make_new_valueinfo_name(),
- TensorProto.FLOAT,
- (1, ifm_dim_h, ifm_dim_w, ifm_ch), # NHWC
+ dw = True
+
+ # reuse conv weights for new matmul weights
+ # conv weights are [OFM][IFM][k][k]
+ # first convert to [OFM][k_h][k_w][IFM] (to remain compatible with
+ # finn-hlslib and how it does im2col/sliding window)
+ W_matmul = W_conv.transpose(0, 2, 3, 1) # W_conv = [OFM, IFM, k_H, k_W]
+ # reshape into [OFM][k_h*k_w*IFM] matrix
+ W_matmul = W_matmul.reshape(ofm_ch, ifm_ch * k_h * k_w)
+ # transpose to get ONNX-compatible [k_h*k_w*IFM][OFM] matrix
+ W_matmul = W_matmul.T
+ model.set_initializer(weight_name, W_matmul)
+ if weight_name != conv_weight_inp_name:
+ # required for convs with quantized weights
+ model.set_tensor_shape(conv_weight_inp_name, W_matmul.shape)
+ if conv_weight_q_scale_name is not None:
+ # required for convs with quantized weights
+ scale_weight_q = model.get_initializer(conv_weight_q_scale_name)
+ if scale_weight_q.ndim > 0:
+ # scale shape is originally [OFM, IFM, k_H, k_W]
+ # transpose into [OFM, k_H, k_W, IFM]
+ scale_weight_q = scale_weight_q.transpose(0, 2, 3, 1)
+ # reshape into [OFM][k_h*k_w*IFM] matrix
+ scale_weight_q = scale_weight_q.reshape(ofm_ch, -1)
+ # transpose to be shape-compatible with weight matrix
+ scale_weight_q = scale_weight_q.T
+ model.set_initializer(conv_weight_q_scale_name, scale_weight_q)
+
+ # create new intermediate values
+ inp_trans_out = helper.make_tensor_value_info(
+ model.make_new_valueinfo_name(),
+ TensorProto.FLOAT,
+ (1, ifm_dim_h, ifm_dim_w, ifm_ch), # NHWC
+ )
+ graph.value_info.append(inp_trans_out)
+ inp_trans_out = inp_trans_out.name
+ model.set_tensor_datatype(inp_trans_out, cnv_input_datatype)
+
+ # k_h=k_w==1: pointwise convolution, thus no im2col needed
+ need_im2col = any(p != 0 for p in pad) or k_h != 1 or k_w != 1 or stride_h != 1 or stride_w != 1
+
+ # create new intermediate values
+ matmul_out = helper.make_tensor_value_info(
+ model.make_new_valueinfo_name(), TensorProto.FLOAT, (1, ofm_dim_h, ofm_dim_w, ofm_ch)
+ )
+ graph.value_info.append(matmul_out)
+ matmul_out = matmul_out.name
+ model.set_tensor_datatype(matmul_out, cnv_output_datatype)
+
+ # create new nodes
+ # NCHW -> NHWC
+ inp_trans_node = helper.make_node("Transpose", [cnv_input], [inp_trans_out], perm=[0, 2, 3, 1])
+ nodes_to_insert = [inp_trans_node]
+
+ if need_im2col:
+ im2col_out = helper.make_tensor_value_info(
+ model.make_new_valueinfo_name(), TensorProto.FLOAT, (1, ofm_dim_h, ofm_dim_w, ifm_ch * k_h * k_w)
)
- graph.value_info.append(inp_trans_out)
- inp_trans_out = inp_trans_out.name
- model.set_tensor_datatype(inp_trans_out, idt)
-
- need_im2col = True
- if all(p == 0 for p in pad):
- padding = 0
-
- # k_h=k_w==1: pointwise convolution, thus no im2col needed
- if k_h == 1 and k_w == 1 and padding == 0 and stride_h == 1 and stride_w == 1:
- need_im2col = False
-
- if need_im2col:
- im2col_out = helper.make_tensor_value_info(
- model.make_new_valueinfo_name(),
- TensorProto.FLOAT,
- (1, ofm_dim_h, ofm_dim_w, ifm_ch * k_h * k_w),
- )
- graph.value_info.append(im2col_out)
- im2col_out = im2col_out.name
- model.set_tensor_datatype(im2col_out, idt)
-
- matmul_out = helper.make_tensor_value_info(
- model.make_new_valueinfo_name(),
- TensorProto.FLOAT,
- (1, ofm_dim_h, ofm_dim_w, ofm_ch),
+ graph.value_info.append(im2col_out)
+ im2col_out = im2col_out.name
+ model.set_tensor_datatype(im2col_out, cnv_input_datatype)
+ im2col_node = helper.make_node(
+ "Im2Col",
+ [inp_trans_out],
+ [im2col_out],
+ domain="qonnx.custom_op.general",
+ stride=[stride_h, stride_w],
+ kernel_size=[k_h, k_w],
+ pad_amount=pad,
+ input_shape="(1,{},{},{})".format(ifm_dim_h, ifm_dim_w, ifm_ch),
+ depthwise=dw,
+ dilations=dilation,
)
- graph.value_info.append(matmul_out)
- matmul_out = matmul_out.name
- model.set_tensor_datatype(matmul_out, odt)
-
- # create new nodes
- # NCHW -> NHWC
- inp_trans_node = helper.make_node("Transpose", [cnv_input], [inp_trans_out], perm=[0, 2, 3, 1])
- # lower input tensor
- matmul_input = inp_trans_out
- if need_im2col:
- matmul_input = im2col_out
- im2col_node = helper.make_node(
- "Im2Col",
- [inp_trans_out],
- [im2col_out],
- domain="qonnx.custom_op.general",
- stride=[stride_h, stride_w],
- kernel_size=[k_h, k_w],
- pad_amount=pad,
- input_shape="(1,{},{},{})".format(ifm_dim_h, ifm_dim_w, ifm_ch),
- depthwise=dw,
- dilations=dilation,
- )
-
- # do matmul
- matmul_node = helper.make_node("MatMul", [matmul_input, weight_name], [matmul_out])
- # NHWC -> NCHW
- out_trans_node = helper.make_node("Transpose", [matmul_out], [cnv_output], perm=[0, 3, 1, 2])
- # insert nodes where the conv is to preserve topological ordering
- graph.node.insert(node_ind, inp_trans_node)
- if need_im2col:
- graph.node.insert(node_ind + 1, im2col_node)
- graph.node.insert(node_ind + 2, matmul_node)
- graph.node.insert(node_ind + 3, out_trans_node)
- else:
- graph.node.insert(node_ind + 1, matmul_node)
- graph.node.insert(node_ind + 2, out_trans_node)
- # remove old nodes
- graph.node.remove(n)
+ nodes_to_insert.append(im2col_node)
+
+ matmul_input = im2col_out if need_im2col else inp_trans_out
+ # do matmul
+ matmul_node = helper.make_node("MatMul", [matmul_input, conv_weight_inp_name], [matmul_out])
+ # NHWC -> NCHW
+ out_trans_node = helper.make_node("Transpose", [matmul_out], [cnv_output], perm=[0, 3, 1, 2])
+
+ nodes_to_insert.extend([matmul_node, out_trans_node])
+
+ # insert nodes where the conv is to preserve topological ordering
+ for i, insert_node in enumerate(nodes_to_insert):
+ graph.node.insert(node_ind + i, insert_node)
+ graph.node.remove(node)
return (model, graph_modified)
+
+ def extract_conv_params(self, model, node):
+ cnv_input = node.input[0]
+ cnv_output = node.output[0]
+ cnv_input_datatype = model.get_tensor_datatype(cnv_input)
+ cnv_output_datatype = model.get_tensor_datatype(cnv_output)
+ k_h = get_by_name(node.attribute, "kernel_shape").ints[0]
+ k_w = get_by_name(node.attribute, "kernel_shape").ints[1]
+ stride_h = get_by_name(node.attribute, "strides").ints[0]
+ stride_w = get_by_name(node.attribute, "strides").ints[1]
+ group = get_by_name(node.attribute, "group").i
+ weight_name = node.input[1]
+ conv_weight_inp_name = node.input[1]
+ conv_weight_q_scale_name = None
+ W_conv = model.get_initializer(weight_name)
+ if W_conv is None:
+ # check to see if there is an immediate quantizer node feeding the weight input
+ w_producer = model.find_producer(weight_name)
+ if not (w_producer is None) and w_producer.op_type == "Quant":
+ W_conv = model.get_initializer(w_producer.input[0])
+ weight_name = w_producer.input[0]
+ conv_weight_q_scale_name = w_producer.input[1]
+ ifm_ch = model.get_tensor_shape(cnv_input)[1] # assume NCHW
+ ofm_ch = model.get_tensor_shape(cnv_output)[1] # assume NCHW
+ ifm_dim_h = model.get_tensor_shape(cnv_input)[2] # assume NCHW
+ ifm_dim_w = model.get_tensor_shape(cnv_input)[3] # assume NCHW
+ ofm_dim_h = model.get_tensor_shape(cnv_output)[2] # assume NCHW
+ ofm_dim_w = model.get_tensor_shape(cnv_output)[3] # assume NCHW
+ dilation_attr = get_by_name(node.attribute, "dilations")
+ dilation = dilation_attr.ints if dilation_attr is not None else [1, 1] # default value
+ auto_pad = get_by_name(node.attribute, "auto_pad")
+ if auto_pad is not None:
+ auto_pad = auto_pad.s.decode("utf-8")
+ if auto_pad == "NOTSET":
+ pad = get_by_name(node.attribute, "pads").ints
+ else:
+ pad = auto_pad_to_explicit_padding(
+ auto_pad, ifm_dim_h, ifm_dim_w, k_h, k_w, stride_h, stride_w, len(model.get_tensor_shape(cnv_input)) - 2
+ )
+ else:
+ pad = get_by_name(node.attribute, "pads").ints
+
+ if len(pad) == 2: # only one dimension should be padded
+ assert ifm_dim_h == 1 or ifm_dim_w == 1, "Padding is assumed to be 1D, image is 2D"
+
+ return (
+ cnv_input,
+ cnv_output,
+ cnv_input_datatype,
+ cnv_output_datatype,
+ k_h,
+ k_w,
+ stride_h,
+ stride_w,
+ group,
+ weight_name,
+ conv_weight_inp_name,
+ conv_weight_q_scale_name,
+ W_conv,
+ ifm_ch,
+ ofm_ch,
+ ifm_dim_h,
+ ifm_dim_w,
+ ofm_dim_h,
+ ofm_dim_w,
+ dilation,
+ pad,
+ )
diff --git a/src/qonnx/transformation/quantize_graph.py b/src/qonnx/transformation/quantize_graph.py
new file mode 100644
index 00000000..230650bd
--- /dev/null
+++ b/src/qonnx/transformation/quantize_graph.py
@@ -0,0 +1,238 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of qonnx nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import numpy as np
+import onnx
+from onnx import TensorProto
+
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.general import SortGraph
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.cleanup import cleanup_model
+
+
+def create_quantnode(
+ model,
+ quantnode_input,
+ quantnode_output_shape,
+ scale_value,
+ zeropoint_value,
+ bitwidth_value,
+ narrow,
+ signed,
+ rounding_mode,
+):
+ quant_tensor = onnx.helper.make_tensor_value_info(
+ model.make_new_valueinfo_name(), TensorProto.FLOAT, quantnode_output_shape
+ )
+ model.graph.value_info.append(quant_tensor)
+
+ scale_tensor = np.array(scale_value).astype(np.float32)
+ s_value = onnx.helper.make_tensor_value_info(model.make_new_valueinfo_name(), TensorProto.FLOAT, quantnode_output_shape)
+ model.graph.value_info.append(s_value)
+ model.set_initializer(s_value.name, scale_tensor)
+
+ zeropt_tensor = np.array(zeropoint_value).astype(np.float32)
+ z_value = onnx.helper.make_tensor_value_info(model.make_new_valueinfo_name(), TensorProto.FLOAT, quantnode_output_shape)
+ model.graph.value_info.append(z_value)
+ model.set_initializer(z_value.name, zeropt_tensor)
+
+ bitwidth_tensor = np.array(bitwidth_value).astype(np.float32)
+ b_value = onnx.helper.make_tensor_value_info(model.make_new_valueinfo_name(), TensorProto.FLOAT, [1])
+ model.graph.value_info.append(b_value)
+ model.set_initializer(b_value.name, bitwidth_tensor)
+
+ quantnode = onnx.helper.make_node(
+ "Quant",
+ inputs=[quantnode_input, s_value.name, z_value.name, b_value.name],
+ outputs=[quant_tensor.name],
+ name="Quant_" + quantnode_input,
+ narrow=narrow,
+ signed=signed,
+ rounding_mode=rounding_mode,
+ )
+
+ return quantnode, quant_tensor
+
+
+def adjust_graph(model, input_positions, node_name, quantized_nodes):
+ for pos in input_positions:
+ node_details = (node_name, pos[0])
+ if node_details not in quantized_nodes: # not quantizing for same node_inp/out index.
+ node_in_focus = model.get_node_from_name(node_name)
+
+ if pos[0][0] == "input":
+ quantnode_input = node_in_focus.input[pos[0][1]]
+ consumer_node = node_in_focus
+ producer_node = model.find_producer(quantnode_input)
+ if producer_node is None or producer_node.op_type != "Quant":
+ quantization_to_perform = True
+ else:
+ quantization_to_perform = False
+ else:
+ quantnode_input = node_in_focus.output[pos[0][1]]
+ consumer_node = model.find_consumer(quantnode_input)
+ producer_node = model.find_producer(quantnode_input)
+ if consumer_node is None or consumer_node.op_type != "Quant":
+ quantization_to_perform = True
+ else:
+ quantization_to_perform = False
+ if quantization_to_perform is True:
+ quantnode_output_shape = model.get_tensor_shape(quantnode_input) # Step: 3
+ quantnode, quant_tensor = create_quantnode(
+ model,
+ quantnode_input,
+ quantnode_output_shape,
+ scale_value=pos[1][0],
+ zeropoint_value=pos[1][1],
+ bitwidth_value=pos[1][2],
+ narrow=pos[1][3],
+ signed=pos[1][4],
+ rounding_mode=pos[1][5],
+ )
+
+ if consumer_node is not None:
+ node_pos = model.get_node_index(consumer_node)
+ model.graph.node[node_pos].input[pos[0][1]] = quant_tensor.name
+ model.graph.node.append(quantnode)
+ else:
+ model.graph.value_info.remove(quant_tensor)
+ model.graph.node.append(quantnode)
+ model.graph.output.insert(0, quant_tensor)
+ model.graph.output.pop(1)
+
+ model = model.transform(SortGraph())
+ quantized_nodes.append(node_details)
+ else:
+ print(f"{pos[0][0]} index {pos[0][1]} of {node_name} is already quantized.")
+ else:
+ print(f"{pos[0][0]} index {pos[0][1]} of {node_name} is already quantized.")
+ continue
+
+ return model
+
+
+class QuantizeGraph(Transformation):
+ """This transformation can be used to introduce a Quant node for a specific type of node in the graph.
+ Users would be able to specify the location of the quant node by providing the input and output index
+ as the parameters.
+
+ 1) Expectations:
+ a) Onnx model in the modelwraper format.
+ b) Model must be cleaned using qonnx.util.cleanup.cleanup_model()
+ c) Batchsize to be set.
+
+ 2) Steps to transform are:
+ Step1: Finding the input for the quant node.
+ Step2: Finding the consumer of the quant node output.
+ Step3: Finding the shape for the output tensor of quant node.
+ Note: The output tensor of the quant node must have the same shape as the consumer of the input
+ to the quant node.
+
+ 3) Input:
+ A dict "quantnode_map" specifying the criterion, positions, and input parameters like
+ scale, bitwidth, zeropoint, and others for a specific quantnode.
+
+ Criterion:
+ a) name: This will allow users to add quant nodes for specific node like "Conv_0" and "Gemm_0".
+ Note: using this users can have quant nodes with different parameters. Ex: quantizing
+ "Conv_0" and "Conv_1" with bitwidth of 4 and 6, respectively.
+ b) op_type: This will allow users to add quant nodes for all nodes of a particular op_type such
+ as, "Conv", "Gemm", and others.
+ Note: All quant nodes created using op_type criterion will have the same input
+ parameters (scale, zeropoint, bitwidth, and others.)
+ c) name and op_type: In this case, quant nodes will be added with precedence to "Name"
+ in comparison to "op_type".
+
+ Positions: ("input", index) or ("output", index)
+ a) "input": indicates that the user want to quantize the input of the selected node.
+ b) "output": indicates that the user want to quantize the output of the selected node.
+ c) index: refers to the input/output index to quantize (a node can have multiple inputs and outputs)
+
+ Parameters (to quant node) are provided as (scale, zeropoint, bitwidth, narrow, signed, rounding_mode)
+
+ a) Inputs: scale, zeropoint, bitwidth.
+ b) Attributes: narrow, signed, rounding_mode.
+
+ 4) Assert:
+ a) The input is a dictionary representing the node names as keys and a list of quant positions
+ as values.
+ b) The input dictionary must have atleast one mac node (Conv, gemm, matmul) for the transformation.
+
+ 5) Return:
+ Returns a model with new quant nodes created at the positions specified using the "quantnode_map".
+
+ 6) Example:
+ quantnode_map = {"name": {"Conv_0": [(("input", 0), (1, 0, 8, 0, 1, "ROUND")),
+ (("input", 1), (1, 0, 8, 0, 1, "ROUND")),
+ (("output", 0), (1, 0, 8, 0, 1, "ROUND"))],
+ "Conv_1": [(("input", 0), (1, 0, 8, 0, 1, "ROUND"))],
+ "Conv_2": [(("input", 1), (1, 0, 8, 0, 1, "ROUND")),
+ (("output", 0), (1, 0, 8, 0, 1, "ROUND"))]},
+
+ "op_type": {"Gemm": [(("input", 0), (1, 0, 8, 0, 1, "ROUND")),
+ (("input", 1), (1, 0, 8, 0, 1, "ROUND")),
+ (("input", 2), (1, 0, 8, 0, 1, "ROUND")),
+ (("output", 0), (1, 0, 8, 0, 1, "ROUND"))]}}
+ """
+
+ def __init__(self, quantnode_map):
+ super().__init__()
+ self.quantnode_map = quantnode_map
+
+ def apply(self, model):
+ model = model.transform(InferShapes())
+ if type(self.quantnode_map) == dict:
+ selection_type = self.quantnode_map.keys()
+ if set(selection_type) <= {"name", "op_type"}:
+ quantized_nodes = []
+ if "name" in selection_type:
+ by_name = self.quantnode_map["name"] # dict with unique names and list of positions.
+ node_list_by_name = by_name.keys() # node names specified by the user for quant nodes.
+ for node_name in node_list_by_name:
+ input_positions = by_name[node_name] # input positions to introduce quant nodes.
+ model = adjust_graph(model, input_positions, node_name, quantized_nodes)
+ if "op_type" in selection_type:
+ by_op_type = self.quantnode_map["op_type"] # dict with the unique names and list of positions.
+ op_list = by_op_type.keys()
+ for op in op_list:
+ node_list = model.get_nodes_by_op_type(op) # List of all nodes with the operation type "op".
+ input_positions = by_op_type[op]
+ for node in node_list:
+ node_name = node.name
+ model = adjust_graph(model, input_positions, node_name, quantized_nodes)
+ model = cleanup_model(model)
+ else:
+ raise Exception("Unsupported selection type")
+ else:
+ raise TypeError("Input must be a dictionary.")
+
+ graph_modified = False
+
+ return (model, graph_modified)
diff --git a/src/qonnx/transformation/resize_conv_to_deconv.py b/src/qonnx/transformation/resize_conv_to_deconv.py
new file mode 100644
index 00000000..0dd40972
--- /dev/null
+++ b/src/qonnx/transformation/resize_conv_to_deconv.py
@@ -0,0 +1,259 @@
+# Copyright (c) 2024, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of QONNX nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import numpy as np
+import warnings
+from onnx import helper
+
+from qonnx.core.datatype import DataType
+from qonnx.custom_op.general.quant import quant, resolve_rounding_mode
+from qonnx.transformation.base import Transformation
+from qonnx.util.basic import auto_pad_to_explicit_padding, get_by_name
+
+
+def _weight_convolution(cnv_weights: np.ndarray, scale: int) -> np.ndarray:
+ """Adaptation of the weight convolution algorithm as proposed in Colbert et al. (2021) - `An
+ Energy-Efficient Edge Computing Paradigm for Convolution-Based Image Upsampling`"""
+ ofm_ch = cnv_weights.shape[0]
+ ifm_ch = cnv_weights.shape[1]
+ kh_size = cnv_weights.shape[2]
+ kw_size = cnv_weights.shape[3]
+ assert kh_size == kw_size, "Only square channels supported currently."
+ # NOTE - this is different than the convolution kernels, which are OC x IC x KH x KW
+ # rather than IC x OC x KH x KW
+ dcnv_weights = np.zeros((ifm_ch, ofm_ch, kh_size + scale - 1, kw_size + scale - 1))
+ for oc in range(ofm_ch):
+ for ic in range(ifm_ch):
+ for i in range(scale):
+ for j in range(scale):
+ dcnv_weights[ic, oc, i : i + kh_size, j : j + kw_size] += np.rot90(cnv_weights[oc, ic], 2, [0, 1])
+ return dcnv_weights
+
+
+class ResizeConvolutionToDeconvolution(Transformation):
+ """Replaces resize convolution layers (e.g., nearest neighbor upsample + same-padded convolution)
+ with deconvolution layers using the weight convolution algorithm. Currently does not support
+ resize convolutions that use bilinear or bicubic upsampling"""
+
+ def __init__(self, maintain_bit_width: bool = False):
+ super().__init__()
+ self.maintain_bit_width = maintain_bit_width
+
+ def apply(self, model):
+ graph = model.graph
+ node_ind = 0
+ graph_modified = False
+ for n in graph.node:
+ node_ind += 1
+ if n.op_type == "Resize":
+ resize_input = n.input[0]
+ resize_output = n.output[0]
+ consumers = model.find_consumers(resize_output)
+
+ if len(consumers) == 0:
+ continue
+
+ if len(consumers) > 1 and any([c.op_type == "Conv" for c in consumers]):
+ warnings.warn("Skipping resize conv that has resize with multiple consumers. Not yet supported.")
+ continue
+
+ conv = consumers[0]
+ if conv is not None and conv.op_type == "Conv":
+ # TODO: extend support to other resize convolutions
+ resize_mode = get_by_name(n.attribute, "mode").s.decode()
+ if resize_mode != "nearest":
+ warnings.warn(f"Skipping resize conv with resize_mode={resize_mode}. Not yet supported.")
+ continue
+
+ group = get_by_name(conv.attribute, "group").i
+ if group != 1:
+ warnings.warn("Skipping resize conv with group > 1. Not yet supported.")
+ continue
+
+ # The weights of the convolution can be generated by another input op if the model is
+ # quantized. Preliminary support for quantization focuses on QONNX ops (i.e., Quant)
+ weight_name = conv.input[1]
+ weight_prod = model.find_producer(weight_name)
+
+ # If the producer is None, then it is initialized by the Conv node
+ if weight_prod is None:
+ W_conv = model.get_initializer(weight_name) # (OC, IC, KH, KW)
+
+ # If the convolution weights are not initialized by the convolution, then we need to
+ # find the node is producing the weights
+ else:
+ if weight_prod.op_type == "Quant":
+ [q_w_name, q_s_name, q_zp_name, q_bw_name] = weight_prod.input
+ W_conv = model.get_initializer(q_w_name)
+ W_scale = model.get_initializer(q_s_name)
+ if isinstance(W_scale, np.ndarray) and W_scale.ndim > 1:
+ W_scale = np.moveaxis(W_scale, 0, 1)
+ W_zeropt = model.get_initializer(q_zp_name)
+ if isinstance(W_zeropt, np.ndarray) and W_zeropt.ndim > 1:
+ W_zeropt = np.moveaxis(W_zeropt, 0, 1)
+ W_bitwidth = model.get_initializer(q_bw_name)
+ W_signed = get_by_name(weight_prod.attribute, "signed").i
+ W_narrow = get_by_name(weight_prod.attribute, "narrow").i
+ W_rounding_mode = get_by_name(weight_prod.attribute, "rounding_mode").s.decode()
+ else:
+ warnings.warn(
+ f"Weight producer is {weight_prod.op_type}, not a QONNX Quant node. Not yet supported."
+ )
+ continue
+
+ kshape = get_by_name(conv.attribute, "kernel_shape").ints
+ idim = model.get_tensor_shape(conv.input[0]) # require NCHW
+ odim = model.get_tensor_shape(conv.output[0]) # require NCHW
+ if not (len(odim) == len(idim) == 4):
+ warnings.warn("Skipping resize conv, only 2D convolutions supported.")
+ continue
+
+ [_, ifm_ch, ifm_dim_h, ifm_dim_w] = idim
+ [_, ofm_ch, ofm_dim_h, ofm_dim_w] = odim
+
+ if (ifm_dim_h != ofm_dim_h) or (ifm_dim_w != ofm_dim_w):
+ warnings.warn("Skipping resize conv, only same-padded convs supported.")
+ continue
+ dilation_attr = get_by_name(conv.attribute, "dilations")
+ if dilation_attr is not None:
+ dilation = dilation_attr.ints
+ else:
+ dilation = [1, 1] # default value
+ if dilation != [1, 1]:
+ warnings.warn("Skipping resize conv, only supporting dilation=[1,1].")
+ continue
+ # get resize scaling attribute
+ resize_scales = model.get_initializer(n.input[2]) # assume NCHW
+ if not (resize_scales[0] == resize_scales[1] == 1):
+ warnings.warn("Skipping resize conv, scaling along batch or channel dimension not supported.")
+ continue
+ if resize_scales[2] != resize_scales[3]:
+ warnings.warn("Skipping resize conv, non-square scaling not yet supported.")
+ continue
+ resize_scale = int(resize_scales[2]) # TODO: extend to vector once non-square scaling supported
+
+ W_deconv = _weight_convolution(W_conv, resize_scale).astype(np.float32)
+ kh_size_deconv = kshape[0] + resize_scale - 1
+ kw_size_deconv = kshape[1] + resize_scale - 1
+ assert W_deconv.shape == (
+ ifm_ch,
+ ofm_ch,
+ kh_size_deconv,
+ kw_size_deconv,
+ ), "The resulting deconvolution weight shape is incorrect."
+
+ stride_h = get_by_name(conv.attribute, "strides").ints[0]
+ stride_w = get_by_name(conv.attribute, "strides").ints[1]
+ # handle both auto_pad and explicit padding
+ auto_pad = get_by_name(conv.attribute, "auto_pad")
+ if auto_pad is not None:
+ # find equivalent specified padding
+ auto_pad = auto_pad.s.decode("utf-8")
+ if auto_pad == "NOTSET":
+ # use specified padding
+ pad = get_by_name(conv.attribute, "pads").ints
+ else:
+ pad = auto_pad_to_explicit_padding(
+ auto_pad,
+ ifm_dim_h,
+ ifm_dim_w,
+ kshape[0],
+ kshape[1],
+ stride_h,
+ stride_w,
+ len(model.get_tensor_shape(n.input[0])) - 2,
+ )
+ else:
+ # use specified padding
+ pad = get_by_name(conv.attribute, "pads").ints
+
+ # if `maintain_bit_width`, then we use the quant parameters to
+ # re-quantize the weights after the weight convolution
+ if self.maintain_bit_width and (weight_prod is not None):
+ W_deconv_quant = quant(W_deconv, W_scale, W_zeropt, W_bitwidth, W_signed, W_narrow, W_rounding_mode)
+ if not np.allclose(W_deconv, W_deconv_quant):
+ warnings.warn("Clipping error introduced, consider `maintain_bit_width=False`.")
+
+ # if not `maintain_bit_width`, then we adjust the bit width to
+ # account for the clipping errors.
+ elif weight_prod is not None:
+ round_fnc = resolve_rounding_mode(W_rounding_mode)
+ W_int = (W_deconv / W_scale) + W_zeropt
+ W_int = round_fnc(W_int) # handling rounding errors
+ W_min = W_int.min()
+ W_max = W_int.max()
+ if W_min < 0:
+ if abs(W_min) > W_max:
+ wdt = DataType.get_smallest_possible(W_min)
+ else:
+ wdt = DataType.get_smallest_possible(-W_max - 1)
+ else:
+ wdt = DataType.get_smallest_possible(W_max)
+ assert np.vectorize(wdt.allowed)(W_int).all(), "Error: issue finding data type to support."
+ if W_bitwidth != wdt.bitwidth():
+ W_bitwidth = np.array(wdt.bitwidth(), dtype=np.float32)
+ assert wdt.signed() == W_signed, "Error: should maintain sign of the weights."
+
+ deconv_inps = [resize_input, weight_name]
+ # Make sure to keep the biases from the convolution
+ if len(conv.input) == 3:
+ bias_name = conv.input[2]
+ bias_prod = model.find_producer(bias_name)
+ # If the producer is None, then it is initialized by the Conv node
+ # and we need to ensure it isn't removed with the Conv node
+ if bias_prod is None:
+ B_conv = model.get_initializer(bias_name) # (OC,)
+ model.set_initializer(bias_name, B_conv)
+ deconv_inps.append(bias_name) # add to the inputs
+ deconv_outs = conv.output
+ deconv_pad = pad
+ deconv_node = helper.make_node(
+ "ConvTranspose",
+ deconv_inps,
+ deconv_outs,
+ kernel_shape=[kh_size_deconv, kw_size_deconv],
+ strides=[resize_scale, resize_scale],
+ pads=deconv_pad,
+ group=group,
+ dilations=dilation,
+ )
+ W_deconv_init = weight_name
+ if weight_prod is not None:
+ W_deconv_init = q_w_name
+ model.set_initializer(q_zp_name, W_zeropt)
+ model.set_initializer(q_s_name, W_scale)
+ model.set_initializer(q_bw_name, W_bitwidth)
+ model.set_initializer(W_deconv_init, W_deconv)
+ model.set_tensor_shape(weight_name, list(W_deconv.shape))
+ graph.node.insert(node_ind, deconv_node)
+ # remove old nodes
+ graph.node.remove(n)
+ graph.node.remove(conv)
+ graph_modified = True
+
+ return (model, graph_modified)
diff --git a/src/qonnx/transformation/subpixel_to_deconv.py b/src/qonnx/transformation/subpixel_to_deconv.py
index f721140d..3f330c99 100644
--- a/src/qonnx/transformation/subpixel_to_deconv.py
+++ b/src/qonnx/transformation/subpixel_to_deconv.py
@@ -31,7 +31,7 @@
from onnx import helper
from qonnx.transformation.base import Transformation
-from qonnx.util.basic import get_by_name
+from qonnx.util.basic import auto_pad_to_explicit_padding, get_by_name
def _weight_shuffle(cnv_weights: np.ndarray, block_size: int) -> np.ndarray:
@@ -62,23 +62,6 @@ def _weight_shuffle(cnv_weights: np.ndarray, block_size: int) -> np.ndarray:
return dcnv_weights
-def _auto_pad_to_explicit_padding(autopad_str, idim_h, idim_w, k_h, k_w, stride_h, stride_w, n_dims):
- pad_total_h = (stride_h - 1) * idim_h - stride_h + k_h
- pad_total_w = (stride_w - 1) * idim_w - stride_w + k_w
- pad_half_small_h = int((pad_total_h / 2))
- pad_half_small_w = int((pad_total_w / 2))
- pad_half_large_h = pad_total_h - pad_half_small_h
- pad_half_large_w = pad_total_w - pad_half_small_w
- if autopad_str == "VALID":
- return [0 for i in range(2 * n_dims)]
- elif autopad_str == "SAME_UPPER":
- return [pad_half_small_h, pad_half_small_w, pad_half_large_h, pad_half_large_w]
- elif autopad_str == "SAME_LOWER":
- return [pad_half_large_h, pad_half_large_w, pad_half_small_h, pad_half_small_w]
- else:
- raise Exception("Unsupported auto_pad: " + autopad_str)
-
-
class SubPixelToDeconvolution(Transformation):
"""Replaces sub-pixel convolution layers (i.e., same-padded convolution + depth2space)
with deconvolution layers using the weight shuffle algorithm. Currently does not support
@@ -111,6 +94,7 @@ def apply(self, model):
group = get_by_name(n.attribute, "group").i
if group != 1:
warnings.warn("Skipping sub-pixel conv with group > 1. Not yet supported.")
+ continue
# The weights of the convolution can be generated by another input op if the model is
# quantized. Preliminary support for quantization focuses on QONNX ops (i.e., Quant)
@@ -136,14 +120,18 @@ def apply(self, model):
continue
kshape = get_by_name(n.attribute, "kernel_shape").ints
- ifm_ch = model.get_tensor_shape(n.input[0])[1] # assume NCHW
- ofm_ch = model.get_tensor_shape(n.output[0])[1] # assume NCHW
- ifm_dim_h = model.get_tensor_shape(n.input[0])[2] # assume NCHW
- ifm_dim_w = model.get_tensor_shape(n.input[0])[3] # assume NCHW
- ofm_dim_h = model.get_tensor_shape(n.output[0])[2] # assume NCHW
- ofm_dim_w = model.get_tensor_shape(n.output[0])[3]
+ idim = model.get_tensor_shape(n.input[0]) # require NCHW
+ odim = model.get_tensor_shape(n.output[0]) # require NCHW
+ if not (len(odim) == len(idim) == 4):
+ warnings.warn("Skipping sub-pixel conv, only 2D convolutions supported.")
+ continue
+
+ [_, ifm_ch, ifm_dim_h, ifm_dim_w] = idim
+ [_, ofm_ch, ofm_dim_h, ofm_dim_w] = odim
+
if (ifm_dim_h != ofm_dim_h) or (ifm_dim_w != ofm_dim_w):
warnings.warn("Skipping sub-pixel conv, only same-padded convs supported.")
+ continue
dilation_attr = get_by_name(n.attribute, "dilations")
if dilation_attr is not None:
dilation = dilation_attr.ints
@@ -157,6 +145,7 @@ def apply(self, model):
warnings.warn(
"Skipping sub-pixel conv, the output channels and block size need to be evenly divisible."
)
+ continue
W_deconv = _weight_shuffle(W_conv, block_size).astype(np.float32)
kh_size_deconv = kshape[0] * block_size
kw_size_deconv = kshape[1] * block_size
@@ -178,7 +167,7 @@ def apply(self, model):
# use specified padding
pad = get_by_name(n.attribute, "pads").ints
else:
- pad = _auto_pad_to_explicit_padding(
+ pad = auto_pad_to_explicit_padding(
auto_pad,
ifm_dim_h,
ifm_dim_w,
diff --git a/src/qonnx/util/basic.py b/src/qonnx/util/basic.py
index b775a3ba..363aa501 100644
--- a/src/qonnx/util/basic.py
+++ b/src/qonnx/util/basic.py
@@ -321,3 +321,20 @@ def sanitize_quant_values(model, node_tensors, execution_context, check_values=F
)
)
return execution_context
+
+
+def auto_pad_to_explicit_padding(autopad_str, idim_h, idim_w, k_h, k_w, stride_h, stride_w, n_dims):
+ pad_total_h = (stride_h - 1) * idim_h - stride_h + k_h
+ pad_total_w = (stride_w - 1) * idim_w - stride_w + k_w
+ pad_half_small_h = int((pad_total_h / 2))
+ pad_half_small_w = int((pad_total_w / 2))
+ pad_half_large_h = pad_total_h - pad_half_small_h
+ pad_half_large_w = pad_total_w - pad_half_small_w
+ if autopad_str == "VALID":
+ return [0 for i in range(2 * n_dims)]
+ elif autopad_str == "SAME_UPPER":
+ return [pad_half_small_h, pad_half_small_w, pad_half_large_h, pad_half_large_w]
+ elif autopad_str == "SAME_LOWER":
+ return [pad_half_large_h, pad_half_large_w, pad_half_small_h, pad_half_small_w]
+ else:
+ raise Exception("Unsupported auto_pad: " + autopad_str)
diff --git a/src/qonnx/util/cleanup.py b/src/qonnx/util/cleanup.py
index 46dda2a0..933f729d 100644
--- a/src/qonnx/util/cleanup.py
+++ b/src/qonnx/util/cleanup.py
@@ -43,7 +43,7 @@
from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
-def cleanup_model(model, preserve_qnt_ops=True, override_batchsize=None, extract_conv_bias=False):
+def cleanup_model(model, preserve_qnt_ops=True, override_inpsize=None, extract_conv_bias=False):
"""Execute the transformations for the cleanup function on a model level.
This allows the reuse of the cleanup transformations, without needing to read/write the model from/to disk.
@@ -61,6 +61,19 @@ def cleanup_model(model, preserve_qnt_ops=True, override_batchsize=None, extract
preserve_qnt_optypes = ["Quant", "BipolarQuant", "QuantizeLinear", "DequantizeLinear"]
else:
preserve_qnt_optypes = []
+
+ if override_inpsize is not None:
+ if type(override_inpsize) is str:
+ override_inpsize = eval(override_inpsize)
+ if type(override_inpsize) is int:
+ override_batchsize = override_inpsize
+ model = model.transform(ChangeBatchSize(override_batchsize))
+ elif type(override_inpsize) is tuple:
+ override_batchsize = override_inpsize[0]
+ model = model.transform(ChangeBatchSize(override_batchsize))
+ iname = model.graph.input[0].name
+ model.set_tensor_shape(iname, override_inpsize)
+
cleanup_transformations = [
InferShapes(),
GiveUniqueParameterTensors(),
@@ -80,27 +93,24 @@ def cleanup_model(model, preserve_qnt_ops=True, override_batchsize=None, extract
model = model.transform(GiveUniqueNodeNames())
model = model.transform(GiveReadableTensorNames())
- if override_batchsize is not None:
- model = model.transform(ChangeBatchSize(override_batchsize))
- model = model.transform(InferShapes())
-
return model
-def cleanup(in_file, *, out_file=None, preserve_qnt_ops=True, override_batchsize: int = None, extract_conv_bias=False):
+def cleanup(in_file, *, out_file=None, preserve_qnt_ops=True, override_inpsize: str = None, extract_conv_bias=False):
"""Execute a set of graph transformations to clean-up the given ONNX file.
:param in_file: Filename for the input ONNX model
:param preserve_qnt_ops: Preserve weight quantization operators
:param out_file: If set, filename for the output ONNX model. Set to in_file with _clean
suffix otherwise.
- :param override_batchsize: If specified, override the batch size for the ONNX graph
+ :param override_inpsize: If specified, override the input size (e.g. "(1,3,224,224)" to set all or
+ just 1 to set batchsize to 1) for the ONNX graph
:param extract_conv_bias: If specified, separate Conv bias into its own Add node
"""
model = ModelWrapper(in_file)
model = cleanup_model(
- model, preserve_qnt_ops=preserve_qnt_ops, override_batchsize=override_batchsize, extract_conv_bias=extract_conv_bias
+ model, preserve_qnt_ops=preserve_qnt_ops, override_inpsize=override_inpsize, extract_conv_bias=extract_conv_bias
)
if out_file is None:
out_file = in_file.replace(".onnx", "_clean.onnx")
diff --git a/src/qonnx/util/inference_cost.py b/src/qonnx/util/inference_cost.py
index 86428c76..8041ecdc 100644
--- a/src/qonnx/util/inference_cost.py
+++ b/src/qonnx/util/inference_cost.py
@@ -70,8 +70,24 @@ def compute_mem_bits_and_elems(inf_cost_dict, filter_string="mem_w"):
return total_mem_bits, total_mem_elems
+def assign_mem_bits_and_elems(res_dict):
+ mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(res_dict, "mem_w")
+ mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(res_dict, "mem_o")
+ res_dict["total_mem_w_bits"] = mem_w_bits
+ res_dict["total_mem_w_elems"] = mem_w_elems
+ res_dict["total_mem_o_bits"] = mem_o_bits
+ res_dict["total_mem_o_elems"] = mem_o_elems
+ return res_dict
+
+
def inference_cost(
- model_filename_or_wrapper, *, output_json=None, output_onnx=None, preprocess=True, discount_sparsity=True
+ model_filename_or_wrapper,
+ *,
+ output_json=None,
+ output_onnx=None,
+ preprocess=True,
+ discount_sparsity=True,
+ cost_breakdown=False
):
"""Return the inference cost estimate metric for given ONNX model.
Supports the Quant op for weight/activation quantization.
@@ -84,7 +100,10 @@ def inference_cost(
datatype inference and constant folding. Strongly recommended.
:param discount_sparsity: If set, will discount op cost of MAC ops with a
constant zero weight, and the mem cost of constant zero weights.
- """
+ :param cost_breakdown: If set, include per-node (by name) and per-node-type
+ breakdowns as part of the returned inference cost dict."""
+
+ combined_results = {}
if isinstance(model_filename_or_wrapper, ModelWrapper):
model = model_filename_or_wrapper
else:
@@ -104,25 +123,29 @@ def inference_cost(
model = model.transform(GiveReadableTensorNames())
if output_onnx is not None:
model.save(output_onnx)
- ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity))
- bops, macs = compute_bops_and_macs(ret)
- mem_w_bits, mem_w_elems = compute_mem_bits_and_elems(ret, "mem_w")
- mem_o_bits, mem_o_elems = compute_mem_bits_and_elems(ret, "mem_o")
- ret["total_bops"] = bops
- ret["total_macs"] = macs
- ret["total_mem_w_bits"] = mem_w_bits
- ret["total_mem_w_elems"] = mem_w_elems
- ret["total_mem_o_bits"] = mem_o_bits
- ret["total_mem_o_elems"] = mem_o_elems
-
- if "unsupported" in ret:
- ret["unsupported"] = str(ret["unsupported"])
-
+ ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity, cost_breakdown))
+ for i, res in ret.items():
+ if i == "total_cost":
+ bops, macs = compute_bops_and_macs(res)
+ res = assign_mem_bits_and_elems(res)
+ res["total_bops"] = bops
+ res["total_macs"] = macs
+ if "unsupported" in res:
+ res["unsupported"] = str(res["unsupported"])
+ combined_results[i] = res
+ elif i in ["optype_cost", "node_cost"]:
+ per_optype_or_node_breakdown = {}
+ for optype, op_res in res.items():
+ bops, macs = compute_bops_and_macs(op_res)
+ op_res = assign_mem_bits_and_elems(op_res)
+ op_res["total_bops"] = bops
+ op_res["total_macs"] = macs
+ per_optype_or_node_breakdown[optype] = op_res
+ combined_results[i] = per_optype_or_node_breakdown
if output_json is not None:
with open(output_json, "w") as f:
- json.dump(ret, f, sort_keys=True, indent=2)
-
- return ret
+ json.dump(combined_results, f, sort_keys=True, indent=2)
+ return combined_results
def main():
diff --git a/src/qonnx/util/range_analysis.py b/src/qonnx/util/range_analysis.py
index fd20db77..353232e5 100644
--- a/src/qonnx/util/range_analysis.py
+++ b/src/qonnx/util/range_analysis.py
@@ -60,13 +60,6 @@ def calculate_matvec_accumulator_extremum(matrix: np.ndarray, vec_min, vec_max):
return (min_values, max_values)
-def propagate_range(node, model, range_dict):
- iname = node.input[0]
- node_irange = range_dict[iname]
- for oname in node.output:
- range_dict[oname] = node_irange
-
-
def calc_gemm_range(node, model, range_dict):
alpha = get_by_name(node.attribute, "alpha").f
beta = get_by_name(node.attribute, "beta").f
@@ -172,10 +165,49 @@ def calc_conv_range(node, model, range_dict):
range_dict[oname] = ret
+def calc_convtranspose_range(node, model, range_dict):
+ iname = node.input[0]
+ wname = node.input[1]
+ assert len(node.input) == 2, "Found unsupported ConvTranspose with bias"
+ oname = node.output[0]
+ irange = range_dict[iname]
+ imin, imax = irange
+ weights = model.get_initializer(wname)
+ assert weights is not None, "Uninitialized ConvTranspose weights"
+ groups = get_by_name(node.attribute, "group")
+ if groups is None:
+ # default to dense convs
+ groups = 1
+ else:
+ groups = groups.i
+ assert groups == 1, "Only dense (non-grouped) ConvTranspose is supported"
+ # do weight reshaping to treat Conv similar to MatMul
+ # (mh, mw) = (ofm, (ifm x k0 x k1 x ...))
+ conv_ofm = weights.shape[1]
+ conv_ifm = weights.shape[0]
+ weights = weights.transpose(1, 0, 2, 3).reshape(conv_ofm, -1)
+ k_total = weights.shape[1] // conv_ifm
+ if type(imin) is np.ndarray:
+ imin_rep = np.repeat(imin, k_total)
+ imax_rep = np.repeat(imax, k_total)
+ else:
+ imin_rep = imin
+ imax_rep = imax
+ dw_ret_min = []
+ dw_ret_max = []
+ for i in range(conv_ofm):
+ w_slice = weights[i, :].reshape(1, -1)
+ dw_ret = calculate_matvec_accumulator_extremum(w_slice, imin_rep, imax_rep)
+ dw_ret_min.append(dw_ret[0].item())
+ dw_ret_max.append(dw_ret[1].item())
+ ret = (np.asarray(dw_ret_min), np.asarray(dw_ret_max))
+ range_dict[oname] = ret
+
+
def get_minmax_prototype_tensors(irange, ishp, inp_vi, i_channel_axis=1):
proto_min = valueinfo_to_tensor(inp_vi)
proto_max = valueinfo_to_tensor(inp_vi)
- if type(irange[0]) in [float, int, np.float32, np.float64, np.uint8, np.int8]:
+ if type(irange[0]) in [float, int, np.float16, np.float32, np.float64, np.uint8, np.int8]:
imin, imax = irange
proto_min[...] = imin
proto_max[...] = imax
@@ -211,11 +243,14 @@ def calc_monotonic_range(node, model, range_dict, i_channel_axis=1):
inp_vi = model.get_tensor_valueinfo(inp)
proto_vectors.append(get_minmax_prototype_tensors(irange, ishp, inp_vi, i_channel_axis))
# process all combinations of prototype vectors for dynamic inputs
- running_min = None
- running_max = None
+ running_min = [None for i in range(len(node.output))]
+ running_max = [None for i in range(len(node.output))]
# create context for single-node execution
ctx = {x: model.get_initializer(x) for x in node.input}
- ctx[oname] = valueinfo_to_tensor(model.get_tensor_valueinfo(oname))
+ for oname in node.output:
+ ctx[oname] = valueinfo_to_tensor(model.get_tensor_valueinfo(oname))
+ # assume all outputs are homogenous wrt data layout (e.g. channel axis
+ # always lives in the same position)
axes_to_min = [i for i in range(ctx[oname].ndim)]
axes_to_min.remove(i_channel_axis)
axes_to_min = tuple(axes_to_min)
@@ -223,13 +258,19 @@ def calc_monotonic_range(node, model, range_dict, i_channel_axis=1):
for i in range(n_dyn_inp):
ctx[dyn_inps[i]] = inps[i]
execute_node(node, ctx, model.graph, opset_version=opset_version)
- # grab new output and update running min/max
- out = ctx[oname]
- chanwise_min = out.min(axis=axes_to_min).flatten()
- chanwise_max = out.max(axis=axes_to_min).flatten()
- running_min = np.minimum(chanwise_min, running_min).flatten() if running_min is not None else chanwise_min
- running_max = np.maximum(chanwise_max, running_max).flatten() if running_max is not None else chanwise_max
- range_dict[oname] = (running_min, running_max)
+ for oind, oname in enumerate(node.output):
+ # grab new output and update running min/max
+ out = ctx[oname]
+ chanwise_min = out.min(axis=axes_to_min).flatten()
+ chanwise_max = out.max(axis=axes_to_min).flatten()
+ running_min[oind] = (
+ np.minimum(chanwise_min, running_min[oind]).flatten() if running_min[oind] is not None else chanwise_min
+ )
+ running_max[oind] = (
+ np.maximum(chanwise_max, running_max[oind]).flatten() if running_max[oind] is not None else chanwise_max
+ )
+ for oind, oname in enumerate(node.output):
+ range_dict[oname] = (running_min[oind], running_max[oind])
def calc_range_outdtype(node, model, range_dict):
@@ -240,12 +281,13 @@ def calc_range_outdtype(node, model, range_dict):
optype_to_range_calc = {
- "Transpose": propagate_range,
+ "Transpose": calc_monotonic_range,
"MatMul": calc_matmul_range,
"Conv": calc_conv_range,
+ "ConvTranspose": calc_convtranspose_range,
"QuantMaxNorm": calc_range_outdtype,
- "Flatten": propagate_range,
- "Reshape": propagate_range,
+ "Flatten": calc_monotonic_range,
+ "Reshape": calc_monotonic_range,
"Quant": calc_monotonic_range,
"BipolarQuant": calc_monotonic_range,
"Mul": calc_monotonic_range,
@@ -254,7 +296,7 @@ def calc_range_outdtype(node, model, range_dict):
"Add": calc_monotonic_range,
"BatchNormalization": calc_monotonic_range,
"Relu": calc_monotonic_range,
- "Pad": propagate_range,
+ "Pad": calc_monotonic_range,
"AveragePool": calc_monotonic_range,
"Trunc": calc_range_outdtype,
"MaxPool": calc_monotonic_range,
@@ -267,6 +309,7 @@ def calc_range_outdtype(node, model, range_dict):
"Clip": calc_monotonic_range,
"Sigmoid": calc_monotonic_range,
"Concat": calc_monotonic_range,
+ "Split": calc_monotonic_range,
}
@@ -320,8 +363,12 @@ def range_analysis(
range_min = None
range_max = None
else:
- irange = irange.split(",")
- range_min, range_max = float(irange[0]), float(irange[1])
+ irange = eval(irange)
+ range_min, range_max = irange
+ if isinstance(range_min, list):
+ range_min = np.asarray(range_min, dtype=np.float32)
+ if isinstance(range_max, list):
+ range_max = np.asarray(range_max, dtype=np.float32)
elif isinstance(irange, tuple):
range_min, range_max = irange
else:
@@ -350,9 +397,8 @@ def range_analysis(
for node in model.graph.node:
dyn_inputs = [x for x in node.input if is_dyn_input(x, model)]
inprange_ok = all([x in range_dict.keys() for x in dyn_inputs])
- outcount_ok = len(node.output) == 1
op_ok = node.op_type in optype_to_range_calc.keys()
- if inprange_ok and op_ok and outcount_ok:
+ if inprange_ok and op_ok:
range_calc_fxn = optype_to_range_calc[node.op_type]
range_calc_fxn(node, model, range_dict)
out_range = range_dict[node.output[0]]
diff --git a/src/qonnx/util/test.py b/src/qonnx/util/test.py
index f18e437e..47b4378f 100644
--- a/src/qonnx/util/test.py
+++ b/src/qonnx/util/test.py
@@ -37,6 +37,76 @@
# utility functions to fetch models and data for
# testing various qonnx transformations
+a2q_rn18_preproc_mean = np.asarray([0.491, 0.482, 0.447], dtype=np.float32)
+a2q_rn18_preproc_std = np.asarray([0.247, 0.243, 0.262], dtype=np.float32)
+a2q_rn18_int_range = (0, 255)
+a2q_rn18_iscale = 1 / 255
+a2q_rn18_rmin = (a2q_rn18_int_range[0] * a2q_rn18_iscale - a2q_rn18_preproc_mean) / a2q_rn18_preproc_std
+a2q_rn18_rmax = (a2q_rn18_int_range[1] * a2q_rn18_iscale - a2q_rn18_preproc_mean) / a2q_rn18_preproc_std
+a2q_rn18_scale = (1 / a2q_rn18_preproc_std) * a2q_rn18_iscale
+a2q_rn18_bias = -a2q_rn18_preproc_mean * a2q_rn18_preproc_std
+a2q_rn18_common = {
+ "input_shape": (1, 3, 32, 32),
+ "input_range": (a2q_rn18_rmin, a2q_rn18_rmax),
+ "int_range": a2q_rn18_int_range,
+ "scale": a2q_rn18_scale,
+ "bias": a2q_rn18_bias,
+}
+a2q_rn18_urlbase = "https://github.com/fastmachinelearning/qonnx_model_zoo/releases/download/a2q-20240905/"
+
+a2q_model_details = {
+ "rn18_w4a4_a2q_16b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q 16-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_16b-d4bfa990.onnx",
+ **a2q_rn18_common,
+ },
+ "rn18_w4a4_a2q_15b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q 15-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_15b-eeca8ac2.onnx",
+ **a2q_rn18_common,
+ },
+ "rn18_w4a4_a2q_14b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q 14-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_14b-563cf426.onnx",
+ **a2q_rn18_common,
+ },
+ "rn18_w4a4_a2q_13b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q 13-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_13b-d3cae293.onnx",
+ **a2q_rn18_common,
+ },
+ "rn18_w4a4_a2q_12b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q 12-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_12b-fb3a0f8a.onnx",
+ **a2q_rn18_common,
+ },
+ "rn18_w4a4_a2q_plus_16b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q+ 16-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_plus_16b-09e47feb.onnx",
+ **a2q_rn18_common,
+ },
+ "rn18_w4a4_a2q_plus_15b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q+ 15-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_plus_15b-10e7bc83.onnx",
+ **a2q_rn18_common,
+ },
+ "rn18_w4a4_a2q_plus_14b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q+ 14-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_plus_14b-8db8c78c.onnx",
+ **a2q_rn18_common,
+ },
+ "rn18_w4a4_a2q_plus_13b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q+ 13-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_plus_13b-f57b05ce.onnx",
+ **a2q_rn18_common,
+ },
+ "rn18_w4a4_a2q_plus_12b": {
+ "description": "4-bit ResNet-18 on CIFAR-10, A2Q+ 12-bit accumulators",
+ "url": a2q_rn18_urlbase + "quant_resnet18_w4a4_a2q_plus_12b-1e2aca29.onnx",
+ **a2q_rn18_common,
+ },
+}
+
test_model_details = {
"FINN-CNV_W2A2": {
"description": "2-bit VGG-10-like CNN on CIFAR-10",
@@ -116,6 +186,7 @@
"input_shape": (1, 3, 224, 224),
"input_range": (0, 1),
},
+ **a2q_model_details,
}
@@ -145,15 +216,25 @@ def qonnx_download_model():
clize.run(download_model)
-def get_golden_in_and_output(test_model):
- model = download_model(test_model, do_cleanup=True, return_modelwrapper=True)
- rng = np.random.RandomState(42)
+def get_random_input(test_model, seed=42):
+ rng = np.random.RandomState(seed)
input_shape = test_model_details[test_model]["input_shape"]
(low, high) = test_model_details[test_model]["input_range"]
+ # some models spec per-channel ranges, be conservative for those
+ if isinstance(low, np.ndarray):
+ low = low.max()
+ if isinstance(high, np.ndarray):
+ high = high.min()
size = np.prod(np.asarray(input_shape))
input_tensor = rng.uniform(low=low, high=high, size=size)
input_tensor = input_tensor.astype(np.float32)
input_tensor = input_tensor.reshape(input_shape)
+ return input_tensor
+
+
+def get_golden_in_and_output(test_model, seed=42):
+ model = download_model(test_model, do_cleanup=True, return_modelwrapper=True)
+ input_tensor = get_random_input(test_model, seed=seed)
input_dict = {model.graph.input[0].name: input_tensor}
golden_output_dict = oxe.execute_onnx(model, input_dict)
golden_result = golden_output_dict[model.graph.output[0].name]
diff --git a/tests/analysis/test_inference_cost.py b/tests/analysis/test_inference_cost.py
index a94f57f4..572d2e14 100644
--- a/tests/analysis/test_inference_cost.py
+++ b/tests/analysis/test_inference_cost.py
@@ -34,90 +34,102 @@
model_details_infcost = {
"FINN-CNV_W2A2": {
"expected_sparse": {
- "op_mac_SCALEDINT<8>_INT2": 1345500.0,
- "mem_w_INT2": 908033.0,
- "mem_o_SCALEDINT<32>": 57600.0,
- "op_mac_INT2_INT2": 35615771.0,
- "mem_o_INT32": 85002.0,
- "unsupported": "set()",
- "discount_sparsity": True,
- "total_bops": 163991084.0,
- "total_macs": 36961271.0,
- "total_mem_w_bits": 1816066.0,
- "total_mem_w_elems": 908033.0,
- "total_mem_o_bits": 4563264.0,
- "total_mem_o_elems": 142602.0,
+ "total_cost": {
+ "op_mac_SCALEDINT<8>_INT2": 1345500.0,
+ "mem_w_INT2": 908033.0,
+ "mem_o_SCALEDINT<32>": 57600.0,
+ "op_mac_INT2_INT2": 35615771.0,
+ "mem_o_INT32": 85002.0,
+ "unsupported": "set()",
+ "discount_sparsity": True,
+ "total_bops": 163991084.0,
+ "total_macs": 36961271.0,
+ "total_mem_w_bits": 1816066.0,
+ "total_mem_w_elems": 908033.0,
+ "total_mem_o_bits": 4563264.0,
+ "total_mem_o_elems": 142602.0,
+ }
},
"expected_dense": {
- "op_mac_SCALEDINT<8>_INT2": 1555200.0,
- "mem_w_INT2": 1542848.0,
- "mem_o_SCALEDINT<32>": 57600.0,
- "op_mac_INT2_INT2": 57906176.0,
- "mem_o_INT32": 85002.0,
- "unsupported": "set()",
- "discount_sparsity": False,
- "total_bops": 256507904.0,
- "total_macs": 59461376.0,
- "total_mem_w_bits": 3085696.0,
- "total_mem_w_elems": 1542848.0,
- "total_mem_o_bits": 4563264.0,
- "total_mem_o_elems": 142602.0,
+ "total_cost": {
+ "op_mac_SCALEDINT<8>_INT2": 1555200.0,
+ "mem_w_INT2": 1542848.0,
+ "mem_o_SCALEDINT<32>": 57600.0,
+ "op_mac_INT2_INT2": 57906176.0,
+ "mem_o_INT32": 85002.0,
+ "unsupported": "set()",
+ "discount_sparsity": False,
+ "total_bops": 256507904.0,
+ "total_macs": 59461376.0,
+ "total_mem_w_bits": 3085696.0,
+ "total_mem_w_elems": 1542848.0,
+ "total_mem_o_bits": 4563264.0,
+ "total_mem_o_elems": 142602.0,
+ }
},
},
"FINN-TFC_W2A2": {
"expected_sparse": {
- "op_mac_INT2_INT2": 22355.0,
- "mem_w_INT2": 22355.0,
- "mem_o_INT32": 202.0,
- "unsupported": "set()",
- "discount_sparsity": True,
- "total_bops": 89420.0,
- "total_macs": 22355.0,
- "total_mem_w_bits": 44710.0,
- "total_mem_w_elems": 22355.0,
- "total_mem_o_bits": 6464.0,
- "total_mem_o_elems": 202.0,
+ "total_cost": {
+ "op_mac_INT2_INT2": 22355.0,
+ "mem_w_INT2": 22355.0,
+ "mem_o_INT32": 202.0,
+ "unsupported": "set()",
+ "discount_sparsity": True,
+ "total_bops": 89420.0,
+ "total_macs": 22355.0,
+ "total_mem_w_bits": 44710.0,
+ "total_mem_w_elems": 22355.0,
+ "total_mem_o_bits": 6464.0,
+ "total_mem_o_elems": 202.0,
+ }
},
"expected_dense": {
- "op_mac_INT2_INT2": 59008.0,
- "mem_w_INT2": 59008.0,
- "mem_o_INT32": 202.0,
- "unsupported": "set()",
- "discount_sparsity": False,
- "total_bops": 236032.0,
- "total_macs": 59008.0,
- "total_mem_w_bits": 118016.0,
- "total_mem_w_elems": 59008.0,
- "total_mem_o_bits": 6464.0,
- "total_mem_o_elems": 202.0,
+ "total_cost": {
+ "op_mac_INT2_INT2": 59008.0,
+ "mem_w_INT2": 59008.0,
+ "mem_o_INT32": 202.0,
+ "unsupported": "set()",
+ "discount_sparsity": False,
+ "total_bops": 236032.0,
+ "total_macs": 59008.0,
+ "total_mem_w_bits": 118016.0,
+ "total_mem_w_elems": 59008.0,
+ "total_mem_o_bits": 6464.0,
+ "total_mem_o_elems": 202.0,
+ }
},
},
"RadioML_VGG10": {
"expected_sparse": {
- "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12620311.0,
- "mem_w_SCALEDINT<8>": 155617.0,
- "mem_o_SCALEDINT<32>": 130328.0,
- "unsupported": "set()",
- "discount_sparsity": True,
- "total_bops": 807699904.0,
- "total_macs": 12620311.0,
- "total_mem_w_bits": 1244936.0,
- "total_mem_w_elems": 155617.0,
- "total_mem_o_bits": 4170496.0,
- "total_mem_o_elems": 130328.0,
+ "total_cost": {
+ "unsupported": "set()",
+ "discount_sparsity": True,
+ "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12620311.0,
+ "mem_w_SCALEDINT<8>": 155617.0,
+ "mem_o_SCALEDINT<32>": 130328.0,
+ "total_bops": 807699904.0,
+ "total_macs": 12620311.0,
+ "total_mem_w_bits": 1244936.0,
+ "total_mem_w_elems": 155617.0,
+ "total_mem_o_bits": 4170496.0,
+ "total_mem_o_elems": 130328.0,
+ }
},
"expected_dense": {
- "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12864512.0,
- "mem_w_SCALEDINT<8>": 159104.0,
- "mem_o_SCALEDINT<32>": 130328.0,
- "unsupported": "set()",
- "discount_sparsity": False,
- "total_bops": 823328768.0,
- "total_macs": 12864512.0,
- "total_mem_w_bits": 1272832.0,
- "total_mem_w_elems": 159104.0,
- "total_mem_o_bits": 4170496.0,
- "total_mem_o_elems": 130328.0,
+ "total_cost": {
+ "unsupported": "set()",
+ "discount_sparsity": False,
+ "op_mac_SCALEDINT<8>_SCALEDINT<8>": 12864512.0,
+ "mem_w_SCALEDINT<8>": 159104.0,
+ "mem_o_SCALEDINT<32>": 130328.0,
+ "total_bops": 823328768.0,
+ "total_macs": 12864512.0,
+ "total_mem_w_bits": 1272832.0,
+ "total_mem_w_elems": 159104.0,
+ "total_mem_o_bits": 4170496.0,
+ "total_mem_o_elems": 130328.0,
+ }
},
},
}
diff --git a/tests/analysis/test_inference_cost_breakdown.py b/tests/analysis/test_inference_cost_breakdown.py
new file mode 100644
index 00000000..afa422b9
--- /dev/null
+++ b/tests/analysis/test_inference_cost_breakdown.py
@@ -0,0 +1,90 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of qonnx nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import os
+import urllib.request
+
+from qonnx.analysis.inference_cost import aggregate_dict_keys
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.cleanup import cleanup
+from qonnx.util.inference_cost import inference_cost as infca
+
+download_url = "https://github.com/onnx/models/raw/main/validated/vision/"
+download_url += "classification/resnet/model/resnet18-v1-7.onnx?download="
+
+model_details = {
+ "resnet18-v1-7": {
+ "description": "Resnet18 Opset version 7.",
+ "url": download_url,
+ "enc": {
+ "a": "op_mac_FLOAT32_FLOAT32",
+ "b": "total_mem_w_bits",
+ "c": "total_mem_w_elems",
+ "d": "total_mem_o_bits",
+ "e": "total_mem_o_elems",
+ },
+ },
+}
+
+
+def download_model(test_model, do_cleanup=False, return_modelwrapper=False):
+ qonnx_url = model_details[test_model]["url"]
+ # download test data
+ dl_dir = "/tmp"
+ dl_file = dl_dir + f"/{test_model}.onnx"
+ ret = dl_file
+ if not os.path.isfile(dl_file):
+ urllib.request.urlretrieve(qonnx_url, dl_file)
+ if do_cleanup:
+ out_file = dl_dir + f"/{test_model}_clean.onnx"
+ cleanup(dl_file, out_file=out_file, override_inpsize=1)
+ ret = out_file
+ if return_modelwrapper:
+ ret = ModelWrapper(ret)
+ return ret
+
+
+@pytest.mark.parametrize("test_model", model_details.keys())
+def test_inference_cost_breakdown(test_model):
+ test_details = model_details[test_model]
+ model = download_model(test_model, do_cleanup=True, return_modelwrapper=True)
+ inf_cost = infca(model, discount_sparsity=False, cost_breakdown=True)
+ assert inf_cost["node_cost"]["Conv_0"]["total_macs"] == 118013952
+ assert inf_cost["node_cost"]["Conv_1"]["total_macs"] == 115605504
+ assert inf_cost["optype_cost"]["Conv"]["total_macs"] == 1813561344
+ t_cost = inf_cost["total_cost"] # total cost
+ op_cost = aggregate_dict_keys(inf_cost["optype_cost"]) # cost per optype
+ n_cost = aggregate_dict_keys(inf_cost["node_cost"]) # cost per node.
+ enc = test_details["enc"]
+ assert t_cost[enc["a"]] == op_cost[enc["a"]] == n_cost[enc["a"]], "inf discrepancy"
+ assert t_cost[enc["b"]] == op_cost[enc["b"]] == n_cost[enc["b"]], "inf discrepancy"
+ assert t_cost[enc["c"]] == op_cost[enc["c"]] == n_cost[enc["c"]], "inf discrepancy"
+ assert t_cost[enc["d"]] == op_cost[enc["d"]] == n_cost[enc["d"]], "inf discrepancy"
+ assert t_cost[enc["e"]] == op_cost[enc["e"]] == n_cost[enc["e"]], "inf discrepancy"
diff --git a/tests/analysis/test_matmul_mac_cost.py b/tests/analysis/test_matmul_mac_cost.py
new file mode 100644
index 00000000..ff7dbc2f
--- /dev/null
+++ b/tests/analysis/test_matmul_mac_cost.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2023 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of Xilinx nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+from pkgutil import get_data
+
+import qonnx.util.inference_cost as infc
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.cleanup import cleanup_model
+
+
+def test_matmul_mac_cost():
+ raw_model = get_data("qonnx", "data/onnx/matmul_update/sdp.onnx")
+ model = ModelWrapper(raw_model)
+ cleaned_model = cleanup_model(model)
+ # Two Matmul layers with shape (i_shape, w_shape, o_shape),
+ # L1: ([4, 64, 32], [4, 32, 64], [4, 64, 64]) and L2: ([4, 64, 64], [4, 64, 32], [4, 64, 32])
+ inf_cost_dict = infc.inference_cost(cleaned_model, discount_sparsity=False)["total_cost"]
+ mac_cost = inf_cost_dict["op_mac_FLOAT32_FLOAT32"] # Expected mac cost 4*32*64*64 + 4*64*64*32 = 1048576
+ assert mac_cost == 1048576.0, "Error: discrepancy in mac cost."
diff --git a/tests/custom_op/test_attr.py b/tests/custom_op/test_attr.py
index 9db644d7..cde5a321 100644
--- a/tests/custom_op/test_attr.py
+++ b/tests/custom_op/test_attr.py
@@ -37,7 +37,8 @@
class AttrTestOp(CustomOp):
def get_nodeattr_types(self):
- return {"tensor_attr": ("t", True, np.asarray([]))}
+ my_attrs = {"tensor_attr": ("t", True, np.asarray([])), "strings_attr": ("strings", True, [""])}
+ return my_attrs
def make_shape_compatible_op(self, model):
param_tensor = self.get_nodeattr("tensor_attr")
@@ -70,6 +71,7 @@ def test_attr():
strarr = np.array2string(w, separator=", ")
w_str = strarr.replace("[", "{").replace("]", "}").replace(" ", "")
tensor_attr_str = f"int8{wshp_str} {w_str}"
+ strings_attr = ["a", "bc", "def"]
input = f"""
<
@@ -86,9 +88,17 @@ def test_attr():
model = oprs.parse_model(input)
model = ModelWrapper(model)
inst = getCustomOp(model.graph.node[0])
+
w_prod = inst.get_nodeattr("tensor_attr")
assert (w_prod == w).all()
w = w - 1
inst.set_nodeattr("tensor_attr", w)
w_prod = inst.get_nodeattr("tensor_attr")
assert (w_prod == w).all()
+
+ inst.set_nodeattr("strings_attr", strings_attr)
+ strings_attr_prod = inst.get_nodeattr("strings_attr")
+ assert strings_attr_prod == strings_attr
+ strings_attr_prod[0] = "test"
+ inst.set_nodeattr("strings_attr", strings_attr_prod)
+ assert inst.get_nodeattr("strings_attr") == ["test"] + strings_attr[1:]
diff --git a/tests/transformation/test_batchnorm_to_affine.py b/tests/transformation/test_batchnorm_to_affine.py
index 622f0d9c..705a31c1 100644
--- a/tests/transformation/test_batchnorm_to_affine.py
+++ b/tests/transformation/test_batchnorm_to_affine.py
@@ -41,7 +41,7 @@
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model
-download_url = "https://github.com/onnx/models/raw/main/vision/classification"
+download_url = "https://github.com/onnx/models/raw/main/validated/vision/classification"
download_url += "/shufflenet/model/shufflenet-9.onnx"
export_onnx_path = download_url.split("/")[-1]
diff --git a/tests/transformation/test_change_batchsize.py b/tests/transformation/test_change_batchsize.py
index 08d7c20f..e6c76da1 100644
--- a/tests/transformation/test_change_batchsize.py
+++ b/tests/transformation/test_change_batchsize.py
@@ -45,6 +45,11 @@ def test_change_batchsize(test_model):
batch_size = 10
old_ishape = test_details["input_shape"]
imin, imax = test_details["input_range"]
+ # some models spec per-channel ranges, be conservative for those
+ if isinstance(imin, np.ndarray):
+ imin = imin.max()
+ if isinstance(imax, np.ndarray):
+ imax = imax.min()
model = download_model(test_model=test_model, do_cleanup=True, return_modelwrapper=True)
iname = model.graph.input[0].name
oname = model.graph.output[0].name
diff --git a/tests/transformation/test_conv_lowering.py b/tests/transformation/test_conv_lowering.py
index 78da6213..0da57ea3 100644
--- a/tests/transformation/test_conv_lowering.py
+++ b/tests/transformation/test_conv_lowering.py
@@ -43,6 +43,19 @@
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.transformation.lower_convs_to_matmul import LowerConvsToMatMul
from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model
+from qonnx.util.test import download_model, get_golden_in_and_output
+
+
+@pytest.mark.parametrize("model_name", ["FINN-CNV_W2A2", "MobileNetv1-w4a4"])
+def test_conv_lowering_quant_weights(model_name):
+ model = download_model(model_name, return_modelwrapper=True, do_cleanup=True)
+ input_t, golden_t = get_golden_in_and_output(model_name, seed=0)
+ input_dict = {model.graph.input[0].name: input_t}
+ model = model.transform(LowerConvsToMatMul())
+ assert model.get_nodes_by_op_type("Conv") == []
+ prod_dict = oxe.execute_onnx(model, input_dict)
+ prod_t = prod_dict[model.graph.output[0].name]
+ assert np.isclose(golden_t, prod_t, atol=1e-04).all()
def test_conv_lowering_convmnist():
@@ -65,7 +78,7 @@ def test_conv_lowering_convmnist():
model = model.transform(InferShapes())
output_dict_p = oxe.execute_onnx(model, input_dict)
produced = output_dict_p[output_name]
- assert np.isclose(produced, expected).all()
+ assert np.isclose(produced, expected, rtol=1.0e-4).all()
def run_conv_lowering_test(idt, k_h, k_w, ifm_dim_h, ifm_dim_w, ifm_ch, stride, padding, dilations, dw, bias):
diff --git a/tests/transformation/test_extract_quant_scale_zeropt.py b/tests/transformation/test_extract_quant_scale_zeropt.py
new file mode 100644
index 00000000..540ec274
--- /dev/null
+++ b/tests/transformation/test_extract_quant_scale_zeropt.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2023 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of qonnx nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+import onnx.parser as oprs
+
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.core.onnx_exec import execute_onnx
+from qonnx.transformation.extract_quant_scale_zeropt import ExtractQuantScaleZeroPt
+
+
+def make_test_model(ishp, channelwise, bitwidth, need_extraction_scale, need_extraction_zeropt):
+ ishp_str = str(list(ishp))
+ if channelwise:
+ q_attr_shp = ishp
+ else:
+ q_attr_shp = (1,)
+ attrshp_str = str(list(q_attr_shp))
+ np.random.seed(0)
+ if need_extraction_scale:
+ scale = np.random.rand(*q_attr_shp).astype(np.float32)
+ else:
+ scale = np.ones(q_attr_shp, dtype=np.float32)
+ if need_extraction_zeropt:
+ zeropt = np.random.rand(*q_attr_shp).astype(np.float32)
+ else:
+ zeropt = np.zeros(q_attr_shp, dtype=np.float32)
+ signed = 1
+ narrow = 1
+ rounding_mode = "ROUND"
+
+ input = f"""
+ <
+ ir_version: 7,
+ opset_import: ["" : 9]
+ >
+ agraph (float{ishp_str} in0) => (float{ishp_str} out0)
+ <
+ float{attrshp_str} scale_param,
+ float{attrshp_str} zeropt_param,
+ float bitwidth_param
+ >
+ {{
+ out0 = qonnx.custom_op.general.Quant<
+ signed={str(signed)},
+ narrow={str(narrow)},
+ rounding_mode="{rounding_mode}"
+ >(in0, scale_param, zeropt_param, bitwidth_param)
+ }}
+ """
+ model = oprs.parse_model(input)
+ model = ModelWrapper(model)
+ model.set_initializer("scale_param", scale)
+ model.set_initializer("zeropt_param", zeropt)
+ model.set_initializer("bitwidth_param", bitwidth)
+ return model
+
+
+@pytest.mark.parametrize("need_extraction_scale", [True, False])
+@pytest.mark.parametrize("need_extraction_zeropt", [True, False])
+@pytest.mark.parametrize("channelwise", [True, False])
+def test_extract_quant_scale_zeropt(channelwise, need_extraction_scale, need_extraction_zeropt):
+ ishp = (1, 10)
+ bitwidth = np.asarray(4.0, dtype=np.float32)
+ model = make_test_model(ishp, channelwise, bitwidth, need_extraction_scale, need_extraction_zeropt)
+ ishp = model.get_tensor_shape("in0")
+ inp = np.random.rand(*ishp).astype(np.float32)
+ y_golden = execute_onnx(model, {"in0": inp})["out0"]
+ model_new = model.transform(ExtractQuantScaleZeroPt())
+ y_ret = execute_onnx(model_new, {"in0": inp})["out0"]
+ assert np.allclose(y_golden, y_ret)
+ qnt_node = model_new.get_nodes_by_op_type("Quant")[0]
+ new_scale = model_new.get_initializer(qnt_node.input[1])
+ assert (new_scale == 1).all()
+ new_zeropt = model_new.get_initializer(qnt_node.input[2])
+ assert (new_zeropt == 0).all()
+ if need_extraction_scale:
+ assert len(model_new.get_nodes_by_op_type("Mul")) == 1
+ assert len(model_new.get_nodes_by_op_type("Div")) == 1
+ if need_extraction_zeropt:
+ assert len(model_new.get_nodes_by_op_type("Add")) == 1
+ assert len(model_new.get_nodes_by_op_type("Sub")) == 1
diff --git a/tests/transformation/test_nn_resize_to_deconv.py b/tests/transformation/test_nn_resize_to_deconv.py
new file mode 100644
index 00000000..9f369ed4
--- /dev/null
+++ b/tests/transformation/test_nn_resize_to_deconv.py
@@ -0,0 +1,341 @@
+# Copyright (c) 2024, Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of QONNX nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import numpy as np
+import onnx
+import onnx.numpy_helper as nph
+import onnx.parser as oprs
+from onnx.checker import check_model
+from pkgutil import get_data
+
+import qonnx.core.onnx_exec as oxe
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.custom_op.general.quant import quant
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.resize_conv_to_deconv import ResizeConvolutionToDeconvolution
+from qonnx.util.basic import gen_finn_dt_tensor
+
+np.random.seed(0)
+
+
+@pytest.mark.parametrize("maintain_bit_width", [True, False])
+def test_resize_conv_to_deconv_float_model(maintain_bit_width: bool):
+ raw_m = get_data("qonnx.data", "onnx/bsd300x3-espcn/nn_resize/float_model.onnx")
+ model = ModelWrapper(raw_m)
+ model = model.transform(InferShapes())
+ iname = model.graph.input[0].name
+ oname = model.graph.output[0].name
+ ishape = model.get_tensor_shape(iname)
+ rand_inp = gen_finn_dt_tensor(DataType["FLOAT32"], ishape)
+ input_dict = {iname: rand_inp}
+ expected = oxe.execute_onnx(model, input_dict)[oname]
+ new_model = model.transform(ResizeConvolutionToDeconvolution(maintain_bit_width=maintain_bit_width))
+ # check that there are no Resize ops left
+ op_types = list(map(lambda x: x.op_type, new_model.graph.node))
+ assert "Resize" not in op_types, "Error: the Resize nodes should be removed."
+ produced = oxe.execute_onnx(new_model, input_dict)[oname]
+ assert np.isclose(expected, produced, atol=1e-4).all(), "Error: expected output does not match the produced output."
+
+
+@pytest.mark.parametrize("maintain_bit_width", [True, False])
+def test_resize_conv_to_deconv_quant_model(maintain_bit_width: bool):
+ # get raw quantized model with reference input
+ raw_i = get_data("qonnx.data", "onnx/bsd300x3-espcn/test_data/input_0.pb")
+ raw_m = get_data("qonnx.data", "onnx/bsd300x3-espcn/nn_resize/quant_model.onnx")
+ # create model from the onnx file and infer the shapes
+ model = ModelWrapper(raw_m)
+ model = model.transform(InferShapes())
+ iname = model.graph.input[0].name
+ oname = model.graph.output[0].name
+ ishape = model.get_tensor_shape(iname)
+ # load the reference input tensor
+ input_tensor = onnx.load_tensor_from_string(raw_i)
+ input_tensor = nph.to_array(input_tensor)
+ assert list(input_tensor.shape) == ishape, "Error: reference input doesn't match loaded model."
+ input_dict = {iname: input_tensor}
+ # get the output from the sub-pixel convolution model
+ output_resize_conv = oxe.execute_onnx(model, input_dict)[oname]
+ # translate the sub-pixel convolution to the deconvolution
+ new_model = model.transform(ResizeConvolutionToDeconvolution(maintain_bit_width=maintain_bit_width))
+ # check that there are no Resize ops left
+ op_types = list(map(lambda x: x.op_type, new_model.graph.node))
+ assert "Resize" not in op_types, "Error: the Resize nodes should be removed."
+ # get the output from the deconvolution model
+ output_deconv = oxe.execute_onnx(new_model, input_dict)[oname]
+ # maintaining the specified bit width introduces additional clipping errors that
+ # shouldn't be expected to maintain reasonable functional similarity
+ if not maintain_bit_width:
+ assert np.isclose(
+ output_deconv, output_resize_conv, atol=1 / 255.0, rtol=1.0
+ ).all(), "Error: expected output does not match the produced output."
+
+
+def float_nn_resize_model(r: int, ifm: int, ich: int, och: int, ksize: int, use_bias: bool):
+ assert isinstance(ksize, int), "Assuming square kernels, so kernel_size needs to be an int."
+ pad = (ksize - 1) // 2
+
+ ishp = (1, ich, ifm, ifm)
+ oshp = (1, och, ifm * r, ifm * r)
+ wshp = (och, ich, ksize, ksize)
+ bshp = (och,)
+ rscales = np.array([1.0, 1.0, r, r], dtype=np.float32)
+ weight = np.random.randn(*wshp)
+ bias = np.random.randn(*bshp)
+ ishp_str = str(list(ishp))
+ oshp_str = str(list(oshp))
+ wshp_str = str(list(wshp))
+ bshp_str = str(list(bshp))
+
+ if use_bias:
+ params_str = f"""
+ <
+ float{wshp_str} conv_param,
+ float{bshp_str} bias_param,
+ float roi,
+ float scales
+ >
+ """
+ else:
+ params_str = f"""
+ <
+ float{wshp_str} conv_param,
+ float roi,
+ float scales
+ >
+ """
+
+ if use_bias:
+ conv_str = f"""
+ out0 = Conv<
+ dilations=[1,1],
+ group=1,
+ kernel_shape=[{ksize},{ksize}],
+ strides=[1,1],
+ pads=[{pad},{pad},{pad},{pad}]
+ >(hid0, conv_param, bias_param)
+ """
+ else:
+ conv_str = f"""
+ out0 = Conv<
+ dilations=[1,1],
+ group=1,
+ kernel_shape=[{ksize},{ksize}],
+ strides=[1,1],
+ pads=[{pad},{pad},{pad},{pad}]
+ >(hid0, conv_param)
+ """
+
+ input = f"""
+ <
+ ir_version: 7,
+ opset_import: ["" : 13]
+ >
+ agraph (float{ishp_str} in0) => (float{oshp_str} out0)
+ {params_str}
+ {{
+ hid0 = Resize<
+ mode="nearest"
+ >(in0, roi, scales)
+ {conv_str}
+ }}
+ """
+
+ model = oprs.parse_model(input)
+ model = ModelWrapper(model)
+ model.set_initializer("roi", np.empty(0))
+ model.set_initializer("scales", rscales.astype(np.float32))
+ model.set_initializer("conv_param", weight.astype(np.float32))
+ if use_bias:
+ model.set_initializer("bias_param", bias.astype(np.float32))
+ model = model.transform(InferShapes())
+ check_model(model._model_proto)
+ return model
+
+
+def quant_nn_resize_model(r: int, ifm: int, ich: int, och: int, ksize: int, use_bias: bool, channelwise: bool):
+ assert isinstance(ksize, int), "Assuming square kernels, so kernel_size needs to be an int."
+ pad = (ksize - 1) // 2
+
+ ishp = (1, ich, ifm, ifm)
+ oshp = (1, och, ifm * r, ifm * r)
+ wshp = (och, ich, ksize, ksize)
+ bshp = (och,)
+ rscales = np.array([1.0, 1.0, r, r], dtype=np.float32)
+ weight = np.random.randn(*wshp)
+ bias = np.random.randn(*bshp)
+ ishp_str = str(list(ishp))
+ oshp_str = str(list(oshp))
+ wshp_str = str(list(wshp))
+ bshp_str = str(list(bshp))
+
+ if channelwise:
+ q_attr_shp = (och, 1, 1, 1)
+ else:
+ q_attr_shp = (1,)
+ attrshp_str = str(list(q_attr_shp))
+ scale = np.random.rand(*q_attr_shp).astype(np.float32)
+ zeropt = np.zeros(q_attr_shp).astype(np.float32) # NOTE: needs to be integer
+ bitwidth = np.array(4.0)
+
+ weight: np.ndarray = quant(weight, scale, zeropt, bitwidth, signed=True, narrow=True, rounding_mode="ROUND")
+
+ if use_bias:
+ params_str = f"""
+ <
+ float{wshp_str} conv_param,
+ float{attrshp_str} scale_param,
+ float{attrshp_str} zeropt_param,
+ float{bshp_str} bias_param,
+ float bitwidth_param,
+ float scale_bias,
+ float zeropt_bias,
+ float bitwidth_bias,
+ float roi,
+ float scales
+ >
+ """
+ else:
+ params_str = f"""
+ <
+ float{wshp_str} conv_param,
+ float{attrshp_str} scale_param,
+ float{attrshp_str} zeropt_param,
+ float roi,
+ float scales,
+ float bitwidth_param
+ >
+ """
+
+ if use_bias:
+ scale_bias = np.random.rand(
+ 1,
+ )
+ zeropt_bias = np.array(0.0)
+ bitwidth_bias = np.array(16.0)
+ convs_str = f"""
+ param1 = qonnx.custom_op.general.Quant<
+ signed=1,
+ narrow=1,
+ rounding_mode="ROUND"
+ >(bias_param, scale_bias, zeropt_bias, bitwidth_bias)
+ out0 = Conv<
+ dilations=[1,1],
+ group=1,
+ kernel_shape=[{ksize},{ksize}],
+ strides=[1,1],
+ pads=[{pad},{pad},{pad},{pad}]
+ >(hid0, param0, param1)
+ """
+ else:
+ convs_str = f"""
+ out0 = Conv<
+ dilations=[1,1],
+ group=1,
+ kernel_shape=[{ksize},{ksize}],
+ strides=[1,1],
+ pads=[{pad},{pad},{pad},{pad}]
+ >(hid0, param0)
+ """
+
+ input = f"""
+ <
+ ir_version: 7,
+ opset_import: ["" : 13, "qonnx.custom_op.general" : 1]
+ >
+ agraph (float{ishp_str} in0) => (float{oshp_str} out0)
+ {params_str}
+ {{
+ hid0 = Resize<
+ mode="nearest"
+ >(in0, roi, scales)
+ param0 = qonnx.custom_op.general.Quant<
+ signed=1,
+ narrow=1,
+ rounding_mode="ROUND"
+ >(conv_param, scale_param, zeropt_param, bitwidth_param)
+ {convs_str}
+ }}
+ """
+ model = oprs.parse_model(input)
+ model = ModelWrapper(model)
+ model.set_initializer("roi", np.empty(0))
+ model.set_initializer("scales", rscales.astype(np.float32))
+ model.set_initializer("conv_param", weight.astype(np.float32))
+ if use_bias:
+ model.set_initializer("bias_param", bias.astype(np.float32))
+ model.set_initializer("scale_bias", scale_bias.astype(np.float32))
+ model.set_initializer("zeropt_bias", zeropt_bias.astype(np.float32))
+ model.set_initializer("bitwidth_bias", bitwidth_bias.astype(np.float32))
+ model.set_initializer("scale_param", scale.astype(np.float32))
+ model.set_initializer("zeropt_param", zeropt.astype(np.float32))
+ model.set_initializer("bitwidth_param", bitwidth.astype(np.float32))
+ model = model.transform(InferShapes())
+ check_model(model._model_proto)
+ return model
+
+
+@pytest.mark.parametrize("kernel_size", [3, 5, 7])
+@pytest.mark.parametrize("upscale_factor", [1, 2, 3, 4])
+@pytest.mark.parametrize("bias", [True, False])
+def test_float_resize_conv_to_deconv_layer(kernel_size: int, upscale_factor: int, bias: bool):
+ och = 10 # output channels
+ ich = 3 # input channels
+ ifm = 4 # input feature map size
+ input_shape = [1, ich, ifm, ifm]
+ # Create resize convolution layer that upsamples a 4x4 image with 1 I/O channel
+ model_1 = float_nn_resize_model(upscale_factor, ifm, ich, och, kernel_size, bias)
+ model_2 = model_1.transform(ResizeConvolutionToDeconvolution())
+ inp_dict = {"inp": np.random.rand(*input_shape).astype(np.float32)}
+ assert oxe.compare_execution(model_1, model_2, inp_dict)
+
+
+@pytest.mark.parametrize("kernel_size", [3, 5, 7])
+@pytest.mark.parametrize("upscale_factor", [1, 2, 3, 4])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("channelwise", [True, False])
+@pytest.mark.parametrize("maintain_bit_width", [True, False])
+def test_quant_resize_conv_to_deconv_layer(
+ kernel_size: int, upscale_factor: int, bias: bool, channelwise: bool, maintain_bit_width: bool
+):
+ och = 10 # output channels
+ ich = 3 # input channels
+ ifm = 4 # input feature map size
+ input_shape = [1, ich, ifm, ifm]
+ # Create resize convolution layer that upsamples a 4x4 image with 1 I/O channel
+ model_1 = quant_nn_resize_model(upscale_factor, ifm, ich, och, kernel_size, bias, channelwise)
+ model_2 = model_1.transform(ResizeConvolutionToDeconvolution(maintain_bit_width=maintain_bit_width))
+ inp_dict = {"inp": np.random.rand(*input_shape).astype(np.float32)}
+ assert oxe.compare_execution(model_1, model_2, inp_dict)
+
+ if maintain_bit_width:
+ bw1 = model_1.get_initializer("bitwidth_param")
+ bw2 = model_2.get_initializer("bitwidth_param")
+ assert (bw1 == bw2).all()
diff --git a/tests/transformation/test_pruning.py b/tests/transformation/test_pruning.py
index 85f9afc9..b2fdbcd8 100644
--- a/tests/transformation/test_pruning.py
+++ b/tests/transformation/test_pruning.py
@@ -90,7 +90,7 @@ def test_pruning_mnv1():
# do cleanup including folding quantized weights
model = cleanup_model(model, False)
inp, golden = get_golden_in_and_output("MobileNetv1-w4a4")
- cost0 = inference_cost(model, discount_sparsity=False)
+ cost0 = inference_cost(model, discount_sparsity=False)["total_cost"]
assert cost0["op_mac_SCALEDINT<8>_SCALEDINT<8>"] == 10645344.0
assert cost0["mem_w_SCALEDINT<8>"] == 864.0
assert cost0["op_mac_SCALEDINT<4>_SCALEDINT<4>"] == 556357408.0
@@ -105,7 +105,7 @@ def test_pruning_mnv1():
}
model = model.transform(PruneChannels(prune_spec))
- cost1 = inference_cost(model, discount_sparsity=False)
+ cost1 = inference_cost(model, discount_sparsity=False)["total_cost"]
assert cost1["op_mac_SCALEDINT<8>_SCALEDINT<8>"] == 7318674.0
assert cost1["mem_w_SCALEDINT<8>"] == 594.0
assert cost1["op_mac_SCALEDINT<4>_SCALEDINT<4>"] == 546053216.0
diff --git a/tests/transformation/test_qcdq_to_qonnx.py b/tests/transformation/test_qcdq_to_qonnx.py
index 4532530c..44d10524 100644
--- a/tests/transformation/test_qcdq_to_qonnx.py
+++ b/tests/transformation/test_qcdq_to_qonnx.py
@@ -39,7 +39,9 @@
model_details = {
"MobileNetv2-w8a8": {
- "url": ("https://github.com/onnx/models/raw/main/vision/classification/mobilenet/model/mobilenetv2-12-qdq.onnx"),
+ "url": (
+ "https://github.com/onnx/models/raw/main/validated/vision/classification/mobilenet/model/mobilenetv2-12-qdq.onnx"
+ ),
"input_shape": (1, 3, 224, 224),
"input_range": (-1, +1),
"exp_q_nodes": 171,
diff --git a/tests/transformation/test_quantize_graph.py b/tests/transformation/test_quantize_graph.py
new file mode 100644
index 00000000..5278194d
--- /dev/null
+++ b/tests/transformation/test_quantize_graph.py
@@ -0,0 +1,145 @@
+# Copyright (c) 2024 Advanced Micro Devices, Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# * Neither the name of qonnx nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pytest
+
+import os
+import random
+import urllib.request
+
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.quantize_graph import QuantizeGraph
+from qonnx.util.cleanup import cleanup
+from qonnx.util.inference_cost import inference_cost
+
+random.seed(42)
+
+download_url = "https://github.com/onnx/models/raw/main/validated/vision/"
+download_url += "classification/resnet/model/resnet18-v1-7.onnx?download="
+
+model_details = {
+ "resnet18-v1-7": {
+ "description": "Resnet18 Opset version 7.",
+ "url": download_url,
+ "test_input": {
+ "name": {
+ "Conv_0": [
+ (("input", 0), (1, 0, 8, 0, 1, "ROUND")),
+ (("input", 1), (1, 0, 8, 0, 1, "ROUND")),
+ (("output", 0), (1, 0, 8, 0, 1, "ROUND")),
+ ],
+ "Conv_1": [(("input", 0), (1, 0, 8, 0, 1, "ROUND"))],
+ "Conv_2": [(("input", 1), (1, 0, 8, 0, 1, "ROUND")), (("output", 0), (1, 0, 8, 0, 1, "ROUND"))],
+ },
+ "op_type": {
+ "Gemm": [
+ (("input", 0), (1, 0, 8, 0, 1, "ROUND")),
+ (("input", 1), (1, 0, 8, 0, 1, "ROUND")),
+ (("input", 2), (1, 0, 8, 0, 1, "ROUND")),
+ (("output", 0), (1, 0, 8, 0, 1, "ROUND")),
+ ]
+ },
+ },
+ },
+}
+
+
+def download_model(test_model, do_cleanup=False, return_modelwrapper=False):
+ qonnx_url = model_details[test_model]["url"]
+ # download test data
+ dl_dir = "/tmp"
+ dl_file = dl_dir + f"/{test_model}.onnx"
+ ret = dl_file
+ if not os.path.isfile(dl_file):
+ urllib.request.urlretrieve(qonnx_url, dl_file)
+ if do_cleanup:
+ out_file = dl_dir + f"/{test_model}_clean.onnx"
+ cleanup(dl_file, out_file=out_file, override_inpsize=1)
+ ret = out_file
+ if return_modelwrapper:
+ ret = ModelWrapper(ret)
+ return ret
+
+
+def to_verify(model, test_details):
+ by = random.choice(list(test_details.keys())) # by "name" or "op_type"
+
+ if by == "name":
+ sample_node_name = random.choice(list(test_details["name"].keys()))
+ sample_node = model.get_node_from_name(sample_node_name)
+ sample_pos = random.choice(test_details["name"][sample_node_name])
+ if by == "op_type":
+ node_type = random.choice(list(test_details["op_type"].keys()))
+ sample_node = random.choice(model.get_nodes_by_op_type(node_type))
+ sample_pos = random.choice(test_details["op_type"][node_type])
+
+ if sample_pos[0][0] == "input":
+ tensor_to_verify = sample_node.input[sample_pos[0][1]]
+ producer_node = model.find_producer(tensor_to_verify)
+ if producer_node.op_type == "Quant":
+ verification = "Success"
+ else:
+ verification = "Failure"
+ if sample_pos[0][0] == "output":
+ tensor_to_verify = sample_node.output[sample_pos[0][1]]
+ consumer_node = model.find_consumer(tensor_to_verify)
+ if consumer_node.op_type == "Quant":
+ verification = "Success"
+ else:
+ verification = "Failure"
+
+ return verification
+
+
+@pytest.mark.parametrize("test_model", model_details.keys())
+def test_quantize_graph(test_model):
+ test_details = model_details[test_model]
+ model = download_model(test_model, do_cleanup=True, return_modelwrapper=True)
+ original_model_inf_cost = inference_cost(model, discount_sparsity=False)["total_cost"]
+ nodes_pos = test_details["test_input"]
+ model = model.transform(QuantizeGraph(nodes_pos))
+ quantnodes_added = len(model.get_nodes_by_op_type("Quant"))
+ assert quantnodes_added == 10 # 10 positions are specified.
+ verification = to_verify(model, nodes_pos)
+ assert verification == "Success"
+ inf_cost = inference_cost(model, discount_sparsity=False)["total_cost"]
+ assert (
+ inf_cost["total_macs"] == original_model_inf_cost["total_macs"]
+ ) # "1814073344.0" must be same as the original model.
+ assert (
+ inf_cost["total_mem_w_elems"] == original_model_inf_cost["total_mem_w_elems"]
+ ) # "11678912.0" must be same as the original model.
+ assert (
+ inf_cost["total_mem_o_bits"] == original_model_inf_cost["total_mem_o_bits"]
+ ) # "79510784.0" must be same as the original model.
+ assert (
+ inf_cost["total_mem_o_elems"] == original_model_inf_cost["total_mem_o_elems"]
+ ) # "2484712.0" must be same as the original model.
+ assert inf_cost["total_bops"] == 1566256136192.0
+ assert inf_cost["total_mem_w_bits"] == 360326656.0
+ assert inf_cost["op_mac_INT8_INT8"] == 118525952.0
diff --git a/tests/transformation/test_renaming.py b/tests/transformation/test_renaming.py
index a318a2dd..2a4f765a 100644
--- a/tests/transformation/test_renaming.py
+++ b/tests/transformation/test_renaming.py
@@ -77,7 +77,7 @@ def test_renaming():
def test_rename_multi_io_tinyyolov3():
- download_url = "https://github.com/onnx/models/raw/main/vision/object_detection_segmentation"
+ download_url = "https://github.com/onnx/models/raw/main/validated/vision/object_detection_segmentation"
download_url += "/tiny-yolov3/model/tiny-yolov3-11.onnx"
export_onnx_path = download_url.split("/")[-1]
ureq.urlretrieve(download_url, export_onnx_path)
diff --git a/tests/transformation/test_sort_commutative_inputs_initializer_last.py b/tests/transformation/test_sort_commutative_inputs_initializer_last.py
new file mode 100644
index 00000000..1cd1eb72
--- /dev/null
+++ b/tests/transformation/test_sort_commutative_inputs_initializer_last.py
@@ -0,0 +1,91 @@
+# Set pytest parameters
+import pytest
+
+# Numpy for handling simulation of tensor operations
+import numpy as np
+
+# Helper for creating ONNX nodes
+from onnx import TensorProto
+from onnx import helper as oh
+
+# QONNX wrapper of ONNX model graphs
+from qonnx.core.modelwrapper import ModelWrapper
+
+# Execute QONNX model graphs
+from qonnx.core.onnx_exec import execute_onnx
+
+# Graph transformation to be tested: Sorts the input list of commutative
+# operations to have all dynamic inputs first followed by all initializer inputs
+from qonnx.transformation.general import SortCommutativeInputsInitializerLast
+
+# QONNX utility for creating models from ONNX graphs
+from qonnx.util.basic import qonnx_make_model
+
+
+# Specify how many inputs the test should cover
+@pytest.mark.parametrize("num_inputs", [4, 5, 6])
+# Specify which inputs should be turned into initializers
+@pytest.mark.parametrize(
+ # fmt: off
+ "initializers", [[], [0], [1], [0, 1], [0, 3], [0, 1, 2, 3]]
+ # fmt: on
+)
+# Tests the SortCommutativeInputsInitializerLast transformation
+def test_sort_commutative_inputs_initializer_last(num_inputs, initializers):
+ # Generate the input tensor names
+ inputs = [f"in{i}" for i in range(num_inputs)]
+ # We will use the Sum ONNX operation to test this behavior, as it allows for
+ # arbitrary many inputs
+ node = oh.make_node(
+ # fmt: off
+ op_type="Sum", inputs=inputs, outputs=["out"], name="Sum"
+ # fmt: on
+ )
+ # Create value infos for all input and the output tensor
+ inputs = [
+ # fmt: off
+ oh.make_tensor_value_info(i, TensorProto.FLOAT, (16,)) for i in inputs
+ # fmt: on
+ ]
+ out = oh.make_tensor_value_info("out", TensorProto.FLOAT, (16,))
+ # Make a graph comprising the Sum node and value infos for all inputs and
+ # the output
+ graph = oh.make_graph([node], inputs=inputs, outputs=[out], name="Sum")
+ # Wrap the graph in an QONNX model wrapper
+ model = ModelWrapper(qonnx_make_model(graph, producer_name="qonnx-tests"))
+ # Prepare the execution context
+ context = {f"in{i}": np.random.rand(16) for i in range(num_inputs)}
+ # Make sure all inputs are of type float32
+ context = {key: value.astype(np.float32) for key, value in context.items()}
+ # Turn selected inputs into initializers
+ for i in initializers:
+ model.set_initializer(f"in{i}", context[f"in{i}"])
+
+ # Execute the ONNX model before transforming
+ out_expected = execute_onnx(model, context)["out"]
+ # Apply the transformation to be tested
+ # Note: No cleanup, as the tested transformation is part of the cleanup, and
+ # we want to test this in isolation
+ model = model.transform(
+ # fmt: off
+ SortCommutativeInputsInitializerLast(), cleanup=False
+ # fmt: on
+ )
+ # Execute the ONNX model after transforming
+ out_produced = execute_onnx(model, context)["out"]
+
+ # Start with no initializer input seen so far
+ seen_initializer = False
+ # Verify that no "dynamic" input follows an initializer input
+ for i in model.graph.node[0].input:
+ # Keep track of when an initializer has been seen
+ if model.get_initializer(i) is not None:
+ seen_initializer = True
+ # If there has already been an initializer, this input must be an
+ # initializer as well
+ assert (
+ not seen_initializer or model.get_initializer(i) is not None
+ ), "Non-initializer input following initializer after sorting"
+
+ # Outputs before and after must match
+ assert np.allclose(out_produced, out_expected)
diff --git a/tests/transformation/test_subpixel_to_deconv.py b/tests/transformation/test_subpixel_to_deconv.py
index 17a68d19..b033a476 100644
--- a/tests/transformation/test_subpixel_to_deconv.py
+++ b/tests/transformation/test_subpixel_to_deconv.py
@@ -47,7 +47,7 @@
def test_subpixel_to_deconv_float_espcn():
- raw_m = get_data("qonnx.data", "onnx/bsd300x3-espcn/float_model.onnx")
+ raw_m = get_data("qonnx.data", "onnx/bsd300x3-espcn/subpixel/float_model.onnx")
model = ModelWrapper(raw_m)
model = model.transform(InferShapes())
iname = model.graph.input[0].name
@@ -67,7 +67,7 @@ def test_subpixel_to_deconv_float_espcn():
def test_subpixel_to_deconv_quant_espcn():
# get raw quantized model with reference input
raw_i = get_data("qonnx.data", "onnx/bsd300x3-espcn/test_data/input_0.pb")
- raw_m = get_data("qonnx.data", "onnx/bsd300x3-espcn/quant_model.onnx")
+ raw_m = get_data("qonnx.data", "onnx/bsd300x3-espcn/subpixel/quant_model.onnx")
# create model from the onnx file and infer the shapes
model = ModelWrapper(raw_m)
model = model.transform(InferShapes())