From 49a5a342a220ac708e1827084a7efcd74b382950 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Thu, 21 Sep 2023 19:56:08 +0530 Subject: [PATCH 01/20] Create add_op.py --- src/qonnx/transformation/operators/add_op.py | 519 +++++++++++++++++++ 1 file changed, 519 insertions(+) create mode 100644 src/qonnx/transformation/operators/add_op.py diff --git a/src/qonnx/transformation/operators/add_op.py b/src/qonnx/transformation/operators/add_op.py new file mode 100644 index 00000000..90792d1f --- /dev/null +++ b/src/qonnx/transformation/operators/add_op.py @@ -0,0 +1,519 @@ +a######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class QLinearAdd: + + def __init__(self, node, aecg_zendnn_opt, remove_relu): + + add_node = node + + if len(add_node.inputs[1].inputs)==0: + # if Add node has only 1 input node and other input is constant tensor we cannot change it to QLinearAdd node hence keeping it as is + x_name = add_node.inputs[0].name + y_name = add_node.outputs[0].name + + const_val = add_node.inputs[1].values + + const_name = add_node.name + "_const_add_tensor" + y_scale_tensor = helper.create_initializer_tensor(name=const_name, + tensor_array=const_val, + data_type=onnx.TensorProto.FLOAT) + + new_add_node = onnx.helper.make_node(name = add_node.name, + op_type = "Add", + inputs = [x_name, const_name], + outputs = [y_name]) + self.node = new_add_node + + if helper.is_child_present(add_node, 0, 0) and add_node.o().op == "Relu": + relu_node = add_node.o() + relu_node1 = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", inputs = [add_node.outputs[0].name], outputs = [relu_node.outputs[0].name]) + self.relu_node = relu_node1 + + intializer_list = [] + intializer_list.append(y_scale_tensor) + self.intializer_list = intializer_list + + else: + input_node1 = add_node.inputs[0].inputs[0] + input_node2 = add_node.inputs[1].inputs[0] + output_node = add_node.o() + + is_relu_present = False + if output_node.op == "Relu": + is_relu_present = True + relu_node = output_node + # relu_node gets updated in later conditions thus keeping relu_node_name and relu_node_output_tensor to make it simple to keep their track + relu_node_name = relu_node.name + relu_node_output_tensor = relu_node.outputs[0].name + if relu_node.o().op == "QuantizeLinear": + output_node = relu_node.o() + else: + print("*********************** ERROR output of Relu node ", relu_node.name, " is not QuantizeLinear ***********************") + elif not(output_node.op == "QuantizeLinear"): + print("*********************** ERROR output of Add node ", add_node.name, " is not QuantizeLinear ***********************") + + + # in order to get scale and zp for the 2 inputs to Add node, we need 2 DQL nodes. + if not (input_node1.op == "DequantizeLinear" and input_node2.op == "DequantizeLinear"): + + """ + case observed in Resnet50v1 + Add1 + | + | + V + Relu-------------------- + | | + | | + V | + QL | + | | + | | + | | + DQL DQL DQL2 | + | | | | + | | | | + ----------------------------Conv | + | | + | | + V | + QL | + | | + | | + V | + DQL DQL DQL | + | | | | + | | | | + | | V | + ---------------------------Conv | + | | + | | + V | + QL | + | | + | | + V | + DQL DQL DQL | + | | | | + | | | | + | | V | + ----------------------------Conv | + | | + | | + V | + QL | + | | + | | + V | + DQL1 | + | | + | | + V | + Add<--------------------- + + + here Add doesn't have 1 of the DQL node, so we take DQL2 as the other DQL node. + + in case both inputs are missing DQL node, haven't encountered this case to this is flagged for now, if needed will be handled later depending on the case + """ + if not (input_node1.op == "DequantizeLinear") and not (input_node2.op == "DequantizeLinear"): + print("***************************** ERROR No input of Add node is DequantizeLinear ***********************************") + elif not (input_node1.op == "DequantizeLinear"): + # if input_node1 is not DQL + if input_node1.op == "Relu": + relu_node = input_node1 + if relu_node.i().op == "Add" and relu_node.o().op == "QuantizeLinear": + if (relu_node.o()).o().op == "DequantizeLinear": + input_node1 = (relu_node.o()).o() + # in the example case, shown input_node1 is now DQL2 + elif input_node1.op == "MaxPool": + # when resnet strides has been implemented there will be a maxpool node between the shown Relu and Add node. + maxpool_node = input_node1 + relu_node = maxpool_node.i() + if relu_node.i().op == "Add" and (relu_node.o().op == "QuantizeLinear" or relu_node.output[0].outputs[1].op == "QuantizeLinear"): + if (relu_node.o()).o().op == "DequantizeLinear": + input_node1 = (relu_node.o()).o() + # input_node1 is now DQL2 + elif (relu_node.outputs[0].outputs[1]).o().op == "DequantizeLinear": + input_node2 = (relu_node.outputs[0].outputs[1]).o() + # input_node2 is now DQL2 + elif input_node1.op == "Add": + + """ + this case is observed in mobilenetv2-12-qdq.onnx + + + Add2------------------------- + | | + | | + | V + | QL1 + | | + | | + | V + | DQL1 DQL DQL + | | | | + | | | | + | V | | + | Conv<-------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL + | | | | + | | | | + | V | | + | Conv<-------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL + | | | | + | | | | + | V | | + | Conv<------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL + | | + | | + Add1------------------------- + + Add2 = parent_add_node + QL1 = parent_add_node_ql_node + input_node1 = DQL1 + + """ + parent_add_node = input_node1 + parent_add_node_ql_node = parent_add_node.o() + input_node1 = parent_add_node_ql_node.o() + elif not (input_node2.op == "DequantizeLinear"): + # if input_node2 is not DQL + if input_node2.op == "Relu": + relu_node = input_node2 + if relu_node.i().op == "Add" and relu_node.o().op == "QuantizeLinear": + if (relu_node.o()).o().op == "DequantizeLinear": + input_node2 = (relu_node.o()).o() + # input_node2 is now the DQL node from which we need to take scale and zp + + elif input_node2.op == "MaxPool": + maxpool_node = input_node2 + if maxpool_node.i().op == "Relu": + relu_node = maxpool_node.i() + elif maxpool_node.i().op == "DequantizeLinear": + if maxpool_node.i().i().op == "QuantizeLinear": + if maxpool_node.i().i().i().op == "Relu": + relu_node = maxpool_node.i().i().i() + if relu_node.i().op == "Add" and (relu_node.o().op == "QuantizeLinear" or (len(relu_node.outputs[0].outputs)>1 and relu_node.output[0].outputs[1].op == "QuantizeLinear")): + if (relu_node.o()).o().op == "DequantizeLinear": + input_node2 = (relu_node.o()).o() + elif len(relu_node.outputs[0].outputs)>1 and (relu_node.outputs[0].outputs[1]).o().op == "DequantizeLinear": + input_node2 = (relu_node.outputs[0].outputs[1]).o() + # input_node2 is now the DQL node from which we need to take scale and zp + + if input_node1.op == "DequantizeLinear" and input_node2.op == "DequantizeLinear" and output_node.op == "QuantizeLinear": + # now we have input_node1 = input_node2 = DQL and output as QL node + if add_node.inputs[0].inputs[0].op == "MaxPool": + # this is strides case now if Maxpool is parent to Add node, maxpool = node1 + node1 = add_node.i() + elif add_node.inputs[0].inputs[0].op == "Add": + # this is for mobilenet case, so Add2 = node1 + node1 = add_node.i() + else: + """ + if above 2 cases not there lets assume following case now from Resnet50v1 model + + | DQL DQL | DQL DQL + | | | | | | + | | | | | | + Conv<--------------- Conv--------------------- + | | + | | + QL1 QL2 + | | + | | + DQL DQL + | | + | | + Add<----------------------------------------- + + now node1 is QL1/QL2 + + """ + node1 = add_node.inputs[0].inputs[0].i() + + if add_node.inputs[1].inputs[0].op == "MaxPool": + # same as above but for other input, node2 = maxpool node + node2 = add_node.inputs[1].inputs[0] + else: + # same as the above general case discussed, node2 = QL1/QL2 + node2 =input_node2.i() + + if node1.op == "Add": + # this is mobilenet case explained abaove, node1 will be converted to QLinearAdd node and it wiil act as input to current add node + # this a_name = QL1 output tensor name (please refer above mobilenet case) + a_name = node1.o().outputs[0].name + else: + # refering to general case taken above from resnet50v1 model, a_name = QL1/QL2's output tensor name + a_name = node1.outputs[0].name + + a_scale_name = add_node.name + "_A_SCALE" + a_scale_value = input_node1.inputs[1].values + a_scale_tensor = helper.create_initializer_tensor(name=a_scale_name, + tensor_array=a_scale_value, + data_type=onnx.TensorProto.FLOAT) + + a_zp_name = add_node.name + "_A_ZP" + a_zp_value = input_node1.inputs[2].values + + if aecg_zendnn_opt: + a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, + tensor_array=a_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if node1.i().op == "QuantizeLinear" and node1.i().i() == "Relu": + a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, + tensor_array=a_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if input_node1.inputs[2].dtype == np.int8: + a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, + tensor_array=a_zp_value, + data_type=onnx.TensorProto.INT8) + elif input_node1.inputs[2].dtype == np.uint8: + a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, + tensor_array=a_zp_value, + data_type=onnx.TensorProto.UINT8) + + # TODO: Only 1 condition is handled here that Add Node's 1st parent is DQL<--QL and 2nd parent can be Relu. Vice Versa and other cases are not encountered yet thus not handled. + if helper.is_parent_exist(node2, 0, 0): + if remove_relu: + # b_name = the QL's output tensor + b_name = node2.outputs[0].name + else: + # check Relu and input of Add node is s8, any 1 input can be checked, thus we check for node1 + if node2.i().op == "Relu" and node1.inputs[2].values.dtype == np.int8: + """ + this case is observed in renset50v1.5 + + DQL DQL + | | + | | + V | + Add<----------------- + | + | + V + Relu1 + | + | + V + QL1 + | + | + V + ------------------------------------DQL1 DQL DQL + | | | | + | | | | + | V | | + | Conv4<-------------------- + | | + | | + | V + | Relu + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL + | | | | + | | | | + | V | | + | Conv3<-------------------- + | | + | | + | V + | Relu + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL + | | | | + | | | | + | V | | + | Conv2<-------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL + | | + | | + | V + ---------------------------------->Add1 + + + in this case node2 is QL1 + node2_relu_node = Relu1 + thus b_name = Relu1's output as abotve top Add node is converted as follows- + + QLinearAdd + | + | + V + Relu1 + + thus relu1 output is set to b_name + + + """ + node2_relu_node = node2.i() + if node2_relu_node.i().op == "Conv" or node2_relu_node.i().op == "Add": + b_name = node2_relu_node.outputs[0].name + else: + b_name = node2.outputs[0].name + else: + b_name = node2.outputs[0].name + else: + print("************* ERROR ****************** Please check parent of Add Node's parent, ", node2.name) + + b_scale_name = add_node.name + "_B_SCALE" + b_scale_value = input_node2.inputs[1].values + b_scale_tensor = helper.create_initializer_tensor(name=b_scale_name, + tensor_array=b_scale_value, + data_type=onnx.TensorProto.FLOAT) + + b_zp_name = add_node.name + "_B_ZP" + b_zp_value = input_node2.inputs[2].values + + if aecg_zendnn_opt: + b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, + tensor_array=b_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if node2.i().op == "QuantizeLinear" and node2.i().i().op == "Relu": + b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, + tensor_array=b_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if input_node2.inputs[2].dtype == np.int8: + b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, + tensor_array=b_zp_value, + data_type=onnx.TensorProto.INT8) + elif input_node2.inputs[2].dtype == np.uint8: + b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, + tensor_array=b_zp_value, + data_type=onnx.TensorProto.UINT8) + + y_scale_name = add_node.name + "_Y_SCALE" + y_scale_value = output_node.inputs[1].values + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + + y_zp_name = add_node.name + "_Y_ZP" + y_zp_value = output_node.inputs[2].values + + if aecg_zendnn_opt: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + y_name = output_node.outputs[0].name + else: + if output_node.inputs[2].dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif output_node.inputs[2].dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + if is_relu_present and not remove_relu and node1.inputs[2].values.dtype == np.int8: + y_name = add_node.outputs[0].name + else: + y_name = output_node.outputs[0].name + + kwargs = {} + kwargs["domain"] = 'com.microsoft' + + + new_add_node = onnx.helper.make_node(name = add_node.name, + op_type = "QLinearAdd", + inputs = [a_name, a_scale_name, a_zp_name, b_name, b_scale_name, b_zp_name, y_scale_name, y_zp_name], + outputs = [y_name], + **kwargs) + + self.node = new_add_node + + if is_relu_present: + relu_node = onnx.helper.make_node(name = relu_node_name, op_type = "Relu", inputs = [add_node.outputs[0].name], outputs = [relu_node_output_tensor]) + self.relu_node = relu_node + + intializer_list = [] + intializer_list.append(a_scale_tensor) + intializer_list.append(a_zp_tensor) + intializer_list.append(b_scale_tensor) + intializer_list.append(b_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + + def get_relu_node(self): + return self.relu_node From 91ba2ee41fab4e71fa56e782a7b1b2a4f7eb7f0a Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Thu, 21 Sep 2023 19:57:59 +0530 Subject: [PATCH 02/20] Qop operators --- .../operators/averagepool_op.py | 48 ++ src/qonnx/transformation/operators/cast_op.py | 44 ++ src/qonnx/transformation/operators/clip_op.py | 61 +++ .../transformation/operators/concat_op.py | 129 +++++ .../operators/dequantizelinear_op.py | 107 ++++ .../transformation/operators/flatten_op.py | 53 ++ .../transformation/operators/gather_op.py | 112 +++++ src/qonnx/transformation/operators/gemm_op.py | 51 ++ .../operators/gemm_op_optimized.py | 98 ++++ .../operators/globalAveragePool_op.py | 143 ++++++ .../transformation/operators/greater_op.py | 57 +++ src/qonnx/transformation/operators/helper.py | 65 +++ .../transformation/operators/identity_op.py | 56 +++ src/qonnx/transformation/operators/less_op.py | 57 +++ src/qonnx/transformation/operators/lrn_op.py | 48 ++ .../transformation/operators/matmul_op.py | 157 ++++++ .../operators/matmul_retained_op.py | 154 ++++++ .../transformation/operators/maxpool_op.py | 125 +++++ .../operators/qlinearconv_op.py | 470 ++++++++++++++++++ .../operators/quantizelinear_op.py | 78 +++ src/qonnx/transformation/operators/relu_op.py | 44 ++ .../transformation/operators/reshape_op.py | 65 +++ .../transformation/operators/resize_op.py | 57 +++ .../transformation/operators/shape_op.py | 44 ++ .../transformation/operators/slice_op.py | 77 +++ .../transformation/operators/softmax_op.py | 45 ++ .../transformation/operators/squeeze_op.py | 57 +++ .../transformation/operators/transpose_op.py | 45 ++ .../transformation/operators/unsqueeze_op.py | 62 +++ 29 files changed, 2609 insertions(+) create mode 100644 src/qonnx/transformation/operators/averagepool_op.py create mode 100644 src/qonnx/transformation/operators/cast_op.py create mode 100644 src/qonnx/transformation/operators/clip_op.py create mode 100644 src/qonnx/transformation/operators/concat_op.py create mode 100644 src/qonnx/transformation/operators/dequantizelinear_op.py create mode 100644 src/qonnx/transformation/operators/flatten_op.py create mode 100644 src/qonnx/transformation/operators/gather_op.py create mode 100644 src/qonnx/transformation/operators/gemm_op.py create mode 100644 src/qonnx/transformation/operators/gemm_op_optimized.py create mode 100644 src/qonnx/transformation/operators/globalAveragePool_op.py create mode 100644 src/qonnx/transformation/operators/greater_op.py create mode 100644 src/qonnx/transformation/operators/helper.py create mode 100644 src/qonnx/transformation/operators/identity_op.py create mode 100644 src/qonnx/transformation/operators/less_op.py create mode 100644 src/qonnx/transformation/operators/lrn_op.py create mode 100644 src/qonnx/transformation/operators/matmul_op.py create mode 100644 src/qonnx/transformation/operators/matmul_retained_op.py create mode 100644 src/qonnx/transformation/operators/maxpool_op.py create mode 100644 src/qonnx/transformation/operators/qlinearconv_op.py create mode 100644 src/qonnx/transformation/operators/quantizelinear_op.py create mode 100644 src/qonnx/transformation/operators/relu_op.py create mode 100644 src/qonnx/transformation/operators/reshape_op.py create mode 100644 src/qonnx/transformation/operators/resize_op.py create mode 100644 src/qonnx/transformation/operators/shape_op.py create mode 100644 src/qonnx/transformation/operators/slice_op.py create mode 100644 src/qonnx/transformation/operators/softmax_op.py create mode 100644 src/qonnx/transformation/operators/squeeze_op.py create mode 100644 src/qonnx/transformation/operators/transpose_op.py create mode 100644 src/qonnx/transformation/operators/unsqueeze_op.py diff --git a/src/qonnx/transformation/operators/averagepool_op.py b/src/qonnx/transformation/operators/averagepool_op.py new file mode 100644 index 00000000..db385be5 --- /dev/null +++ b/src/qonnx/transformation/operators/averagepool_op.py @@ -0,0 +1,48 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class AveragePool: + + def __init__(self, node): + + average_pool_node = node + a_name = average_pool_node.inputs[0].name + + y_name = average_pool_node.outputs[0].name + + new_average_pool_node = onnx.helper.make_node(name = average_pool_node.name, op_type = "AveragePool", + inputs = [a_name], + outputs = [y_name], + ceil_mode = average_pool_node.attrs["ceil_mode"], + kernel_shape = average_pool_node.attrs["kernel_shape"], + pads = average_pool_node.attrs["pads"], + strides = average_pool_node.attrs["strides"]) + + self.node = new_average_pool_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/cast_op.py b/src/qonnx/transformation/operators/cast_op.py new file mode 100644 index 00000000..578329d9 --- /dev/null +++ b/src/qonnx/transformation/operators/cast_op.py @@ -0,0 +1,44 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Cast: + + def __init__(self, node): + + cast_node = node + + x_name = cast_node.inputs[0].name + y_name = cast_node.outputs[0].name + + new_cast_node = onnx.helper.make_node(name = cast_node.name, op_type = "Cast", + inputs = [x_name], + outputs = [y_name], + to = cast_node.attrs["to"]) + self.node = new_cast_node + + def get_node(self): + return self.node diff --git a/src/qonnx/transformation/operators/clip_op.py b/src/qonnx/transformation/operators/clip_op.py new file mode 100644 index 00000000..f672bfde --- /dev/null +++ b/src/qonnx/transformation/operators/clip_op.py @@ -0,0 +1,61 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Clip: + + def __init__(self, node): + + clip_node = node + + x_name = clip_node.inputs[0].name + + x2_name = clip_node.inputs[1].name + x2_value = clip_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT8) + + x3_name = clip_node.inputs[2].name + x3_value = clip_node.inputs[2].values + x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.INT8) + + new_clip_node = onnx.helper.make_node(name = clip_node.name, op_type = "Clip", + inputs= [x_name, x2_name, x3_name], + outputs = [clip_node.outputs[0].name]) + + self.node = new_clip_node + + intializer_list = [] + intializer_list.append(x2_tensor) + intializer_list.append(x3_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + diff --git a/src/qonnx/transformation/operators/concat_op.py b/src/qonnx/transformation/operators/concat_op.py new file mode 100644 index 00000000..4f5e5f6e --- /dev/null +++ b/src/qonnx/transformation/operators/concat_op.py @@ -0,0 +1,129 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Concat: + + def __init__(self, node, is_all_concat_input_dql): + + concat_node = node + + number_of_inputs = len(concat_node.inputs) + + zp_value_list = [] + zp_name_list = [] + scale_values_list = [] + scale_name_list = [] + input_tensor_names = [] + + intializer_list = [] + input_names = [] + + for i in range(number_of_inputs): + if is_all_concat_input_dql: + parent_dql_node = concat_node.inputs[i].inputs[0] + scale_values_list.append(parent_dql_node.inputs[1].values) + scale_name_list.append(parent_dql_node.inputs[1].name) + zp_value_list.append(parent_dql_node.inputs[2].values) + zp_name_list.append(parent_dql_node.inputs[2].name) + input_tensor_names.append(parent_dql_node.inputs[0].name) + else: + input_tensor_names.append(concat_node.inputs[i].name) + if len(concat_node.inputs[i].inputs) == 0: + c_input = helper.create_initializer_tensor(name=concat_node.inputs[i].name, + tensor_array=concat_node.inputs[i].values, + data_type=onnx.TensorProto.INT64) + intializer_list.append(c_input) + self.intializer_list = intializer_list + + if is_all_concat_input_dql: + for i in range(number_of_inputs): + scale_tesnor = helper.create_initializer_tensor(name=scale_name_list[i], + tensor_array=scale_values_list[i], + data_type=onnx.TensorProto.FLOAT) + zp_tensor = helper.create_initializer_tensor(name=zp_name_list[i], + tensor_array=zp_value_list[i], + data_type=onnx.TensorProto.UINT8) + intializer_list.append(scale_tesnor) + intializer_list.append(zp_tensor) + + if helper.is_child_present(concat_node, 0, 0) and concat_node.o().op == "DequantizeLinear" and is_all_concat_input_dql: + y_ql_node = concat_node.o() + y_name = y_ql_node.outputs[0].name + else: + y_name = concat_node.outputs[0].name + + if helper.is_child_present(concat_node, 0, 0) and concat_node.o().op == "DequantizeLinear" and is_all_concat_input_dql: + y_scale_name = y_ql_node.inputs[1].name + y_scale_value = y_ql_node.inputs[1].values + y_zp_name = y_ql_node.inputs[2].name + y_zp_value = y_ql_node.inputs[2].values + + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + input_names.append(y_scale_tensor.name) + input_names.append(y_zp_tensor.name) + + for i in range(number_of_inputs): + input_names.append(input_tensor_names[i]) + if len(scale_name_list)>0 and len(zp_name_list)>0: + input_names.append(scale_name_list[i]) + input_names.append(zp_name_list[i]) + + kwargs = {} + kwargs["domain"] = 'com.microsoft' + + if is_all_concat_input_dql: + new_concat_node = onnx.helper.make_node(name = concat_node.name, + op_type = "QLinearConcat", + inputs = input_names, + outputs = [y_name], + axis = concat_node.attrs["axis"], + **kwargs) + else: + new_concat_node = onnx.helper.make_node(name = concat_node.name, + op_type = "Concat", + inputs = input_names, + outputs = [y_name], + axis = concat_node.attrs["axis"]) + + self.node = new_concat_node + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/dequantizelinear_op.py b/src/qonnx/transformation/operators/dequantizelinear_op.py new file mode 100644 index 00000000..d9f5b6b2 --- /dev/null +++ b/src/qonnx/transformation/operators/dequantizelinear_op.py @@ -0,0 +1,107 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class DequantizeLinear: + + def __init__(self, node, aecg_zendnn_opt): + + dql_node = node + + x_name = dql_node.inputs[0].name + + if helper.is_parent_exist(dql_node, 0, 0): + if dql_node.i().op == "QuantizeLinear": + ql_node = dql_node.i() + if helper.is_parent_exist(ql_node,0, 0): + if ql_node.i().op == "Relu": + relu_node = ql_node.i() + x_name = relu_node.outputs[0].name + else: + print("*************** WARNING *********************** Please check parent of QL node", ql_node.name, " ignore if pattern is correct") + else: + print("*************** WARNING *********************** Please check parent of DQL node", dql_node.name, " ignore if pattern is correct") + self.initializers = [] + + if len(dql_node.inputs[0].inputs) == 0: + if dql_node.inputs[0].dtype == np.uint8: + input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, + tensor_array=dql_node.inputs[0].values, + data_type=onnx.TensorProto.UINT8) + elif dql_node.inputs[0].dtype == np.int8: + input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, + tensor_array=dql_node.inputs[0].values, + data_type=onnx.TensorProto.INT8) + elif dql_node.inputs[0].dtype == np.int32: + input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, + tensor_array=dql_node.inputs[0].values, + data_type=onnx.TensorProto.INT32) + self.initializers.append(input_tensor) + + x_scale_name = dql_node.inputs[1].name + x_scale_value = dql_node.inputs[1].values + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name,tensor_array=x_scale_value,data_type=onnx.TensorProto.FLOAT) + + x_zp_name = dql_node.inputs[2].name + x_zp_value = dql_node.inputs[2].values + + if aecg_zendnn_opt: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if dql_node.inputs[2].dtype == np.uint8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + if dql_node.inputs[2].dtype == np.int32: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT32) + elif dql_node.inputs[2].dtype == np.int8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + + y_name = dql_node.outputs[0].name + + dequantizelinear_node = onnx.helper.make_node(name = dql_node.name, + op_type = "DequantizeLinear", + inputs = [x_name, x_scale_name, x_zp_name], + outputs = [y_name]) + + self.node = dequantizelinear_node + + self.initializers.append(x_scale_tensor) + self.initializers.append(x_zp_tensor) + + def get_node(self): + return self.node + + def get_intializers(self): + return self.initializers \ No newline at end of file diff --git a/src/qonnx/transformation/operators/flatten_op.py b/src/qonnx/transformation/operators/flatten_op.py new file mode 100644 index 00000000..62831558 --- /dev/null +++ b/src/qonnx/transformation/operators/flatten_op.py @@ -0,0 +1,53 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Flatten: + + def __init__(self, node): + + flatten_node = node + x_name = flatten_node.inputs[0].name + y_name = flatten_node.outputs[0].name + + if flatten_node.i().op == "DequantizeLinear": + node1 = flatten_node.i() + x_name = node1.inputs[0].name + + if flatten_node.o().op == "QuantizeLinear": + node2 = flatten_node.o() + y_name = node2.outputs[0].name + + + new_flatten_node = onnx.helper.make_node(name = flatten_node.name, op_type = "Flatten", + inputs = [x_name], + outputs = [y_name]) + + + self.node = new_flatten_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/gather_op.py b/src/qonnx/transformation/operators/gather_op.py new file mode 100644 index 00000000..5fd01faa --- /dev/null +++ b/src/qonnx/transformation/operators/gather_op.py @@ -0,0 +1,112 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class Gather: + + def __init__(self, node): + + gather_node = node + # -------------------------------- + # For QCDQ / QDQ model, this case: + # QuantizeLinear + # | (0) + # Gather ---------- (1) Input + # | + # -------------------------------- + gather_parent_node = node + quantized_data_tensor = node + if helper.is_parent_exist(gather_node, 0, 0): + gather_parent_node = node.i(0) + if len(gather_parent_node.inputs) > 1 and helper.is_constant_tensor(gather_parent_node.inputs[1]): + quantized_data_tensor = gather_parent_node.inputs[1].values + + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + if gather_parent_node.op == "QuantizeLinear": + X_DQL_node = gather_parent_node + dequantized_data_tensor = X_DQL_node.inputs[0] + data_scale_tensor = X_DQL_node.inputs[1] + data_zero_point_tensor = X_DQL_node.inputs[2] + + data_scale_tensor = data_scale_tensor.values * np.ones(dequantized_data_tensor.shape) + a = dequantized_data_tensor.values / data_scale_tensor + b = data_zero_point_tensor.values * np.ones(dequantized_data_tensor.shape) + quantized_data_tensor = a + b + quantized_data_tensor = quantized_data_tensor.astype(np.int8) + + else: + if gather_parent_node.op == "QuantizeLinear": + X_QL_node = gather_parent_node + quantized_data_tensor = X_QL_node.inputs[1].values + + data_tensor = helper.create_initializer_tensor(name=gather_node.inputs[0].name, + tensor_array=quantized_data_tensor, + data_type=onnx.TensorProto.INT8) + + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + data_tensor = helper.create_initializer_tensor(name=gather_node.inputs[0].name, + tensor_array=quantized_data_tensor, + data_type=onnx.TensorProto.INT8) + if helper.is_constant_tensor(gather_node.inputs[1]): + if gather_node.inputs[1].dtype == "int64": + indices_tensor = helper.create_initializer_tensor(name=gather_node.inputs[1].name, + tensor_array=gather_node.inputs[1].values, + data_type=onnx.TensorProto.INT64) + else: + print("ERROR check data type in Gather node ", gather_node.name) + + new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", + inputs= [data_tensor.name, gather_node.inputs[1].name], + outputs = [gather_node.outputs[0].name], + axis = 0) + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", + inputs= [data_tensor.name, gather_node.inputs[1].name], + outputs = [gather_node.outputs[0].name], + axis = 0) + elif helper.is_constant_tensor(gather_node.inputs[1]): + new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", + inputs= [gather_node.inputs[0].name,indices_tensor.name], + outputs = [gather_node.outputs[0].name], + axis = gather_node.attrs['axis']) + + self.node = new_gather_node + + intializer_list = [] + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + intializer_list.append(data_tensor) + elif helper.is_constant_tensor(gather_node.inputs[1]): + intializer_list.append(indices_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + diff --git a/src/qonnx/transformation/operators/gemm_op.py b/src/qonnx/transformation/operators/gemm_op.py new file mode 100644 index 00000000..30a9a904 --- /dev/null +++ b/src/qonnx/transformation/operators/gemm_op.py @@ -0,0 +1,51 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class Gemm: + + def __init__(self, node): + + gemm_node = node + + x1 = gemm_node.inputs[0] + x2 = gemm_node.inputs[1] + x3 = gemm_node.inputs[2] + y = gemm_node.outputs[0] + + new_gemm_node = onnx.helper.make_node(name = gemm_node.name, op_type = "Gemm", + inputs= [x1.name, x2.name, x3.name], + outputs = [y.name], + alpha = gemm_node.attrs["alpha"], + beta = gemm_node.attrs["beta"], + transB = gemm_node.attrs["transB"]) + + self.node = new_gemm_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/gemm_op_optimized.py b/src/qonnx/transformation/operators/gemm_op_optimized.py new file mode 100644 index 00000000..aff0526b --- /dev/null +++ b/src/qonnx/transformation/operators/gemm_op_optimized.py @@ -0,0 +1,98 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class Gemm_optimized: + + def __init__(self, node): + + gemm_node = node + + x1 = gemm_node.inputs[0] + x2 = gemm_node.inputs[1] + x3 = gemm_node.inputs[2] + y = gemm_node.outputs[0] + + bias_node = gemm_node.i(2); + bias_tensor = bias_node.inputs[0] + bias_scale_tensor = bias_node.inputs[1] + bias_zero_point = bias_node.inputs[2] + bias_scale_tensor = bias_scale_tensor.values * np.ones(bias_tensor.shape) + a = bias_tensor.values * bias_scale_tensor + b = bias_zero_point.values * np.ones(bias_tensor.shape) + fp32_bias_tensor = a + b + fp32_bias_tensor = fp32_bias_tensor.astype(np.float32) + + weight_node = gemm_node.i(1).i() + if gemm_node.i(1).i().op == "Clip": + weight_node = gemm_node.i(1).i().i() + weight_tensor = weight_node.inputs[0] + weight_scale_tensor = weight_node.inputs[1] + weight_zero_point = weight_node.inputs[2] + weight_scale_tensor = weight_scale_tensor.values * np.ones(weight_tensor.shape) + a = weight_tensor.values * weight_scale_tensor + b = weight_zero_point.values * np.ones(weight_tensor.shape) + int8_weight = a + b + int8_weight = np.clip(int8_weight, -127, 127) + dq_weight_scale_tensor = gemm_node.i(1).inputs[1] + dq_weight_zero_point = gemm_node.i(1).inputs[2] + fp32_weight = (int8_weight / (dq_weight_scale_tensor.values * np.ones(int8_weight.shape)) + dq_weight_zero_point.values * np.ones(int8_weight.shape)) + + bias_name = x1.name + ".1" + weight_name = x1.name + ".2" + bias_tensor_1 = helper.create_initializer_tensor(name=bias_name, + tensor_array=fp32_bias_tensor, + data_type=onnx.TensorProto.FLOAT) + weight_tensor_1 = helper.create_initializer_tensor(name=weight_name, + tensor_array=fp32_weight, + data_type=onnx.TensorProto.FLOAT) + + new_gemm_node = onnx.helper.make_node(name = gemm_node.name, op_type = "Gemm", + inputs= [x1.name, weight_name, bias_name], + outputs = [y.name], + alpha = gemm_node.attrs["alpha"], + beta = gemm_node.attrs["beta"], + transB = gemm_node.attrs["transB"]) + + + node.i(1).i(0).inputs.clear() + node.i(1).i(0).outputs.clear() + node.i(1).inputs.clear() + node.i(1).outputs.clear() + + self.node = new_gemm_node + intializer_list = [] + intializer_list.append(weight_tensor_1) + intializer_list.append(bias_tensor_1) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list diff --git a/src/qonnx/transformation/operators/globalAveragePool_op.py b/src/qonnx/transformation/operators/globalAveragePool_op.py new file mode 100644 index 00000000..17f8cec6 --- /dev/null +++ b/src/qonnx/transformation/operators/globalAveragePool_op.py @@ -0,0 +1,143 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class GlobalAveragePool: + + def __init__(self, node, aecg_zendnn_opt, remove_relu): + + golbal_average_pool_node = node + x_name = golbal_average_pool_node.inputs[0].name + y_name = golbal_average_pool_node.outputs[0].name + + if helper.is_parent_exist(golbal_average_pool_node, 0, 0) and golbal_average_pool_node.i().op == "DequantizeLinear": + if helper.is_parent_exist(golbal_average_pool_node, 0, 0): + parent_dql_node = golbal_average_pool_node.i() + else: + print("************* ERROR ****************** Please check 1st parent of GlobalAveragePool, ", golbal_average_pool_node.name, " parent DNE") + + x_scale_name = node.name + "x_scale" + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, + tensor_array=parent_dql_node.inputs[1].values, + data_type=onnx.TensorProto.FLOAT) + x_zp_name = node.name + "x_zp" + + is_input_s8 = True + + if helper.is_parent_exist(parent_dql_node, 0, 0): + if aecg_zendnn_opt: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=parent_dql_node.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + else: + second_parent = parent_dql_node.i() + if second_parent.op == "Relu": + if helper.is_parent_exist(second_parent, 0, 0) and second_parent.i().op == "QuantizeLinear": + third_parent = second_parent.i() + if third_parent.inputs[2].values.dtype == np.int8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=third_parent.inputs[2].values, + data_type=onnx.TensorProto.INT8) + is_input_s8 = True + else: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=third_parent.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + is_input_s8 = False + else: + if parent_dql_node.i().inputs[2].values.dtype == np.int8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=parent_dql_node.inputs[2].values, + data_type=onnx.TensorProto.INT8) + is_input_s8 = True + else: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=parent_dql_node.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + is_input_s8 = False + else: + print("************* ERROR ****************** Please check 2nd parent of GlobalAveragePool, ", golbal_average_pool_node.name, " 1st parent of ", parent_dql_node, " parent DNE") + + if parent_dql_node.i().i().op == "Relu" and parent_dql_node.i().i().i().i().inputs[2].values.dtype == np.int8: + if remove_relu: + x_name = parent_dql_node.inputs[0].name + else: + third_parent_relu = parent_dql_node.i().i() + if third_parent_relu.i().op == "Conv" or third_parent_relu.i().op == "Add": + x_name = third_parent_relu.outputs[0].name + else: + x_name = (third_parent_relu.o()).outputs[0].name + else: + x_name = parent_dql_node.inputs[0].name + + if helper.is_child_present(node, 0, 0) and golbal_average_pool_node.o().op == "QuantizeLinear": + child_ql_node = golbal_average_pool_node.o() + + y_scale_name = node.name + "y_scale" + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=child_ql_node.inputs[1].values, + data_type=onnx.TensorProto.FLOAT) + y_zp_name = node.name + "y_zp" + + if aecg_zendnn_opt: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=child_ql_node.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + else: + if is_input_s8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=child_ql_node.inputs[2].values, + data_type=onnx.TensorProto.INT8) + else: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=child_ql_node.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + + y_name = child_ql_node.outputs[0].name + + kwargs = {} + kwargs["domain"] = 'com.microsoft' + new_average_pool_node = onnx.helper.make_node(name = golbal_average_pool_node.name, op_type = "QLinearGlobalAveragePool", + inputs = [x_name, x_scale_name, x_zp_name, y_scale_name, y_zp_name], + outputs = [y_name], + channels_last = 0,**kwargs) + + intializer_list = [] + intializer_list.append(x_scale_tensor) + intializer_list.append(x_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + self.node = new_average_pool_node + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/greater_op.py b/src/qonnx/transformation/operators/greater_op.py new file mode 100644 index 00000000..fc54c6e5 --- /dev/null +++ b/src/qonnx/transformation/operators/greater_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Greater: + + def __init__(self, node): + + greater_node = node + x1_name = greater_node.inputs[0].name + + x2_name = greater_node.inputs[1].name + x2_value = greater_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.FLOAT) + + y_name = greater_node.outputs[0].name + + new_greater_node = onnx.helper.make_node(name = greater_node.name, + op_type = "Greater", + inputs = [x1_name, x2_name], + outputs = [y_name]) + + self.node = new_greater_node + + intializer_list = [] + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/helper.py b/src/qonnx/transformation/operators/helper.py new file mode 100644 index 00000000..c070a6a4 --- /dev/null +++ b/src/qonnx/transformation/operators/helper.py @@ -0,0 +1,65 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +import numpy as np + +class helper : + + def __init__(self) -> None: + pass + + def create_initializer_tensor(name: str, tensor_array: np.ndarray, data_type: onnx.TensorProto = onnx.TensorProto.FLOAT) -> onnx.TensorProto: + initializer_tensor = onnx.helper.make_tensor(name=name, + data_type=data_type, + dims=tensor_array.shape, + vals=tensor_array.flatten().tolist()) + return initializer_tensor + + # to check node.i() exists pass tesor_idx=0, node_idx=0 + # to check node.inputs[1].inputs[0] exists pass tesor_idx=1, node_idx=0 + def is_parent_exist(node, tesor_idx, node_idx): + if len(node.inputs)>tesor_idx and len(node.inputs[tesor_idx].inputs)>node_idx: + return True + return False + + def is_child_present(node,tesor_idx, node_idx): + if len(node.outputs)>tesor_idx and len(node.outputs[tesor_idx].outputs)>node_idx: + return True + return False + + def is_attr_exist(node, attr_name): + try: + node.attrs[attr_name] + return True + except: + return False + + def is_constant_tensor(tensor): + try: + tensor.values + return True + except: + return False \ No newline at end of file diff --git a/src/qonnx/transformation/operators/identity_op.py b/src/qonnx/transformation/operators/identity_op.py new file mode 100644 index 00000000..e9019659 --- /dev/null +++ b/src/qonnx/transformation/operators/identity_op.py @@ -0,0 +1,56 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Identity: + + def __init__(self, node): + + identity_node = node + + x1_name = identity_node.inputs[0].name + x1_value = identity_node.inputs[0].values + x1_tensor = helper.create_initializer_tensor(x1_name,x1_value,onnx.TensorProto.FLOAT) + + y_name = identity_node.outputs[0].name + + new_identity_node = onnx.helper.make_node(name = identity_node.name, + op_type = "Identity", + inputs = [x1_name], + outputs = [y_name]) + + self.node = new_identity_node + + intializer_list = [] + intializer_list.append(x1_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/less_op.py b/src/qonnx/transformation/operators/less_op.py new file mode 100644 index 00000000..9d54216f --- /dev/null +++ b/src/qonnx/transformation/operators/less_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Less: + + def __init__(self, node): + + less_node = node + x1_name = less_node.inputs[0].name + + x2_name = less_node.inputs[1].name + x2_value = less_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.FLOAT) + + y_name = less_node.outputs[0].name + + new_less_node = onnx.helper.make_node(name = less_node.name, + op_type = "Less", + inputs = [x1_name, x2_name], + outputs = [y_name]) + + self.node = new_less_node + + intializer_list = [] + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/lrn_op.py b/src/qonnx/transformation/operators/lrn_op.py new file mode 100644 index 00000000..f8dcbf22 --- /dev/null +++ b/src/qonnx/transformation/operators/lrn_op.py @@ -0,0 +1,48 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class LRN: + + def __init__(self, node): + + lrn_node = node + + x_name = lrn_node.inputs[0].name + y_name = lrn_node.outputs[0].name + + new_lrn_node = onnx.helper.make_node(name = lrn_node.name, op_type = "LRN", + inputs = [x_name], + outputs = [y_name], + alpha = lrn_node.attrs["alpha"], + beta = lrn_node.attrs["beta"], + bias = lrn_node.attrs["bias"], + size = lrn_node.attrs["size"]) + + self.node = new_lrn_node + + def get_node(self): + return self.node diff --git a/src/qonnx/transformation/operators/matmul_op.py b/src/qonnx/transformation/operators/matmul_op.py new file mode 100644 index 00000000..1cb1842d --- /dev/null +++ b/src/qonnx/transformation/operators/matmul_op.py @@ -0,0 +1,157 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class MatMul: + + def __init__(self, node): + matlmul_node = node + + if helper.is_parent_exist(matlmul_node, 0, 0): + x_DQL_node = matlmul_node.i() + else: + print("**************** ERROR ******************* Matmul node ", matlmul_node.name, " input(0,0) DNE") + + if helper.is_parent_exist(matlmul_node, 1, 0): + w_DQL_node = matlmul_node.inputs[1].inputs[0] + else: + print("************* ERROR ************************ Please check the Matmul node ", matlmul_node.name, " the input(1,0) DNE") + + if helper.is_parent_exist(x_DQL_node, 0, 0): + x_QL_node = x_DQL_node.i() + else: + print("**************** ERROR ******************* ", x_DQL_node.name, " input(0,0) DNE Please check") + + x_scale_tensor = x_DQL_node.inputs[1] + x_scale = x_scale_tensor.values + x_zp_tensor = x_DQL_node.inputs[2] + + w_scale_tensor = w_DQL_node.inputs[1] + w_scale = w_scale_tensor.values + w_zp_tensor = w_DQL_node.inputs[2] + + if helper.is_child_present(matlmul_node, 0, 0): + if (matlmul_node.o().op == "QuantizeLinear"): + y_QL_node = matlmul_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Matmul node ", matlmul_node.name, " is not QuantizeLinear ***********************") + else: + print(matlmul_node.name, " output(0,0) DNE") + + quantized_weight_tensor_original = w_DQL_node.inputs[0].values + new_shape = quantized_weight_tensor_original.shape + (1,1) + a1 = np.reshape(quantized_weight_tensor_original, new_shape) + quantized_weight_tensor = np.transpose(a1, (1,0,2,3)) + + if x_QL_node.i().op == "DequantizeLinear" and x_QL_node.i().i().op == "QuantizeLinear": + x_name = x_QL_node.i().i().outputs[0].name + else: + x_name = x_QL_node.outputs[0].name + + y_name = matlmul_node.o().outputs[0].name + + x_scale_name = matlmul_node.name + "_X_SCALE" + x_scale_value = x_scale + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, + tensor_array=x_scale_value, + data_type=onnx.TensorProto.FLOAT) + + x_zp_name = matlmul_node.name + "_X_ZERO_POINT" + x_zp_value = x_zp_tensor.values + + if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + + w_name = matlmul_node.inputs[1].name + w_value = quantized_weight_tensor + w_tensor = helper.create_initializer_tensor(name=w_name, + tensor_array=w_value, + data_type=onnx.TensorProto.INT8) + + w_scale_name = matlmul_node.name + "_W_SCALE" + w_scale_value = w_scale + w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, + tensor_array=w_scale_value, + data_type=onnx.TensorProto.FLOAT) + + w_zp_name = matlmul_node.name + "_W_ZERO_POINT" + w_zp_value = w_zp_tensor.values + w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, + tensor_array=w_zp_value, + data_type=onnx.TensorProto.INT8) + + y_scale_name = matlmul_node.name + "_Y_SCALE" + y_scale_value = y_scale + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + + y_zp_name = matlmul_node.name + "_Y_ZERO_POINT" + y_zp_value = y_zp_tensor.values + + if y_zp_tensor.dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif y_zp_tensor.dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + qlinearconv_node = onnx.helper.make_node(name = matlmul_node.name, op_type = "QLinearConv", + inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], + outputs = [y_name], dilations = [1,1], group = 1, + kernel_shape = [1,1], pads = [0,0,0,0], strides = [1,1]) + + + self.node = qlinearconv_node + + intializer_list = [] + intializer_list.append(x_scale_tensor) + intializer_list.append(x_zp_tensor) + intializer_list.append(w_tensor) + intializer_list.append(w_scale_tensor) + intializer_list.append(w_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/matmul_retained_op.py b/src/qonnx/transformation/operators/matmul_retained_op.py new file mode 100644 index 00000000..ba410bc9 --- /dev/null +++ b/src/qonnx/transformation/operators/matmul_retained_op.py @@ -0,0 +1,154 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class MatMul_Retained: + + def __init__(self, node): + matlmul_node = node + + if helper.is_parent_exist(matlmul_node, 0, 0): + x_DQL_node = matlmul_node.i() + else: + print("**************** ERROR ******************* Matmul node ", matlmul_node.name, " input(0,0) DNE") + + if helper.is_parent_exist(matlmul_node, 1, 0): + w_DQL_node = matlmul_node.inputs[1].inputs[0] + else: + print("************* ERROR ************************ Please check the Matmul node ", matlmul_node.name, " the input(1,0) DNE") + + if helper.is_parent_exist(x_DQL_node, 0, 0): + x_QL_node = x_DQL_node.i() + else: + print("**************** ERROR ******************* ", x_DQL_node.name, " input(0,0) DNE Please check") + + x_scale_tensor = x_DQL_node.inputs[1] + x_scale = x_scale_tensor.values + x_zp_tensor = x_DQL_node.inputs[2] + + w_scale_tensor = w_DQL_node.inputs[1] + w_scale = w_scale_tensor.values + w_zp_tensor = w_DQL_node.inputs[2] + + if helper.is_child_present(matlmul_node, 0, 0): + if (matlmul_node.o().op == "QuantizeLinear"): + y_QL_node = matlmul_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Matmul node ", matlmul_node.name, " is not QuantizeLinear ***********************") + else: + print(matlmul_node.name, " output(0,0) DNE") + + if x_QL_node.op == "QuantizeLinear" or x_QL_node.op == "MaxPool": + x_name = x_QL_node.outputs[0].name + else: + print("please check x_QL_node of Matmul node ", matlmul_node.name) + + y_name = y_QL_node.outputs[0].name + + x_scale_name = matlmul_node.name + "_X_SCALE" + x_scale_value = x_scale + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, + tensor_array=x_scale_value, + data_type=onnx.TensorProto.FLOAT) + + x_zp_name = matlmul_node.name + "_X_ZERO_POINT" + x_zp_value = x_zp_tensor.values + + if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.op == "MaxPool"): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + + w_name = matlmul_node.inputs[1].name + w_value = w_DQL_node.inputs[0].values + w_tensor = helper.create_initializer_tensor(name=w_name, + tensor_array=w_value, + data_type=onnx.TensorProto.INT8) + + w_scale_name = matlmul_node.name + "_W_SCALE" + w_scale_value = w_scale + w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, + tensor_array=w_scale_value, + data_type=onnx.TensorProto.FLOAT) + + w_zp_name = matlmul_node.name + "_W_ZERO_POINT" + w_zp_value = w_zp_tensor.values + w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, + tensor_array=w_zp_value, + data_type=onnx.TensorProto.INT8) + + y_scale_name = matlmul_node.name + "_Y_SCALE" + y_scale_value = y_scale + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + + y_zp_name = matlmul_node.name + "_Y_ZERO_POINT" + y_zp_value = y_zp_tensor.values + + if y_zp_tensor.dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif y_zp_tensor.dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + qlinear_matmul = onnx.helper.make_node(name = matlmul_node.name, op_type = "QLinearMatMul", + inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], + outputs = [y_name]) + + self.node = qlinear_matmul + + intializer_list = [] + intializer_list.append(x_scale_tensor) + intializer_list.append(x_zp_tensor) + intializer_list.append(w_tensor) + intializer_list.append(w_scale_tensor) + intializer_list.append(w_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/maxpool_op.py b/src/qonnx/transformation/operators/maxpool_op.py new file mode 100644 index 00000000..345393f8 --- /dev/null +++ b/src/qonnx/transformation/operators/maxpool_op.py @@ -0,0 +1,125 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class MaxPool: + + def __init__(self, node, maxpool_count, remove_relu): + + maxpool_node = node + x_name = maxpool_node.inputs[0].name + y_name = maxpool_node.outputs[0].name + + if helper.is_child_present(maxpool_node, 0, 0) and maxpool_node.o().op == "QuantizeLinear": + if helper.is_parent_exist(maxpool_node, 0, 0) and maxpool_node.i().op == "DequantizeLinear": + q_node = maxpool_node.o() + y_name = q_node.outputs[0].name + + if helper.is_parent_exist(maxpool_node, 0, 0): + found_relu = False + if maxpool_node.i().op == "Relu": + relu_node = maxpool_node.i() + found_relu = True + elif maxpool_node.i().op == "DequantizeLinear": + if maxpool_node.i().i().i().op == "Relu": + relu_node = maxpool_node.i().i().i() + found_relu = True + elif maxpool_node.i().i().i().op == "Concat": + x_name = maxpool_node.i().i().outputs[0].name + if maxpool_node.o().op == "QuantizeLinear": + y_name = maxpool_node.o().outputs[0].name + elif maxpool_node.i().i().op == "MaxPool": + x_name = maxpool_node.i().i().outputs[0].name + + if found_relu: + if helper.is_child_present(relu_node, 0, 0) and relu_node.outputs[0].outputs[0].op == "MaxPool": + ql_node = relu_node.outputs[0].outputs[0] + x_name = ql_node.outputs[0].name + elif helper.is_child_present(relu_node, 0, 1) and relu_node.outputs[0].outputs[1].op == "MaxPool": + ql_node = relu_node.outputs[0].outputs[0] + x_name = ql_node.outputs[0].name + elif helper.is_child_present(relu_node, 0, 0) and relu_node.o().o().outputs[0].outputs[0].op == "MaxPool": + x_name = relu_node.outputs[0].name + elif helper.is_child_present(relu_node, 0, 0) and relu_node.o().o().outputs[0].outputs[1].op == "MaxPool": + x_name = relu_node.outputs[0].name + + + if maxpool_node.i().op == "QuantizeLinear": + x_ql_node = maxpool_node.i() + if remove_relu: + x_name = x_ql_node.outputs[0].name + else: + if helper.is_parent_exist(x_ql_node, 0, 0) and x_ql_node.i().op == "Relu" and x_ql_node.i().i().op == "Conv": + relu_node = x_ql_node.i() + x_name = relu_node.outputs[0].name + + if helper.is_attr_exist(maxpool_node, 'auto_pad'): + auto_pad_attr = maxpool_node.attrs["auto_pad"] + else: + auto_pad_attr = "NOTSET" + + if helper.is_attr_exist(maxpool_node, 'ceil_mode'): + ceil_mode_attr = maxpool_node.attrs["ceil_mode"] + else: + ceil_mode_attr = 0 + + if helper.is_attr_exist(maxpool_node, 'dilations'): + dilations_attr = maxpool_node.attrs["dilations"] + else: + dilations_attr =[1,1] + + if helper.is_attr_exist(maxpool_node, 'pads'): + pads_attr = maxpool_node.attrs["pads"] + else: + pads_attr = [0,0,0,0] + + if helper.is_attr_exist(maxpool_node, 'storage_order'): + storage_order_attr = maxpool_node.attrs["storage_order"] + else: + storage_order_attr = 0 + + if helper.is_attr_exist(maxpool_node, 'strides'): + strides_attr = maxpool_node.attrs["strides"] + else: + strides_attr = [1,1] + + new_mapool_node = onnx.helper.make_node(name = maxpool_node.name, + op_type = "MaxPool", + inputs = [x_name], + outputs = [y_name], + auto_pad = auto_pad_attr, + ceil_mode = ceil_mode_attr, + dilations = dilations_attr, + pads = pads_attr, + storage_order = storage_order_attr, + strides = strides_attr, + kernel_shape = maxpool_node.attrs["kernel_shape"]) + + self.node = new_mapool_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/qlinearconv_op.py b/src/qonnx/transformation/operators/qlinearconv_op.py new file mode 100644 index 00000000..b3556678 --- /dev/null +++ b/src/qonnx/transformation/operators/qlinearconv_op.py @@ -0,0 +1,470 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +import numpy as np +from .helper import helper + +class QLinearConv: + + def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): + x_DQL_node = node.i() + + conv_node = node + + has_bias = True if len(conv_node.inputs) == 3 else False + + w_DQL_node = conv_node.inputs[1].inputs[0] + QCDQ_model_detected=False + clip_max = np.iinfo(np.int8).min + clip_min = np.iinfo(np.int8).max + if (helper.is_constant_tensor(w_DQL_node.i())==False and w_DQL_node.i().op == "Clip"): + QCDQ_model_detected=True + clip_min = w_DQL_node.i().inputs[1].values + clip_max = w_DQL_node.i().inputs[2].values + + # b_DQL_node = (3) + # ------------------------------------------------------------------------ + # (1) (2) DequantizeLinear (1) (2) + # \ | / (3) for bias OR \ / + # \ | / \ / + # Conv (QDQ model) Conv (3 - FP32 bias embedded) (QCDQ model) + # | | + # ------------------------------------------------------------------------ + b_DQL_node = conv_node.inputs[2] # For QDQ + b_DQL_tensor = conv_node.inputs[2] # For QCDQ + if has_bias and QCDQ_model_detected==False: + b_DQL_node = conv_node.inputs[2].inputs[0] + is_fp32_bias_embedded = False + if QCDQ_model_detected: + if helper.is_constant_tensor(b_DQL_tensor) and b_DQL_tensor.dtype == "float32": + is_fp32_bias_embedded = True + b_QL_tensor = b_DQL_tensor + if is_fp32_bias_embedded: + if not helper.is_parent_exist(b_DQL_tensor, 0, 0): + b_QL_tensor = b_DQL_tensor + + is_weight_tensor_quantized = False + if len(w_DQL_node.inputs[0].inputs) == 0: + is_weight_tensor_quantized = True + is_bias_tensor_quantized = False + if QCDQ_model_detected and has_bias and not is_fp32_bias_embedded and not helper.is_parent_exist(b_DQL_tensor, 0, 0) and b_DQL_tensor.dtype == "int32": + is_bias_tensor_quantized = True + elif QCDQ_model_detected==False and has_bias and len(b_DQL_node.inputs[0].inputs) == 0: + is_bias_tensor_quantized = True + + if not is_weight_tensor_quantized: + w_QL_node = w_DQL_node.i() + + if QCDQ_model_detected==False and has_bias and (not is_bias_tensor_quantized): + b_QL_node = b_DQL_node.i() + + x_scale_tensor = x_DQL_node.inputs[1] + x_scale = x_scale_tensor.values + x_zp_tensor = x_DQL_node.inputs[2] + + w_scale_tensor = w_DQL_node.inputs[1] + w_scale = w_scale_tensor.values + w_zp_tensor = w_DQL_node.inputs[2] + + is_relu_present = False + if conv_node.o().op == "Relu": + relu_node = conv_node.o() + is_relu_present = True + if relu_node.o().op == "QuantizeLinear": + y_QL_node = relu_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Relu node ", relu_node.name, " is not QuantizeLinear ***********************") + elif (conv_node.o().op == "QuantizeLinear"): + y_QL_node = conv_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Conv node ", conv_node.name, " is not QuantizeLinear ***********************") + + S8_MIN = np.iinfo(np.int8).min + S8_MAX = np.iinfo(np.int8).max + if clip_min != np.iinfo(np.int8).max and clip_max != np.iinfo(np.int8).min: + S8_MIN = clip_min + S8_MAX = clip_max + U8_MIN = np.iinfo(np.uint8).min + U8_MAX = np.iinfo(np.uint8).max + S32_MIN = np.iinfo(np.int32).min + S32_MAX = np.iinfo(np.int32).max + + if (QCDQ_model_detected and helper.is_parent_exist(w_DQL_node, 0, 0) and helper.is_parent_exist(w_DQL_node.i(0), 0, 0) and w_DQL_node.i(0).i(0).op == "QuantizeLinear"): + w_QL_node = w_DQL_node.i(0).i(0) + + if QCDQ_model_detected==False and has_bias and (not is_bias_tensor_quantized) and helper.is_parent_exist(b_DQL_node, 0, 0): + b_QL_node = b_DQL_node.i() + + # -------------------------------------------------------------------------- + # QuantizeLinear (w_QL_node set to this in first if condition) + # | + # Clip + # | + # DequantizeLinear (for weight) + # (0) / (1) + # | / + # Conv + # -------------------------------------------------------------------------- + if QCDQ_model_detected and helper.is_parent_exist(w_DQL_node, 0, 0) and helper.is_parent_exist(w_DQL_node.i(0), 0, 0): + w_QL_node = w_DQL_node.i().i() + quantized_weight_tensor = w_QL_node.inputs[0] + #if is_weight_tensor_quantized and QCDQ_model_detected: + # quantized_weight_tensor = w_DQL_node.inputs[1].values + if is_weight_tensor_quantized and not QCDQ_model_detected: + quantized_weight_tensor = w_DQL_node.inputs[0].values + elif helper.is_constant_tensor(w_QL_node): + quantized_weight_tensor = w_QL_node.values + quantized_weight_tensor = np.clip(quantized_weight_tensor, S8_MIN, S8_MAX) + quantized_weight_tensor = np.round(quantized_weight_tensor) + quantized_weight_tensor = quantized_weight_tensor.astype(np.int8) + elif not helper.is_constant_tensor(w_QL_node): + weight_tensor = w_QL_node.inputs[0] + weight_scale_tensor = w_QL_node.inputs[1] + weight_zp_tensor = w_QL_node.inputs[2] + + weight_scaled_tensor = weight_scale_tensor.values * np.ones(weight_tensor.shape) + if QCDQ_model_detected: + weight_scaled_tensor = np.ones(weight_tensor.shape) * weight_scale_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] + b = weight_tensor.values / weight_scaled_tensor + c = weight_zp_tensor.values * np.ones(weight_tensor.shape) + if QCDQ_model_detected: + c = weight_zp_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] * np.ones(weight_tensor.shape) + quantized_weight_tensor = b + c + if weight_zp_tensor.dtype == "int8": + quantized_weight_tensor = np.clip(quantized_weight_tensor, S8_MIN, S8_MAX) + elif weight_zp_tensor.dtype == "uint8": + quantized_weight_tensor = np.clip(quantized_weight_tensor, U8_MIN, U8_MAX) + quantized_weight_tensor = np.round(quantized_weight_tensor) + quantized_weight_tensor = quantized_weight_tensor.astype(np.int8) + if QCDQ_model_detected: + clip_node = w_DQL_node.i() + clip_node.inputs.clear() + clip_node.outputs.clear() + + if has_bias and is_bias_tensor_quantized: + quantized_bias_tensor = b_DQL_node.inputs[0].values + elif is_fp32_bias_embedded and has_bias: + bias_tensor = b_QL_tensor + bias_scale_tensor1 = w_QL_node.inputs[1] + bias_zp_tensor = w_QL_node.inputs[2] + + # satutration after QL node + a = x_scale * bias_scale_tensor1.values + b = bias_tensor.values / a + # Zero point is set to 0 for quantizing bias + d = b + d = np.round(d) + quantized_bias_tensor = d + quantized_bias_tensor = np.clip(quantized_bias_tensor, S32_MIN, S32_MAX) + quantized_bias_tensor = np.round(quantized_bias_tensor) + quantized_bias_tensor = quantized_bias_tensor.astype(np.int32) + else: + bias_tensor = b_QL_node.inputs[0] + bias_scale_tensor1 = b_QL_node.inputs[1] + bias_zp_tensor = b_QL_node.inputs[2] + + # satutration after QL node + a = bias_scale_tensor1.values * np.ones(bias_tensor.shape) + b = bias_tensor.values / a + c = bias_zp_tensor.values * np.ones(bias_tensor.shape) + d = b + c + if bias_zp_tensor.dtype == "int8": + d = np.clip(d, S8_MIN, S8_MAX) + elif bias_zp_tensor.dtype == "uint8": + d = np.clip(d, U8_MIN, U8_MAX) + d = np.round(d) + + # now again dequantize it + e = d * a + f = e - c + # f is now fp32 tensor + + bias_scale = x_scale * w_scale + bias_scale_tensor = bias_scale * np.ones(bias_tensor.shape) + quantized_bias_tensor = (f / bias_scale_tensor) + quantized_bias_tensor = np.clip(quantized_bias_tensor, S32_MIN, S32_MAX) + quantized_bias_tensor = np.round(quantized_bias_tensor) + quantized_bias_tensor = quantized_bias_tensor.astype(np.int32) + + x_QL_node = x_DQL_node.i() + is_x_QL_maxpool = False + is_X_QL_transpose = True if x_QL_node.op == "Transpose" else False + maxpool_input_s8 = False # True means s8 False means u8 + if x_QL_node.op == "MaxPool": + is_x_QL_maxpool = True + + if helper.is_parent_exist(x_QL_node, 0, 0): + if x_QL_node.i().op == "Relu": + if remove_relu: + # if this flag is enabled, then relu will not be added thus x_name will be x_QL's output tensor name + x_name = x_QL_node.outputs[0].name + else: + if (x_QL_node.i().i().op == "Conv") or (x_QL_node.i().i().op == "Add" and x_QL_node.i().i().i().inputs[2].values.dtype == np.int8): + + """ + these are 2 condtions + one in resnet50v1 + + DQL DQL + | | + | | + V | + Add<------------- + | + | + V + Relu------------------------------ + | + | + QL (x_QL_node) + | + | + DQL DQL DQL + | | | + | | | + Conv<------------ + + if Add input is s8 + x_relu_node = Relu + relu will be maintained due to s8 data type thus + x_name = relu's output + + other case is in Resnet50v1.5 + + Conv + | + | + Relu + | + | + QL + | + | + DQL DQL DQL + | | | + | | | + Conv<------------ + + we maintain relu node here thus x_name = relu's output + + """ + x_relu_node = x_QL_node.i() + x_name = x_relu_node.outputs[0].name + else: + x_name = x_QL_node.outputs[0].name + elif x_QL_node.op == "MaxPool": + """ + this is resnet50v1 case + + QL + | + | + V + Maxpool + | + | + V + DQL DQL DQL + | | | + | | | + V | | + Conv<------------ + + """ + x_name = x_QL_node.outputs[0].name + if x_QL_node.i().op == "QuantizeLinear": + if (x_QL_node.i()).inputs[2].dtype == np.int8: + maxpool_input_s8 = True + elif (x_QL_node.i()).inputs[2].dtype == np.uint8: + maxpool_input_s8 = False + else: + x_name = x_QL_node.outputs[0].name + else: + x_name = x_QL_node.outputs[0].name + + if is_relu_present and not(remove_relu): + y_name = conv_node.outputs[0].name + else: + y_name = y_QL_node.outputs[0].name + + x_scale_name = conv_node.name + "_X_SCALE" + x_scale_value = x_scale + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, + tensor_array=x_scale_value, + data_type=onnx.TensorProto.FLOAT) + + x_zp_name = conv_node.name + "_X_ZERO_POINT" + x_zp_value = x_zp_tensor.values + + if aecg_zendnn_opt and conv_count > 0: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if is_x_QL_maxpool: + if maxpool_input_s8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + else: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + elif is_X_QL_transpose: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + else: + if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + elif x_QL_node.op == "Relu" or x_QL_node.op == "Clip": + if (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + print("ERROR Please check x_zp_tensor of ", conv_node.name) + + w_name = conv_node.inputs[1].name + w_value = quantized_weight_tensor + w_tensor = helper.create_initializer_tensor(name=w_name, + tensor_array=w_value, + data_type=onnx.TensorProto.INT8) + + w_scale_name = conv_node.name + "_W_SCALE" + w_scale_value = w_scale + w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, + tensor_array=w_scale_value, + data_type=onnx.TensorProto.FLOAT) + + w_zp_name = conv_node.name + "_W_ZERO_POINT" + w_zp_value = w_zp_tensor.values + w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, + tensor_array=w_zp_value, + data_type=onnx.TensorProto.INT8) + + y_scale_name = conv_node.name + "_Y_SCALE" + y_scale_value = y_scale + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + + y_zp_name = conv_node.name + "_Y_ZERO_POINT" + y_zp_value = y_zp_tensor.values + + if aecg_zendnn_opt: + # if this opt is enabled then y_zp has be to set to u8 type + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if y_zp_tensor.dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif y_zp_tensor.dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + if has_bias: + b_name = conv_node.inputs[2].name + b_value = quantized_bias_tensor + b_tensor = helper.create_initializer_tensor(name=b_name, + tensor_array=b_value, + data_type=onnx.TensorProto.INT32) + + if helper.is_attr_exist(conv_node, 'auto_pad'): + auto_pad_attr = conv_node.attrs["auto_pad"] + else: + auto_pad_attr = "NOTSET" + + if helper.is_attr_exist(conv_node, 'dilations'): + dilations_attr = conv_node.attrs["dilations"] + else: + dilations_attr = 1 + + if helper.is_attr_exist(conv_node, 'group'): + group_attr = conv_node.attrs["group"] + else: + group_attr = 1 + + if helper.is_attr_exist(conv_node, 'pads'): + pads_attr = conv_node.attrs["pads"] + else: + pads_attr = [0,0,0,0] + + if helper.is_attr_exist(conv_node, 'strides'): + strides_attr = conv_node.attrs["strides"] + else: + strides_attr = 1 + + qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", + inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name, b_name], + outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, + kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) + + if is_relu_present: + relu_node = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", inputs = [conv_node.outputs[0].name], outputs = [relu_node.outputs[0].name]) + self.relu_node = relu_node + + self.node = qlinearconv_node + + intializer_list = [] + intializer_list.append(x_scale_tensor) + intializer_list.append(x_zp_tensor) + intializer_list.append(w_tensor) + intializer_list.append(w_scale_tensor) + intializer_list.append(w_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + intializer_list.append(b_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + + def get_relu_node(self): + return self.relu_node diff --git a/src/qonnx/transformation/operators/quantizelinear_op.py b/src/qonnx/transformation/operators/quantizelinear_op.py new file mode 100644 index 00000000..d35b21b0 --- /dev/null +++ b/src/qonnx/transformation/operators/quantizelinear_op.py @@ -0,0 +1,78 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class QuantizeLinear: + + def __init__(self, node): + ql_node = node + + x_name = ql_node.inputs[0].name + flag = False + if helper.is_child_present(node, 0, 0) and node.o().op == "DequantizeLinear": + if helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Conv": + if helper.is_child_present(node.o().o(), 0, 0) and node.o().o().o().op == "Reshape": + flag = True + x_tensor = helper.create_initializer_tensor(name = x_name,tensor_array = ql_node.inputs[0].values, data_type = onnx.TensorProto.FLOAT) + elif helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Gemm": + flag = True + x_tensor = helper.create_initializer_tensor(name = x_name,tensor_array = ql_node.inputs[0].values, data_type = onnx.TensorProto.FLOAT) + + y_scale_name = ql_node.inputs[1].name + y_scale_value = ql_node.inputs[1].values + y_scale_tensor = helper.create_initializer_tensor(name = y_scale_name,tensor_array = y_scale_value, data_type = onnx.TensorProto.FLOAT) + + y_zp_name = ql_node.inputs[2].name + y_zp_value = ql_node.inputs[2].values + if ql_node.inputs[2].dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type = onnx.TensorProto.INT8) + elif ql_node.inputs[2].dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type = onnx.TensorProto.UINT8) + + y_name = ql_node.outputs[0].name + + quantizelinear_node = onnx.helper.make_node(name = ql_node.name, op_type = "QuantizeLinear", inputs = [x_name, y_scale_name, y_zp_name], outputs = [y_name]) + + self.node = quantizelinear_node + + intializer_list = [] + if flag: + intializer_list.append(x_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/relu_op.py b/src/qonnx/transformation/operators/relu_op.py new file mode 100644 index 00000000..58cc23cd --- /dev/null +++ b/src/qonnx/transformation/operators/relu_op.py @@ -0,0 +1,44 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Relu: + + def __init__(self, node): + + relu_node = node + + x_name = relu_node.inputs[0].name + y_name = relu_node.outputs[0].name + + new_relu_node = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", + inputs = [x_name], + outputs = [y_name]) + + self.node = new_relu_node + + def get_node(self): + return self.node diff --git a/src/qonnx/transformation/operators/reshape_op.py b/src/qonnx/transformation/operators/reshape_op.py new file mode 100644 index 00000000..424cd38f --- /dev/null +++ b/src/qonnx/transformation/operators/reshape_op.py @@ -0,0 +1,65 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Reshape: + + def __init__(self, node): + + reshape_node = node + + x_name = reshape_node.inputs[0].name + + x2_name = reshape_node.inputs[1].name + if helper.is_constant_tensor(reshape_node.inputs[1]): + x2_value = reshape_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) + + y_name = reshape_node.outputs[0].name + + try: + new_reshape_node = onnx.helper.make_node(name = reshape_node.name, op_type = "Reshape", + inputs = [x_name, x2_name], + outputs = [y_name], + allowzero = reshape_node.attrs["allowzero"]) + except: + new_reshape_node = onnx.helper.make_node(name = reshape_node.name, op_type = "Reshape", + inputs = [x_name, x2_name], + outputs = [y_name]) + + self.node = new_reshape_node + + intializer_list = [] + if helper.is_constant_tensor(reshape_node.inputs[1]): + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/resize_op.py b/src/qonnx/transformation/operators/resize_op.py new file mode 100644 index 00000000..be571f1b --- /dev/null +++ b/src/qonnx/transformation/operators/resize_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Resize: + + def __init__(self, node): + + resize_node = node + + x1_name = resize_node.inputs[0].name + x2_name = resize_node.inputs[1].name + x3_name = resize_node.inputs[2].name + x4_name = resize_node.inputs[3].name + + y_name = resize_node.outputs[0].name + + # Resize has 4 inputs, x, roi, scales, sizes. With later 3 as optional. + # In the model (retinanet) there are 2 inputs X and sizes thus 2nd input is obtained at 3rd index. + # 1st and 2nd index i.e x2_name and x3_name come out to be empty + print("WARNING check inputs of resize node") + + new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", + inputs = [x1_name, x2_name, x3_name, x4_name], + outputs = [y_name], + coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], + cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], + mode = resize_node.attrs["mode"], + nearest_mode = resize_node.attrs["nearest_mode"]) + + self.node = new_resize_node + + def get_node(self): + return self.node diff --git a/src/qonnx/transformation/operators/shape_op.py b/src/qonnx/transformation/operators/shape_op.py new file mode 100644 index 00000000..aadc1179 --- /dev/null +++ b/src/qonnx/transformation/operators/shape_op.py @@ -0,0 +1,44 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Shape: + + def __init__(self, node): + + shape_node = node + + x_name = shape_node.inputs[0].name + y_name = shape_node.outputs[0].name + + new_shape_node = onnx.helper.make_node(name = shape_node.name, op_type = "Shape", + inputs = [x_name], + outputs = [y_name]) + + self.node = new_shape_node + + def get_node(self): + return self.node diff --git a/src/qonnx/transformation/operators/slice_op.py b/src/qonnx/transformation/operators/slice_op.py new file mode 100644 index 00000000..ae06e86b --- /dev/null +++ b/src/qonnx/transformation/operators/slice_op.py @@ -0,0 +1,77 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Slice: + + def __init__(self, node): + + slice_node = node + x1_name = slice_node.inputs[0].name + + x2_name = slice_node.inputs[1].name + x2_value = slice_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) + + x3_name = slice_node.inputs[2].name + x3_value = slice_node.inputs[2].values + x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.INT64) + + x4_name = slice_node.inputs[3].name + x4_value = slice_node.inputs[3].values + x4_tensor = helper.create_initializer_tensor(x4_name,x4_value,onnx.TensorProto.INT64) + + # x5_name = slice_node.inputs[4].name + # x5_value = slice_node.inputs[4].values + # x5_tensor = helper.create_initializer_tensor(x5_name,x5_value,onnx.TensorProto.INT64) + + y_name = slice_node.outputs[0].name + + # new_squeeze_node = onnx.helper.make_node(name = slice_node.name, + # op_type = "Slice", + # inputs = [x1_name, x2_name, x3_name, x4_name, x5_name], + # outputs = [y_name]) + + new_squeeze_node = onnx.helper.make_node(name = slice_node.name, + op_type = "Slice", + inputs = [x1_name, x2_name, x3_name, x4_name], + outputs = [y_name]) + + self.node = new_squeeze_node + + intializer_list = [] + intializer_list.append(x2_tensor) + intializer_list.append(x3_tensor) + intializer_list.append(x4_tensor) + # intializer_list.append(x5_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/softmax_op.py b/src/qonnx/transformation/operators/softmax_op.py new file mode 100644 index 00000000..4e7f9786 --- /dev/null +++ b/src/qonnx/transformation/operators/softmax_op.py @@ -0,0 +1,45 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Softmax: + + def __init__(self, node): + + softmax_node = node + + x_name = softmax_node.inputs[0].name + y_name = softmax_node.outputs[0].name + + new_sftmx_node = onnx.helper.make_node(name = softmax_node.name, op_type = "Softmax", + inputs = [x_name], + outputs = [y_name], + axis = softmax_node.attrs["axis"]) + + self.node = new_sftmx_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/squeeze_op.py b/src/qonnx/transformation/operators/squeeze_op.py new file mode 100644 index 00000000..bdfbae0d --- /dev/null +++ b/src/qonnx/transformation/operators/squeeze_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Squeeze: + + def __init__(self, node): + + squeeze_node = node + x1_name = squeeze_node.inputs[0].name + + x2_name = squeeze_node.inputs[1].name + x2_value = squeeze_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) + + y_name = squeeze_node.outputs[0].name + + new_squeeze_node = onnx.helper.make_node(name = squeeze_node.name, + op_type = "Squeeze", + inputs = [x1_name, x2_name], + outputs = [y_name]) + + self.node = new_squeeze_node + + intializer_list = [] + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/transpose_op.py b/src/qonnx/transformation/operators/transpose_op.py new file mode 100644 index 00000000..4607a600 --- /dev/null +++ b/src/qonnx/transformation/operators/transpose_op.py @@ -0,0 +1,45 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Transpose: + + def __init__(self, node): + + transpose_node = node + + x_name = transpose_node.inputs[0].name + y_name = transpose_node.outputs[0].name + + new_transpose_node = onnx.helper.make_node(name = transpose_node.name, op_type = "Transpose", + inputs = [x_name], + outputs = [y_name], + perm = transpose_node.attrs["perm"]) + + self.node = new_transpose_node + + def get_node(self): + return self.node diff --git a/src/qonnx/transformation/operators/unsqueeze_op.py b/src/qonnx/transformation/operators/unsqueeze_op.py new file mode 100644 index 00000000..b59d8d52 --- /dev/null +++ b/src/qonnx/transformation/operators/unsqueeze_op.py @@ -0,0 +1,62 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Unsqueeze: + + def __init__(self, node): + + unsq_node = node + + x1_name = unsq_node.inputs[0].name + y_name = unsq_node.outputs[0].name + + if helper.is_constant_tensor(unsq_node.inputs[1]): + if unsq_node.inputs[1].dtype == "int64": + axes_tensor = helper.create_initializer_tensor(name=unsq_node.inputs[1].name, + tensor_array=unsq_node.inputs[1].values, + data_type=onnx.TensorProto.INT64) + else: + print("ERROR please check axes data type for Unsqueeze Node ", unsq_node.name) + + + new_unsq_node = onnx.helper.make_node(name = unsq_node.name, op_type = "Unsqueeze", + inputs = [x1_name, axes_tensor.name], + outputs = [y_name]) + + intializer_list = [] + if helper.is_constant_tensor(unsq_node.inputs[1]): + intializer_list.append(axes_tensor) + self.intializer_list = intializer_list + + self.node = new_unsq_node + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list From ee2bf1136822507b75be05fac8fd5938e1bf3c22 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Thu, 21 Sep 2023 19:58:57 +0530 Subject: [PATCH 03/20] QCDQ to QOp --- src/qonnx/transformation/qcdq_to_qop.py | 1260 +++++++++++++++++++++++ 1 file changed, 1260 insertions(+) create mode 100644 src/qonnx/transformation/qcdq_to_qop.py diff --git a/src/qonnx/transformation/qcdq_to_qop.py b/src/qonnx/transformation/qcdq_to_qop.py new file mode 100644 index 00000000..133ee82f --- /dev/null +++ b/src/qonnx/transformation/qcdq_to_qop.py @@ -0,0 +1,1260 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx_graphsurgeon as gs +import numpy as np +import onnx +import os +import argparse +from onnx import TensorProto +import sys + +from qonnx.transformation.operators.qlinearconv_op import * +from qonnx.transformation.operators.quantizelinear_op import * +from qonnx.transformation.operators.dequantizelinear_op import * +from qonnx.transformation.operators.maxpool_op import * +from qonnx.transformation.operators.add_op import * +from qonnx.transformation.operators.averagepool_op import * +from qonnx.transformation.operators.squeeze_op import * +from qonnx.transformation.operators.globalAveragePool_op import * +from qonnx.transformation.operators.flatten_op import * +from qonnx.transformation.operators.matmul_op import * +from qonnx.transformation.operators.lrn_op import * +from qonnx.transformation.operators.concat_op import * +from qonnx.transformation.operators.softmax_op import * +from qonnx.transformation.operators.matmul_retained_op import * +from qonnx.transformation.operators.cast_op import * +from qonnx.transformation.operators.gather_op import * +from qonnx.transformation.operators.gemm_op import * +from qonnx.transformation.operators.gemm_op_optimized import * +from qonnx.transformation.operators.greater_op import * +from qonnx.transformation.operators.less_op import * +from qonnx.transformation.operators.slice_op import * +from qonnx.transformation.operators.transpose_op import * +from qonnx.transformation.operators.relu_op import * +from qonnx.transformation.operators.reshape_op import * +from qonnx.transformation.operators.identity_op import * +from qonnx.transformation.operators.shape_op import * +from qonnx.transformation.operators.resize_op import * +from qonnx.transformation.operators.unsqueeze_op import * +from qonnx.transformation.operators.clip_op import * + +class CustomEnv(): + imp_strides_opt=False + save_opt_qdq=False + change_avgpool=False + aecg_zendnn_opt=True + remove_relu=False + retain_matmul=False + is_ryzenai_model=False + is_retinanet=False + + def __init__(self): + pass + +def QLinearConvert(model_file): + args = CustomEnv() + if os.path.isfile(model_file): + onnx_model_name = os.path.basename(model_file) + model_path = model_file + graph = gs.import_onnx(onnx.load(model_path)) + + graph.fold_constants() + + aecg_zendnn_opt = args.aecg_zendnn_opt + retain_matmul = args.retain_matmul + + def is_parent_conv(index, add_node): + if len(add_node.inputs[index].inputs)==1 and add_node.inputs[index].inputs[0].op == "DequantizeLinear": + dql_node = add_node.inputs[index].inputs[0] + if len(dql_node.inputs)>0 and len(dql_node.inputs[0].inputs)==1 and dql_node.i().op == "QuantizeLinear": + ql_node = dql_node.i() + if len(ql_node.inputs)>0 and len(ql_node.inputs[0].inputs)==1 and ql_node.i().op == "Conv": + return True + return False + + def is_relu_input_s8_or_fp32(node): + if node.op == "Add" and (len(node.inputs[1].inputs)==0): + return True + elif helper.is_parent_exist(node, 0, 0) and node.i().op == "DequantizeLinear": + if helper.is_parent_exist(node.i(), 0, 0): + add_node_ql_parent = node.i().i() + if add_node_ql_parent.inputs[2].values.dtype == np.int8: + return True + else: + print("Please check Add node, ", add_node.name) + elif helper.is_parent_exist(node, 1, 0) and node.inputs[1].inputs[0].op == "DequantizeLinear": + if helper.is_parent_exist(node.inputs[1].inputs[0], 1, 0): + add_node_ql_parent = (node.inputs[1].inputs[0]).i() + if add_node_ql_parent.inputs[2].values.dtype == np.int8: + return True + else: + print("Please check Add node, ", add_node.name) + else: + return False + + def is_any_output_tensor_graph_output(node): + for i in range(len(graph.outputs)): + output_tensor_name = graph.outputs[i].name + if node.outputs[0].name == output_tensor_name: + return True + return False + + # return 6/7/8th child name depending on Relu is present or not + def get_child_name(node): + if helper.is_child_present(node, 0, 0): + c1 = node.o() + if c1.op == "Relu": #C1 is relu + if helper.is_child_present(c1, 0, 0): + c1 = c1.o() # c1 is QL node + # c1 is QL now + if helper.is_child_present(c1, 0, 0): + c2 = c1.o() # c2 is DQL + if helper.is_child_present(c2, 0, 0): + c3 = c2.o() # c3 is Conv + + if helper.is_child_present(c3, 0, 0): + c4 = c3.o() + + if c4.op == "Relu": + if helper.is_child_present(c4, 0, 0): + c4 = c4.o() + # c4 is QL now + + if helper.is_child_present(c4, 0, 0): + c5 = c4.o() # c5 is DQL + if helper.is_child_present(c5, 0, 0): + c6 = c5.o() # c6 is conv + + return c6.name + + print("************************* ERROR ************************* get_child_name() returned empty string") + return "" + + def get_child_conv(node): + if helper.is_child_present(node, 0, 0): + c1 = node.o() + if c1.op == "Relu" and helper.is_child_present(c1, 0, 0): + c1 = c1.o() + if helper.is_child_present(c1, 0, 0): + c2 = c1.o() + if helper.is_child_present(c2, 0, 0): + c3 = c2.o() + return c3 + + supported_op = ["Conv", "QuantizeLinear", "DequantizeLinear", "MaxPool", "Add", "AveragePool", "Squeeze", "GlobalAveragePool", "Flatten", "MatMul", "LRN", "Concat", "Softmax", "Cast", "Gather", "Gemm", "Greater", "Less", "Slice", "Transpose", "Relu", "Reshape", "Shape", "Resize", "Unsqueeze", "Clip"] + + for node in graph.nodes: + if not node.op in supported_op: + print(node.op, " op is currently not supported in the converter. Exiting model converter") + sys.exit() + + + maxpool_count = 0 + ctr = 0 + cast_count = 0 + clip_num = 0 + for node in graph.nodes: + + # Resnet strides optimization for Resnet50v1 + + """ + |--------->Relu2 + | | + V | + QL | + | | + | | + V | + DQL DQL DQL | + | | | | + | | | | + | | V | + ---------------------->Conv7 | + | | + | | + V | + QL | + | | + | | + V | + DQL DQL DQL | + | | | | + | | | | + | | V | + --------------------->Conv6 | + | | + | | + V | + QL | + | | + | | + V | + DQL DQL DQL | + | | | | + | | | | + | | V | + --------------------->Conv5 | + | | + | | + V | + QL | + | | + | | + V | + DQL | + | | + | V + |--------->Add2 + | + | + V + Relu1 + | + | + V + QL + | + | + V + ------------------------------------DQL1 DQL DQL + | | | | + | | | | + | V | | + | Conv4<-------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL + | | | | + | | | | + | V | | + | Conv3<-------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL DQL DQL + | | | | | | + | | | | | | + V | | V | | + Conv1<-------------------- Conv2<-------------------- + | | + | | + V V + QL QL + | | + | | + V V + DQL DQL + | | + | | + V V + ------------------------------------->Add1 + + + Add1 = add_node + Relu1 = relu_node + DQL1 = relu_dql_node + + Conv1 = conv_node1 + Conv2 = conv_node2 + make sure conv_node1 has strides [2,2] and conv_node2 has strides [1,1] and conv_node1 will be the shortcut path + + Conv4 and Conv1 are child1_node and child2_node (not necessary conv4 is child1_node and conv1 is child2_node) + but we are sure conv4's 6th child is conv2 thus get_child_name() gives 6th child name of child1_node and child2_name and check if the 6th child name = conv2's name when it is found + make child1_node = conv_node1 that is Conv1 = conv_node1 and child1_node and conv4 = child2_node + conv1 and conv4 should have strides = [2,2] + + Add2 = upper_add_node + Conv5 = upper_conv_node it should have strides = [1,1] + Relu2 = upper_relu_node + + Now add Maxpool between Relu2 and Add2 + """ + if args.imp_strides_opt and node.op == "Add" and len(node.inputs)==2: + add_node = node + if is_parent_conv(0, add_node) and is_parent_conv(1, add_node): + conv_node1 = add_node.inputs[0].inputs[0].i().i() + conv_node2 = add_node.inputs[1].inputs[0].i().i() + + strides1 = conv_node1.attrs["strides"] + strides2 = conv_node2.attrs["strides"] + + if (strides1==[1,1] and strides2==[2,2]) or (strides1==[2,2] and strides2==[1,1]): + if strides1==[1,1] and strides2==[2,2]: + temp_node = conv_node1 + conv_node1 = conv_node2 + conv_node2 = temp_node + # conv_node1 has stride [2,2] + relu_node = conv_node1.i().i().i() + # due to retinanet cases discussed below, instead of taking dql_node at relu_node.o().o() we take QL node at relu_node.o(), please check below case for more clarity + relu_ql_node = relu_node.o() + + if (len(relu_ql_node.outputs[0].outputs)==2): + + child1_node = relu_ql_node.outputs[0].outputs[0].o() + child2_node = relu_ql_node.outputs[0].outputs[1].o() + + if child1_node.op == "Conv" and child2_node.op == "Conv": + + if (child1_node.name == conv_node1.name and get_child_name(child2_node) == conv_node2.name) or (child2_node.name == conv_node1.name and get_child_name(child1_node) == conv_node2.name): + + if not(child1_node.name == conv_node1.name): + tem = child1_node + child1_node = child2_node + child2_node = tem + + if child1_node.attrs["strides"] == [2,2] and child2_node.attrs["strides"] == [2,2]: + + upper_add_node = relu_node.i() + + if upper_add_node.inputs[0].inputs[0].op == "Relu": + + upper_conv_node = upper_add_node.inputs[1].inputs[0].i().i() + upper_relu_node = upper_add_node.inputs[0].inputs[0] + + elif upper_add_node.inputs[1].inputs[0].op == "Relu": + + upper_conv_node = upper_add_node.inputs[0].inputs[0].i().i() + upper_relu_node = upper_add_node.inputs[1].inputs[0] + + else: + continue + if not (upper_conv_node.attrs["strides"] == [1,1]): + continue + else: + #all conditions satisfied + child1_node.attrs["strides"] = [1,1] + child2_node.attrs["strides"] = [1,1] + upper_conv_node.attrs["strides"] = [2,2] + + #now add maxpool between upper_relu and upper add + maxpool_attrs = { + "strides":[2,2], + "kernel_shape":[1,1] + } + maxpool_output = gs.Variable(name = "maxpool_output_" + child1_node.name, dtype = np.uint8) + + if len(upper_relu_node.outputs[0].outputs) == 1: + maxpool_node = gs.Node(op="MaxPool", name = "maxpool_" + child1_node.name, attrs=maxpool_attrs, inputs = [upper_relu_node.o().o().outputs[0]], outputs = [maxpool_output]) + else: + maxpool_node = gs.Node(op="MaxPool", name = "maxpool_" + child1_node.name, attrs=maxpool_attrs, inputs = [upper_relu_node.outputs[0]], outputs = [maxpool_output]) + + # conv_x_dql_node = child1_node.i() + list2 = [upper_add_node.inputs[0], upper_add_node.inputs[1]] + + if upper_relu_node.outputs[0].name == upper_add_node.inputs[0].name: + list2 = [upper_add_node.inputs[1]] + upper_add_node.inputs.clear() + upper_add_node.inputs = [maxpool_output, list2[0]] + elif upper_relu_node.outputs[0].name == upper_add_node.inputs[1].name: + list2 = [upper_add_node.inputs[0]] + upper_add_node.inputs.clear() + upper_add_node.inputs = [list2[0], maxpool_output] + else: + if upper_relu_node.o().o().outputs[0].name == upper_add_node.inputs[0].name: + list2 = [upper_add_node.inputs[1]] + upper_add_node.inputs.clear() + upper_add_node.inputs = [maxpool_output, list2[0]] + elif upper_relu_node.o().o().outputs[0].name == upper_add_node.inputs[1].name: + list2 = [upper_add_node.inputs[0]] + upper_add_node.inputs.clear() + upper_add_node.inputs = [list2[0], maxpool_output] + else: + print("ERROR in strides optimization") + graph.nodes.append(maxpool_node) + + if node.op == "Gemm": + gemm_node = node + if helper.is_child_present(gemm_node, 0, 0) and gemm_node.o().op == "Softmax": + continue + gemm_input_node = gemm_node.i() + if gemm_input_node.op == "DequantizeLinear": + + if gemm_node.inputs[1].inputs[0].op == "DequantizeLinear": + w_dql_node = gemm_node.inputs[1].inputs[0] + is_weight_quantized = True if len(w_dql_node.inputs[0].inputs) == 0 else False + if is_weight_quantized: + wt_tensor = w_dql_node.inputs[0] + else: + w_ql_node = w_dql_node.i() + wt_tensor = w_ql_node.inputs[0] + org = wt_tensor.values + new_shape = org.shape + (1,1) + new = np.reshape(org, new_shape) + if is_weight_quantized: + w_dql_node.inputs[0] = gs.Constant(name=w_dql_node.inputs[0].name, values = new.astype(np.int8)) + else: + w_ql_node.inputs[0] = gs.Constant(name=w_ql_node.inputs[0].name, values = new.astype(np.float32)) + + gemm_node.op = "Conv" + new_attrs = { + "dilations":[1,1], + "group":1, + "kernel_shape":[1,1], + "pads":[0,0,0,0], + "strides":[1,1] + } + gemm_node.attrs = new_attrs + elif gemm_input_node.op == "Flatten": + flatten_node = gemm_input_node + flatten_dql_node = flatten_node.i() + flatten_dql_node.outputs = flatten_node.outputs + flatten_node.outputs.clear() + gemm_ql_node = node.o().o() + + w_dql_node = gemm_node.inputs[1].inputs[0] + is_weight_quantized = True if len(w_dql_node.inputs[0].inputs) == 0 else False + wt_tensor = w_dql_node.i().inputs[0] + if is_weight_quantized: + wt_tensor = w_dql_node.i().inputs[0] + else: + wt_tensor = w_dql_node.i().inputs[0] + w_ql_node = w_dql_node.inputs[0] + if w_dql_node.i().op == "Clip": + w_ql_node = w_dql_node.i(0).i(0) + wt_tensor = w_ql_node.inputs[0] + org = wt_tensor.values + new_shape = org.shape + (1,1) + new = np.reshape(org, new_shape) + if is_weight_quantized: + w_dql_node.inputs[0] = gs.Constant(name=w_dql_node.inputs[0].name, values = new.astype(np.int8)) + else: + w_ql_node.inputs[0] = gs.Constant(name=w_ql_node.inputs[0].name, values = new.astype(np.float32)) + + gemm_node.op = "Conv" + new_attrs = { + "dilations":[1,1], + "group":1, + "kernel_shape":[1,1], + "pads":[0,0,0,0], + "strides":[1,1] + } + gemm_node.attrs = new_attrs + + squeeze_dim = [2, 3] + Y1 = gs.Variable(name="sq_output_" + node.name, dtype=np.uint8) + parent_node = gemm_ql_node if node.o().op == "Relu" else node + + X1 = parent_node.outputs[0] + X2 = gs.Constant(name="axes" + node.name, values=(np.array(squeeze_dim)).astype(np.int64)) + + squeeze_node = gs.Node(op="Squeeze", name="squeeze_node_" + node.name, inputs=[X1, X2], outputs=[Y1]) + + gemm_ql_node.o().inputs[0] = squeeze_node.outputs[0] + + graph.nodes.append(squeeze_node) + + if node.op == "Reshape": + reshape_node = node + reshape_child_node = reshape_node.o() + + if reshape_child_node.op == "Gemm": + + """ + Removing a pattern in Resent50v1.5 model + + DQL-------------- + | | + | | + | V + | Shape + | | + | | + | V + | Gather + | | + | | + | V + | Unsqueeze + | | + | | + | V + | Concat + | | + | | + | V + Reshape<-------- + | + | DQL DQL + | | | + | | | + V | | + Gemm<------------------------------------------------ + | + | + | + QL + + + Connect DQL directly to Gemm and change Gemm to Conv node + + """ + gemm_node = reshape_child_node + DQL_node = reshape_node.i() + + DQL_node.outputs = reshape_node.outputs + reshape_node.outputs.clear() + + gemm_DQL_node = gemm_node.inputs[1].inputs[0] + gemm_QL_node = gemm_DQL_node.i() + + w_tensor = gemm_QL_node.inputs[0] + original = w_tensor.values + new_shape = original.shape + (1,1) + new = np.reshape(original, new_shape) + gemm_QL_node.inputs[0] = gs.Constant(name= gemm_QL_node.inputs[0].name , values=new.astype(np.float32)) + + new_attrs = { + "dilations":[1,1], + "group":1, + "kernel_shape":[1,1], + "pads":[0,0,0,0], + "strides":[1,1] + } + gemm_node.attrs = new_attrs + gemm_node.op = "Conv" + + elif reshape_child_node.op == "QuantizeLinear": + reshape_parent_node = reshape_node.i() + if reshape_parent_node.op == "DequantizeLinear": + if len(reshape_parent_node.inputs[0].inputs) == 1 and len(reshape_child_node.outputs[0].outputs) == 1: # 1 parent and 1 child + + """ + Node1-------->QL------>Reshape----->DQL---------->Node2 + + is changed to + + Node1------>Node2 + """ + pp = reshape_parent_node.i() + cc = reshape_child_node.o() + pp.outputs = reshape_child_node.outputs + reshape_child_node.outputs.clear() + else: + # if there is any other connection to the QL or DQL node, remove only the reshape node, let QL and DQL as is + reshape_parent_node.outputs = reshape_node.outputs + reshape_node.outputs.clear() + + elif reshape_child_node.op == "Transpose": + if helper.is_parent_exist(reshape_node, 0, 0) and reshape_node.i().op == "DequantizeLinear": + if helper.is_parent_exist(reshape_node.i(), 0, 0) and reshape_node.i().i().op == "QuantizeLinear": + new_shape = reshape_node.inputs[1].values + p1 = reshape_node.i() + p2 = p1.i() + if helper.is_constant_tensor(p2.inputs[0]): + p2.inputs[0].values = np.reshape(p2.inputs[0].values, new_shape) + p1.outputs = reshape_node.outputs + reshape_node.outputs.clear() + + elif reshape_child_node.op == "Add": + if reshape_child_node.i().op == "Conv": + conv_node = reshape_child_node.i() + conv_node.inputs = [conv_node.inputs[0], conv_node.inputs[1], reshape_node.inputs[0]] + reshape_node.inputs.clear + + conv_node.outputs = reshape_child_node.outputs + reshape_child_node.outputs.clear() + elif reshape_child_node.op == "Conv": + reshape_node.i().outputs = reshape_node.outputs + reshape_node.outputs.clear() + elif reshape_node.i().i().op == "Conv": + reshape_node.i().outputs = reshape_node.outputs + reshape_node.outputs.clear() + + if node.op == "Clip": + clip_num = clip_num + 1 + if helper.is_parent_exist(node, 0, 0) and (node.i().op == "Conv" or node.i().op == "Add"): + if helper.is_child_present(node, 0, 0) and node.o().op == "QuantizeLinear": + clip_node = node + clip_max = clip_node.inputs[2].values + + p1 = clip_node.i() + c1 = clip_node.o() + + scale = c1.inputs[1].values + new_clip_max_tensor = gs.Constant(name=clip_node.inputs[2].name+"_"+str(clip_num), values=(np.asarray(clip_max/scale)).astype(np.int8)) + new_clip_min_tensor = gs.Constant(name=clip_node.inputs[1].name+"_"+str(clip_num), values=clip_node.inputs[1].values.astype(np.int8)) + clip_node.inputs[2] = new_clip_max_tensor + clip_node.inputs[1] = new_clip_min_tensor + + # p1---->Clip ------>c1----->c2 + # becomes + # p1---->c1-----> Clip----->c2 + # p1 = conv/add, c1 = QL, c2 = anything + if helper.is_child_present(c1, 0, 0): + c2 = c1.o() + c1.inputs = [p1.outputs[0], c1.inputs[1], c1.inputs[2]] + clip_node.inputs = [c1.outputs[0], clip_node.inputs[1], clip_node.inputs[2]] + c2.inputs = [clip_node.outputs[0], c2.inputs[1], c2.inputs[2]] + else: + # p1---->Clip ------>c1---->graph.outputs + # becomes + # p1---->c1-----> Clip---->graph.outputs + c1.inputs = [p1.outputs[0], c1.inputs[1], c1.inputs[2]] + clip_node.inputs = [c1.outputs[0], clip_node.inputs[1], clip_node.inputs[2]] + + clip_node.outputs[0].dtype = "int8" + graph.outputs[0] = clip_node.outputs[0] + + if node.op == "Transpose": + tranpose_node = node + if helper.is_parent_exist(tranpose_node, 0, 0) and tranpose_node.i().op == "DequantizeLinear": + if helper.is_parent_exist(tranpose_node.i(), 0, 0) and tranpose_node.i().i().op == "QuantizeLinear": + td = tranpose_node.i() + tq = td.i() + + if helper.is_constant_tensor(tq.inputs[0]): + tq.inputs[0].values = np.transpose(tq.inputs[0].values, (3,2,0,1)) + else: + tq.inputs[0].shape = [None, 3, 224, 224] + td.outputs = tranpose_node.outputs + tranpose_node.outputs.clear() + + if node.op == "Squeeze": + if helper.is_parent_exist(node,0,0) and node.i().op == "GlobalAveragePool": + squeeze_node = node + p1 = squeeze_node.i() + + if helper.is_child_present(squeeze_node, 0, 0) and squeeze_node.o().op == "Mul": + mul_node = squeeze_node.o() + + if helper.is_child_present(mul_node, 0, 0) and mul_node.o().op == "QuantizeLinear": + ql_node = mul_node.o() + + if helper.is_child_present(ql_node, 0, 0) and ql_node.o().op == "DequantizeLinear": + dql_node = ql_node.o() + + # GlobalAveragPool ---> Squeeze ---> Mul ---> QL ---> DQL + # becomes + # GlobalAveragPool --->QL ---> DQL + ql_node.inputs[0] = p1.outputs[0] + mul_node.outputs.clear() + + if node.op == "Mul": + # Remove Mul node + mul_node = node + if helper.is_parent_exist(mul_node, 0, 0) and helper.is_child_present(mul_node, 0, 0): + average_pool_node = mul_node.i() + average_pool_node.outputs = mul_node.outputs + mul_node.outputs.clear() + + if node.op == "Pad": + # Remove Pad node + pad_node = node + if len(pad_node.inputs) == 2: + nl,cl,hl,wl,nr,cr,hr,wr = pad_node.inputs[1].values + + if helper.is_child_present(pad_node, 0 ,0) and pad_node.o().op == "Conv": + conv_child_node = pad_node.o() + conv_child_node.attrs['pads'] = hl,wl,hr,wr + + DQL_node = pad_node.i() + DQL_node.outputs = pad_node.outputs + pad_node.outputs.clear() + + # TODO: Add a condition, if input size == Averagepool's kernel shape, only then change it to GlobalAveragePool not all, for time being adding a flag for it + if node.op == "AveragePool" and args.change_avgpool: + # Change AveragePool node toGlobalAveragePool Node. + node.op = "GlobalAveragePool" + + if node.op == "Flatten": + flatten_node = node + if helper.is_parent_exist(flatten_node, 0, 0) and flatten_node.i().op == "DequantizeLinear": + dql_node = flatten_node.i() + if helper.is_child_present(flatten_node, 0, 0) and flatten_node.o().op == "QuantizeLinear": + ql_node = flatten_node.o() + # don't remove this Flatten node in VGG model. as input is Maxpool (producing 4d tensor) and Child is Matmul expecting 2d tensor. + # this child matmul is retained (not converted to Conv) due to Add node after this Matmul which has 2nd input as 2d tensor. thus two 2d tensors will get added. + if helper.is_child_present(dql_node, 0, 0) and dql_node.i().op == "MaxPool": + continue + # node1--->DQL--->Flatten---->QL----->node2 + # becomes + # node1 ----> node2 + node1 = dql_node.i() + node1.outputs = ql_node.outputs + ql_node.outputs.clear() + + if helper.is_parent_exist(flatten_node, 0, 0) and flatten_node.i().op == "Relu": + relu_node1 = flatten_node.i() + if helper.is_child_present(flatten_node, 0, 0) and flatten_node.o().op == "QuantizeLinear": + relu_node1.outputs = flatten_node.outputs + flatten_node.outputs.clear() + + if node.op == "Relu": + relu_node = node + if helper.is_parent_exist(relu_node, 0, 0) and relu_node.i().op == "DequantizeLinear": + dql_node = relu_node.i() + if helper.is_child_present(relu_node, 0, 0) and relu_node.o().op == "QuantizeLinear": + ql_node = relu_node.o() + node1 = dql_node.i() + node2 = ql_node.o() + if node1.op == "QuantizeLinear" and (not (node1.i()).op == "Add"): + #if node1 produces u8 output then Relu can also be removed, but if it creates s8 output then Relu should be retained + if node1.inputs[2].values.dtype == np.uint8: + # node1--->DQL--->Relu---->QL----->node2 + # becomes + # node1 ----> node2 + for i in range(len(node2.inputs)): + if node2.inputs[i].name == ql_node.ouputs[0].name: + node2.inputs[i] = node1.outputs[0] + ql_node.outputs.clear() + else: + # node1--->DQL--->Relu---->QL----->node2 + # becomes + # node1 ----> Relu ---> node2 + + #relu has single input + relu_node.inputs = node1.outputs + relu_node.outputs = ql_node.outputs + ql_node.outputs.clear() + + if node1.op == "QuantizeLinear" and (node1.i()).op == "Add": + + # Add -----> node1 ------> DQL -----> Relu ------> QL -------> node2 + # becomes + # Add ----> node1 ------> Relu ------> node2 + node1.outputs = dql_node.outputs + dql_node.outputs.clear() + + relu_node.outputs = ql_node.outputs + ql_node.outputs.clear() + + if node.op == "MaxPool": + if helper.is_parent_exist(node, 0, 0) and helper.is_child_present(node, 0, 0): + parent_node = node.i() + child_node = node.o() + if len(parent_node.outputs[0].outputs) == 1 and parent_node.op == "DequantizeLinear" and child_node.op == "QuantizeLinear": + dql_node = parent_node + dql_parent = dql_node.i() + dql_parent.outputs = dql_node.outputs + dql_node.outputs.clear() + + ql_node = child_node + node.outputs = ql_node.outputs + ql_node.outputs.clear() + elif len(parent_node.outputs[0].outputs) == 1 and parent_node.op == "DequantizeLinear" and child_node.op == "Conv": + dql_node = parent_node + dql_parent = dql_node.i() + node.inputs[0] = dql_parent.outputs[0] + + conv_node1 = child_node + dql_node.inputs[0] = node.outputs[0] + conv_node1.inputs[0] = dql_node.outputs[0] + + # add Squeeze as input to last DequantizeLinear node + if (not args.is_retinanet) and node.op == "DequantizeLinear" and ((len(node.outputs[0].outputs) == 0) or (len(node.outputs[0].outputs)==1 and (node.o().op == "Add" or node.o().op == "Softmax") and len(node.o().outputs[0].outputs)==0)): + + # no need to add Squeeze node if DQL is already getting 2d tensor + # TODO: add a check if input is 2d then don't add Squeeze node + # retain_matmul condition is sufficient to ensure Matmul will be present (not converted to conv) and it will give 2d tensor + if (retain_matmul): + continue + + squeeze_dim = [2, 3] + + Y1 = gs.Variable(name="sq_output" + node.name, dtype=np.int8) + parent_node = node.i() + + X1 = parent_node.outputs[0] + X2 = gs.Constant(name="axes" + node.name, values=(np.array(squeeze_dim)).astype(np.int64)) + + squeeze_node = gs.Node(op="Squeeze", name="squeeze_node" + node.name, inputs=[X1, X2], outputs=[Y1]) + + node.inputs[0] = squeeze_node.outputs[0] + graph.nodes.append(squeeze_node) + + # Retinanet case + if node.op == "DequantizeLinear": + if helper.is_parent_exist(node, 0, 0): + dql_parent = node.i() + if len(node.outputs) > 0 and len(node.outputs[0].outputs) > 1: + for i in range(len(node.outputs[0].outputs)): + # node.outputs[0].outputs[0].op is used instead of node.outputs[0].outputs[i].op because in each pass 1 child is removed + if node.outputs[0].outputs[0].op == "Shape" or node.outputs[0].outputs[0].op == "Add" or is_any_output_tensor_graph_output(node) or node.outputs[0].outputs[0].op == "Conv" or node.outputs[0].outputs[0].op == "Relu" or node.outputs[0].outputs[0].op == "Resize": + child_node = node.outputs[0].outputs[0] + s = gs.Constant(name=node.inputs[1].name + "_" + str(i), values=(node.inputs[1].values).astype(np.float32)) + zp = gs.Constant(name=node.inputs[2].name + "_" + str(i), values=(node.inputs[2].values).astype(node.inputs[2].dtype)) + y = gs.Variable(name=node.outputs[0].name + "_" + str(i), dtype=node.inputs[2].dtype) + new_dql_node = gs.Node(op = "DequantizeLinear", name = node.name + "_" + str(i), inputs = [node.i().outputs[0], s, zp], outputs = [y]) + + for j in range(len(child_node.inputs)): + if child_node.inputs[j].name == node.outputs[0].name: + child_node.inputs[j] = new_dql_node.outputs[0] + graph.nodes.append(new_dql_node) + + # QL QL-------DQL-------Conv + # | | \ + # | | \ + # DQL---------conv gets converted to DQL \ + # | | DQL + # | | + # Conv Conv + # this extra DQL needs to be removed, when later we do graph.cleanup() this node gets removed but before cleanup if any case needs QL childs it will reflect 3 childs + + for i in range(len(dql_parent.outputs[0].outputs)): + child_node = dql_parent.outputs[0].outputs[i] + if not helper.is_child_present(child_node, 0, 0) and not is_any_output_tensor_graph_output(child_node): + child_node.inputs.clear() + break + + if node.op == "Gather" and node.o().op == "Transpose": + gather_node = node + transpose_node = gather_node.o() + gather_dql_node = gather_node.i() + gather_ql_node = gather_dql_node.i() + if gather_ql_node.op == "Clip" and gather_dql_node.i().i().op == "QuantizeLinear": + gather_ql_node = gather_dql_node.i().i() + transpose_conv_node = transpose_node.o() + # QL QL + # | | + # | Clip + # | | + # DQL DQL + # | | + # | | + # ---------Gather OR ---------Gather + # | | + # | | + # Transpose Transpose + # | | + # | | + # Conv Conv + + # is changed to + + + # QL + # | + # | + # ------------Gather + # | + # | + # Transpose + # | + # | + # DQL + # | + # | + # Conv + gather_dql_node_inputs = gather_dql_node.inputs + gather_node.inputs[0] = gather_ql_node.outputs[0] + + gather_dql_node_inputs[0] = transpose_node.outputs[0] + transpose_conv_node.inputs[0] = gather_dql_node.outputs[0] + gather_dql_node.inputs = gather_dql_node_inputs + + + if node.op == "Conv": + ctr = ctr + 1 + conv_node = node + if len(conv_node.outputs[0].outputs) == 4: + if not (conv_node.outputs[0].outputs[0].op == "Shape" and conv_node.outputs[0].outputs[1].op == "Shape" and conv_node.outputs[0].outputs[2].op == "Shape" and conv_node.outputs[0].outputs[3].op == "Reshape"): + continue + shape_node1 = conv_node.outputs[0].outputs[0] + shape_node2 = conv_node.outputs[0].outputs[1] + shape_node3 = conv_node.outputs[0].outputs[2] + reshape_node = conv_node.outputs[0].outputs[3] + if helper.is_child_present(reshape_node, 0, 0) and reshape_node.o().op == "Transpose": + if helper.is_child_present(reshape_node.o(), 0, 0) and reshape_node.o().o().op == "Reshape": + if helper.is_child_present(reshape_node.o().o(), 0, 0) and reshape_node.o().o().o().op == "Concat": + if helper.is_child_present(reshape_node.o().o().o(), 0, 0) and reshape_node.o().o().o().o().op == "QuantizeLinear": + ret_ql_node = reshape_node.o().o().o().o() + # this is retinaNet pattern at the end + # Conv--->Reshape--->Transpose--->Reshape--->Concat--->QL---->node2 + # will be made as Conv--->QL---->Rehsape---->Transpose---->Reshape---->Concat---->QL----->node2 + # later QL at end will also be removed + s_ql = gs.Constant(name=ret_ql_node.inputs[1].name + "_" + str(ctr), values=(ret_ql_node.inputs[1].values).astype(np.float32)) + zp_ql = gs.Constant(name=ret_ql_node.inputs[2].name + "_" + str(ctr), values=(ret_ql_node.inputs[2].values).astype(np.int8)) + y_ql = gs.Variable(name=ret_ql_node.outputs[0].name + "_" + str(ctr), dtype=np.int8) + new_ql_node = gs.Node(op = "QuantizeLinear", name = ret_ql_node.name + "_" + str(ctr), inputs = [conv_node.outputs[0], s_ql, zp_ql], outputs = [y_ql]) + reshape_node.inputs[0] = new_ql_node.outputs[0] + shape_node1.inputs[0] = new_ql_node.outputs[0] + shape_node2.inputs[0] = new_ql_node.outputs[0] + shape_node3.inputs[0] = new_ql_node.outputs[0] + graph.nodes.append(new_ql_node) + + if node.op == "QuantizeLinear": + if helper.is_parent_exist(node, 0, 0) and node.i().op == "Concat": + if helper.is_child_present(node, 0, 0) and node.o().op == "DequantizeLinear": + # remove the QL node as mentioned in the above condition. (Part of retinaNet model) + # Concat------>QL -------> DQL is changed to + # Concat------>DQL + node.i().outputs = node.outputs + node.outputs.clear() + + + if node.op == "Unsqueeze": + unsqueeze_node = node + if helper.is_parent_exist(unsqueeze_node, 0, 0) and unsqueeze_node.i().op == "Gather": + if helper.is_parent_exist(unsqueeze_node.i(), 0, 0) and unsqueeze_node.i().i().op == "Shape": + if helper.is_parent_exist(unsqueeze_node.i().i(), 0, 0) and unsqueeze_node.i().i().i().op == "QuantizeLinear": + if helper.is_child_present(unsqueeze_node, 0, 0) and unsqueeze_node.o().op == "Concat": + + # QL-------> Shape------> Gather ------>Unsqueeze------> Concat is changed to + # QL-------> Shape------> Gather ------>Unsqueeze------> Cast----->Concat + + concat_node = unsqueeze_node.o() + cast_count += 1 + cast_node_name = node.name + "_" + str(cast_count) + cast_output_tensor = gs.Variable(name=cast_node_name + "_output", dtype=np.int64) + new_cast_node = gs.Node(op = "Cast", name = cast_node_name, attrs = {"to":getattr(TensorProto, "INT64")}, inputs = [unsqueeze_node.outputs[0]], outputs = [cast_output_tensor]) + + for i in range(len(concat_node.inputs)): + if concat_node.inputs[i].name == node.outputs[0].name: + concat_node.inputs[i] = new_cast_node.outputs[0] + break + graph.nodes.append(new_cast_node) + + graph.cleanup() + + if args.save_opt_qdq: + onnx.save(gs.export_onnx(graph), "optimized_qdq_" + onnx_model_name) + print("Optimized QDQ model has been saved") + + node_list = [] + initializer_list = [] + node_count = 0 + maxpool_count = 0 + conv_count = 0 + + def is_all_concat_input_dql(node): + for i in range(len(node.inputs)): + if helper.is_parent_exist(node, i, 0) and node.inputs[i].inputs[0].op != "DequantizeLinear": + return False + return True + + def concat_input_not_constant(node): + for i in range(len(node.inputs)): + if len(node.inputs[i].inputs) == 0: + return True + return False + + + def all_dql_conditions_satisfy(node): + has_output_ternsor = len(node.outputs) > 0 + has_no_child = has_output_ternsor and len(node.outputs[0].outputs)==0 + has_child = helper.is_child_present(node, 0, 0) + child_is_add_node = False + child_has_no_child = False + child_is_averagepool_node = False + child_add_node_has_no_2nd_input = False + + if has_child: + child_is_add_node = node.o().op == "Add" + child_is_softmax_node = node.o().op == "Softmax" + child_has_no_child = len(node.o().outputs[0].outputs)==0 + child_is_averagepool_node = node.o().op == "AveragePool" + child_is_lrn_node = node.o().op == "LRN" + child_is_gemm_node = node.o().op == "Gemm" + child_is_relu_node = node.o().op == "Relu" + child_is_shape_node = node.o().op == "Shape" + child_is_slice_node = node.o().op == "Slice" + child_is_resize_node = node.o().op == "Resize" + child_is_reshape_node = node.o().op == "Reshape" + + if child_is_add_node: + child_add_node = node.o() + if len(child_add_node.inputs[1].inputs) == 0: + child_add_node_has_no_2nd_input = True + + if not has_output_ternsor: + return False + + if has_output_ternsor and is_any_output_tensor_graph_output(node): + return True + + if has_no_child: + return True + + if child_is_add_node and child_add_node_has_no_2nd_input: + return True + + if child_is_softmax_node and child_has_no_child: + return True + + if child_is_averagepool_node: + return True + + if child_is_lrn_node: + return True + + if child_is_gemm_node: + return True + + if child_is_relu_node: + return True + + if child_is_shape_node or child_is_slice_node or child_is_resize_node: + return True + + if helper.is_child_present(node, 0, 1): + c2 = node.outputs[0].outputs[1] + if c2.op == "Shape" or c2.op == "Resize": + return True + + if helper.is_child_present(node, 0, 2): + c2 = node.outputs[0].outputs[2] + if c2.op == "Shape" or c2.op == "Resize": + return True + + if child_is_reshape_node: + if helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Softmax": + return True + + return False + + def all_ql_conditions_satify(count, node): + if args.is_ryzenai_model and count == 2: + return True + if helper.is_child_present(node, 0, 0): + if node.o().op == "Gather": + return False + if helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Gemm" and len(node.inputs[0].inputs) == 0: + return True + if count == 0: + if args.is_ryzenai_model and helper.is_child_present(node, 0, 0) and node.o().op == "DequantizeLinear": + if helper.is_child_present(node.o(), 0,0) and node.o().o().op == "Conv": + if helper.is_child_present(node.o().o(), 0, 0) and node.o().o().o().op == "QuantizeLinear": + return False + return True + has_parent = helper.is_parent_exist(node, 0, 0) + + if has_parent: + is_parent_averagepool = node.i().op == "AveragePool" + is_parent_lrn_node = node.i().op == "LRN" + is_parent_maxpool_node = node.i().op == "MaxPool" + is_parent_relu_node = node.i().op == "Relu" + is_parent_resize_node = node.i().op == "Resize" + is_parent_concat = node.i().op == "Concat" + + if is_parent_averagepool or is_parent_lrn_node: + return True + + if is_parent_maxpool_node: + # (Non DQL)--->MaxPool----->QL (keep this QL) + if not (node.i().i().op == "DequantizeLinear"): + return True + if is_parent_relu_node: + parent_relu_node = node.i() + if parent_relu_node.i().op == "Add": + parent_add_node = parent_relu_node.i() + if len(parent_add_node.inputs[1].inputs)==0: + return True + + if is_parent_resize_node: + return True + if is_parent_concat: + return True + + if helper.is_child_present(node, 0, 0): + if helper.is_parent_exist(node, 0, 0): + if node.i().op == "Relu": + return False + + return False + + for node in graph.nodes: + + if node.op == "Conv": + QLinearConv_node = QLinearConv(node, aecg_zendnn_opt, args.remove_relu, conv_count) + node_list.append(QLinearConv_node.get_node()) + initializer_list.append(QLinearConv_node.get_intializers()) + conv_count = conv_count + 1 + elif node.op == "QuantizeLinear" and all_ql_conditions_satify(node_count, node): + QuantizeLinear_node = QuantizeLinear(node) + node_list.append(QuantizeLinear_node.get_node()) + initializer_list.append(QuantizeLinear_node.get_intializers()) + elif node.op == "DequantizeLinear" and all_dql_conditions_satisfy(node): + DequantizeLinear_node = DequantizeLinear(node, aecg_zendnn_opt) + node_list.append(DequantizeLinear_node.get_node()) + initializer_list.append(DequantizeLinear_node.get_intializers()) + elif node.op == "MaxPool": + maxpool_node = MaxPool(node, maxpool_count, args.remove_relu) + node_list.append(maxpool_node.get_node()) + maxpool_count = maxpool_count + 1 + elif node.op == "Add": + add_node = QLinearAdd(node, aecg_zendnn_opt, args.remove_relu) + node_list.append(add_node.get_node()) + initializer_list.append(add_node.get_intializers()) + elif node.op == "AveragePool": + average_pool_node = AveragePool(node) + node_list.append(average_pool_node.get_node()) + elif node.op == "Squeeze": + squeeze_node = Squeeze(node) + node_list.append(squeeze_node.get_node()) + initializer_list.append(squeeze_node.get_intializers()) + elif node.op == "GlobalAveragePool": + global_average_pool_node = GlobalAveragePool(node, aecg_zendnn_opt, args.remove_relu) + node_list.append(global_average_pool_node.get_node()) + initializer_list.append(global_average_pool_node.get_intializers()) + elif node.op == "Flatten": + flatten_node = Flatten(node) + node_list.append(flatten_node.get_node()) + elif node.op == "MatMul": + if retain_matmul: + matmul_node = MatMul_Retained(node) + node_list.append(matmul_node.get_node()) + initializer_list.append(matmul_node.get_intializers()) + else: + matmul_node = MatMul(node) + node_list.append(matmul_node.get_node()) + initializer_list.append(matmul_node.get_intializers()) + elif node.op == "LRN": + lrn_node = LRN(node) + node_list.append(lrn_node.get_node()) + elif node.op == "Concat": + concat_node = Concat(node, is_all_concat_input_dql(node)) + node_list.append(concat_node.get_node()) + if (is_all_concat_input_dql(node) or concat_input_not_constant(node)): + initializer_list.append(concat_node.get_intializers()) + elif node.op == "Softmax": + softmax_node = Softmax(node) + node_list.append(softmax_node.get_node()) + elif node.op == "Cast": + cast_node = Cast(node) + node_list.append(cast_node.get_node()) + elif node.op == "Gather": + gather_node = Gather(node) + node_list.append(gather_node.get_node()) + initializer_list.append(gather_node.get_intializers()) + elif node.op == "Gemm": + # If weights and bias are dequantized, embed it in Gemm + if node.i(0).op == "DequantizeLinear" and node.i(1).op == "DequantizeLinear" and node.i(2).op == "DequantizeLinear": + dql_node1 = node.i(1).name + dql_node2 = node.i(2).name + ql_node1 = node.i(1).i(0).name + dql_list = [dql_node1, dql_node2, ql_node1] + dql_found = [] + gemm_node = Gemm_optimized(node) + for node_current in node_list: + if node_current.name in dql_list: + dql_found.append(node_current) + for node_dql in dql_found: + node_list.remove(node_dql) + node_list.append(gemm_node.get_node()) + initializer_list.append(gemm_node.get_intializers()) + else: + gemm_node = Gemm(node) + node_list.append(gemm_node.get_node()) + elif node.op == "Greater": + greater_node = Greater(node) + node_list.append(greater_node.get_node()) + initializer_list.append(greater_node.get_intializers()) + elif node.op == "Less": + less_node = Less(node) + node_list.append(less_node.get_node()) + initializer_list.append(less_node.get_intializers()) + elif node.op == "Slice": + slice_node = Slice(node) + node_list.append(slice_node.get_node()) + initializer_list.append(slice_node.get_intializers()) + elif node.op == "Transpose": + transpose_node = Transpose(node) + node_list.append(transpose_node.get_node()) + elif node.op == "Relu": + if not args.remove_relu: + relu_node = Relu(node) + node_list.append(relu_node.get_node()) + elif node.op == "Reshape": + reshape_node = Reshape(node) + node_list.append(reshape_node.get_node()) + initializer_list.append(reshape_node.get_intializers()) + elif node.op == "Shape": + shape_node = Shape(node) + node_list.append(shape_node.get_node()) + elif node.op == "Resize": + resize_node = Resize(node) + node_list.append(resize_node.get_node()) + elif node.op == "Unsqueeze": + unsq_node = Unsqueeze(node) + node_list.append(unsq_node.get_node()) + initializer_list.append(unsq_node.get_intializers()) + elif node.op == "Clip": + found = False + for node_current in node_list: + if node_current.name == node.i(0).name: + found = True + if found == False: + continue + clip_node = Clip(node) + node_list.append(clip_node.get_node()) + initializer_list.append(clip_node.get_intializers()) + + if node.op in supported_op: + node_count = node_count + 1 + + new_list = [] + for list1 in initializer_list: + for i in list1: + new_list.append(i) + + graph_input_shape = graph.inputs[0].shape + graph_input_shape[0] = None + + if graph.inputs[0].dtype == "float32": + grapth_input_tensor_dtype = onnx.TensorProto.FLOAT + elif graph.inputs[0].dtype == "int8": + grapth_input_tensor_dtype = onnx.TensorProto.INT8 + elif graph.inputs[0].dtype == "int64": + grapth_input_tensor_dtype = onnx.TensorProto.INT64 + X = onnx.helper.make_tensor_value_info(graph.inputs[0].name, + grapth_input_tensor_dtype, + graph_input_shape) + graph_output_tensor_list = [] + for i in range(len(graph.outputs)): + if graph.outputs[i].dtype == "float32": + grapth_output_tensor_dtype = onnx.TensorProto.FLOAT + elif graph.outputs[i].dtype == "int8": + grapth_output_tensor_dtype = onnx.TensorProto.INT8 + elif graph.outputs[i].dtype == "bool": + grapth_output_tensor_dtype = onnx.TensorProto.BOOL + + graph_output_shape = graph.outputs[i].shape + + Y = onnx.helper.make_tensor_value_info(graph.outputs[i].name, + grapth_output_tensor_dtype, + graph_output_shape) + graph_output_tensor_list.append(Y) + + graph_def = onnx.helper.make_graph(nodes=node_list, name=graph.name, + inputs=[X], + outputs=graph_output_tensor_list, + initializer=new_list) + + model_def = onnx.helper.make_model(graph_def, producer_name="onnx-example") + model_def.opset_import[0].version = 16 + + new_model_name = "q_operator_" + onnx_model_name + + onnx.save(model_def, new_model_name) From ea14785ef123b41e37e695ed7711f481b44b9dd4 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Thu, 21 Sep 2023 20:00:17 +0530 Subject: [PATCH 04/20] Added new option qop in convert.py --- src/qonnx/util/convert.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/qonnx/util/convert.py b/src/qonnx/util/convert.py index 5e7d2495..ee4bdc5c 100644 --- a/src/qonnx/util/convert.py +++ b/src/qonnx/util/convert.py @@ -31,16 +31,20 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.qcdq_to_qonnx import QCDQToQuant from qonnx.transformation.qonnx_to_qcdq import QuantToQCDQ +from qonnx.transformation.operators import * +from qonnx.transformation.qcdq_to_qop import * CONVERT_MODE_QCDQ = "qcdq" CONVERT_MODE_QUANT = "quant" +CONVERT_MODE_QOP = "qop" -convert_modes = {CONVERT_MODE_QCDQ, CONVERT_MODE_QUANT} +convert_modes = {CONVERT_MODE_QCDQ, CONVERT_MODE_QUANT, CONVERT_MODE_QOP} convert_mode_options = clize.parameters.mapped( [ (CONVERT_MODE_QCDQ, [CONVERT_MODE_QCDQ], "Convert from Quant to QCDQ"), (CONVERT_MODE_QUANT, [CONVERT_MODE_QUANT], "Convert from QCDQ to Quant"), + (CONVERT_MODE_QOP, [CONVERT_MODE_QOP], "Convert from QCDQ to QOp"), ] ) @@ -55,17 +59,21 @@ def convert(input_model_file, *, output_style: convert_mode_options, output_file :param output_file: If specified, write the output ONNX model to this filename. Otherwise, will default to the input file with an _output_style suffix. """ + print(input_model_file) model = ModelWrapper(input_model_file) if output_style == CONVERT_MODE_QCDQ: model = model.transform(QuantToQCDQ()) elif output_style == CONVERT_MODE_QUANT: model = model.transform(QCDQToQuant()) + elif output_style == CONVERT_MODE_QOP: + QLinearConvert(input_model_file) else: print("Unknown output_style for conversion: %s" % output_style) exit(-1) if output_file is None: output_file = input_model_file.replace(".onnx", "_%s.onnx" % output_style) - model.save(output_file) + if output_style != CONVERT_MODE_QOP: + model.save(output_file) def main(): From f2bf438c1678af67d66a19493de1bf7b7e593ba7 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Mon, 25 Sep 2023 11:58:34 +0530 Subject: [PATCH 05/20] Fixed typo in add_op.py --- src/qonnx/transformation/operators/add_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/qonnx/transformation/operators/add_op.py b/src/qonnx/transformation/operators/add_op.py index 90792d1f..cd7da025 100644 --- a/src/qonnx/transformation/operators/add_op.py +++ b/src/qonnx/transformation/operators/add_op.py @@ -1,4 +1,4 @@ -a######################################################################## +######################################################################## # # Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. # From 1e94ea6b6642459382ca74eb097fcda8c838a736 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Mon, 25 Sep 2023 11:59:07 +0530 Subject: [PATCH 06/20] Update qcdq_to_qop.py --- src/qonnx/transformation/qcdq_to_qop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/qonnx/transformation/qcdq_to_qop.py b/src/qonnx/transformation/qcdq_to_qop.py index 133ee82f..4403e13a 100644 --- a/src/qonnx/transformation/qcdq_to_qop.py +++ b/src/qonnx/transformation/qcdq_to_qop.py @@ -65,8 +65,8 @@ class CustomEnv(): imp_strides_opt=False save_opt_qdq=False change_avgpool=False - aecg_zendnn_opt=True - remove_relu=False + aecg_zendnn_opt=False + remove_relu=True retain_matmul=False is_ryzenai_model=False is_retinanet=False From bf80df1ba1e32c93bb6e1d96a796b5b003412f22 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:37:50 +0530 Subject: [PATCH 07/20] Update dequantizelinear_op.py --- .../transformation/operators/dequantizelinear_op.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/qonnx/transformation/operators/dequantizelinear_op.py b/src/qonnx/transformation/operators/dequantizelinear_op.py index d9f5b6b2..309d6564 100644 --- a/src/qonnx/transformation/operators/dequantizelinear_op.py +++ b/src/qonnx/transformation/operators/dequantizelinear_op.py @@ -29,7 +29,7 @@ class DequantizeLinear: - def __init__(self, node, aecg_zendnn_opt): + def __init__(self, node, aecg_zendnn_opt, remove_relu): dql_node = node @@ -41,7 +41,10 @@ def __init__(self, node, aecg_zendnn_opt): if helper.is_parent_exist(ql_node,0, 0): if ql_node.i().op == "Relu": relu_node = ql_node.i() - x_name = relu_node.outputs[0].name + if remove_relu: + x_name = ql_node.outputs[0].name + else: + x_name = relu_node.outputs[0].name else: print("*************** WARNING *********************** Please check parent of QL node", ql_node.name, " ignore if pattern is correct") else: @@ -104,4 +107,4 @@ def get_node(self): return self.node def get_intializers(self): - return self.initializers \ No newline at end of file + return self.initializers From 363f882b60ddf5036c5fdc0f280aec299ab99de5 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:39:27 +0530 Subject: [PATCH 08/20] Update qlinearconv_op.py --- .../operators/qlinearconv_op.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/qonnx/transformation/operators/qlinearconv_op.py b/src/qonnx/transformation/operators/qlinearconv_op.py index b3556678..f1960df1 100644 --- a/src/qonnx/transformation/operators/qlinearconv_op.py +++ b/src/qonnx/transformation/operators/qlinearconv_op.py @@ -53,8 +53,12 @@ def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): # Conv (QDQ model) Conv (3 - FP32 bias embedded) (QCDQ model) # | | # ------------------------------------------------------------------------ - b_DQL_node = conv_node.inputs[2] # For QDQ - b_DQL_tensor = conv_node.inputs[2] # For QCDQ + # Initialization + b_DQL_node = conv_node + b_DQL_tensor = conv_node + if has_bias: + b_DQL_node = conv_node.inputs[2] # For QDQ + b_DQL_tensor = conv_node.inputs[2] # For QCDQ if has_bias and QCDQ_model_detected==False: b_DQL_node = conv_node.inputs[2].inputs[0] is_fp32_bias_embedded = False @@ -187,7 +191,7 @@ def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): quantized_bias_tensor = np.clip(quantized_bias_tensor, S32_MIN, S32_MAX) quantized_bias_tensor = np.round(quantized_bias_tensor) quantized_bias_tensor = quantized_bias_tensor.astype(np.int32) - else: + elif has_bias: bias_tensor = b_QL_node.inputs[0] bias_scale_tensor1 = b_QL_node.inputs[1] bias_zp_tensor = b_QL_node.inputs[2] @@ -308,6 +312,8 @@ def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): maxpool_input_s8 = False else: x_name = x_QL_node.outputs[0].name + if x_QL_node.op == "Clip": + x_name = str(int(x_QL_node.o().outputs[0].name)-3) else: x_name = x_QL_node.outputs[0].name @@ -438,10 +444,9 @@ def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): else: strides_attr = 1 - qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", - inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name, b_name], - outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, - kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) + qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) + if has_bias: + qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name, b_name], outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) if is_relu_present: relu_node = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", inputs = [conv_node.outputs[0].name], outputs = [relu_node.outputs[0].name]) @@ -457,7 +462,8 @@ def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): intializer_list.append(w_zp_tensor) intializer_list.append(y_scale_tensor) intializer_list.append(y_zp_tensor) - intializer_list.append(b_tensor) + if has_bias: + intializer_list.append(b_tensor) self.intializer_list = intializer_list def get_node(self): From ce6b1049d369ac688e32dd6996b9a71ec229a1c0 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:40:38 +0530 Subject: [PATCH 09/20] Update resize_op.py --- .../transformation/operators/resize_op.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/qonnx/transformation/operators/resize_op.py b/src/qonnx/transformation/operators/resize_op.py index be571f1b..15b81f8d 100644 --- a/src/qonnx/transformation/operators/resize_op.py +++ b/src/qonnx/transformation/operators/resize_op.py @@ -24,6 +24,7 @@ ######################################################################### import onnx +from .helper import helper class Resize: @@ -34,7 +35,9 @@ def __init__(self, node): x1_name = resize_node.inputs[0].name x2_name = resize_node.inputs[1].name x3_name = resize_node.inputs[2].name - x4_name = resize_node.inputs[3].name + x4_name = resize_node + if len(resize_node.inputs) > 3: + x4_name = resize_node.inputs[3].name y_name = resize_node.outputs[0].name @@ -43,15 +46,21 @@ def __init__(self, node): # 1st and 2nd index i.e x2_name and x3_name come out to be empty print("WARNING check inputs of resize node") - new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", - inputs = [x1_name, x2_name, x3_name, x4_name], - outputs = [y_name], - coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], - cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], - mode = resize_node.attrs["mode"], - nearest_mode = resize_node.attrs["nearest_mode"]) + new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", inputs = [x1_name, x2_name, x3_name], outputs = [y_name], coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], mode = resize_node.attrs["mode"], nearest_mode = resize_node.attrs["nearest_mode"]) + if len(resize_node.inputs) > 3: + new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", inputs = [x1_name, x2_name, x3_name, x4_name], outputs = [y_name], coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], mode = resize_node.attrs["mode"], nearest_mode = resize_node.attrs["nearest_mode"]) self.node = new_resize_node + if len(resize_node.inputs) == 3: + x3_value = resize_node.inputs[2].values + x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.FLOAT) + intializer_list = [] + intializer_list.append(x3_tensor) + self.intializer_list = intializer_list + def get_node(self): return self.node + + def get_intializers(self): + return self.intializer_list From cb0a87b6db65662b4011905f78a8205712080fac Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Thu, 5 Oct 2023 13:42:19 +0530 Subject: [PATCH 10/20] Update qcdq_to_qop.py --- src/qonnx/transformation/qcdq_to_qop.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/qonnx/transformation/qcdq_to_qop.py b/src/qonnx/transformation/qcdq_to_qop.py index 4403e13a..446a4013 100644 --- a/src/qonnx/transformation/qcdq_to_qop.py +++ b/src/qonnx/transformation/qcdq_to_qop.py @@ -166,18 +166,23 @@ def get_child_conv(node): supported_op = ["Conv", "QuantizeLinear", "DequantizeLinear", "MaxPool", "Add", "AveragePool", "Squeeze", "GlobalAveragePool", "Flatten", "MatMul", "LRN", "Concat", "Softmax", "Cast", "Gather", "Gemm", "Greater", "Less", "Slice", "Transpose", "Relu", "Reshape", "Shape", "Resize", "Unsqueeze", "Clip"] + ''' for node in graph.nodes: if not node.op in supported_op: print(node.op, " op is currently not supported in the converter. Exiting model converter") sys.exit() - + ''' maxpool_count = 0 ctr = 0 cast_count = 0 clip_num = 0 + retinanet_end_pattern_found = False + squeeze_output = False for node in graph.nodes: + if node.op == "Flatten": + squeeze_output = True # Resnet strides optimization for Resnet50v1 """ @@ -773,7 +778,7 @@ def get_child_conv(node): conv_node1.inputs[0] = dql_node.outputs[0] # add Squeeze as input to last DequantizeLinear node - if (not args.is_retinanet) and node.op == "DequantizeLinear" and ((len(node.outputs[0].outputs) == 0) or (len(node.outputs[0].outputs)==1 and (node.o().op == "Add" or node.o().op == "Softmax") and len(node.o().outputs[0].outputs)==0)): + if squeeze_output and (not args.is_retinanet) and node.op == "DequantizeLinear" and ((len(node.outputs[0].outputs) == 0) or (len(node.outputs[0].outputs)==1 and (node.o().op == "Add" or node.o().op == "Softmax") and len(node.o().outputs[0].outputs)==0)): # no need to add Squeeze node if DQL is already getting 2d tensor # TODO: add a check if input is 2d then don't add Squeeze node @@ -903,8 +908,9 @@ def get_child_conv(node): shape_node2.inputs[0] = new_ql_node.outputs[0] shape_node3.inputs[0] = new_ql_node.outputs[0] graph.nodes.append(new_ql_node) + retinanet_end_pattern_found = True - if node.op == "QuantizeLinear": + if node.op == "QuantizeLinear" and retinanet_end_pattern_found: if helper.is_parent_exist(node, 0, 0) and node.i().op == "Concat": if helper.is_child_present(node, 0, 0) and node.o().op == "DequantizeLinear": # remove the QL node as mentioned in the above condition. (Part of retinaNet model) @@ -1074,8 +1080,8 @@ def all_ql_conditions_satify(count, node): if is_parent_resize_node: return True - if is_parent_concat: - return True + #if is_parent_concat: + # return True if helper.is_child_present(node, 0, 0): if helper.is_parent_exist(node, 0, 0): @@ -1096,7 +1102,7 @@ def all_ql_conditions_satify(count, node): node_list.append(QuantizeLinear_node.get_node()) initializer_list.append(QuantizeLinear_node.get_intializers()) elif node.op == "DequantizeLinear" and all_dql_conditions_satisfy(node): - DequantizeLinear_node = DequantizeLinear(node, aecg_zendnn_opt) + DequantizeLinear_node = DequantizeLinear(node, aecg_zendnn_opt, args.remove_relu) node_list.append(DequantizeLinear_node.get_node()) initializer_list.append(DequantizeLinear_node.get_intializers()) elif node.op == "MaxPool": @@ -1196,6 +1202,7 @@ def all_ql_conditions_satify(count, node): elif node.op == "Resize": resize_node = Resize(node) node_list.append(resize_node.get_node()) + initializer_list.append(resize_node.get_intializers()) elif node.op == "Unsqueeze": unsq_node = Unsqueeze(node) node_list.append(unsq_node.get_node()) From 5d5090dc77aac79b4747871215471f7c1c488f17 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Fri, 6 Oct 2023 14:50:09 +0530 Subject: [PATCH 11/20] [Code cleanup for QCDQToQOp] convert.py --- src/qonnx/util/convert.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/qonnx/util/convert.py b/src/qonnx/util/convert.py index ee4bdc5c..ec243cd0 100644 --- a/src/qonnx/util/convert.py +++ b/src/qonnx/util/convert.py @@ -31,7 +31,7 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.qcdq_to_qonnx import QCDQToQuant from qonnx.transformation.qonnx_to_qcdq import QuantToQCDQ -from qonnx.transformation.operators import * +from qonnx.custom_op.qop import * from qonnx.transformation.qcdq_to_qop import * CONVERT_MODE_QCDQ = "qcdq" @@ -66,19 +66,16 @@ def convert(input_model_file, *, output_style: convert_mode_options, output_file elif output_style == CONVERT_MODE_QUANT: model = model.transform(QCDQToQuant()) elif output_style == CONVERT_MODE_QOP: - QLinearConvert(input_model_file) + model = model.transform(QCDQToQOp(), False, False) else: print("Unknown output_style for conversion: %s" % output_style) exit(-1) if output_file is None: output_file = input_model_file.replace(".onnx", "_%s.onnx" % output_style) - if output_style != CONVERT_MODE_QOP: - model.save(output_file) - + model.save(output_file) def main(): clize.run(convert) - if __name__ == "__main__": main() From 7c3aebebf51ab3fb168076120aeea9cfbd465df6 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Fri, 6 Oct 2023 14:51:05 +0530 Subject: [PATCH 12/20] [Code cleanup for QCDQToQOp] qcdq_to_qop.py --- src/qonnx/transformation/qcdq_to_qop.py | 85 +++++++++++++------------ 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/src/qonnx/transformation/qcdq_to_qop.py b/src/qonnx/transformation/qcdq_to_qop.py index 446a4013..56902b44 100644 --- a/src/qonnx/transformation/qcdq_to_qop.py +++ b/src/qonnx/transformation/qcdq_to_qop.py @@ -31,35 +31,42 @@ from onnx import TensorProto import sys -from qonnx.transformation.operators.qlinearconv_op import * -from qonnx.transformation.operators.quantizelinear_op import * -from qonnx.transformation.operators.dequantizelinear_op import * -from qonnx.transformation.operators.maxpool_op import * -from qonnx.transformation.operators.add_op import * -from qonnx.transformation.operators.averagepool_op import * -from qonnx.transformation.operators.squeeze_op import * -from qonnx.transformation.operators.globalAveragePool_op import * -from qonnx.transformation.operators.flatten_op import * -from qonnx.transformation.operators.matmul_op import * -from qonnx.transformation.operators.lrn_op import * -from qonnx.transformation.operators.concat_op import * -from qonnx.transformation.operators.softmax_op import * -from qonnx.transformation.operators.matmul_retained_op import * -from qonnx.transformation.operators.cast_op import * -from qonnx.transformation.operators.gather_op import * -from qonnx.transformation.operators.gemm_op import * -from qonnx.transformation.operators.gemm_op_optimized import * -from qonnx.transformation.operators.greater_op import * -from qonnx.transformation.operators.less_op import * -from qonnx.transformation.operators.slice_op import * -from qonnx.transformation.operators.transpose_op import * -from qonnx.transformation.operators.relu_op import * -from qonnx.transformation.operators.reshape_op import * -from qonnx.transformation.operators.identity_op import * -from qonnx.transformation.operators.shape_op import * -from qonnx.transformation.operators.resize_op import * -from qonnx.transformation.operators.unsqueeze_op import * -from qonnx.transformation.operators.clip_op import * +import math +import onnx.numpy_helper +from typing import Tuple +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.base import Transformation +from qonnx.util.basic import get_by_name + +from qonnx.custom_op.qop.qlinearconv_op import * +from qonnx.custom_op.qop.quantizelinear_op import * +from qonnx.custom_op.qop.dequantizelinear_op import * +from qonnx.custom_op.qop.maxpool_op import * +from qonnx.custom_op.qop.add_op import * +from qonnx.custom_op.qop.averagepool_op import * +from qonnx.custom_op.qop.squeeze_op import * +from qonnx.custom_op.qop.globalAveragePool_op import * +from qonnx.custom_op.qop.flatten_op import * +from qonnx.custom_op.qop.matmul_op import * +from qonnx.custom_op.qop.lrn_op import * +from qonnx.custom_op.qop.concat_op import * +from qonnx.custom_op.qop.softmax_op import * +from qonnx.custom_op.qop.matmul_retained_op import * +from qonnx.custom_op.qop.cast_op import * +from qonnx.custom_op.qop.gather_op import * +from qonnx.custom_op.qop.gemm_op import * +from qonnx.custom_op.qop.gemm_op_optimized import * +from qonnx.custom_op.qop.greater_op import * +from qonnx.custom_op.qop.less_op import * +from qonnx.custom_op.qop.slice_op import * +from qonnx.custom_op.qop.transpose_op import * +from qonnx.custom_op.qop.relu_op import * +from qonnx.custom_op.qop.reshape_op import * +from qonnx.custom_op.qop.identity_op import * +from qonnx.custom_op.qop.shape_op import * +from qonnx.custom_op.qop.resize_op import * +from qonnx.custom_op.qop.unsqueeze_op import * +from qonnx.custom_op.qop.clip_op import * class CustomEnv(): imp_strides_opt=False @@ -74,12 +81,14 @@ class CustomEnv(): def __init__(self): pass -def QLinearConvert(model_file): - args = CustomEnv() - if os.path.isfile(model_file): - onnx_model_name = os.path.basename(model_file) - model_path = model_file - graph = gs.import_onnx(onnx.load(model_path)) +class QCDQToQOp(Transformation): + + def __init__(self) -> None: + super().__init__() + + def apply(self, model: ModelWrapper) -> Tuple[ModelWrapper, bool]: + args = CustomEnv() + graph = gs.import_onnx(model.model) graph.fold_constants() @@ -1261,7 +1270,5 @@ def all_ql_conditions_satify(count, node): model_def = onnx.helper.make_model(graph_def, producer_name="onnx-example") model_def.opset_import[0].version = 16 - - new_model_name = "q_operator_" + onnx_model_name - - onnx.save(model_def, new_model_name) + model_qop = ModelWrapper(model_def) + return (model_qop, False) From 0bba81f02d781487222c9bbfa7491fe3eca134c8 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Fri, 6 Oct 2023 14:51:46 +0530 Subject: [PATCH 13/20] [Code cleanup for QCDQToQOp] Delete src/qonnx/transformation/operators directory --- src/qonnx/transformation/operators/add_op.py | 519 ------------------ .../operators/averagepool_op.py | 48 -- src/qonnx/transformation/operators/cast_op.py | 44 -- src/qonnx/transformation/operators/clip_op.py | 61 -- .../transformation/operators/concat_op.py | 129 ----- .../operators/dequantizelinear_op.py | 110 ---- .../transformation/operators/flatten_op.py | 53 -- .../transformation/operators/gather_op.py | 112 ---- src/qonnx/transformation/operators/gemm_op.py | 51 -- .../operators/gemm_op_optimized.py | 98 ---- .../operators/globalAveragePool_op.py | 143 ----- .../transformation/operators/greater_op.py | 57 -- src/qonnx/transformation/operators/helper.py | 65 --- .../transformation/operators/identity_op.py | 56 -- src/qonnx/transformation/operators/less_op.py | 57 -- src/qonnx/transformation/operators/lrn_op.py | 48 -- .../transformation/operators/matmul_op.py | 157 ------ .../operators/matmul_retained_op.py | 154 ------ .../transformation/operators/maxpool_op.py | 125 ----- .../operators/qlinearconv_op.py | 476 ---------------- .../operators/quantizelinear_op.py | 78 --- src/qonnx/transformation/operators/relu_op.py | 44 -- .../transformation/operators/reshape_op.py | 65 --- .../transformation/operators/resize_op.py | 66 --- .../transformation/operators/shape_op.py | 44 -- .../transformation/operators/slice_op.py | 77 --- .../transformation/operators/softmax_op.py | 45 -- .../transformation/operators/squeeze_op.py | 57 -- .../transformation/operators/transpose_op.py | 45 -- .../transformation/operators/unsqueeze_op.py | 62 --- 30 files changed, 3146 deletions(-) delete mode 100644 src/qonnx/transformation/operators/add_op.py delete mode 100644 src/qonnx/transformation/operators/averagepool_op.py delete mode 100644 src/qonnx/transformation/operators/cast_op.py delete mode 100644 src/qonnx/transformation/operators/clip_op.py delete mode 100644 src/qonnx/transformation/operators/concat_op.py delete mode 100644 src/qonnx/transformation/operators/dequantizelinear_op.py delete mode 100644 src/qonnx/transformation/operators/flatten_op.py delete mode 100644 src/qonnx/transformation/operators/gather_op.py delete mode 100644 src/qonnx/transformation/operators/gemm_op.py delete mode 100644 src/qonnx/transformation/operators/gemm_op_optimized.py delete mode 100644 src/qonnx/transformation/operators/globalAveragePool_op.py delete mode 100644 src/qonnx/transformation/operators/greater_op.py delete mode 100644 src/qonnx/transformation/operators/helper.py delete mode 100644 src/qonnx/transformation/operators/identity_op.py delete mode 100644 src/qonnx/transformation/operators/less_op.py delete mode 100644 src/qonnx/transformation/operators/lrn_op.py delete mode 100644 src/qonnx/transformation/operators/matmul_op.py delete mode 100644 src/qonnx/transformation/operators/matmul_retained_op.py delete mode 100644 src/qonnx/transformation/operators/maxpool_op.py delete mode 100644 src/qonnx/transformation/operators/qlinearconv_op.py delete mode 100644 src/qonnx/transformation/operators/quantizelinear_op.py delete mode 100644 src/qonnx/transformation/operators/relu_op.py delete mode 100644 src/qonnx/transformation/operators/reshape_op.py delete mode 100644 src/qonnx/transformation/operators/resize_op.py delete mode 100644 src/qonnx/transformation/operators/shape_op.py delete mode 100644 src/qonnx/transformation/operators/slice_op.py delete mode 100644 src/qonnx/transformation/operators/softmax_op.py delete mode 100644 src/qonnx/transformation/operators/squeeze_op.py delete mode 100644 src/qonnx/transformation/operators/transpose_op.py delete mode 100644 src/qonnx/transformation/operators/unsqueeze_op.py diff --git a/src/qonnx/transformation/operators/add_op.py b/src/qonnx/transformation/operators/add_op.py deleted file mode 100644 index cd7da025..00000000 --- a/src/qonnx/transformation/operators/add_op.py +++ /dev/null @@ -1,519 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class QLinearAdd: - - def __init__(self, node, aecg_zendnn_opt, remove_relu): - - add_node = node - - if len(add_node.inputs[1].inputs)==0: - # if Add node has only 1 input node and other input is constant tensor we cannot change it to QLinearAdd node hence keeping it as is - x_name = add_node.inputs[0].name - y_name = add_node.outputs[0].name - - const_val = add_node.inputs[1].values - - const_name = add_node.name + "_const_add_tensor" - y_scale_tensor = helper.create_initializer_tensor(name=const_name, - tensor_array=const_val, - data_type=onnx.TensorProto.FLOAT) - - new_add_node = onnx.helper.make_node(name = add_node.name, - op_type = "Add", - inputs = [x_name, const_name], - outputs = [y_name]) - self.node = new_add_node - - if helper.is_child_present(add_node, 0, 0) and add_node.o().op == "Relu": - relu_node = add_node.o() - relu_node1 = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", inputs = [add_node.outputs[0].name], outputs = [relu_node.outputs[0].name]) - self.relu_node = relu_node1 - - intializer_list = [] - intializer_list.append(y_scale_tensor) - self.intializer_list = intializer_list - - else: - input_node1 = add_node.inputs[0].inputs[0] - input_node2 = add_node.inputs[1].inputs[0] - output_node = add_node.o() - - is_relu_present = False - if output_node.op == "Relu": - is_relu_present = True - relu_node = output_node - # relu_node gets updated in later conditions thus keeping relu_node_name and relu_node_output_tensor to make it simple to keep their track - relu_node_name = relu_node.name - relu_node_output_tensor = relu_node.outputs[0].name - if relu_node.o().op == "QuantizeLinear": - output_node = relu_node.o() - else: - print("*********************** ERROR output of Relu node ", relu_node.name, " is not QuantizeLinear ***********************") - elif not(output_node.op == "QuantizeLinear"): - print("*********************** ERROR output of Add node ", add_node.name, " is not QuantizeLinear ***********************") - - - # in order to get scale and zp for the 2 inputs to Add node, we need 2 DQL nodes. - if not (input_node1.op == "DequantizeLinear" and input_node2.op == "DequantizeLinear"): - - """ - case observed in Resnet50v1 - Add1 - | - | - V - Relu-------------------- - | | - | | - V | - QL | - | | - | | - | | - DQL DQL DQL2 | - | | | | - | | | | - ----------------------------Conv | - | | - | | - V | - QL | - | | - | | - V | - DQL DQL DQL | - | | | | - | | | | - | | V | - ---------------------------Conv | - | | - | | - V | - QL | - | | - | | - V | - DQL DQL DQL | - | | | | - | | | | - | | V | - ----------------------------Conv | - | | - | | - V | - QL | - | | - | | - V | - DQL1 | - | | - | | - V | - Add<--------------------- - - - here Add doesn't have 1 of the DQL node, so we take DQL2 as the other DQL node. - - in case both inputs are missing DQL node, haven't encountered this case to this is flagged for now, if needed will be handled later depending on the case - """ - if not (input_node1.op == "DequantizeLinear") and not (input_node2.op == "DequantizeLinear"): - print("***************************** ERROR No input of Add node is DequantizeLinear ***********************************") - elif not (input_node1.op == "DequantizeLinear"): - # if input_node1 is not DQL - if input_node1.op == "Relu": - relu_node = input_node1 - if relu_node.i().op == "Add" and relu_node.o().op == "QuantizeLinear": - if (relu_node.o()).o().op == "DequantizeLinear": - input_node1 = (relu_node.o()).o() - # in the example case, shown input_node1 is now DQL2 - elif input_node1.op == "MaxPool": - # when resnet strides has been implemented there will be a maxpool node between the shown Relu and Add node. - maxpool_node = input_node1 - relu_node = maxpool_node.i() - if relu_node.i().op == "Add" and (relu_node.o().op == "QuantizeLinear" or relu_node.output[0].outputs[1].op == "QuantizeLinear"): - if (relu_node.o()).o().op == "DequantizeLinear": - input_node1 = (relu_node.o()).o() - # input_node1 is now DQL2 - elif (relu_node.outputs[0].outputs[1]).o().op == "DequantizeLinear": - input_node2 = (relu_node.outputs[0].outputs[1]).o() - # input_node2 is now DQL2 - elif input_node1.op == "Add": - - """ - this case is observed in mobilenetv2-12-qdq.onnx - - - Add2------------------------- - | | - | | - | V - | QL1 - | | - | | - | V - | DQL1 DQL DQL - | | | | - | | | | - | V | | - | Conv<-------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL - | | | | - | | | | - | V | | - | Conv<-------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL - | | | | - | | | | - | V | | - | Conv<------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL - | | - | | - Add1------------------------- - - Add2 = parent_add_node - QL1 = parent_add_node_ql_node - input_node1 = DQL1 - - """ - parent_add_node = input_node1 - parent_add_node_ql_node = parent_add_node.o() - input_node1 = parent_add_node_ql_node.o() - elif not (input_node2.op == "DequantizeLinear"): - # if input_node2 is not DQL - if input_node2.op == "Relu": - relu_node = input_node2 - if relu_node.i().op == "Add" and relu_node.o().op == "QuantizeLinear": - if (relu_node.o()).o().op == "DequantizeLinear": - input_node2 = (relu_node.o()).o() - # input_node2 is now the DQL node from which we need to take scale and zp - - elif input_node2.op == "MaxPool": - maxpool_node = input_node2 - if maxpool_node.i().op == "Relu": - relu_node = maxpool_node.i() - elif maxpool_node.i().op == "DequantizeLinear": - if maxpool_node.i().i().op == "QuantizeLinear": - if maxpool_node.i().i().i().op == "Relu": - relu_node = maxpool_node.i().i().i() - if relu_node.i().op == "Add" and (relu_node.o().op == "QuantizeLinear" or (len(relu_node.outputs[0].outputs)>1 and relu_node.output[0].outputs[1].op == "QuantizeLinear")): - if (relu_node.o()).o().op == "DequantizeLinear": - input_node2 = (relu_node.o()).o() - elif len(relu_node.outputs[0].outputs)>1 and (relu_node.outputs[0].outputs[1]).o().op == "DequantizeLinear": - input_node2 = (relu_node.outputs[0].outputs[1]).o() - # input_node2 is now the DQL node from which we need to take scale and zp - - if input_node1.op == "DequantizeLinear" and input_node2.op == "DequantizeLinear" and output_node.op == "QuantizeLinear": - # now we have input_node1 = input_node2 = DQL and output as QL node - if add_node.inputs[0].inputs[0].op == "MaxPool": - # this is strides case now if Maxpool is parent to Add node, maxpool = node1 - node1 = add_node.i() - elif add_node.inputs[0].inputs[0].op == "Add": - # this is for mobilenet case, so Add2 = node1 - node1 = add_node.i() - else: - """ - if above 2 cases not there lets assume following case now from Resnet50v1 model - - | DQL DQL | DQL DQL - | | | | | | - | | | | | | - Conv<--------------- Conv--------------------- - | | - | | - QL1 QL2 - | | - | | - DQL DQL - | | - | | - Add<----------------------------------------- - - now node1 is QL1/QL2 - - """ - node1 = add_node.inputs[0].inputs[0].i() - - if add_node.inputs[1].inputs[0].op == "MaxPool": - # same as above but for other input, node2 = maxpool node - node2 = add_node.inputs[1].inputs[0] - else: - # same as the above general case discussed, node2 = QL1/QL2 - node2 =input_node2.i() - - if node1.op == "Add": - # this is mobilenet case explained abaove, node1 will be converted to QLinearAdd node and it wiil act as input to current add node - # this a_name = QL1 output tensor name (please refer above mobilenet case) - a_name = node1.o().outputs[0].name - else: - # refering to general case taken above from resnet50v1 model, a_name = QL1/QL2's output tensor name - a_name = node1.outputs[0].name - - a_scale_name = add_node.name + "_A_SCALE" - a_scale_value = input_node1.inputs[1].values - a_scale_tensor = helper.create_initializer_tensor(name=a_scale_name, - tensor_array=a_scale_value, - data_type=onnx.TensorProto.FLOAT) - - a_zp_name = add_node.name + "_A_ZP" - a_zp_value = input_node1.inputs[2].values - - if aecg_zendnn_opt: - a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, - tensor_array=a_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if node1.i().op == "QuantizeLinear" and node1.i().i() == "Relu": - a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, - tensor_array=a_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if input_node1.inputs[2].dtype == np.int8: - a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, - tensor_array=a_zp_value, - data_type=onnx.TensorProto.INT8) - elif input_node1.inputs[2].dtype == np.uint8: - a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, - tensor_array=a_zp_value, - data_type=onnx.TensorProto.UINT8) - - # TODO: Only 1 condition is handled here that Add Node's 1st parent is DQL<--QL and 2nd parent can be Relu. Vice Versa and other cases are not encountered yet thus not handled. - if helper.is_parent_exist(node2, 0, 0): - if remove_relu: - # b_name = the QL's output tensor - b_name = node2.outputs[0].name - else: - # check Relu and input of Add node is s8, any 1 input can be checked, thus we check for node1 - if node2.i().op == "Relu" and node1.inputs[2].values.dtype == np.int8: - """ - this case is observed in renset50v1.5 - - DQL DQL - | | - | | - V | - Add<----------------- - | - | - V - Relu1 - | - | - V - QL1 - | - | - V - ------------------------------------DQL1 DQL DQL - | | | | - | | | | - | V | | - | Conv4<-------------------- - | | - | | - | V - | Relu - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL - | | | | - | | | | - | V | | - | Conv3<-------------------- - | | - | | - | V - | Relu - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL - | | | | - | | | | - | V | | - | Conv2<-------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL - | | - | | - | V - ---------------------------------->Add1 - - - in this case node2 is QL1 - node2_relu_node = Relu1 - thus b_name = Relu1's output as abotve top Add node is converted as follows- - - QLinearAdd - | - | - V - Relu1 - - thus relu1 output is set to b_name - - - """ - node2_relu_node = node2.i() - if node2_relu_node.i().op == "Conv" or node2_relu_node.i().op == "Add": - b_name = node2_relu_node.outputs[0].name - else: - b_name = node2.outputs[0].name - else: - b_name = node2.outputs[0].name - else: - print("************* ERROR ****************** Please check parent of Add Node's parent, ", node2.name) - - b_scale_name = add_node.name + "_B_SCALE" - b_scale_value = input_node2.inputs[1].values - b_scale_tensor = helper.create_initializer_tensor(name=b_scale_name, - tensor_array=b_scale_value, - data_type=onnx.TensorProto.FLOAT) - - b_zp_name = add_node.name + "_B_ZP" - b_zp_value = input_node2.inputs[2].values - - if aecg_zendnn_opt: - b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, - tensor_array=b_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if node2.i().op == "QuantizeLinear" and node2.i().i().op == "Relu": - b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, - tensor_array=b_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if input_node2.inputs[2].dtype == np.int8: - b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, - tensor_array=b_zp_value, - data_type=onnx.TensorProto.INT8) - elif input_node2.inputs[2].dtype == np.uint8: - b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, - tensor_array=b_zp_value, - data_type=onnx.TensorProto.UINT8) - - y_scale_name = add_node.name + "_Y_SCALE" - y_scale_value = output_node.inputs[1].values - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=y_scale_value, - data_type=onnx.TensorProto.FLOAT) - - y_zp_name = add_node.name + "_Y_ZP" - y_zp_value = output_node.inputs[2].values - - if aecg_zendnn_opt: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - y_name = output_node.outputs[0].name - else: - if output_node.inputs[2].dtype == np.int8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.INT8) - elif output_node.inputs[2].dtype == np.uint8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - - if is_relu_present and not remove_relu and node1.inputs[2].values.dtype == np.int8: - y_name = add_node.outputs[0].name - else: - y_name = output_node.outputs[0].name - - kwargs = {} - kwargs["domain"] = 'com.microsoft' - - - new_add_node = onnx.helper.make_node(name = add_node.name, - op_type = "QLinearAdd", - inputs = [a_name, a_scale_name, a_zp_name, b_name, b_scale_name, b_zp_name, y_scale_name, y_zp_name], - outputs = [y_name], - **kwargs) - - self.node = new_add_node - - if is_relu_present: - relu_node = onnx.helper.make_node(name = relu_node_name, op_type = "Relu", inputs = [add_node.outputs[0].name], outputs = [relu_node_output_tensor]) - self.relu_node = relu_node - - intializer_list = [] - intializer_list.append(a_scale_tensor) - intializer_list.append(a_zp_tensor) - intializer_list.append(b_scale_tensor) - intializer_list.append(b_zp_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list - - def get_relu_node(self): - return self.relu_node diff --git a/src/qonnx/transformation/operators/averagepool_op.py b/src/qonnx/transformation/operators/averagepool_op.py deleted file mode 100644 index db385be5..00000000 --- a/src/qonnx/transformation/operators/averagepool_op.py +++ /dev/null @@ -1,48 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class AveragePool: - - def __init__(self, node): - - average_pool_node = node - a_name = average_pool_node.inputs[0].name - - y_name = average_pool_node.outputs[0].name - - new_average_pool_node = onnx.helper.make_node(name = average_pool_node.name, op_type = "AveragePool", - inputs = [a_name], - outputs = [y_name], - ceil_mode = average_pool_node.attrs["ceil_mode"], - kernel_shape = average_pool_node.attrs["kernel_shape"], - pads = average_pool_node.attrs["pads"], - strides = average_pool_node.attrs["strides"]) - - self.node = new_average_pool_node - - def get_node(self): - return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/cast_op.py b/src/qonnx/transformation/operators/cast_op.py deleted file mode 100644 index 578329d9..00000000 --- a/src/qonnx/transformation/operators/cast_op.py +++ /dev/null @@ -1,44 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class Cast: - - def __init__(self, node): - - cast_node = node - - x_name = cast_node.inputs[0].name - y_name = cast_node.outputs[0].name - - new_cast_node = onnx.helper.make_node(name = cast_node.name, op_type = "Cast", - inputs = [x_name], - outputs = [y_name], - to = cast_node.attrs["to"]) - self.node = new_cast_node - - def get_node(self): - return self.node diff --git a/src/qonnx/transformation/operators/clip_op.py b/src/qonnx/transformation/operators/clip_op.py deleted file mode 100644 index f672bfde..00000000 --- a/src/qonnx/transformation/operators/clip_op.py +++ /dev/null @@ -1,61 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Clip: - - def __init__(self, node): - - clip_node = node - - x_name = clip_node.inputs[0].name - - x2_name = clip_node.inputs[1].name - x2_value = clip_node.inputs[1].values - x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT8) - - x3_name = clip_node.inputs[2].name - x3_value = clip_node.inputs[2].values - x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.INT8) - - new_clip_node = onnx.helper.make_node(name = clip_node.name, op_type = "Clip", - inputs= [x_name, x2_name, x3_name], - outputs = [clip_node.outputs[0].name]) - - self.node = new_clip_node - - intializer_list = [] - intializer_list.append(x2_tensor) - intializer_list.append(x3_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list - diff --git a/src/qonnx/transformation/operators/concat_op.py b/src/qonnx/transformation/operators/concat_op.py deleted file mode 100644 index 4f5e5f6e..00000000 --- a/src/qonnx/transformation/operators/concat_op.py +++ /dev/null @@ -1,129 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Concat: - - def __init__(self, node, is_all_concat_input_dql): - - concat_node = node - - number_of_inputs = len(concat_node.inputs) - - zp_value_list = [] - zp_name_list = [] - scale_values_list = [] - scale_name_list = [] - input_tensor_names = [] - - intializer_list = [] - input_names = [] - - for i in range(number_of_inputs): - if is_all_concat_input_dql: - parent_dql_node = concat_node.inputs[i].inputs[0] - scale_values_list.append(parent_dql_node.inputs[1].values) - scale_name_list.append(parent_dql_node.inputs[1].name) - zp_value_list.append(parent_dql_node.inputs[2].values) - zp_name_list.append(parent_dql_node.inputs[2].name) - input_tensor_names.append(parent_dql_node.inputs[0].name) - else: - input_tensor_names.append(concat_node.inputs[i].name) - if len(concat_node.inputs[i].inputs) == 0: - c_input = helper.create_initializer_tensor(name=concat_node.inputs[i].name, - tensor_array=concat_node.inputs[i].values, - data_type=onnx.TensorProto.INT64) - intializer_list.append(c_input) - self.intializer_list = intializer_list - - if is_all_concat_input_dql: - for i in range(number_of_inputs): - scale_tesnor = helper.create_initializer_tensor(name=scale_name_list[i], - tensor_array=scale_values_list[i], - data_type=onnx.TensorProto.FLOAT) - zp_tensor = helper.create_initializer_tensor(name=zp_name_list[i], - tensor_array=zp_value_list[i], - data_type=onnx.TensorProto.UINT8) - intializer_list.append(scale_tesnor) - intializer_list.append(zp_tensor) - - if helper.is_child_present(concat_node, 0, 0) and concat_node.o().op == "DequantizeLinear" and is_all_concat_input_dql: - y_ql_node = concat_node.o() - y_name = y_ql_node.outputs[0].name - else: - y_name = concat_node.outputs[0].name - - if helper.is_child_present(concat_node, 0, 0) and concat_node.o().op == "DequantizeLinear" and is_all_concat_input_dql: - y_scale_name = y_ql_node.inputs[1].name - y_scale_value = y_ql_node.inputs[1].values - y_zp_name = y_ql_node.inputs[2].name - y_zp_value = y_ql_node.inputs[2].values - - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=y_scale_value, - data_type=onnx.TensorProto.FLOAT) - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - input_names.append(y_scale_tensor.name) - input_names.append(y_zp_tensor.name) - - for i in range(number_of_inputs): - input_names.append(input_tensor_names[i]) - if len(scale_name_list)>0 and len(zp_name_list)>0: - input_names.append(scale_name_list[i]) - input_names.append(zp_name_list[i]) - - kwargs = {} - kwargs["domain"] = 'com.microsoft' - - if is_all_concat_input_dql: - new_concat_node = onnx.helper.make_node(name = concat_node.name, - op_type = "QLinearConcat", - inputs = input_names, - outputs = [y_name], - axis = concat_node.attrs["axis"], - **kwargs) - else: - new_concat_node = onnx.helper.make_node(name = concat_node.name, - op_type = "Concat", - inputs = input_names, - outputs = [y_name], - axis = concat_node.attrs["axis"]) - - self.node = new_concat_node - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/dequantizelinear_op.py b/src/qonnx/transformation/operators/dequantizelinear_op.py deleted file mode 100644 index 309d6564..00000000 --- a/src/qonnx/transformation/operators/dequantizelinear_op.py +++ /dev/null @@ -1,110 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class DequantizeLinear: - - def __init__(self, node, aecg_zendnn_opt, remove_relu): - - dql_node = node - - x_name = dql_node.inputs[0].name - - if helper.is_parent_exist(dql_node, 0, 0): - if dql_node.i().op == "QuantizeLinear": - ql_node = dql_node.i() - if helper.is_parent_exist(ql_node,0, 0): - if ql_node.i().op == "Relu": - relu_node = ql_node.i() - if remove_relu: - x_name = ql_node.outputs[0].name - else: - x_name = relu_node.outputs[0].name - else: - print("*************** WARNING *********************** Please check parent of QL node", ql_node.name, " ignore if pattern is correct") - else: - print("*************** WARNING *********************** Please check parent of DQL node", dql_node.name, " ignore if pattern is correct") - self.initializers = [] - - if len(dql_node.inputs[0].inputs) == 0: - if dql_node.inputs[0].dtype == np.uint8: - input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, - tensor_array=dql_node.inputs[0].values, - data_type=onnx.TensorProto.UINT8) - elif dql_node.inputs[0].dtype == np.int8: - input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, - tensor_array=dql_node.inputs[0].values, - data_type=onnx.TensorProto.INT8) - elif dql_node.inputs[0].dtype == np.int32: - input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, - tensor_array=dql_node.inputs[0].values, - data_type=onnx.TensorProto.INT32) - self.initializers.append(input_tensor) - - x_scale_name = dql_node.inputs[1].name - x_scale_value = dql_node.inputs[1].values - x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name,tensor_array=x_scale_value,data_type=onnx.TensorProto.FLOAT) - - x_zp_name = dql_node.inputs[2].name - x_zp_value = dql_node.inputs[2].values - - if aecg_zendnn_opt: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if dql_node.inputs[2].dtype == np.uint8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - if dql_node.inputs[2].dtype == np.int32: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT32) - elif dql_node.inputs[2].dtype == np.int8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - - y_name = dql_node.outputs[0].name - - dequantizelinear_node = onnx.helper.make_node(name = dql_node.name, - op_type = "DequantizeLinear", - inputs = [x_name, x_scale_name, x_zp_name], - outputs = [y_name]) - - self.node = dequantizelinear_node - - self.initializers.append(x_scale_tensor) - self.initializers.append(x_zp_tensor) - - def get_node(self): - return self.node - - def get_intializers(self): - return self.initializers diff --git a/src/qonnx/transformation/operators/flatten_op.py b/src/qonnx/transformation/operators/flatten_op.py deleted file mode 100644 index 62831558..00000000 --- a/src/qonnx/transformation/operators/flatten_op.py +++ /dev/null @@ -1,53 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class Flatten: - - def __init__(self, node): - - flatten_node = node - x_name = flatten_node.inputs[0].name - y_name = flatten_node.outputs[0].name - - if flatten_node.i().op == "DequantizeLinear": - node1 = flatten_node.i() - x_name = node1.inputs[0].name - - if flatten_node.o().op == "QuantizeLinear": - node2 = flatten_node.o() - y_name = node2.outputs[0].name - - - new_flatten_node = onnx.helper.make_node(name = flatten_node.name, op_type = "Flatten", - inputs = [x_name], - outputs = [y_name]) - - - self.node = new_flatten_node - - def get_node(self): - return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/gather_op.py b/src/qonnx/transformation/operators/gather_op.py deleted file mode 100644 index 5fd01faa..00000000 --- a/src/qonnx/transformation/operators/gather_op.py +++ /dev/null @@ -1,112 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class Gather: - - def __init__(self, node): - - gather_node = node - # -------------------------------- - # For QCDQ / QDQ model, this case: - # QuantizeLinear - # | (0) - # Gather ---------- (1) Input - # | - # -------------------------------- - gather_parent_node = node - quantized_data_tensor = node - if helper.is_parent_exist(gather_node, 0, 0): - gather_parent_node = node.i(0) - if len(gather_parent_node.inputs) > 1 and helper.is_constant_tensor(gather_parent_node.inputs[1]): - quantized_data_tensor = gather_parent_node.inputs[1].values - - if helper.is_constant_tensor(gather_parent_node.inputs[0]): - if gather_parent_node.op == "QuantizeLinear": - X_DQL_node = gather_parent_node - dequantized_data_tensor = X_DQL_node.inputs[0] - data_scale_tensor = X_DQL_node.inputs[1] - data_zero_point_tensor = X_DQL_node.inputs[2] - - data_scale_tensor = data_scale_tensor.values * np.ones(dequantized_data_tensor.shape) - a = dequantized_data_tensor.values / data_scale_tensor - b = data_zero_point_tensor.values * np.ones(dequantized_data_tensor.shape) - quantized_data_tensor = a + b - quantized_data_tensor = quantized_data_tensor.astype(np.int8) - - else: - if gather_parent_node.op == "QuantizeLinear": - X_QL_node = gather_parent_node - quantized_data_tensor = X_QL_node.inputs[1].values - - data_tensor = helper.create_initializer_tensor(name=gather_node.inputs[0].name, - tensor_array=quantized_data_tensor, - data_type=onnx.TensorProto.INT8) - - if helper.is_constant_tensor(gather_parent_node.inputs[0]): - data_tensor = helper.create_initializer_tensor(name=gather_node.inputs[0].name, - tensor_array=quantized_data_tensor, - data_type=onnx.TensorProto.INT8) - if helper.is_constant_tensor(gather_node.inputs[1]): - if gather_node.inputs[1].dtype == "int64": - indices_tensor = helper.create_initializer_tensor(name=gather_node.inputs[1].name, - tensor_array=gather_node.inputs[1].values, - data_type=onnx.TensorProto.INT64) - else: - print("ERROR check data type in Gather node ", gather_node.name) - - new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", - inputs= [data_tensor.name, gather_node.inputs[1].name], - outputs = [gather_node.outputs[0].name], - axis = 0) - if helper.is_constant_tensor(gather_parent_node.inputs[0]): - new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", - inputs= [data_tensor.name, gather_node.inputs[1].name], - outputs = [gather_node.outputs[0].name], - axis = 0) - elif helper.is_constant_tensor(gather_node.inputs[1]): - new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", - inputs= [gather_node.inputs[0].name,indices_tensor.name], - outputs = [gather_node.outputs[0].name], - axis = gather_node.attrs['axis']) - - self.node = new_gather_node - - intializer_list = [] - if helper.is_constant_tensor(gather_parent_node.inputs[0]): - intializer_list.append(data_tensor) - elif helper.is_constant_tensor(gather_node.inputs[1]): - intializer_list.append(indices_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list - diff --git a/src/qonnx/transformation/operators/gemm_op.py b/src/qonnx/transformation/operators/gemm_op.py deleted file mode 100644 index 30a9a904..00000000 --- a/src/qonnx/transformation/operators/gemm_op.py +++ /dev/null @@ -1,51 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class Gemm: - - def __init__(self, node): - - gemm_node = node - - x1 = gemm_node.inputs[0] - x2 = gemm_node.inputs[1] - x3 = gemm_node.inputs[2] - y = gemm_node.outputs[0] - - new_gemm_node = onnx.helper.make_node(name = gemm_node.name, op_type = "Gemm", - inputs= [x1.name, x2.name, x3.name], - outputs = [y.name], - alpha = gemm_node.attrs["alpha"], - beta = gemm_node.attrs["beta"], - transB = gemm_node.attrs["transB"]) - - self.node = new_gemm_node - - def get_node(self): - return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/gemm_op_optimized.py b/src/qonnx/transformation/operators/gemm_op_optimized.py deleted file mode 100644 index aff0526b..00000000 --- a/src/qonnx/transformation/operators/gemm_op_optimized.py +++ /dev/null @@ -1,98 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class Gemm_optimized: - - def __init__(self, node): - - gemm_node = node - - x1 = gemm_node.inputs[0] - x2 = gemm_node.inputs[1] - x3 = gemm_node.inputs[2] - y = gemm_node.outputs[0] - - bias_node = gemm_node.i(2); - bias_tensor = bias_node.inputs[0] - bias_scale_tensor = bias_node.inputs[1] - bias_zero_point = bias_node.inputs[2] - bias_scale_tensor = bias_scale_tensor.values * np.ones(bias_tensor.shape) - a = bias_tensor.values * bias_scale_tensor - b = bias_zero_point.values * np.ones(bias_tensor.shape) - fp32_bias_tensor = a + b - fp32_bias_tensor = fp32_bias_tensor.astype(np.float32) - - weight_node = gemm_node.i(1).i() - if gemm_node.i(1).i().op == "Clip": - weight_node = gemm_node.i(1).i().i() - weight_tensor = weight_node.inputs[0] - weight_scale_tensor = weight_node.inputs[1] - weight_zero_point = weight_node.inputs[2] - weight_scale_tensor = weight_scale_tensor.values * np.ones(weight_tensor.shape) - a = weight_tensor.values * weight_scale_tensor - b = weight_zero_point.values * np.ones(weight_tensor.shape) - int8_weight = a + b - int8_weight = np.clip(int8_weight, -127, 127) - dq_weight_scale_tensor = gemm_node.i(1).inputs[1] - dq_weight_zero_point = gemm_node.i(1).inputs[2] - fp32_weight = (int8_weight / (dq_weight_scale_tensor.values * np.ones(int8_weight.shape)) + dq_weight_zero_point.values * np.ones(int8_weight.shape)) - - bias_name = x1.name + ".1" - weight_name = x1.name + ".2" - bias_tensor_1 = helper.create_initializer_tensor(name=bias_name, - tensor_array=fp32_bias_tensor, - data_type=onnx.TensorProto.FLOAT) - weight_tensor_1 = helper.create_initializer_tensor(name=weight_name, - tensor_array=fp32_weight, - data_type=onnx.TensorProto.FLOAT) - - new_gemm_node = onnx.helper.make_node(name = gemm_node.name, op_type = "Gemm", - inputs= [x1.name, weight_name, bias_name], - outputs = [y.name], - alpha = gemm_node.attrs["alpha"], - beta = gemm_node.attrs["beta"], - transB = gemm_node.attrs["transB"]) - - - node.i(1).i(0).inputs.clear() - node.i(1).i(0).outputs.clear() - node.i(1).inputs.clear() - node.i(1).outputs.clear() - - self.node = new_gemm_node - intializer_list = [] - intializer_list.append(weight_tensor_1) - intializer_list.append(bias_tensor_1) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list diff --git a/src/qonnx/transformation/operators/globalAveragePool_op.py b/src/qonnx/transformation/operators/globalAveragePool_op.py deleted file mode 100644 index 17f8cec6..00000000 --- a/src/qonnx/transformation/operators/globalAveragePool_op.py +++ /dev/null @@ -1,143 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class GlobalAveragePool: - - def __init__(self, node, aecg_zendnn_opt, remove_relu): - - golbal_average_pool_node = node - x_name = golbal_average_pool_node.inputs[0].name - y_name = golbal_average_pool_node.outputs[0].name - - if helper.is_parent_exist(golbal_average_pool_node, 0, 0) and golbal_average_pool_node.i().op == "DequantizeLinear": - if helper.is_parent_exist(golbal_average_pool_node, 0, 0): - parent_dql_node = golbal_average_pool_node.i() - else: - print("************* ERROR ****************** Please check 1st parent of GlobalAveragePool, ", golbal_average_pool_node.name, " parent DNE") - - x_scale_name = node.name + "x_scale" - x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, - tensor_array=parent_dql_node.inputs[1].values, - data_type=onnx.TensorProto.FLOAT) - x_zp_name = node.name + "x_zp" - - is_input_s8 = True - - if helper.is_parent_exist(parent_dql_node, 0, 0): - if aecg_zendnn_opt: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=parent_dql_node.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - else: - second_parent = parent_dql_node.i() - if second_parent.op == "Relu": - if helper.is_parent_exist(second_parent, 0, 0) and second_parent.i().op == "QuantizeLinear": - third_parent = second_parent.i() - if third_parent.inputs[2].values.dtype == np.int8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=third_parent.inputs[2].values, - data_type=onnx.TensorProto.INT8) - is_input_s8 = True - else: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=third_parent.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - is_input_s8 = False - else: - if parent_dql_node.i().inputs[2].values.dtype == np.int8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=parent_dql_node.inputs[2].values, - data_type=onnx.TensorProto.INT8) - is_input_s8 = True - else: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=parent_dql_node.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - is_input_s8 = False - else: - print("************* ERROR ****************** Please check 2nd parent of GlobalAveragePool, ", golbal_average_pool_node.name, " 1st parent of ", parent_dql_node, " parent DNE") - - if parent_dql_node.i().i().op == "Relu" and parent_dql_node.i().i().i().i().inputs[2].values.dtype == np.int8: - if remove_relu: - x_name = parent_dql_node.inputs[0].name - else: - third_parent_relu = parent_dql_node.i().i() - if third_parent_relu.i().op == "Conv" or third_parent_relu.i().op == "Add": - x_name = third_parent_relu.outputs[0].name - else: - x_name = (third_parent_relu.o()).outputs[0].name - else: - x_name = parent_dql_node.inputs[0].name - - if helper.is_child_present(node, 0, 0) and golbal_average_pool_node.o().op == "QuantizeLinear": - child_ql_node = golbal_average_pool_node.o() - - y_scale_name = node.name + "y_scale" - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=child_ql_node.inputs[1].values, - data_type=onnx.TensorProto.FLOAT) - y_zp_name = node.name + "y_zp" - - if aecg_zendnn_opt: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=child_ql_node.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - else: - if is_input_s8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=child_ql_node.inputs[2].values, - data_type=onnx.TensorProto.INT8) - else: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=child_ql_node.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - - y_name = child_ql_node.outputs[0].name - - kwargs = {} - kwargs["domain"] = 'com.microsoft' - new_average_pool_node = onnx.helper.make_node(name = golbal_average_pool_node.name, op_type = "QLinearGlobalAveragePool", - inputs = [x_name, x_scale_name, x_zp_name, y_scale_name, y_zp_name], - outputs = [y_name], - channels_last = 0,**kwargs) - - intializer_list = [] - intializer_list.append(x_scale_tensor) - intializer_list.append(x_zp_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - self.node = new_average_pool_node - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/greater_op.py b/src/qonnx/transformation/operators/greater_op.py deleted file mode 100644 index fc54c6e5..00000000 --- a/src/qonnx/transformation/operators/greater_op.py +++ /dev/null @@ -1,57 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Greater: - - def __init__(self, node): - - greater_node = node - x1_name = greater_node.inputs[0].name - - x2_name = greater_node.inputs[1].name - x2_value = greater_node.inputs[1].values - x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.FLOAT) - - y_name = greater_node.outputs[0].name - - new_greater_node = onnx.helper.make_node(name = greater_node.name, - op_type = "Greater", - inputs = [x1_name, x2_name], - outputs = [y_name]) - - self.node = new_greater_node - - intializer_list = [] - intializer_list.append(x2_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/helper.py b/src/qonnx/transformation/operators/helper.py deleted file mode 100644 index c070a6a4..00000000 --- a/src/qonnx/transformation/operators/helper.py +++ /dev/null @@ -1,65 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -import numpy as np - -class helper : - - def __init__(self) -> None: - pass - - def create_initializer_tensor(name: str, tensor_array: np.ndarray, data_type: onnx.TensorProto = onnx.TensorProto.FLOAT) -> onnx.TensorProto: - initializer_tensor = onnx.helper.make_tensor(name=name, - data_type=data_type, - dims=tensor_array.shape, - vals=tensor_array.flatten().tolist()) - return initializer_tensor - - # to check node.i() exists pass tesor_idx=0, node_idx=0 - # to check node.inputs[1].inputs[0] exists pass tesor_idx=1, node_idx=0 - def is_parent_exist(node, tesor_idx, node_idx): - if len(node.inputs)>tesor_idx and len(node.inputs[tesor_idx].inputs)>node_idx: - return True - return False - - def is_child_present(node,tesor_idx, node_idx): - if len(node.outputs)>tesor_idx and len(node.outputs[tesor_idx].outputs)>node_idx: - return True - return False - - def is_attr_exist(node, attr_name): - try: - node.attrs[attr_name] - return True - except: - return False - - def is_constant_tensor(tensor): - try: - tensor.values - return True - except: - return False \ No newline at end of file diff --git a/src/qonnx/transformation/operators/identity_op.py b/src/qonnx/transformation/operators/identity_op.py deleted file mode 100644 index e9019659..00000000 --- a/src/qonnx/transformation/operators/identity_op.py +++ /dev/null @@ -1,56 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Identity: - - def __init__(self, node): - - identity_node = node - - x1_name = identity_node.inputs[0].name - x1_value = identity_node.inputs[0].values - x1_tensor = helper.create_initializer_tensor(x1_name,x1_value,onnx.TensorProto.FLOAT) - - y_name = identity_node.outputs[0].name - - new_identity_node = onnx.helper.make_node(name = identity_node.name, - op_type = "Identity", - inputs = [x1_name], - outputs = [y_name]) - - self.node = new_identity_node - - intializer_list = [] - intializer_list.append(x1_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/less_op.py b/src/qonnx/transformation/operators/less_op.py deleted file mode 100644 index 9d54216f..00000000 --- a/src/qonnx/transformation/operators/less_op.py +++ /dev/null @@ -1,57 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Less: - - def __init__(self, node): - - less_node = node - x1_name = less_node.inputs[0].name - - x2_name = less_node.inputs[1].name - x2_value = less_node.inputs[1].values - x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.FLOAT) - - y_name = less_node.outputs[0].name - - new_less_node = onnx.helper.make_node(name = less_node.name, - op_type = "Less", - inputs = [x1_name, x2_name], - outputs = [y_name]) - - self.node = new_less_node - - intializer_list = [] - intializer_list.append(x2_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/lrn_op.py b/src/qonnx/transformation/operators/lrn_op.py deleted file mode 100644 index f8dcbf22..00000000 --- a/src/qonnx/transformation/operators/lrn_op.py +++ /dev/null @@ -1,48 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class LRN: - - def __init__(self, node): - - lrn_node = node - - x_name = lrn_node.inputs[0].name - y_name = lrn_node.outputs[0].name - - new_lrn_node = onnx.helper.make_node(name = lrn_node.name, op_type = "LRN", - inputs = [x_name], - outputs = [y_name], - alpha = lrn_node.attrs["alpha"], - beta = lrn_node.attrs["beta"], - bias = lrn_node.attrs["bias"], - size = lrn_node.attrs["size"]) - - self.node = new_lrn_node - - def get_node(self): - return self.node diff --git a/src/qonnx/transformation/operators/matmul_op.py b/src/qonnx/transformation/operators/matmul_op.py deleted file mode 100644 index 1cb1842d..00000000 --- a/src/qonnx/transformation/operators/matmul_op.py +++ /dev/null @@ -1,157 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class MatMul: - - def __init__(self, node): - matlmul_node = node - - if helper.is_parent_exist(matlmul_node, 0, 0): - x_DQL_node = matlmul_node.i() - else: - print("**************** ERROR ******************* Matmul node ", matlmul_node.name, " input(0,0) DNE") - - if helper.is_parent_exist(matlmul_node, 1, 0): - w_DQL_node = matlmul_node.inputs[1].inputs[0] - else: - print("************* ERROR ************************ Please check the Matmul node ", matlmul_node.name, " the input(1,0) DNE") - - if helper.is_parent_exist(x_DQL_node, 0, 0): - x_QL_node = x_DQL_node.i() - else: - print("**************** ERROR ******************* ", x_DQL_node.name, " input(0,0) DNE Please check") - - x_scale_tensor = x_DQL_node.inputs[1] - x_scale = x_scale_tensor.values - x_zp_tensor = x_DQL_node.inputs[2] - - w_scale_tensor = w_DQL_node.inputs[1] - w_scale = w_scale_tensor.values - w_zp_tensor = w_DQL_node.inputs[2] - - if helper.is_child_present(matlmul_node, 0, 0): - if (matlmul_node.o().op == "QuantizeLinear"): - y_QL_node = matlmul_node.o() - y_scale_tensor = y_QL_node.inputs[1] - y_scale = y_scale_tensor.values - y_zp_tensor = y_QL_node.inputs[2] - else: - print("*********************** ERROR output of Matmul node ", matlmul_node.name, " is not QuantizeLinear ***********************") - else: - print(matlmul_node.name, " output(0,0) DNE") - - quantized_weight_tensor_original = w_DQL_node.inputs[0].values - new_shape = quantized_weight_tensor_original.shape + (1,1) - a1 = np.reshape(quantized_weight_tensor_original, new_shape) - quantized_weight_tensor = np.transpose(a1, (1,0,2,3)) - - if x_QL_node.i().op == "DequantizeLinear" and x_QL_node.i().i().op == "QuantizeLinear": - x_name = x_QL_node.i().i().outputs[0].name - else: - x_name = x_QL_node.outputs[0].name - - y_name = matlmul_node.o().outputs[0].name - - x_scale_name = matlmul_node.name + "_X_SCALE" - x_scale_value = x_scale - x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, - tensor_array=x_scale_value, - data_type=onnx.TensorProto.FLOAT) - - x_zp_name = matlmul_node.name + "_X_ZERO_POINT" - x_zp_value = x_zp_tensor.values - - if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - - w_name = matlmul_node.inputs[1].name - w_value = quantized_weight_tensor - w_tensor = helper.create_initializer_tensor(name=w_name, - tensor_array=w_value, - data_type=onnx.TensorProto.INT8) - - w_scale_name = matlmul_node.name + "_W_SCALE" - w_scale_value = w_scale - w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, - tensor_array=w_scale_value, - data_type=onnx.TensorProto.FLOAT) - - w_zp_name = matlmul_node.name + "_W_ZERO_POINT" - w_zp_value = w_zp_tensor.values - w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, - tensor_array=w_zp_value, - data_type=onnx.TensorProto.INT8) - - y_scale_name = matlmul_node.name + "_Y_SCALE" - y_scale_value = y_scale - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=y_scale_value, - data_type=onnx.TensorProto.FLOAT) - - y_zp_name = matlmul_node.name + "_Y_ZERO_POINT" - y_zp_value = y_zp_tensor.values - - if y_zp_tensor.dtype == np.int8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.INT8) - elif y_zp_tensor.dtype == np.uint8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - - qlinearconv_node = onnx.helper.make_node(name = matlmul_node.name, op_type = "QLinearConv", - inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], - outputs = [y_name], dilations = [1,1], group = 1, - kernel_shape = [1,1], pads = [0,0,0,0], strides = [1,1]) - - - self.node = qlinearconv_node - - intializer_list = [] - intializer_list.append(x_scale_tensor) - intializer_list.append(x_zp_tensor) - intializer_list.append(w_tensor) - intializer_list.append(w_scale_tensor) - intializer_list.append(w_zp_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/matmul_retained_op.py b/src/qonnx/transformation/operators/matmul_retained_op.py deleted file mode 100644 index ba410bc9..00000000 --- a/src/qonnx/transformation/operators/matmul_retained_op.py +++ /dev/null @@ -1,154 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class MatMul_Retained: - - def __init__(self, node): - matlmul_node = node - - if helper.is_parent_exist(matlmul_node, 0, 0): - x_DQL_node = matlmul_node.i() - else: - print("**************** ERROR ******************* Matmul node ", matlmul_node.name, " input(0,0) DNE") - - if helper.is_parent_exist(matlmul_node, 1, 0): - w_DQL_node = matlmul_node.inputs[1].inputs[0] - else: - print("************* ERROR ************************ Please check the Matmul node ", matlmul_node.name, " the input(1,0) DNE") - - if helper.is_parent_exist(x_DQL_node, 0, 0): - x_QL_node = x_DQL_node.i() - else: - print("**************** ERROR ******************* ", x_DQL_node.name, " input(0,0) DNE Please check") - - x_scale_tensor = x_DQL_node.inputs[1] - x_scale = x_scale_tensor.values - x_zp_tensor = x_DQL_node.inputs[2] - - w_scale_tensor = w_DQL_node.inputs[1] - w_scale = w_scale_tensor.values - w_zp_tensor = w_DQL_node.inputs[2] - - if helper.is_child_present(matlmul_node, 0, 0): - if (matlmul_node.o().op == "QuantizeLinear"): - y_QL_node = matlmul_node.o() - y_scale_tensor = y_QL_node.inputs[1] - y_scale = y_scale_tensor.values - y_zp_tensor = y_QL_node.inputs[2] - else: - print("*********************** ERROR output of Matmul node ", matlmul_node.name, " is not QuantizeLinear ***********************") - else: - print(matlmul_node.name, " output(0,0) DNE") - - if x_QL_node.op == "QuantizeLinear" or x_QL_node.op == "MaxPool": - x_name = x_QL_node.outputs[0].name - else: - print("please check x_QL_node of Matmul node ", matlmul_node.name) - - y_name = y_QL_node.outputs[0].name - - x_scale_name = matlmul_node.name + "_X_SCALE" - x_scale_value = x_scale - x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, - tensor_array=x_scale_value, - data_type=onnx.TensorProto.FLOAT) - - x_zp_name = matlmul_node.name + "_X_ZERO_POINT" - x_zp_value = x_zp_tensor.values - - if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - elif (x_QL_node.op == "MaxPool"): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - - w_name = matlmul_node.inputs[1].name - w_value = w_DQL_node.inputs[0].values - w_tensor = helper.create_initializer_tensor(name=w_name, - tensor_array=w_value, - data_type=onnx.TensorProto.INT8) - - w_scale_name = matlmul_node.name + "_W_SCALE" - w_scale_value = w_scale - w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, - tensor_array=w_scale_value, - data_type=onnx.TensorProto.FLOAT) - - w_zp_name = matlmul_node.name + "_W_ZERO_POINT" - w_zp_value = w_zp_tensor.values - w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, - tensor_array=w_zp_value, - data_type=onnx.TensorProto.INT8) - - y_scale_name = matlmul_node.name + "_Y_SCALE" - y_scale_value = y_scale - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=y_scale_value, - data_type=onnx.TensorProto.FLOAT) - - y_zp_name = matlmul_node.name + "_Y_ZERO_POINT" - y_zp_value = y_zp_tensor.values - - if y_zp_tensor.dtype == np.int8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.INT8) - elif y_zp_tensor.dtype == np.uint8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - - qlinear_matmul = onnx.helper.make_node(name = matlmul_node.name, op_type = "QLinearMatMul", - inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], - outputs = [y_name]) - - self.node = qlinear_matmul - - intializer_list = [] - intializer_list.append(x_scale_tensor) - intializer_list.append(x_zp_tensor) - intializer_list.append(w_tensor) - intializer_list.append(w_scale_tensor) - intializer_list.append(w_zp_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/maxpool_op.py b/src/qonnx/transformation/operators/maxpool_op.py deleted file mode 100644 index 345393f8..00000000 --- a/src/qonnx/transformation/operators/maxpool_op.py +++ /dev/null @@ -1,125 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class MaxPool: - - def __init__(self, node, maxpool_count, remove_relu): - - maxpool_node = node - x_name = maxpool_node.inputs[0].name - y_name = maxpool_node.outputs[0].name - - if helper.is_child_present(maxpool_node, 0, 0) and maxpool_node.o().op == "QuantizeLinear": - if helper.is_parent_exist(maxpool_node, 0, 0) and maxpool_node.i().op == "DequantizeLinear": - q_node = maxpool_node.o() - y_name = q_node.outputs[0].name - - if helper.is_parent_exist(maxpool_node, 0, 0): - found_relu = False - if maxpool_node.i().op == "Relu": - relu_node = maxpool_node.i() - found_relu = True - elif maxpool_node.i().op == "DequantizeLinear": - if maxpool_node.i().i().i().op == "Relu": - relu_node = maxpool_node.i().i().i() - found_relu = True - elif maxpool_node.i().i().i().op == "Concat": - x_name = maxpool_node.i().i().outputs[0].name - if maxpool_node.o().op == "QuantizeLinear": - y_name = maxpool_node.o().outputs[0].name - elif maxpool_node.i().i().op == "MaxPool": - x_name = maxpool_node.i().i().outputs[0].name - - if found_relu: - if helper.is_child_present(relu_node, 0, 0) and relu_node.outputs[0].outputs[0].op == "MaxPool": - ql_node = relu_node.outputs[0].outputs[0] - x_name = ql_node.outputs[0].name - elif helper.is_child_present(relu_node, 0, 1) and relu_node.outputs[0].outputs[1].op == "MaxPool": - ql_node = relu_node.outputs[0].outputs[0] - x_name = ql_node.outputs[0].name - elif helper.is_child_present(relu_node, 0, 0) and relu_node.o().o().outputs[0].outputs[0].op == "MaxPool": - x_name = relu_node.outputs[0].name - elif helper.is_child_present(relu_node, 0, 0) and relu_node.o().o().outputs[0].outputs[1].op == "MaxPool": - x_name = relu_node.outputs[0].name - - - if maxpool_node.i().op == "QuantizeLinear": - x_ql_node = maxpool_node.i() - if remove_relu: - x_name = x_ql_node.outputs[0].name - else: - if helper.is_parent_exist(x_ql_node, 0, 0) and x_ql_node.i().op == "Relu" and x_ql_node.i().i().op == "Conv": - relu_node = x_ql_node.i() - x_name = relu_node.outputs[0].name - - if helper.is_attr_exist(maxpool_node, 'auto_pad'): - auto_pad_attr = maxpool_node.attrs["auto_pad"] - else: - auto_pad_attr = "NOTSET" - - if helper.is_attr_exist(maxpool_node, 'ceil_mode'): - ceil_mode_attr = maxpool_node.attrs["ceil_mode"] - else: - ceil_mode_attr = 0 - - if helper.is_attr_exist(maxpool_node, 'dilations'): - dilations_attr = maxpool_node.attrs["dilations"] - else: - dilations_attr =[1,1] - - if helper.is_attr_exist(maxpool_node, 'pads'): - pads_attr = maxpool_node.attrs["pads"] - else: - pads_attr = [0,0,0,0] - - if helper.is_attr_exist(maxpool_node, 'storage_order'): - storage_order_attr = maxpool_node.attrs["storage_order"] - else: - storage_order_attr = 0 - - if helper.is_attr_exist(maxpool_node, 'strides'): - strides_attr = maxpool_node.attrs["strides"] - else: - strides_attr = [1,1] - - new_mapool_node = onnx.helper.make_node(name = maxpool_node.name, - op_type = "MaxPool", - inputs = [x_name], - outputs = [y_name], - auto_pad = auto_pad_attr, - ceil_mode = ceil_mode_attr, - dilations = dilations_attr, - pads = pads_attr, - storage_order = storage_order_attr, - strides = strides_attr, - kernel_shape = maxpool_node.attrs["kernel_shape"]) - - self.node = new_mapool_node - - def get_node(self): - return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/qlinearconv_op.py b/src/qonnx/transformation/operators/qlinearconv_op.py deleted file mode 100644 index f1960df1..00000000 --- a/src/qonnx/transformation/operators/qlinearconv_op.py +++ /dev/null @@ -1,476 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -import numpy as np -from .helper import helper - -class QLinearConv: - - def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): - x_DQL_node = node.i() - - conv_node = node - - has_bias = True if len(conv_node.inputs) == 3 else False - - w_DQL_node = conv_node.inputs[1].inputs[0] - QCDQ_model_detected=False - clip_max = np.iinfo(np.int8).min - clip_min = np.iinfo(np.int8).max - if (helper.is_constant_tensor(w_DQL_node.i())==False and w_DQL_node.i().op == "Clip"): - QCDQ_model_detected=True - clip_min = w_DQL_node.i().inputs[1].values - clip_max = w_DQL_node.i().inputs[2].values - - # b_DQL_node = (3) - # ------------------------------------------------------------------------ - # (1) (2) DequantizeLinear (1) (2) - # \ | / (3) for bias OR \ / - # \ | / \ / - # Conv (QDQ model) Conv (3 - FP32 bias embedded) (QCDQ model) - # | | - # ------------------------------------------------------------------------ - # Initialization - b_DQL_node = conv_node - b_DQL_tensor = conv_node - if has_bias: - b_DQL_node = conv_node.inputs[2] # For QDQ - b_DQL_tensor = conv_node.inputs[2] # For QCDQ - if has_bias and QCDQ_model_detected==False: - b_DQL_node = conv_node.inputs[2].inputs[0] - is_fp32_bias_embedded = False - if QCDQ_model_detected: - if helper.is_constant_tensor(b_DQL_tensor) and b_DQL_tensor.dtype == "float32": - is_fp32_bias_embedded = True - b_QL_tensor = b_DQL_tensor - if is_fp32_bias_embedded: - if not helper.is_parent_exist(b_DQL_tensor, 0, 0): - b_QL_tensor = b_DQL_tensor - - is_weight_tensor_quantized = False - if len(w_DQL_node.inputs[0].inputs) == 0: - is_weight_tensor_quantized = True - is_bias_tensor_quantized = False - if QCDQ_model_detected and has_bias and not is_fp32_bias_embedded and not helper.is_parent_exist(b_DQL_tensor, 0, 0) and b_DQL_tensor.dtype == "int32": - is_bias_tensor_quantized = True - elif QCDQ_model_detected==False and has_bias and len(b_DQL_node.inputs[0].inputs) == 0: - is_bias_tensor_quantized = True - - if not is_weight_tensor_quantized: - w_QL_node = w_DQL_node.i() - - if QCDQ_model_detected==False and has_bias and (not is_bias_tensor_quantized): - b_QL_node = b_DQL_node.i() - - x_scale_tensor = x_DQL_node.inputs[1] - x_scale = x_scale_tensor.values - x_zp_tensor = x_DQL_node.inputs[2] - - w_scale_tensor = w_DQL_node.inputs[1] - w_scale = w_scale_tensor.values - w_zp_tensor = w_DQL_node.inputs[2] - - is_relu_present = False - if conv_node.o().op == "Relu": - relu_node = conv_node.o() - is_relu_present = True - if relu_node.o().op == "QuantizeLinear": - y_QL_node = relu_node.o() - y_scale_tensor = y_QL_node.inputs[1] - y_scale = y_scale_tensor.values - y_zp_tensor = y_QL_node.inputs[2] - else: - print("*********************** ERROR output of Relu node ", relu_node.name, " is not QuantizeLinear ***********************") - elif (conv_node.o().op == "QuantizeLinear"): - y_QL_node = conv_node.o() - y_scale_tensor = y_QL_node.inputs[1] - y_scale = y_scale_tensor.values - y_zp_tensor = y_QL_node.inputs[2] - else: - print("*********************** ERROR output of Conv node ", conv_node.name, " is not QuantizeLinear ***********************") - - S8_MIN = np.iinfo(np.int8).min - S8_MAX = np.iinfo(np.int8).max - if clip_min != np.iinfo(np.int8).max and clip_max != np.iinfo(np.int8).min: - S8_MIN = clip_min - S8_MAX = clip_max - U8_MIN = np.iinfo(np.uint8).min - U8_MAX = np.iinfo(np.uint8).max - S32_MIN = np.iinfo(np.int32).min - S32_MAX = np.iinfo(np.int32).max - - if (QCDQ_model_detected and helper.is_parent_exist(w_DQL_node, 0, 0) and helper.is_parent_exist(w_DQL_node.i(0), 0, 0) and w_DQL_node.i(0).i(0).op == "QuantizeLinear"): - w_QL_node = w_DQL_node.i(0).i(0) - - if QCDQ_model_detected==False and has_bias and (not is_bias_tensor_quantized) and helper.is_parent_exist(b_DQL_node, 0, 0): - b_QL_node = b_DQL_node.i() - - # -------------------------------------------------------------------------- - # QuantizeLinear (w_QL_node set to this in first if condition) - # | - # Clip - # | - # DequantizeLinear (for weight) - # (0) / (1) - # | / - # Conv - # -------------------------------------------------------------------------- - if QCDQ_model_detected and helper.is_parent_exist(w_DQL_node, 0, 0) and helper.is_parent_exist(w_DQL_node.i(0), 0, 0): - w_QL_node = w_DQL_node.i().i() - quantized_weight_tensor = w_QL_node.inputs[0] - #if is_weight_tensor_quantized and QCDQ_model_detected: - # quantized_weight_tensor = w_DQL_node.inputs[1].values - if is_weight_tensor_quantized and not QCDQ_model_detected: - quantized_weight_tensor = w_DQL_node.inputs[0].values - elif helper.is_constant_tensor(w_QL_node): - quantized_weight_tensor = w_QL_node.values - quantized_weight_tensor = np.clip(quantized_weight_tensor, S8_MIN, S8_MAX) - quantized_weight_tensor = np.round(quantized_weight_tensor) - quantized_weight_tensor = quantized_weight_tensor.astype(np.int8) - elif not helper.is_constant_tensor(w_QL_node): - weight_tensor = w_QL_node.inputs[0] - weight_scale_tensor = w_QL_node.inputs[1] - weight_zp_tensor = w_QL_node.inputs[2] - - weight_scaled_tensor = weight_scale_tensor.values * np.ones(weight_tensor.shape) - if QCDQ_model_detected: - weight_scaled_tensor = np.ones(weight_tensor.shape) * weight_scale_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] - b = weight_tensor.values / weight_scaled_tensor - c = weight_zp_tensor.values * np.ones(weight_tensor.shape) - if QCDQ_model_detected: - c = weight_zp_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] * np.ones(weight_tensor.shape) - quantized_weight_tensor = b + c - if weight_zp_tensor.dtype == "int8": - quantized_weight_tensor = np.clip(quantized_weight_tensor, S8_MIN, S8_MAX) - elif weight_zp_tensor.dtype == "uint8": - quantized_weight_tensor = np.clip(quantized_weight_tensor, U8_MIN, U8_MAX) - quantized_weight_tensor = np.round(quantized_weight_tensor) - quantized_weight_tensor = quantized_weight_tensor.astype(np.int8) - if QCDQ_model_detected: - clip_node = w_DQL_node.i() - clip_node.inputs.clear() - clip_node.outputs.clear() - - if has_bias and is_bias_tensor_quantized: - quantized_bias_tensor = b_DQL_node.inputs[0].values - elif is_fp32_bias_embedded and has_bias: - bias_tensor = b_QL_tensor - bias_scale_tensor1 = w_QL_node.inputs[1] - bias_zp_tensor = w_QL_node.inputs[2] - - # satutration after QL node - a = x_scale * bias_scale_tensor1.values - b = bias_tensor.values / a - # Zero point is set to 0 for quantizing bias - d = b - d = np.round(d) - quantized_bias_tensor = d - quantized_bias_tensor = np.clip(quantized_bias_tensor, S32_MIN, S32_MAX) - quantized_bias_tensor = np.round(quantized_bias_tensor) - quantized_bias_tensor = quantized_bias_tensor.astype(np.int32) - elif has_bias: - bias_tensor = b_QL_node.inputs[0] - bias_scale_tensor1 = b_QL_node.inputs[1] - bias_zp_tensor = b_QL_node.inputs[2] - - # satutration after QL node - a = bias_scale_tensor1.values * np.ones(bias_tensor.shape) - b = bias_tensor.values / a - c = bias_zp_tensor.values * np.ones(bias_tensor.shape) - d = b + c - if bias_zp_tensor.dtype == "int8": - d = np.clip(d, S8_MIN, S8_MAX) - elif bias_zp_tensor.dtype == "uint8": - d = np.clip(d, U8_MIN, U8_MAX) - d = np.round(d) - - # now again dequantize it - e = d * a - f = e - c - # f is now fp32 tensor - - bias_scale = x_scale * w_scale - bias_scale_tensor = bias_scale * np.ones(bias_tensor.shape) - quantized_bias_tensor = (f / bias_scale_tensor) - quantized_bias_tensor = np.clip(quantized_bias_tensor, S32_MIN, S32_MAX) - quantized_bias_tensor = np.round(quantized_bias_tensor) - quantized_bias_tensor = quantized_bias_tensor.astype(np.int32) - - x_QL_node = x_DQL_node.i() - is_x_QL_maxpool = False - is_X_QL_transpose = True if x_QL_node.op == "Transpose" else False - maxpool_input_s8 = False # True means s8 False means u8 - if x_QL_node.op == "MaxPool": - is_x_QL_maxpool = True - - if helper.is_parent_exist(x_QL_node, 0, 0): - if x_QL_node.i().op == "Relu": - if remove_relu: - # if this flag is enabled, then relu will not be added thus x_name will be x_QL's output tensor name - x_name = x_QL_node.outputs[0].name - else: - if (x_QL_node.i().i().op == "Conv") or (x_QL_node.i().i().op == "Add" and x_QL_node.i().i().i().inputs[2].values.dtype == np.int8): - - """ - these are 2 condtions - one in resnet50v1 - - DQL DQL - | | - | | - V | - Add<------------- - | - | - V - Relu------------------------------ - | - | - QL (x_QL_node) - | - | - DQL DQL DQL - | | | - | | | - Conv<------------ - - if Add input is s8 - x_relu_node = Relu - relu will be maintained due to s8 data type thus - x_name = relu's output - - other case is in Resnet50v1.5 - - Conv - | - | - Relu - | - | - QL - | - | - DQL DQL DQL - | | | - | | | - Conv<------------ - - we maintain relu node here thus x_name = relu's output - - """ - x_relu_node = x_QL_node.i() - x_name = x_relu_node.outputs[0].name - else: - x_name = x_QL_node.outputs[0].name - elif x_QL_node.op == "MaxPool": - """ - this is resnet50v1 case - - QL - | - | - V - Maxpool - | - | - V - DQL DQL DQL - | | | - | | | - V | | - Conv<------------ - - """ - x_name = x_QL_node.outputs[0].name - if x_QL_node.i().op == "QuantizeLinear": - if (x_QL_node.i()).inputs[2].dtype == np.int8: - maxpool_input_s8 = True - elif (x_QL_node.i()).inputs[2].dtype == np.uint8: - maxpool_input_s8 = False - else: - x_name = x_QL_node.outputs[0].name - if x_QL_node.op == "Clip": - x_name = str(int(x_QL_node.o().outputs[0].name)-3) - else: - x_name = x_QL_node.outputs[0].name - - if is_relu_present and not(remove_relu): - y_name = conv_node.outputs[0].name - else: - y_name = y_QL_node.outputs[0].name - - x_scale_name = conv_node.name + "_X_SCALE" - x_scale_value = x_scale - x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, - tensor_array=x_scale_value, - data_type=onnx.TensorProto.FLOAT) - - x_zp_name = conv_node.name + "_X_ZERO_POINT" - x_zp_value = x_zp_tensor.values - - if aecg_zendnn_opt and conv_count > 0: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if is_x_QL_maxpool: - if maxpool_input_s8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - else: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - elif is_X_QL_transpose: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - else: - if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - elif x_QL_node.op == "Relu" or x_QL_node.op == "Clip": - if (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.int8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - elif (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.uint8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - print("ERROR Please check x_zp_tensor of ", conv_node.name) - - w_name = conv_node.inputs[1].name - w_value = quantized_weight_tensor - w_tensor = helper.create_initializer_tensor(name=w_name, - tensor_array=w_value, - data_type=onnx.TensorProto.INT8) - - w_scale_name = conv_node.name + "_W_SCALE" - w_scale_value = w_scale - w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, - tensor_array=w_scale_value, - data_type=onnx.TensorProto.FLOAT) - - w_zp_name = conv_node.name + "_W_ZERO_POINT" - w_zp_value = w_zp_tensor.values - w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, - tensor_array=w_zp_value, - data_type=onnx.TensorProto.INT8) - - y_scale_name = conv_node.name + "_Y_SCALE" - y_scale_value = y_scale - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=y_scale_value, - data_type=onnx.TensorProto.FLOAT) - - y_zp_name = conv_node.name + "_Y_ZERO_POINT" - y_zp_value = y_zp_tensor.values - - if aecg_zendnn_opt: - # if this opt is enabled then y_zp has be to set to u8 type - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if y_zp_tensor.dtype == np.int8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.INT8) - elif y_zp_tensor.dtype == np.uint8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - - if has_bias: - b_name = conv_node.inputs[2].name - b_value = quantized_bias_tensor - b_tensor = helper.create_initializer_tensor(name=b_name, - tensor_array=b_value, - data_type=onnx.TensorProto.INT32) - - if helper.is_attr_exist(conv_node, 'auto_pad'): - auto_pad_attr = conv_node.attrs["auto_pad"] - else: - auto_pad_attr = "NOTSET" - - if helper.is_attr_exist(conv_node, 'dilations'): - dilations_attr = conv_node.attrs["dilations"] - else: - dilations_attr = 1 - - if helper.is_attr_exist(conv_node, 'group'): - group_attr = conv_node.attrs["group"] - else: - group_attr = 1 - - if helper.is_attr_exist(conv_node, 'pads'): - pads_attr = conv_node.attrs["pads"] - else: - pads_attr = [0,0,0,0] - - if helper.is_attr_exist(conv_node, 'strides'): - strides_attr = conv_node.attrs["strides"] - else: - strides_attr = 1 - - qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) - if has_bias: - qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name, b_name], outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) - - if is_relu_present: - relu_node = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", inputs = [conv_node.outputs[0].name], outputs = [relu_node.outputs[0].name]) - self.relu_node = relu_node - - self.node = qlinearconv_node - - intializer_list = [] - intializer_list.append(x_scale_tensor) - intializer_list.append(x_zp_tensor) - intializer_list.append(w_tensor) - intializer_list.append(w_scale_tensor) - intializer_list.append(w_zp_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - if has_bias: - intializer_list.append(b_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list - - def get_relu_node(self): - return self.relu_node diff --git a/src/qonnx/transformation/operators/quantizelinear_op.py b/src/qonnx/transformation/operators/quantizelinear_op.py deleted file mode 100644 index d35b21b0..00000000 --- a/src/qonnx/transformation/operators/quantizelinear_op.py +++ /dev/null @@ -1,78 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class QuantizeLinear: - - def __init__(self, node): - ql_node = node - - x_name = ql_node.inputs[0].name - flag = False - if helper.is_child_present(node, 0, 0) and node.o().op == "DequantizeLinear": - if helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Conv": - if helper.is_child_present(node.o().o(), 0, 0) and node.o().o().o().op == "Reshape": - flag = True - x_tensor = helper.create_initializer_tensor(name = x_name,tensor_array = ql_node.inputs[0].values, data_type = onnx.TensorProto.FLOAT) - elif helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Gemm": - flag = True - x_tensor = helper.create_initializer_tensor(name = x_name,tensor_array = ql_node.inputs[0].values, data_type = onnx.TensorProto.FLOAT) - - y_scale_name = ql_node.inputs[1].name - y_scale_value = ql_node.inputs[1].values - y_scale_tensor = helper.create_initializer_tensor(name = y_scale_name,tensor_array = y_scale_value, data_type = onnx.TensorProto.FLOAT) - - y_zp_name = ql_node.inputs[2].name - y_zp_value = ql_node.inputs[2].values - if ql_node.inputs[2].dtype == np.int8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type = onnx.TensorProto.INT8) - elif ql_node.inputs[2].dtype == np.uint8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type = onnx.TensorProto.UINT8) - - y_name = ql_node.outputs[0].name - - quantizelinear_node = onnx.helper.make_node(name = ql_node.name, op_type = "QuantizeLinear", inputs = [x_name, y_scale_name, y_zp_name], outputs = [y_name]) - - self.node = quantizelinear_node - - intializer_list = [] - if flag: - intializer_list.append(x_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/relu_op.py b/src/qonnx/transformation/operators/relu_op.py deleted file mode 100644 index 58cc23cd..00000000 --- a/src/qonnx/transformation/operators/relu_op.py +++ /dev/null @@ -1,44 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class Relu: - - def __init__(self, node): - - relu_node = node - - x_name = relu_node.inputs[0].name - y_name = relu_node.outputs[0].name - - new_relu_node = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", - inputs = [x_name], - outputs = [y_name]) - - self.node = new_relu_node - - def get_node(self): - return self.node diff --git a/src/qonnx/transformation/operators/reshape_op.py b/src/qonnx/transformation/operators/reshape_op.py deleted file mode 100644 index 424cd38f..00000000 --- a/src/qonnx/transformation/operators/reshape_op.py +++ /dev/null @@ -1,65 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Reshape: - - def __init__(self, node): - - reshape_node = node - - x_name = reshape_node.inputs[0].name - - x2_name = reshape_node.inputs[1].name - if helper.is_constant_tensor(reshape_node.inputs[1]): - x2_value = reshape_node.inputs[1].values - x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) - - y_name = reshape_node.outputs[0].name - - try: - new_reshape_node = onnx.helper.make_node(name = reshape_node.name, op_type = "Reshape", - inputs = [x_name, x2_name], - outputs = [y_name], - allowzero = reshape_node.attrs["allowzero"]) - except: - new_reshape_node = onnx.helper.make_node(name = reshape_node.name, op_type = "Reshape", - inputs = [x_name, x2_name], - outputs = [y_name]) - - self.node = new_reshape_node - - intializer_list = [] - if helper.is_constant_tensor(reshape_node.inputs[1]): - intializer_list.append(x2_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/resize_op.py b/src/qonnx/transformation/operators/resize_op.py deleted file mode 100644 index 15b81f8d..00000000 --- a/src/qonnx/transformation/operators/resize_op.py +++ /dev/null @@ -1,66 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Resize: - - def __init__(self, node): - - resize_node = node - - x1_name = resize_node.inputs[0].name - x2_name = resize_node.inputs[1].name - x3_name = resize_node.inputs[2].name - x4_name = resize_node - if len(resize_node.inputs) > 3: - x4_name = resize_node.inputs[3].name - - y_name = resize_node.outputs[0].name - - # Resize has 4 inputs, x, roi, scales, sizes. With later 3 as optional. - # In the model (retinanet) there are 2 inputs X and sizes thus 2nd input is obtained at 3rd index. - # 1st and 2nd index i.e x2_name and x3_name come out to be empty - print("WARNING check inputs of resize node") - - new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", inputs = [x1_name, x2_name, x3_name], outputs = [y_name], coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], mode = resize_node.attrs["mode"], nearest_mode = resize_node.attrs["nearest_mode"]) - if len(resize_node.inputs) > 3: - new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", inputs = [x1_name, x2_name, x3_name, x4_name], outputs = [y_name], coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], mode = resize_node.attrs["mode"], nearest_mode = resize_node.attrs["nearest_mode"]) - - self.node = new_resize_node - - if len(resize_node.inputs) == 3: - x3_value = resize_node.inputs[2].values - x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.FLOAT) - intializer_list = [] - intializer_list.append(x3_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list diff --git a/src/qonnx/transformation/operators/shape_op.py b/src/qonnx/transformation/operators/shape_op.py deleted file mode 100644 index aadc1179..00000000 --- a/src/qonnx/transformation/operators/shape_op.py +++ /dev/null @@ -1,44 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class Shape: - - def __init__(self, node): - - shape_node = node - - x_name = shape_node.inputs[0].name - y_name = shape_node.outputs[0].name - - new_shape_node = onnx.helper.make_node(name = shape_node.name, op_type = "Shape", - inputs = [x_name], - outputs = [y_name]) - - self.node = new_shape_node - - def get_node(self): - return self.node diff --git a/src/qonnx/transformation/operators/slice_op.py b/src/qonnx/transformation/operators/slice_op.py deleted file mode 100644 index ae06e86b..00000000 --- a/src/qonnx/transformation/operators/slice_op.py +++ /dev/null @@ -1,77 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Slice: - - def __init__(self, node): - - slice_node = node - x1_name = slice_node.inputs[0].name - - x2_name = slice_node.inputs[1].name - x2_value = slice_node.inputs[1].values - x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) - - x3_name = slice_node.inputs[2].name - x3_value = slice_node.inputs[2].values - x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.INT64) - - x4_name = slice_node.inputs[3].name - x4_value = slice_node.inputs[3].values - x4_tensor = helper.create_initializer_tensor(x4_name,x4_value,onnx.TensorProto.INT64) - - # x5_name = slice_node.inputs[4].name - # x5_value = slice_node.inputs[4].values - # x5_tensor = helper.create_initializer_tensor(x5_name,x5_value,onnx.TensorProto.INT64) - - y_name = slice_node.outputs[0].name - - # new_squeeze_node = onnx.helper.make_node(name = slice_node.name, - # op_type = "Slice", - # inputs = [x1_name, x2_name, x3_name, x4_name, x5_name], - # outputs = [y_name]) - - new_squeeze_node = onnx.helper.make_node(name = slice_node.name, - op_type = "Slice", - inputs = [x1_name, x2_name, x3_name, x4_name], - outputs = [y_name]) - - self.node = new_squeeze_node - - intializer_list = [] - intializer_list.append(x2_tensor) - intializer_list.append(x3_tensor) - intializer_list.append(x4_tensor) - # intializer_list.append(x5_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/softmax_op.py b/src/qonnx/transformation/operators/softmax_op.py deleted file mode 100644 index 4e7f9786..00000000 --- a/src/qonnx/transformation/operators/softmax_op.py +++ /dev/null @@ -1,45 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class Softmax: - - def __init__(self, node): - - softmax_node = node - - x_name = softmax_node.inputs[0].name - y_name = softmax_node.outputs[0].name - - new_sftmx_node = onnx.helper.make_node(name = softmax_node.name, op_type = "Softmax", - inputs = [x_name], - outputs = [y_name], - axis = softmax_node.attrs["axis"]) - - self.node = new_sftmx_node - - def get_node(self): - return self.node \ No newline at end of file diff --git a/src/qonnx/transformation/operators/squeeze_op.py b/src/qonnx/transformation/operators/squeeze_op.py deleted file mode 100644 index bdfbae0d..00000000 --- a/src/qonnx/transformation/operators/squeeze_op.py +++ /dev/null @@ -1,57 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Squeeze: - - def __init__(self, node): - - squeeze_node = node - x1_name = squeeze_node.inputs[0].name - - x2_name = squeeze_node.inputs[1].name - x2_value = squeeze_node.inputs[1].values - x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) - - y_name = squeeze_node.outputs[0].name - - new_squeeze_node = onnx.helper.make_node(name = squeeze_node.name, - op_type = "Squeeze", - inputs = [x1_name, x2_name], - outputs = [y_name]) - - self.node = new_squeeze_node - - intializer_list = [] - intializer_list.append(x2_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/transformation/operators/transpose_op.py b/src/qonnx/transformation/operators/transpose_op.py deleted file mode 100644 index 4607a600..00000000 --- a/src/qonnx/transformation/operators/transpose_op.py +++ /dev/null @@ -1,45 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class Transpose: - - def __init__(self, node): - - transpose_node = node - - x_name = transpose_node.inputs[0].name - y_name = transpose_node.outputs[0].name - - new_transpose_node = onnx.helper.make_node(name = transpose_node.name, op_type = "Transpose", - inputs = [x_name], - outputs = [y_name], - perm = transpose_node.attrs["perm"]) - - self.node = new_transpose_node - - def get_node(self): - return self.node diff --git a/src/qonnx/transformation/operators/unsqueeze_op.py b/src/qonnx/transformation/operators/unsqueeze_op.py deleted file mode 100644 index b59d8d52..00000000 --- a/src/qonnx/transformation/operators/unsqueeze_op.py +++ /dev/null @@ -1,62 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Unsqueeze: - - def __init__(self, node): - - unsq_node = node - - x1_name = unsq_node.inputs[0].name - y_name = unsq_node.outputs[0].name - - if helper.is_constant_tensor(unsq_node.inputs[1]): - if unsq_node.inputs[1].dtype == "int64": - axes_tensor = helper.create_initializer_tensor(name=unsq_node.inputs[1].name, - tensor_array=unsq_node.inputs[1].values, - data_type=onnx.TensorProto.INT64) - else: - print("ERROR please check axes data type for Unsqueeze Node ", unsq_node.name) - - - new_unsq_node = onnx.helper.make_node(name = unsq_node.name, op_type = "Unsqueeze", - inputs = [x1_name, axes_tensor.name], - outputs = [y_name]) - - intializer_list = [] - if helper.is_constant_tensor(unsq_node.inputs[1]): - intializer_list.append(axes_tensor) - self.intializer_list = intializer_list - - self.node = new_unsq_node - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list From 14b828e1f48622999ca2946cfce8ffe047d75ed9 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Fri, 6 Oct 2023 14:53:54 +0530 Subject: [PATCH 14/20] [Code cleanup for QCDQToQOp] Create add_op.py --- src/qonnx/custom_op/qop/add_op.py | 519 ++++++++++++++++++++++++++++++ 1 file changed, 519 insertions(+) create mode 100644 src/qonnx/custom_op/qop/add_op.py diff --git a/src/qonnx/custom_op/qop/add_op.py b/src/qonnx/custom_op/qop/add_op.py new file mode 100644 index 00000000..cd7da025 --- /dev/null +++ b/src/qonnx/custom_op/qop/add_op.py @@ -0,0 +1,519 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class QLinearAdd: + + def __init__(self, node, aecg_zendnn_opt, remove_relu): + + add_node = node + + if len(add_node.inputs[1].inputs)==0: + # if Add node has only 1 input node and other input is constant tensor we cannot change it to QLinearAdd node hence keeping it as is + x_name = add_node.inputs[0].name + y_name = add_node.outputs[0].name + + const_val = add_node.inputs[1].values + + const_name = add_node.name + "_const_add_tensor" + y_scale_tensor = helper.create_initializer_tensor(name=const_name, + tensor_array=const_val, + data_type=onnx.TensorProto.FLOAT) + + new_add_node = onnx.helper.make_node(name = add_node.name, + op_type = "Add", + inputs = [x_name, const_name], + outputs = [y_name]) + self.node = new_add_node + + if helper.is_child_present(add_node, 0, 0) and add_node.o().op == "Relu": + relu_node = add_node.o() + relu_node1 = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", inputs = [add_node.outputs[0].name], outputs = [relu_node.outputs[0].name]) + self.relu_node = relu_node1 + + intializer_list = [] + intializer_list.append(y_scale_tensor) + self.intializer_list = intializer_list + + else: + input_node1 = add_node.inputs[0].inputs[0] + input_node2 = add_node.inputs[1].inputs[0] + output_node = add_node.o() + + is_relu_present = False + if output_node.op == "Relu": + is_relu_present = True + relu_node = output_node + # relu_node gets updated in later conditions thus keeping relu_node_name and relu_node_output_tensor to make it simple to keep their track + relu_node_name = relu_node.name + relu_node_output_tensor = relu_node.outputs[0].name + if relu_node.o().op == "QuantizeLinear": + output_node = relu_node.o() + else: + print("*********************** ERROR output of Relu node ", relu_node.name, " is not QuantizeLinear ***********************") + elif not(output_node.op == "QuantizeLinear"): + print("*********************** ERROR output of Add node ", add_node.name, " is not QuantizeLinear ***********************") + + + # in order to get scale and zp for the 2 inputs to Add node, we need 2 DQL nodes. + if not (input_node1.op == "DequantizeLinear" and input_node2.op == "DequantizeLinear"): + + """ + case observed in Resnet50v1 + Add1 + | + | + V + Relu-------------------- + | | + | | + V | + QL | + | | + | | + | | + DQL DQL DQL2 | + | | | | + | | | | + ----------------------------Conv | + | | + | | + V | + QL | + | | + | | + V | + DQL DQL DQL | + | | | | + | | | | + | | V | + ---------------------------Conv | + | | + | | + V | + QL | + | | + | | + V | + DQL DQL DQL | + | | | | + | | | | + | | V | + ----------------------------Conv | + | | + | | + V | + QL | + | | + | | + V | + DQL1 | + | | + | | + V | + Add<--------------------- + + + here Add doesn't have 1 of the DQL node, so we take DQL2 as the other DQL node. + + in case both inputs are missing DQL node, haven't encountered this case to this is flagged for now, if needed will be handled later depending on the case + """ + if not (input_node1.op == "DequantizeLinear") and not (input_node2.op == "DequantizeLinear"): + print("***************************** ERROR No input of Add node is DequantizeLinear ***********************************") + elif not (input_node1.op == "DequantizeLinear"): + # if input_node1 is not DQL + if input_node1.op == "Relu": + relu_node = input_node1 + if relu_node.i().op == "Add" and relu_node.o().op == "QuantizeLinear": + if (relu_node.o()).o().op == "DequantizeLinear": + input_node1 = (relu_node.o()).o() + # in the example case, shown input_node1 is now DQL2 + elif input_node1.op == "MaxPool": + # when resnet strides has been implemented there will be a maxpool node between the shown Relu and Add node. + maxpool_node = input_node1 + relu_node = maxpool_node.i() + if relu_node.i().op == "Add" and (relu_node.o().op == "QuantizeLinear" or relu_node.output[0].outputs[1].op == "QuantizeLinear"): + if (relu_node.o()).o().op == "DequantizeLinear": + input_node1 = (relu_node.o()).o() + # input_node1 is now DQL2 + elif (relu_node.outputs[0].outputs[1]).o().op == "DequantizeLinear": + input_node2 = (relu_node.outputs[0].outputs[1]).o() + # input_node2 is now DQL2 + elif input_node1.op == "Add": + + """ + this case is observed in mobilenetv2-12-qdq.onnx + + + Add2------------------------- + | | + | | + | V + | QL1 + | | + | | + | V + | DQL1 DQL DQL + | | | | + | | | | + | V | | + | Conv<-------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL + | | | | + | | | | + | V | | + | Conv<-------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL + | | | | + | | | | + | V | | + | Conv<------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL + | | + | | + Add1------------------------- + + Add2 = parent_add_node + QL1 = parent_add_node_ql_node + input_node1 = DQL1 + + """ + parent_add_node = input_node1 + parent_add_node_ql_node = parent_add_node.o() + input_node1 = parent_add_node_ql_node.o() + elif not (input_node2.op == "DequantizeLinear"): + # if input_node2 is not DQL + if input_node2.op == "Relu": + relu_node = input_node2 + if relu_node.i().op == "Add" and relu_node.o().op == "QuantizeLinear": + if (relu_node.o()).o().op == "DequantizeLinear": + input_node2 = (relu_node.o()).o() + # input_node2 is now the DQL node from which we need to take scale and zp + + elif input_node2.op == "MaxPool": + maxpool_node = input_node2 + if maxpool_node.i().op == "Relu": + relu_node = maxpool_node.i() + elif maxpool_node.i().op == "DequantizeLinear": + if maxpool_node.i().i().op == "QuantizeLinear": + if maxpool_node.i().i().i().op == "Relu": + relu_node = maxpool_node.i().i().i() + if relu_node.i().op == "Add" and (relu_node.o().op == "QuantizeLinear" or (len(relu_node.outputs[0].outputs)>1 and relu_node.output[0].outputs[1].op == "QuantizeLinear")): + if (relu_node.o()).o().op == "DequantizeLinear": + input_node2 = (relu_node.o()).o() + elif len(relu_node.outputs[0].outputs)>1 and (relu_node.outputs[0].outputs[1]).o().op == "DequantizeLinear": + input_node2 = (relu_node.outputs[0].outputs[1]).o() + # input_node2 is now the DQL node from which we need to take scale and zp + + if input_node1.op == "DequantizeLinear" and input_node2.op == "DequantizeLinear" and output_node.op == "QuantizeLinear": + # now we have input_node1 = input_node2 = DQL and output as QL node + if add_node.inputs[0].inputs[0].op == "MaxPool": + # this is strides case now if Maxpool is parent to Add node, maxpool = node1 + node1 = add_node.i() + elif add_node.inputs[0].inputs[0].op == "Add": + # this is for mobilenet case, so Add2 = node1 + node1 = add_node.i() + else: + """ + if above 2 cases not there lets assume following case now from Resnet50v1 model + + | DQL DQL | DQL DQL + | | | | | | + | | | | | | + Conv<--------------- Conv--------------------- + | | + | | + QL1 QL2 + | | + | | + DQL DQL + | | + | | + Add<----------------------------------------- + + now node1 is QL1/QL2 + + """ + node1 = add_node.inputs[0].inputs[0].i() + + if add_node.inputs[1].inputs[0].op == "MaxPool": + # same as above but for other input, node2 = maxpool node + node2 = add_node.inputs[1].inputs[0] + else: + # same as the above general case discussed, node2 = QL1/QL2 + node2 =input_node2.i() + + if node1.op == "Add": + # this is mobilenet case explained abaove, node1 will be converted to QLinearAdd node and it wiil act as input to current add node + # this a_name = QL1 output tensor name (please refer above mobilenet case) + a_name = node1.o().outputs[0].name + else: + # refering to general case taken above from resnet50v1 model, a_name = QL1/QL2's output tensor name + a_name = node1.outputs[0].name + + a_scale_name = add_node.name + "_A_SCALE" + a_scale_value = input_node1.inputs[1].values + a_scale_tensor = helper.create_initializer_tensor(name=a_scale_name, + tensor_array=a_scale_value, + data_type=onnx.TensorProto.FLOAT) + + a_zp_name = add_node.name + "_A_ZP" + a_zp_value = input_node1.inputs[2].values + + if aecg_zendnn_opt: + a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, + tensor_array=a_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if node1.i().op == "QuantizeLinear" and node1.i().i() == "Relu": + a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, + tensor_array=a_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if input_node1.inputs[2].dtype == np.int8: + a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, + tensor_array=a_zp_value, + data_type=onnx.TensorProto.INT8) + elif input_node1.inputs[2].dtype == np.uint8: + a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, + tensor_array=a_zp_value, + data_type=onnx.TensorProto.UINT8) + + # TODO: Only 1 condition is handled here that Add Node's 1st parent is DQL<--QL and 2nd parent can be Relu. Vice Versa and other cases are not encountered yet thus not handled. + if helper.is_parent_exist(node2, 0, 0): + if remove_relu: + # b_name = the QL's output tensor + b_name = node2.outputs[0].name + else: + # check Relu and input of Add node is s8, any 1 input can be checked, thus we check for node1 + if node2.i().op == "Relu" and node1.inputs[2].values.dtype == np.int8: + """ + this case is observed in renset50v1.5 + + DQL DQL + | | + | | + V | + Add<----------------- + | + | + V + Relu1 + | + | + V + QL1 + | + | + V + ------------------------------------DQL1 DQL DQL + | | | | + | | | | + | V | | + | Conv4<-------------------- + | | + | | + | V + | Relu + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL + | | | | + | | | | + | V | | + | Conv3<-------------------- + | | + | | + | V + | Relu + | | + | | + | V + | QL + | | + | | + | V + | DQL DQL DQL + | | | | + | | | | + | V | | + | Conv2<-------------------- + | | + | | + | V + | QL + | | + | | + | V + | DQL + | | + | | + | V + ---------------------------------->Add1 + + + in this case node2 is QL1 + node2_relu_node = Relu1 + thus b_name = Relu1's output as abotve top Add node is converted as follows- + + QLinearAdd + | + | + V + Relu1 + + thus relu1 output is set to b_name + + + """ + node2_relu_node = node2.i() + if node2_relu_node.i().op == "Conv" or node2_relu_node.i().op == "Add": + b_name = node2_relu_node.outputs[0].name + else: + b_name = node2.outputs[0].name + else: + b_name = node2.outputs[0].name + else: + print("************* ERROR ****************** Please check parent of Add Node's parent, ", node2.name) + + b_scale_name = add_node.name + "_B_SCALE" + b_scale_value = input_node2.inputs[1].values + b_scale_tensor = helper.create_initializer_tensor(name=b_scale_name, + tensor_array=b_scale_value, + data_type=onnx.TensorProto.FLOAT) + + b_zp_name = add_node.name + "_B_ZP" + b_zp_value = input_node2.inputs[2].values + + if aecg_zendnn_opt: + b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, + tensor_array=b_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if node2.i().op == "QuantizeLinear" and node2.i().i().op == "Relu": + b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, + tensor_array=b_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if input_node2.inputs[2].dtype == np.int8: + b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, + tensor_array=b_zp_value, + data_type=onnx.TensorProto.INT8) + elif input_node2.inputs[2].dtype == np.uint8: + b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, + tensor_array=b_zp_value, + data_type=onnx.TensorProto.UINT8) + + y_scale_name = add_node.name + "_Y_SCALE" + y_scale_value = output_node.inputs[1].values + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + + y_zp_name = add_node.name + "_Y_ZP" + y_zp_value = output_node.inputs[2].values + + if aecg_zendnn_opt: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + y_name = output_node.outputs[0].name + else: + if output_node.inputs[2].dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif output_node.inputs[2].dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + if is_relu_present and not remove_relu and node1.inputs[2].values.dtype == np.int8: + y_name = add_node.outputs[0].name + else: + y_name = output_node.outputs[0].name + + kwargs = {} + kwargs["domain"] = 'com.microsoft' + + + new_add_node = onnx.helper.make_node(name = add_node.name, + op_type = "QLinearAdd", + inputs = [a_name, a_scale_name, a_zp_name, b_name, b_scale_name, b_zp_name, y_scale_name, y_zp_name], + outputs = [y_name], + **kwargs) + + self.node = new_add_node + + if is_relu_present: + relu_node = onnx.helper.make_node(name = relu_node_name, op_type = "Relu", inputs = [add_node.outputs[0].name], outputs = [relu_node_output_tensor]) + self.relu_node = relu_node + + intializer_list = [] + intializer_list.append(a_scale_tensor) + intializer_list.append(a_zp_tensor) + intializer_list.append(b_scale_tensor) + intializer_list.append(b_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + + def get_relu_node(self): + return self.relu_node From 73f172268e472d58d0908d31256da5ed54bde4ee Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee <130146833+amd-adchatte@users.noreply.github.com> Date: Fri, 6 Oct 2023 14:56:08 +0530 Subject: [PATCH 15/20] [Code cleanup for QCDQToQOp] Added QOp operators --- src/qonnx/custom_op/qop/averagepool_op.py | 48 ++ src/qonnx/custom_op/qop/cast_op.py | 44 ++ src/qonnx/custom_op/qop/clip_op.py | 61 +++ src/qonnx/custom_op/qop/concat_op.py | 129 +++++ .../custom_op/qop/dequantizelinear_op.py | 110 ++++ src/qonnx/custom_op/qop/flatten_op.py | 53 ++ src/qonnx/custom_op/qop/gather_op.py | 112 +++++ src/qonnx/custom_op/qop/gemm_op.py | 51 ++ src/qonnx/custom_op/qop/gemm_op_optimized.py | 98 ++++ .../custom_op/qop/globalAveragePool_op.py | 143 ++++++ src/qonnx/custom_op/qop/greater_op.py | 57 +++ src/qonnx/custom_op/qop/helper.py | 65 +++ src/qonnx/custom_op/qop/identity_op.py | 56 +++ src/qonnx/custom_op/qop/less_op.py | 57 +++ src/qonnx/custom_op/qop/lrn_op.py | 48 ++ src/qonnx/custom_op/qop/matmul_op.py | 157 ++++++ src/qonnx/custom_op/qop/matmul_retained_op.py | 154 ++++++ src/qonnx/custom_op/qop/maxpool_op.py | 125 +++++ src/qonnx/custom_op/qop/qlinearconv_op.py | 476 ++++++++++++++++++ src/qonnx/custom_op/qop/quantizelinear_op.py | 78 +++ src/qonnx/custom_op/qop/relu_op.py | 44 ++ src/qonnx/custom_op/qop/reshape_op.py | 65 +++ src/qonnx/custom_op/qop/resize_op.py | 66 +++ src/qonnx/custom_op/qop/shape_op.py | 44 ++ src/qonnx/custom_op/qop/slice_op.py | 77 +++ src/qonnx/custom_op/qop/softmax_op.py | 45 ++ src/qonnx/custom_op/qop/squeeze_op.py | 57 +++ src/qonnx/custom_op/qop/transpose_op.py | 45 ++ src/qonnx/custom_op/qop/unsqueeze_op.py | 62 +++ 29 files changed, 2627 insertions(+) create mode 100644 src/qonnx/custom_op/qop/averagepool_op.py create mode 100644 src/qonnx/custom_op/qop/cast_op.py create mode 100644 src/qonnx/custom_op/qop/clip_op.py create mode 100644 src/qonnx/custom_op/qop/concat_op.py create mode 100644 src/qonnx/custom_op/qop/dequantizelinear_op.py create mode 100644 src/qonnx/custom_op/qop/flatten_op.py create mode 100644 src/qonnx/custom_op/qop/gather_op.py create mode 100644 src/qonnx/custom_op/qop/gemm_op.py create mode 100644 src/qonnx/custom_op/qop/gemm_op_optimized.py create mode 100644 src/qonnx/custom_op/qop/globalAveragePool_op.py create mode 100644 src/qonnx/custom_op/qop/greater_op.py create mode 100644 src/qonnx/custom_op/qop/helper.py create mode 100644 src/qonnx/custom_op/qop/identity_op.py create mode 100644 src/qonnx/custom_op/qop/less_op.py create mode 100644 src/qonnx/custom_op/qop/lrn_op.py create mode 100644 src/qonnx/custom_op/qop/matmul_op.py create mode 100644 src/qonnx/custom_op/qop/matmul_retained_op.py create mode 100644 src/qonnx/custom_op/qop/maxpool_op.py create mode 100644 src/qonnx/custom_op/qop/qlinearconv_op.py create mode 100644 src/qonnx/custom_op/qop/quantizelinear_op.py create mode 100644 src/qonnx/custom_op/qop/relu_op.py create mode 100644 src/qonnx/custom_op/qop/reshape_op.py create mode 100644 src/qonnx/custom_op/qop/resize_op.py create mode 100644 src/qonnx/custom_op/qop/shape_op.py create mode 100644 src/qonnx/custom_op/qop/slice_op.py create mode 100644 src/qonnx/custom_op/qop/softmax_op.py create mode 100644 src/qonnx/custom_op/qop/squeeze_op.py create mode 100644 src/qonnx/custom_op/qop/transpose_op.py create mode 100644 src/qonnx/custom_op/qop/unsqueeze_op.py diff --git a/src/qonnx/custom_op/qop/averagepool_op.py b/src/qonnx/custom_op/qop/averagepool_op.py new file mode 100644 index 00000000..db385be5 --- /dev/null +++ b/src/qonnx/custom_op/qop/averagepool_op.py @@ -0,0 +1,48 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class AveragePool: + + def __init__(self, node): + + average_pool_node = node + a_name = average_pool_node.inputs[0].name + + y_name = average_pool_node.outputs[0].name + + new_average_pool_node = onnx.helper.make_node(name = average_pool_node.name, op_type = "AveragePool", + inputs = [a_name], + outputs = [y_name], + ceil_mode = average_pool_node.attrs["ceil_mode"], + kernel_shape = average_pool_node.attrs["kernel_shape"], + pads = average_pool_node.attrs["pads"], + strides = average_pool_node.attrs["strides"]) + + self.node = new_average_pool_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/cast_op.py b/src/qonnx/custom_op/qop/cast_op.py new file mode 100644 index 00000000..578329d9 --- /dev/null +++ b/src/qonnx/custom_op/qop/cast_op.py @@ -0,0 +1,44 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Cast: + + def __init__(self, node): + + cast_node = node + + x_name = cast_node.inputs[0].name + y_name = cast_node.outputs[0].name + + new_cast_node = onnx.helper.make_node(name = cast_node.name, op_type = "Cast", + inputs = [x_name], + outputs = [y_name], + to = cast_node.attrs["to"]) + self.node = new_cast_node + + def get_node(self): + return self.node diff --git a/src/qonnx/custom_op/qop/clip_op.py b/src/qonnx/custom_op/qop/clip_op.py new file mode 100644 index 00000000..f672bfde --- /dev/null +++ b/src/qonnx/custom_op/qop/clip_op.py @@ -0,0 +1,61 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Clip: + + def __init__(self, node): + + clip_node = node + + x_name = clip_node.inputs[0].name + + x2_name = clip_node.inputs[1].name + x2_value = clip_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT8) + + x3_name = clip_node.inputs[2].name + x3_value = clip_node.inputs[2].values + x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.INT8) + + new_clip_node = onnx.helper.make_node(name = clip_node.name, op_type = "Clip", + inputs= [x_name, x2_name, x3_name], + outputs = [clip_node.outputs[0].name]) + + self.node = new_clip_node + + intializer_list = [] + intializer_list.append(x2_tensor) + intializer_list.append(x3_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + diff --git a/src/qonnx/custom_op/qop/concat_op.py b/src/qonnx/custom_op/qop/concat_op.py new file mode 100644 index 00000000..4f5e5f6e --- /dev/null +++ b/src/qonnx/custom_op/qop/concat_op.py @@ -0,0 +1,129 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Concat: + + def __init__(self, node, is_all_concat_input_dql): + + concat_node = node + + number_of_inputs = len(concat_node.inputs) + + zp_value_list = [] + zp_name_list = [] + scale_values_list = [] + scale_name_list = [] + input_tensor_names = [] + + intializer_list = [] + input_names = [] + + for i in range(number_of_inputs): + if is_all_concat_input_dql: + parent_dql_node = concat_node.inputs[i].inputs[0] + scale_values_list.append(parent_dql_node.inputs[1].values) + scale_name_list.append(parent_dql_node.inputs[1].name) + zp_value_list.append(parent_dql_node.inputs[2].values) + zp_name_list.append(parent_dql_node.inputs[2].name) + input_tensor_names.append(parent_dql_node.inputs[0].name) + else: + input_tensor_names.append(concat_node.inputs[i].name) + if len(concat_node.inputs[i].inputs) == 0: + c_input = helper.create_initializer_tensor(name=concat_node.inputs[i].name, + tensor_array=concat_node.inputs[i].values, + data_type=onnx.TensorProto.INT64) + intializer_list.append(c_input) + self.intializer_list = intializer_list + + if is_all_concat_input_dql: + for i in range(number_of_inputs): + scale_tesnor = helper.create_initializer_tensor(name=scale_name_list[i], + tensor_array=scale_values_list[i], + data_type=onnx.TensorProto.FLOAT) + zp_tensor = helper.create_initializer_tensor(name=zp_name_list[i], + tensor_array=zp_value_list[i], + data_type=onnx.TensorProto.UINT8) + intializer_list.append(scale_tesnor) + intializer_list.append(zp_tensor) + + if helper.is_child_present(concat_node, 0, 0) and concat_node.o().op == "DequantizeLinear" and is_all_concat_input_dql: + y_ql_node = concat_node.o() + y_name = y_ql_node.outputs[0].name + else: + y_name = concat_node.outputs[0].name + + if helper.is_child_present(concat_node, 0, 0) and concat_node.o().op == "DequantizeLinear" and is_all_concat_input_dql: + y_scale_name = y_ql_node.inputs[1].name + y_scale_value = y_ql_node.inputs[1].values + y_zp_name = y_ql_node.inputs[2].name + y_zp_value = y_ql_node.inputs[2].values + + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + input_names.append(y_scale_tensor.name) + input_names.append(y_zp_tensor.name) + + for i in range(number_of_inputs): + input_names.append(input_tensor_names[i]) + if len(scale_name_list)>0 and len(zp_name_list)>0: + input_names.append(scale_name_list[i]) + input_names.append(zp_name_list[i]) + + kwargs = {} + kwargs["domain"] = 'com.microsoft' + + if is_all_concat_input_dql: + new_concat_node = onnx.helper.make_node(name = concat_node.name, + op_type = "QLinearConcat", + inputs = input_names, + outputs = [y_name], + axis = concat_node.attrs["axis"], + **kwargs) + else: + new_concat_node = onnx.helper.make_node(name = concat_node.name, + op_type = "Concat", + inputs = input_names, + outputs = [y_name], + axis = concat_node.attrs["axis"]) + + self.node = new_concat_node + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/dequantizelinear_op.py b/src/qonnx/custom_op/qop/dequantizelinear_op.py new file mode 100644 index 00000000..309d6564 --- /dev/null +++ b/src/qonnx/custom_op/qop/dequantizelinear_op.py @@ -0,0 +1,110 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class DequantizeLinear: + + def __init__(self, node, aecg_zendnn_opt, remove_relu): + + dql_node = node + + x_name = dql_node.inputs[0].name + + if helper.is_parent_exist(dql_node, 0, 0): + if dql_node.i().op == "QuantizeLinear": + ql_node = dql_node.i() + if helper.is_parent_exist(ql_node,0, 0): + if ql_node.i().op == "Relu": + relu_node = ql_node.i() + if remove_relu: + x_name = ql_node.outputs[0].name + else: + x_name = relu_node.outputs[0].name + else: + print("*************** WARNING *********************** Please check parent of QL node", ql_node.name, " ignore if pattern is correct") + else: + print("*************** WARNING *********************** Please check parent of DQL node", dql_node.name, " ignore if pattern is correct") + self.initializers = [] + + if len(dql_node.inputs[0].inputs) == 0: + if dql_node.inputs[0].dtype == np.uint8: + input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, + tensor_array=dql_node.inputs[0].values, + data_type=onnx.TensorProto.UINT8) + elif dql_node.inputs[0].dtype == np.int8: + input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, + tensor_array=dql_node.inputs[0].values, + data_type=onnx.TensorProto.INT8) + elif dql_node.inputs[0].dtype == np.int32: + input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, + tensor_array=dql_node.inputs[0].values, + data_type=onnx.TensorProto.INT32) + self.initializers.append(input_tensor) + + x_scale_name = dql_node.inputs[1].name + x_scale_value = dql_node.inputs[1].values + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name,tensor_array=x_scale_value,data_type=onnx.TensorProto.FLOAT) + + x_zp_name = dql_node.inputs[2].name + x_zp_value = dql_node.inputs[2].values + + if aecg_zendnn_opt: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if dql_node.inputs[2].dtype == np.uint8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + if dql_node.inputs[2].dtype == np.int32: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT32) + elif dql_node.inputs[2].dtype == np.int8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + + y_name = dql_node.outputs[0].name + + dequantizelinear_node = onnx.helper.make_node(name = dql_node.name, + op_type = "DequantizeLinear", + inputs = [x_name, x_scale_name, x_zp_name], + outputs = [y_name]) + + self.node = dequantizelinear_node + + self.initializers.append(x_scale_tensor) + self.initializers.append(x_zp_tensor) + + def get_node(self): + return self.node + + def get_intializers(self): + return self.initializers diff --git a/src/qonnx/custom_op/qop/flatten_op.py b/src/qonnx/custom_op/qop/flatten_op.py new file mode 100644 index 00000000..62831558 --- /dev/null +++ b/src/qonnx/custom_op/qop/flatten_op.py @@ -0,0 +1,53 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Flatten: + + def __init__(self, node): + + flatten_node = node + x_name = flatten_node.inputs[0].name + y_name = flatten_node.outputs[0].name + + if flatten_node.i().op == "DequantizeLinear": + node1 = flatten_node.i() + x_name = node1.inputs[0].name + + if flatten_node.o().op == "QuantizeLinear": + node2 = flatten_node.o() + y_name = node2.outputs[0].name + + + new_flatten_node = onnx.helper.make_node(name = flatten_node.name, op_type = "Flatten", + inputs = [x_name], + outputs = [y_name]) + + + self.node = new_flatten_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/gather_op.py b/src/qonnx/custom_op/qop/gather_op.py new file mode 100644 index 00000000..5fd01faa --- /dev/null +++ b/src/qonnx/custom_op/qop/gather_op.py @@ -0,0 +1,112 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class Gather: + + def __init__(self, node): + + gather_node = node + # -------------------------------- + # For QCDQ / QDQ model, this case: + # QuantizeLinear + # | (0) + # Gather ---------- (1) Input + # | + # -------------------------------- + gather_parent_node = node + quantized_data_tensor = node + if helper.is_parent_exist(gather_node, 0, 0): + gather_parent_node = node.i(0) + if len(gather_parent_node.inputs) > 1 and helper.is_constant_tensor(gather_parent_node.inputs[1]): + quantized_data_tensor = gather_parent_node.inputs[1].values + + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + if gather_parent_node.op == "QuantizeLinear": + X_DQL_node = gather_parent_node + dequantized_data_tensor = X_DQL_node.inputs[0] + data_scale_tensor = X_DQL_node.inputs[1] + data_zero_point_tensor = X_DQL_node.inputs[2] + + data_scale_tensor = data_scale_tensor.values * np.ones(dequantized_data_tensor.shape) + a = dequantized_data_tensor.values / data_scale_tensor + b = data_zero_point_tensor.values * np.ones(dequantized_data_tensor.shape) + quantized_data_tensor = a + b + quantized_data_tensor = quantized_data_tensor.astype(np.int8) + + else: + if gather_parent_node.op == "QuantizeLinear": + X_QL_node = gather_parent_node + quantized_data_tensor = X_QL_node.inputs[1].values + + data_tensor = helper.create_initializer_tensor(name=gather_node.inputs[0].name, + tensor_array=quantized_data_tensor, + data_type=onnx.TensorProto.INT8) + + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + data_tensor = helper.create_initializer_tensor(name=gather_node.inputs[0].name, + tensor_array=quantized_data_tensor, + data_type=onnx.TensorProto.INT8) + if helper.is_constant_tensor(gather_node.inputs[1]): + if gather_node.inputs[1].dtype == "int64": + indices_tensor = helper.create_initializer_tensor(name=gather_node.inputs[1].name, + tensor_array=gather_node.inputs[1].values, + data_type=onnx.TensorProto.INT64) + else: + print("ERROR check data type in Gather node ", gather_node.name) + + new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", + inputs= [data_tensor.name, gather_node.inputs[1].name], + outputs = [gather_node.outputs[0].name], + axis = 0) + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", + inputs= [data_tensor.name, gather_node.inputs[1].name], + outputs = [gather_node.outputs[0].name], + axis = 0) + elif helper.is_constant_tensor(gather_node.inputs[1]): + new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", + inputs= [gather_node.inputs[0].name,indices_tensor.name], + outputs = [gather_node.outputs[0].name], + axis = gather_node.attrs['axis']) + + self.node = new_gather_node + + intializer_list = [] + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + intializer_list.append(data_tensor) + elif helper.is_constant_tensor(gather_node.inputs[1]): + intializer_list.append(indices_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + diff --git a/src/qonnx/custom_op/qop/gemm_op.py b/src/qonnx/custom_op/qop/gemm_op.py new file mode 100644 index 00000000..30a9a904 --- /dev/null +++ b/src/qonnx/custom_op/qop/gemm_op.py @@ -0,0 +1,51 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class Gemm: + + def __init__(self, node): + + gemm_node = node + + x1 = gemm_node.inputs[0] + x2 = gemm_node.inputs[1] + x3 = gemm_node.inputs[2] + y = gemm_node.outputs[0] + + new_gemm_node = onnx.helper.make_node(name = gemm_node.name, op_type = "Gemm", + inputs= [x1.name, x2.name, x3.name], + outputs = [y.name], + alpha = gemm_node.attrs["alpha"], + beta = gemm_node.attrs["beta"], + transB = gemm_node.attrs["transB"]) + + self.node = new_gemm_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/gemm_op_optimized.py b/src/qonnx/custom_op/qop/gemm_op_optimized.py new file mode 100644 index 00000000..aff0526b --- /dev/null +++ b/src/qonnx/custom_op/qop/gemm_op_optimized.py @@ -0,0 +1,98 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class Gemm_optimized: + + def __init__(self, node): + + gemm_node = node + + x1 = gemm_node.inputs[0] + x2 = gemm_node.inputs[1] + x3 = gemm_node.inputs[2] + y = gemm_node.outputs[0] + + bias_node = gemm_node.i(2); + bias_tensor = bias_node.inputs[0] + bias_scale_tensor = bias_node.inputs[1] + bias_zero_point = bias_node.inputs[2] + bias_scale_tensor = bias_scale_tensor.values * np.ones(bias_tensor.shape) + a = bias_tensor.values * bias_scale_tensor + b = bias_zero_point.values * np.ones(bias_tensor.shape) + fp32_bias_tensor = a + b + fp32_bias_tensor = fp32_bias_tensor.astype(np.float32) + + weight_node = gemm_node.i(1).i() + if gemm_node.i(1).i().op == "Clip": + weight_node = gemm_node.i(1).i().i() + weight_tensor = weight_node.inputs[0] + weight_scale_tensor = weight_node.inputs[1] + weight_zero_point = weight_node.inputs[2] + weight_scale_tensor = weight_scale_tensor.values * np.ones(weight_tensor.shape) + a = weight_tensor.values * weight_scale_tensor + b = weight_zero_point.values * np.ones(weight_tensor.shape) + int8_weight = a + b + int8_weight = np.clip(int8_weight, -127, 127) + dq_weight_scale_tensor = gemm_node.i(1).inputs[1] + dq_weight_zero_point = gemm_node.i(1).inputs[2] + fp32_weight = (int8_weight / (dq_weight_scale_tensor.values * np.ones(int8_weight.shape)) + dq_weight_zero_point.values * np.ones(int8_weight.shape)) + + bias_name = x1.name + ".1" + weight_name = x1.name + ".2" + bias_tensor_1 = helper.create_initializer_tensor(name=bias_name, + tensor_array=fp32_bias_tensor, + data_type=onnx.TensorProto.FLOAT) + weight_tensor_1 = helper.create_initializer_tensor(name=weight_name, + tensor_array=fp32_weight, + data_type=onnx.TensorProto.FLOAT) + + new_gemm_node = onnx.helper.make_node(name = gemm_node.name, op_type = "Gemm", + inputs= [x1.name, weight_name, bias_name], + outputs = [y.name], + alpha = gemm_node.attrs["alpha"], + beta = gemm_node.attrs["beta"], + transB = gemm_node.attrs["transB"]) + + + node.i(1).i(0).inputs.clear() + node.i(1).i(0).outputs.clear() + node.i(1).inputs.clear() + node.i(1).outputs.clear() + + self.node = new_gemm_node + intializer_list = [] + intializer_list.append(weight_tensor_1) + intializer_list.append(bias_tensor_1) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list diff --git a/src/qonnx/custom_op/qop/globalAveragePool_op.py b/src/qonnx/custom_op/qop/globalAveragePool_op.py new file mode 100644 index 00000000..17f8cec6 --- /dev/null +++ b/src/qonnx/custom_op/qop/globalAveragePool_op.py @@ -0,0 +1,143 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class GlobalAveragePool: + + def __init__(self, node, aecg_zendnn_opt, remove_relu): + + golbal_average_pool_node = node + x_name = golbal_average_pool_node.inputs[0].name + y_name = golbal_average_pool_node.outputs[0].name + + if helper.is_parent_exist(golbal_average_pool_node, 0, 0) and golbal_average_pool_node.i().op == "DequantizeLinear": + if helper.is_parent_exist(golbal_average_pool_node, 0, 0): + parent_dql_node = golbal_average_pool_node.i() + else: + print("************* ERROR ****************** Please check 1st parent of GlobalAveragePool, ", golbal_average_pool_node.name, " parent DNE") + + x_scale_name = node.name + "x_scale" + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, + tensor_array=parent_dql_node.inputs[1].values, + data_type=onnx.TensorProto.FLOAT) + x_zp_name = node.name + "x_zp" + + is_input_s8 = True + + if helper.is_parent_exist(parent_dql_node, 0, 0): + if aecg_zendnn_opt: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=parent_dql_node.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + else: + second_parent = parent_dql_node.i() + if second_parent.op == "Relu": + if helper.is_parent_exist(second_parent, 0, 0) and second_parent.i().op == "QuantizeLinear": + third_parent = second_parent.i() + if third_parent.inputs[2].values.dtype == np.int8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=third_parent.inputs[2].values, + data_type=onnx.TensorProto.INT8) + is_input_s8 = True + else: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=third_parent.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + is_input_s8 = False + else: + if parent_dql_node.i().inputs[2].values.dtype == np.int8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=parent_dql_node.inputs[2].values, + data_type=onnx.TensorProto.INT8) + is_input_s8 = True + else: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=parent_dql_node.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + is_input_s8 = False + else: + print("************* ERROR ****************** Please check 2nd parent of GlobalAveragePool, ", golbal_average_pool_node.name, " 1st parent of ", parent_dql_node, " parent DNE") + + if parent_dql_node.i().i().op == "Relu" and parent_dql_node.i().i().i().i().inputs[2].values.dtype == np.int8: + if remove_relu: + x_name = parent_dql_node.inputs[0].name + else: + third_parent_relu = parent_dql_node.i().i() + if third_parent_relu.i().op == "Conv" or third_parent_relu.i().op == "Add": + x_name = third_parent_relu.outputs[0].name + else: + x_name = (third_parent_relu.o()).outputs[0].name + else: + x_name = parent_dql_node.inputs[0].name + + if helper.is_child_present(node, 0, 0) and golbal_average_pool_node.o().op == "QuantizeLinear": + child_ql_node = golbal_average_pool_node.o() + + y_scale_name = node.name + "y_scale" + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=child_ql_node.inputs[1].values, + data_type=onnx.TensorProto.FLOAT) + y_zp_name = node.name + "y_zp" + + if aecg_zendnn_opt: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=child_ql_node.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + else: + if is_input_s8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=child_ql_node.inputs[2].values, + data_type=onnx.TensorProto.INT8) + else: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=child_ql_node.inputs[2].values, + data_type=onnx.TensorProto.UINT8) + + y_name = child_ql_node.outputs[0].name + + kwargs = {} + kwargs["domain"] = 'com.microsoft' + new_average_pool_node = onnx.helper.make_node(name = golbal_average_pool_node.name, op_type = "QLinearGlobalAveragePool", + inputs = [x_name, x_scale_name, x_zp_name, y_scale_name, y_zp_name], + outputs = [y_name], + channels_last = 0,**kwargs) + + intializer_list = [] + intializer_list.append(x_scale_tensor) + intializer_list.append(x_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + self.node = new_average_pool_node + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/greater_op.py b/src/qonnx/custom_op/qop/greater_op.py new file mode 100644 index 00000000..fc54c6e5 --- /dev/null +++ b/src/qonnx/custom_op/qop/greater_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Greater: + + def __init__(self, node): + + greater_node = node + x1_name = greater_node.inputs[0].name + + x2_name = greater_node.inputs[1].name + x2_value = greater_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.FLOAT) + + y_name = greater_node.outputs[0].name + + new_greater_node = onnx.helper.make_node(name = greater_node.name, + op_type = "Greater", + inputs = [x1_name, x2_name], + outputs = [y_name]) + + self.node = new_greater_node + + intializer_list = [] + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/helper.py b/src/qonnx/custom_op/qop/helper.py new file mode 100644 index 00000000..c070a6a4 --- /dev/null +++ b/src/qonnx/custom_op/qop/helper.py @@ -0,0 +1,65 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +import numpy as np + +class helper : + + def __init__(self) -> None: + pass + + def create_initializer_tensor(name: str, tensor_array: np.ndarray, data_type: onnx.TensorProto = onnx.TensorProto.FLOAT) -> onnx.TensorProto: + initializer_tensor = onnx.helper.make_tensor(name=name, + data_type=data_type, + dims=tensor_array.shape, + vals=tensor_array.flatten().tolist()) + return initializer_tensor + + # to check node.i() exists pass tesor_idx=0, node_idx=0 + # to check node.inputs[1].inputs[0] exists pass tesor_idx=1, node_idx=0 + def is_parent_exist(node, tesor_idx, node_idx): + if len(node.inputs)>tesor_idx and len(node.inputs[tesor_idx].inputs)>node_idx: + return True + return False + + def is_child_present(node,tesor_idx, node_idx): + if len(node.outputs)>tesor_idx and len(node.outputs[tesor_idx].outputs)>node_idx: + return True + return False + + def is_attr_exist(node, attr_name): + try: + node.attrs[attr_name] + return True + except: + return False + + def is_constant_tensor(tensor): + try: + tensor.values + return True + except: + return False \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/identity_op.py b/src/qonnx/custom_op/qop/identity_op.py new file mode 100644 index 00000000..e9019659 --- /dev/null +++ b/src/qonnx/custom_op/qop/identity_op.py @@ -0,0 +1,56 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Identity: + + def __init__(self, node): + + identity_node = node + + x1_name = identity_node.inputs[0].name + x1_value = identity_node.inputs[0].values + x1_tensor = helper.create_initializer_tensor(x1_name,x1_value,onnx.TensorProto.FLOAT) + + y_name = identity_node.outputs[0].name + + new_identity_node = onnx.helper.make_node(name = identity_node.name, + op_type = "Identity", + inputs = [x1_name], + outputs = [y_name]) + + self.node = new_identity_node + + intializer_list = [] + intializer_list.append(x1_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/less_op.py b/src/qonnx/custom_op/qop/less_op.py new file mode 100644 index 00000000..9d54216f --- /dev/null +++ b/src/qonnx/custom_op/qop/less_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Less: + + def __init__(self, node): + + less_node = node + x1_name = less_node.inputs[0].name + + x2_name = less_node.inputs[1].name + x2_value = less_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.FLOAT) + + y_name = less_node.outputs[0].name + + new_less_node = onnx.helper.make_node(name = less_node.name, + op_type = "Less", + inputs = [x1_name, x2_name], + outputs = [y_name]) + + self.node = new_less_node + + intializer_list = [] + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/lrn_op.py b/src/qonnx/custom_op/qop/lrn_op.py new file mode 100644 index 00000000..f8dcbf22 --- /dev/null +++ b/src/qonnx/custom_op/qop/lrn_op.py @@ -0,0 +1,48 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class LRN: + + def __init__(self, node): + + lrn_node = node + + x_name = lrn_node.inputs[0].name + y_name = lrn_node.outputs[0].name + + new_lrn_node = onnx.helper.make_node(name = lrn_node.name, op_type = "LRN", + inputs = [x_name], + outputs = [y_name], + alpha = lrn_node.attrs["alpha"], + beta = lrn_node.attrs["beta"], + bias = lrn_node.attrs["bias"], + size = lrn_node.attrs["size"]) + + self.node = new_lrn_node + + def get_node(self): + return self.node diff --git a/src/qonnx/custom_op/qop/matmul_op.py b/src/qonnx/custom_op/qop/matmul_op.py new file mode 100644 index 00000000..1cb1842d --- /dev/null +++ b/src/qonnx/custom_op/qop/matmul_op.py @@ -0,0 +1,157 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class MatMul: + + def __init__(self, node): + matlmul_node = node + + if helper.is_parent_exist(matlmul_node, 0, 0): + x_DQL_node = matlmul_node.i() + else: + print("**************** ERROR ******************* Matmul node ", matlmul_node.name, " input(0,0) DNE") + + if helper.is_parent_exist(matlmul_node, 1, 0): + w_DQL_node = matlmul_node.inputs[1].inputs[0] + else: + print("************* ERROR ************************ Please check the Matmul node ", matlmul_node.name, " the input(1,0) DNE") + + if helper.is_parent_exist(x_DQL_node, 0, 0): + x_QL_node = x_DQL_node.i() + else: + print("**************** ERROR ******************* ", x_DQL_node.name, " input(0,0) DNE Please check") + + x_scale_tensor = x_DQL_node.inputs[1] + x_scale = x_scale_tensor.values + x_zp_tensor = x_DQL_node.inputs[2] + + w_scale_tensor = w_DQL_node.inputs[1] + w_scale = w_scale_tensor.values + w_zp_tensor = w_DQL_node.inputs[2] + + if helper.is_child_present(matlmul_node, 0, 0): + if (matlmul_node.o().op == "QuantizeLinear"): + y_QL_node = matlmul_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Matmul node ", matlmul_node.name, " is not QuantizeLinear ***********************") + else: + print(matlmul_node.name, " output(0,0) DNE") + + quantized_weight_tensor_original = w_DQL_node.inputs[0].values + new_shape = quantized_weight_tensor_original.shape + (1,1) + a1 = np.reshape(quantized_weight_tensor_original, new_shape) + quantized_weight_tensor = np.transpose(a1, (1,0,2,3)) + + if x_QL_node.i().op == "DequantizeLinear" and x_QL_node.i().i().op == "QuantizeLinear": + x_name = x_QL_node.i().i().outputs[0].name + else: + x_name = x_QL_node.outputs[0].name + + y_name = matlmul_node.o().outputs[0].name + + x_scale_name = matlmul_node.name + "_X_SCALE" + x_scale_value = x_scale + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, + tensor_array=x_scale_value, + data_type=onnx.TensorProto.FLOAT) + + x_zp_name = matlmul_node.name + "_X_ZERO_POINT" + x_zp_value = x_zp_tensor.values + + if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + + w_name = matlmul_node.inputs[1].name + w_value = quantized_weight_tensor + w_tensor = helper.create_initializer_tensor(name=w_name, + tensor_array=w_value, + data_type=onnx.TensorProto.INT8) + + w_scale_name = matlmul_node.name + "_W_SCALE" + w_scale_value = w_scale + w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, + tensor_array=w_scale_value, + data_type=onnx.TensorProto.FLOAT) + + w_zp_name = matlmul_node.name + "_W_ZERO_POINT" + w_zp_value = w_zp_tensor.values + w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, + tensor_array=w_zp_value, + data_type=onnx.TensorProto.INT8) + + y_scale_name = matlmul_node.name + "_Y_SCALE" + y_scale_value = y_scale + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + + y_zp_name = matlmul_node.name + "_Y_ZERO_POINT" + y_zp_value = y_zp_tensor.values + + if y_zp_tensor.dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif y_zp_tensor.dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + qlinearconv_node = onnx.helper.make_node(name = matlmul_node.name, op_type = "QLinearConv", + inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], + outputs = [y_name], dilations = [1,1], group = 1, + kernel_shape = [1,1], pads = [0,0,0,0], strides = [1,1]) + + + self.node = qlinearconv_node + + intializer_list = [] + intializer_list.append(x_scale_tensor) + intializer_list.append(x_zp_tensor) + intializer_list.append(w_tensor) + intializer_list.append(w_scale_tensor) + intializer_list.append(w_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/matmul_retained_op.py b/src/qonnx/custom_op/qop/matmul_retained_op.py new file mode 100644 index 00000000..ba410bc9 --- /dev/null +++ b/src/qonnx/custom_op/qop/matmul_retained_op.py @@ -0,0 +1,154 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class MatMul_Retained: + + def __init__(self, node): + matlmul_node = node + + if helper.is_parent_exist(matlmul_node, 0, 0): + x_DQL_node = matlmul_node.i() + else: + print("**************** ERROR ******************* Matmul node ", matlmul_node.name, " input(0,0) DNE") + + if helper.is_parent_exist(matlmul_node, 1, 0): + w_DQL_node = matlmul_node.inputs[1].inputs[0] + else: + print("************* ERROR ************************ Please check the Matmul node ", matlmul_node.name, " the input(1,0) DNE") + + if helper.is_parent_exist(x_DQL_node, 0, 0): + x_QL_node = x_DQL_node.i() + else: + print("**************** ERROR ******************* ", x_DQL_node.name, " input(0,0) DNE Please check") + + x_scale_tensor = x_DQL_node.inputs[1] + x_scale = x_scale_tensor.values + x_zp_tensor = x_DQL_node.inputs[2] + + w_scale_tensor = w_DQL_node.inputs[1] + w_scale = w_scale_tensor.values + w_zp_tensor = w_DQL_node.inputs[2] + + if helper.is_child_present(matlmul_node, 0, 0): + if (matlmul_node.o().op == "QuantizeLinear"): + y_QL_node = matlmul_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Matmul node ", matlmul_node.name, " is not QuantizeLinear ***********************") + else: + print(matlmul_node.name, " output(0,0) DNE") + + if x_QL_node.op == "QuantizeLinear" or x_QL_node.op == "MaxPool": + x_name = x_QL_node.outputs[0].name + else: + print("please check x_QL_node of Matmul node ", matlmul_node.name) + + y_name = y_QL_node.outputs[0].name + + x_scale_name = matlmul_node.name + "_X_SCALE" + x_scale_value = x_scale + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, + tensor_array=x_scale_value, + data_type=onnx.TensorProto.FLOAT) + + x_zp_name = matlmul_node.name + "_X_ZERO_POINT" + x_zp_value = x_zp_tensor.values + + if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.op == "MaxPool"): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + + w_name = matlmul_node.inputs[1].name + w_value = w_DQL_node.inputs[0].values + w_tensor = helper.create_initializer_tensor(name=w_name, + tensor_array=w_value, + data_type=onnx.TensorProto.INT8) + + w_scale_name = matlmul_node.name + "_W_SCALE" + w_scale_value = w_scale + w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, + tensor_array=w_scale_value, + data_type=onnx.TensorProto.FLOAT) + + w_zp_name = matlmul_node.name + "_W_ZERO_POINT" + w_zp_value = w_zp_tensor.values + w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, + tensor_array=w_zp_value, + data_type=onnx.TensorProto.INT8) + + y_scale_name = matlmul_node.name + "_Y_SCALE" + y_scale_value = y_scale + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + + y_zp_name = matlmul_node.name + "_Y_ZERO_POINT" + y_zp_value = y_zp_tensor.values + + if y_zp_tensor.dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif y_zp_tensor.dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + qlinear_matmul = onnx.helper.make_node(name = matlmul_node.name, op_type = "QLinearMatMul", + inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], + outputs = [y_name]) + + self.node = qlinear_matmul + + intializer_list = [] + intializer_list.append(x_scale_tensor) + intializer_list.append(x_zp_tensor) + intializer_list.append(w_tensor) + intializer_list.append(w_scale_tensor) + intializer_list.append(w_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/maxpool_op.py b/src/qonnx/custom_op/qop/maxpool_op.py new file mode 100644 index 00000000..345393f8 --- /dev/null +++ b/src/qonnx/custom_op/qop/maxpool_op.py @@ -0,0 +1,125 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class MaxPool: + + def __init__(self, node, maxpool_count, remove_relu): + + maxpool_node = node + x_name = maxpool_node.inputs[0].name + y_name = maxpool_node.outputs[0].name + + if helper.is_child_present(maxpool_node, 0, 0) and maxpool_node.o().op == "QuantizeLinear": + if helper.is_parent_exist(maxpool_node, 0, 0) and maxpool_node.i().op == "DequantizeLinear": + q_node = maxpool_node.o() + y_name = q_node.outputs[0].name + + if helper.is_parent_exist(maxpool_node, 0, 0): + found_relu = False + if maxpool_node.i().op == "Relu": + relu_node = maxpool_node.i() + found_relu = True + elif maxpool_node.i().op == "DequantizeLinear": + if maxpool_node.i().i().i().op == "Relu": + relu_node = maxpool_node.i().i().i() + found_relu = True + elif maxpool_node.i().i().i().op == "Concat": + x_name = maxpool_node.i().i().outputs[0].name + if maxpool_node.o().op == "QuantizeLinear": + y_name = maxpool_node.o().outputs[0].name + elif maxpool_node.i().i().op == "MaxPool": + x_name = maxpool_node.i().i().outputs[0].name + + if found_relu: + if helper.is_child_present(relu_node, 0, 0) and relu_node.outputs[0].outputs[0].op == "MaxPool": + ql_node = relu_node.outputs[0].outputs[0] + x_name = ql_node.outputs[0].name + elif helper.is_child_present(relu_node, 0, 1) and relu_node.outputs[0].outputs[1].op == "MaxPool": + ql_node = relu_node.outputs[0].outputs[0] + x_name = ql_node.outputs[0].name + elif helper.is_child_present(relu_node, 0, 0) and relu_node.o().o().outputs[0].outputs[0].op == "MaxPool": + x_name = relu_node.outputs[0].name + elif helper.is_child_present(relu_node, 0, 0) and relu_node.o().o().outputs[0].outputs[1].op == "MaxPool": + x_name = relu_node.outputs[0].name + + + if maxpool_node.i().op == "QuantizeLinear": + x_ql_node = maxpool_node.i() + if remove_relu: + x_name = x_ql_node.outputs[0].name + else: + if helper.is_parent_exist(x_ql_node, 0, 0) and x_ql_node.i().op == "Relu" and x_ql_node.i().i().op == "Conv": + relu_node = x_ql_node.i() + x_name = relu_node.outputs[0].name + + if helper.is_attr_exist(maxpool_node, 'auto_pad'): + auto_pad_attr = maxpool_node.attrs["auto_pad"] + else: + auto_pad_attr = "NOTSET" + + if helper.is_attr_exist(maxpool_node, 'ceil_mode'): + ceil_mode_attr = maxpool_node.attrs["ceil_mode"] + else: + ceil_mode_attr = 0 + + if helper.is_attr_exist(maxpool_node, 'dilations'): + dilations_attr = maxpool_node.attrs["dilations"] + else: + dilations_attr =[1,1] + + if helper.is_attr_exist(maxpool_node, 'pads'): + pads_attr = maxpool_node.attrs["pads"] + else: + pads_attr = [0,0,0,0] + + if helper.is_attr_exist(maxpool_node, 'storage_order'): + storage_order_attr = maxpool_node.attrs["storage_order"] + else: + storage_order_attr = 0 + + if helper.is_attr_exist(maxpool_node, 'strides'): + strides_attr = maxpool_node.attrs["strides"] + else: + strides_attr = [1,1] + + new_mapool_node = onnx.helper.make_node(name = maxpool_node.name, + op_type = "MaxPool", + inputs = [x_name], + outputs = [y_name], + auto_pad = auto_pad_attr, + ceil_mode = ceil_mode_attr, + dilations = dilations_attr, + pads = pads_attr, + storage_order = storage_order_attr, + strides = strides_attr, + kernel_shape = maxpool_node.attrs["kernel_shape"]) + + self.node = new_mapool_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/qlinearconv_op.py b/src/qonnx/custom_op/qop/qlinearconv_op.py new file mode 100644 index 00000000..f1960df1 --- /dev/null +++ b/src/qonnx/custom_op/qop/qlinearconv_op.py @@ -0,0 +1,476 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +import numpy as np +from .helper import helper + +class QLinearConv: + + def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): + x_DQL_node = node.i() + + conv_node = node + + has_bias = True if len(conv_node.inputs) == 3 else False + + w_DQL_node = conv_node.inputs[1].inputs[0] + QCDQ_model_detected=False + clip_max = np.iinfo(np.int8).min + clip_min = np.iinfo(np.int8).max + if (helper.is_constant_tensor(w_DQL_node.i())==False and w_DQL_node.i().op == "Clip"): + QCDQ_model_detected=True + clip_min = w_DQL_node.i().inputs[1].values + clip_max = w_DQL_node.i().inputs[2].values + + # b_DQL_node = (3) + # ------------------------------------------------------------------------ + # (1) (2) DequantizeLinear (1) (2) + # \ | / (3) for bias OR \ / + # \ | / \ / + # Conv (QDQ model) Conv (3 - FP32 bias embedded) (QCDQ model) + # | | + # ------------------------------------------------------------------------ + # Initialization + b_DQL_node = conv_node + b_DQL_tensor = conv_node + if has_bias: + b_DQL_node = conv_node.inputs[2] # For QDQ + b_DQL_tensor = conv_node.inputs[2] # For QCDQ + if has_bias and QCDQ_model_detected==False: + b_DQL_node = conv_node.inputs[2].inputs[0] + is_fp32_bias_embedded = False + if QCDQ_model_detected: + if helper.is_constant_tensor(b_DQL_tensor) and b_DQL_tensor.dtype == "float32": + is_fp32_bias_embedded = True + b_QL_tensor = b_DQL_tensor + if is_fp32_bias_embedded: + if not helper.is_parent_exist(b_DQL_tensor, 0, 0): + b_QL_tensor = b_DQL_tensor + + is_weight_tensor_quantized = False + if len(w_DQL_node.inputs[0].inputs) == 0: + is_weight_tensor_quantized = True + is_bias_tensor_quantized = False + if QCDQ_model_detected and has_bias and not is_fp32_bias_embedded and not helper.is_parent_exist(b_DQL_tensor, 0, 0) and b_DQL_tensor.dtype == "int32": + is_bias_tensor_quantized = True + elif QCDQ_model_detected==False and has_bias and len(b_DQL_node.inputs[0].inputs) == 0: + is_bias_tensor_quantized = True + + if not is_weight_tensor_quantized: + w_QL_node = w_DQL_node.i() + + if QCDQ_model_detected==False and has_bias and (not is_bias_tensor_quantized): + b_QL_node = b_DQL_node.i() + + x_scale_tensor = x_DQL_node.inputs[1] + x_scale = x_scale_tensor.values + x_zp_tensor = x_DQL_node.inputs[2] + + w_scale_tensor = w_DQL_node.inputs[1] + w_scale = w_scale_tensor.values + w_zp_tensor = w_DQL_node.inputs[2] + + is_relu_present = False + if conv_node.o().op == "Relu": + relu_node = conv_node.o() + is_relu_present = True + if relu_node.o().op == "QuantizeLinear": + y_QL_node = relu_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Relu node ", relu_node.name, " is not QuantizeLinear ***********************") + elif (conv_node.o().op == "QuantizeLinear"): + y_QL_node = conv_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Conv node ", conv_node.name, " is not QuantizeLinear ***********************") + + S8_MIN = np.iinfo(np.int8).min + S8_MAX = np.iinfo(np.int8).max + if clip_min != np.iinfo(np.int8).max and clip_max != np.iinfo(np.int8).min: + S8_MIN = clip_min + S8_MAX = clip_max + U8_MIN = np.iinfo(np.uint8).min + U8_MAX = np.iinfo(np.uint8).max + S32_MIN = np.iinfo(np.int32).min + S32_MAX = np.iinfo(np.int32).max + + if (QCDQ_model_detected and helper.is_parent_exist(w_DQL_node, 0, 0) and helper.is_parent_exist(w_DQL_node.i(0), 0, 0) and w_DQL_node.i(0).i(0).op == "QuantizeLinear"): + w_QL_node = w_DQL_node.i(0).i(0) + + if QCDQ_model_detected==False and has_bias and (not is_bias_tensor_quantized) and helper.is_parent_exist(b_DQL_node, 0, 0): + b_QL_node = b_DQL_node.i() + + # -------------------------------------------------------------------------- + # QuantizeLinear (w_QL_node set to this in first if condition) + # | + # Clip + # | + # DequantizeLinear (for weight) + # (0) / (1) + # | / + # Conv + # -------------------------------------------------------------------------- + if QCDQ_model_detected and helper.is_parent_exist(w_DQL_node, 0, 0) and helper.is_parent_exist(w_DQL_node.i(0), 0, 0): + w_QL_node = w_DQL_node.i().i() + quantized_weight_tensor = w_QL_node.inputs[0] + #if is_weight_tensor_quantized and QCDQ_model_detected: + # quantized_weight_tensor = w_DQL_node.inputs[1].values + if is_weight_tensor_quantized and not QCDQ_model_detected: + quantized_weight_tensor = w_DQL_node.inputs[0].values + elif helper.is_constant_tensor(w_QL_node): + quantized_weight_tensor = w_QL_node.values + quantized_weight_tensor = np.clip(quantized_weight_tensor, S8_MIN, S8_MAX) + quantized_weight_tensor = np.round(quantized_weight_tensor) + quantized_weight_tensor = quantized_weight_tensor.astype(np.int8) + elif not helper.is_constant_tensor(w_QL_node): + weight_tensor = w_QL_node.inputs[0] + weight_scale_tensor = w_QL_node.inputs[1] + weight_zp_tensor = w_QL_node.inputs[2] + + weight_scaled_tensor = weight_scale_tensor.values * np.ones(weight_tensor.shape) + if QCDQ_model_detected: + weight_scaled_tensor = np.ones(weight_tensor.shape) * weight_scale_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] + b = weight_tensor.values / weight_scaled_tensor + c = weight_zp_tensor.values * np.ones(weight_tensor.shape) + if QCDQ_model_detected: + c = weight_zp_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] * np.ones(weight_tensor.shape) + quantized_weight_tensor = b + c + if weight_zp_tensor.dtype == "int8": + quantized_weight_tensor = np.clip(quantized_weight_tensor, S8_MIN, S8_MAX) + elif weight_zp_tensor.dtype == "uint8": + quantized_weight_tensor = np.clip(quantized_weight_tensor, U8_MIN, U8_MAX) + quantized_weight_tensor = np.round(quantized_weight_tensor) + quantized_weight_tensor = quantized_weight_tensor.astype(np.int8) + if QCDQ_model_detected: + clip_node = w_DQL_node.i() + clip_node.inputs.clear() + clip_node.outputs.clear() + + if has_bias and is_bias_tensor_quantized: + quantized_bias_tensor = b_DQL_node.inputs[0].values + elif is_fp32_bias_embedded and has_bias: + bias_tensor = b_QL_tensor + bias_scale_tensor1 = w_QL_node.inputs[1] + bias_zp_tensor = w_QL_node.inputs[2] + + # satutration after QL node + a = x_scale * bias_scale_tensor1.values + b = bias_tensor.values / a + # Zero point is set to 0 for quantizing bias + d = b + d = np.round(d) + quantized_bias_tensor = d + quantized_bias_tensor = np.clip(quantized_bias_tensor, S32_MIN, S32_MAX) + quantized_bias_tensor = np.round(quantized_bias_tensor) + quantized_bias_tensor = quantized_bias_tensor.astype(np.int32) + elif has_bias: + bias_tensor = b_QL_node.inputs[0] + bias_scale_tensor1 = b_QL_node.inputs[1] + bias_zp_tensor = b_QL_node.inputs[2] + + # satutration after QL node + a = bias_scale_tensor1.values * np.ones(bias_tensor.shape) + b = bias_tensor.values / a + c = bias_zp_tensor.values * np.ones(bias_tensor.shape) + d = b + c + if bias_zp_tensor.dtype == "int8": + d = np.clip(d, S8_MIN, S8_MAX) + elif bias_zp_tensor.dtype == "uint8": + d = np.clip(d, U8_MIN, U8_MAX) + d = np.round(d) + + # now again dequantize it + e = d * a + f = e - c + # f is now fp32 tensor + + bias_scale = x_scale * w_scale + bias_scale_tensor = bias_scale * np.ones(bias_tensor.shape) + quantized_bias_tensor = (f / bias_scale_tensor) + quantized_bias_tensor = np.clip(quantized_bias_tensor, S32_MIN, S32_MAX) + quantized_bias_tensor = np.round(quantized_bias_tensor) + quantized_bias_tensor = quantized_bias_tensor.astype(np.int32) + + x_QL_node = x_DQL_node.i() + is_x_QL_maxpool = False + is_X_QL_transpose = True if x_QL_node.op == "Transpose" else False + maxpool_input_s8 = False # True means s8 False means u8 + if x_QL_node.op == "MaxPool": + is_x_QL_maxpool = True + + if helper.is_parent_exist(x_QL_node, 0, 0): + if x_QL_node.i().op == "Relu": + if remove_relu: + # if this flag is enabled, then relu will not be added thus x_name will be x_QL's output tensor name + x_name = x_QL_node.outputs[0].name + else: + if (x_QL_node.i().i().op == "Conv") or (x_QL_node.i().i().op == "Add" and x_QL_node.i().i().i().inputs[2].values.dtype == np.int8): + + """ + these are 2 condtions + one in resnet50v1 + + DQL DQL + | | + | | + V | + Add<------------- + | + | + V + Relu------------------------------ + | + | + QL (x_QL_node) + | + | + DQL DQL DQL + | | | + | | | + Conv<------------ + + if Add input is s8 + x_relu_node = Relu + relu will be maintained due to s8 data type thus + x_name = relu's output + + other case is in Resnet50v1.5 + + Conv + | + | + Relu + | + | + QL + | + | + DQL DQL DQL + | | | + | | | + Conv<------------ + + we maintain relu node here thus x_name = relu's output + + """ + x_relu_node = x_QL_node.i() + x_name = x_relu_node.outputs[0].name + else: + x_name = x_QL_node.outputs[0].name + elif x_QL_node.op == "MaxPool": + """ + this is resnet50v1 case + + QL + | + | + V + Maxpool + | + | + V + DQL DQL DQL + | | | + | | | + V | | + Conv<------------ + + """ + x_name = x_QL_node.outputs[0].name + if x_QL_node.i().op == "QuantizeLinear": + if (x_QL_node.i()).inputs[2].dtype == np.int8: + maxpool_input_s8 = True + elif (x_QL_node.i()).inputs[2].dtype == np.uint8: + maxpool_input_s8 = False + else: + x_name = x_QL_node.outputs[0].name + if x_QL_node.op == "Clip": + x_name = str(int(x_QL_node.o().outputs[0].name)-3) + else: + x_name = x_QL_node.outputs[0].name + + if is_relu_present and not(remove_relu): + y_name = conv_node.outputs[0].name + else: + y_name = y_QL_node.outputs[0].name + + x_scale_name = conv_node.name + "_X_SCALE" + x_scale_value = x_scale + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, + tensor_array=x_scale_value, + data_type=onnx.TensorProto.FLOAT) + + x_zp_name = conv_node.name + "_X_ZERO_POINT" + x_zp_value = x_zp_tensor.values + + if aecg_zendnn_opt and conv_count > 0: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if is_x_QL_maxpool: + if maxpool_input_s8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + else: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + elif is_X_QL_transpose: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + else: + if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + elif x_QL_node.op == "Relu" or x_QL_node.op == "Clip": + if (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + print("ERROR Please check x_zp_tensor of ", conv_node.name) + + w_name = conv_node.inputs[1].name + w_value = quantized_weight_tensor + w_tensor = helper.create_initializer_tensor(name=w_name, + tensor_array=w_value, + data_type=onnx.TensorProto.INT8) + + w_scale_name = conv_node.name + "_W_SCALE" + w_scale_value = w_scale + w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, + tensor_array=w_scale_value, + data_type=onnx.TensorProto.FLOAT) + + w_zp_name = conv_node.name + "_W_ZERO_POINT" + w_zp_value = w_zp_tensor.values + w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, + tensor_array=w_zp_value, + data_type=onnx.TensorProto.INT8) + + y_scale_name = conv_node.name + "_Y_SCALE" + y_scale_value = y_scale + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + + y_zp_name = conv_node.name + "_Y_ZERO_POINT" + y_zp_value = y_zp_tensor.values + + if aecg_zendnn_opt: + # if this opt is enabled then y_zp has be to set to u8 type + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + if y_zp_tensor.dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif y_zp_tensor.dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + if has_bias: + b_name = conv_node.inputs[2].name + b_value = quantized_bias_tensor + b_tensor = helper.create_initializer_tensor(name=b_name, + tensor_array=b_value, + data_type=onnx.TensorProto.INT32) + + if helper.is_attr_exist(conv_node, 'auto_pad'): + auto_pad_attr = conv_node.attrs["auto_pad"] + else: + auto_pad_attr = "NOTSET" + + if helper.is_attr_exist(conv_node, 'dilations'): + dilations_attr = conv_node.attrs["dilations"] + else: + dilations_attr = 1 + + if helper.is_attr_exist(conv_node, 'group'): + group_attr = conv_node.attrs["group"] + else: + group_attr = 1 + + if helper.is_attr_exist(conv_node, 'pads'): + pads_attr = conv_node.attrs["pads"] + else: + pads_attr = [0,0,0,0] + + if helper.is_attr_exist(conv_node, 'strides'): + strides_attr = conv_node.attrs["strides"] + else: + strides_attr = 1 + + qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) + if has_bias: + qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name, b_name], outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) + + if is_relu_present: + relu_node = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", inputs = [conv_node.outputs[0].name], outputs = [relu_node.outputs[0].name]) + self.relu_node = relu_node + + self.node = qlinearconv_node + + intializer_list = [] + intializer_list.append(x_scale_tensor) + intializer_list.append(x_zp_tensor) + intializer_list.append(w_tensor) + intializer_list.append(w_scale_tensor) + intializer_list.append(w_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + if has_bias: + intializer_list.append(b_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + + def get_relu_node(self): + return self.relu_node diff --git a/src/qonnx/custom_op/qop/quantizelinear_op.py b/src/qonnx/custom_op/qop/quantizelinear_op.py new file mode 100644 index 00000000..d35b21b0 --- /dev/null +++ b/src/qonnx/custom_op/qop/quantizelinear_op.py @@ -0,0 +1,78 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class QuantizeLinear: + + def __init__(self, node): + ql_node = node + + x_name = ql_node.inputs[0].name + flag = False + if helper.is_child_present(node, 0, 0) and node.o().op == "DequantizeLinear": + if helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Conv": + if helper.is_child_present(node.o().o(), 0, 0) and node.o().o().o().op == "Reshape": + flag = True + x_tensor = helper.create_initializer_tensor(name = x_name,tensor_array = ql_node.inputs[0].values, data_type = onnx.TensorProto.FLOAT) + elif helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Gemm": + flag = True + x_tensor = helper.create_initializer_tensor(name = x_name,tensor_array = ql_node.inputs[0].values, data_type = onnx.TensorProto.FLOAT) + + y_scale_name = ql_node.inputs[1].name + y_scale_value = ql_node.inputs[1].values + y_scale_tensor = helper.create_initializer_tensor(name = y_scale_name,tensor_array = y_scale_value, data_type = onnx.TensorProto.FLOAT) + + y_zp_name = ql_node.inputs[2].name + y_zp_value = ql_node.inputs[2].values + if ql_node.inputs[2].dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type = onnx.TensorProto.INT8) + elif ql_node.inputs[2].dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type = onnx.TensorProto.UINT8) + + y_name = ql_node.outputs[0].name + + quantizelinear_node = onnx.helper.make_node(name = ql_node.name, op_type = "QuantizeLinear", inputs = [x_name, y_scale_name, y_zp_name], outputs = [y_name]) + + self.node = quantizelinear_node + + intializer_list = [] + if flag: + intializer_list.append(x_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/relu_op.py b/src/qonnx/custom_op/qop/relu_op.py new file mode 100644 index 00000000..58cc23cd --- /dev/null +++ b/src/qonnx/custom_op/qop/relu_op.py @@ -0,0 +1,44 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Relu: + + def __init__(self, node): + + relu_node = node + + x_name = relu_node.inputs[0].name + y_name = relu_node.outputs[0].name + + new_relu_node = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", + inputs = [x_name], + outputs = [y_name]) + + self.node = new_relu_node + + def get_node(self): + return self.node diff --git a/src/qonnx/custom_op/qop/reshape_op.py b/src/qonnx/custom_op/qop/reshape_op.py new file mode 100644 index 00000000..424cd38f --- /dev/null +++ b/src/qonnx/custom_op/qop/reshape_op.py @@ -0,0 +1,65 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Reshape: + + def __init__(self, node): + + reshape_node = node + + x_name = reshape_node.inputs[0].name + + x2_name = reshape_node.inputs[1].name + if helper.is_constant_tensor(reshape_node.inputs[1]): + x2_value = reshape_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) + + y_name = reshape_node.outputs[0].name + + try: + new_reshape_node = onnx.helper.make_node(name = reshape_node.name, op_type = "Reshape", + inputs = [x_name, x2_name], + outputs = [y_name], + allowzero = reshape_node.attrs["allowzero"]) + except: + new_reshape_node = onnx.helper.make_node(name = reshape_node.name, op_type = "Reshape", + inputs = [x_name, x2_name], + outputs = [y_name]) + + self.node = new_reshape_node + + intializer_list = [] + if helper.is_constant_tensor(reshape_node.inputs[1]): + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/resize_op.py b/src/qonnx/custom_op/qop/resize_op.py new file mode 100644 index 00000000..15b81f8d --- /dev/null +++ b/src/qonnx/custom_op/qop/resize_op.py @@ -0,0 +1,66 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Resize: + + def __init__(self, node): + + resize_node = node + + x1_name = resize_node.inputs[0].name + x2_name = resize_node.inputs[1].name + x3_name = resize_node.inputs[2].name + x4_name = resize_node + if len(resize_node.inputs) > 3: + x4_name = resize_node.inputs[3].name + + y_name = resize_node.outputs[0].name + + # Resize has 4 inputs, x, roi, scales, sizes. With later 3 as optional. + # In the model (retinanet) there are 2 inputs X and sizes thus 2nd input is obtained at 3rd index. + # 1st and 2nd index i.e x2_name and x3_name come out to be empty + print("WARNING check inputs of resize node") + + new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", inputs = [x1_name, x2_name, x3_name], outputs = [y_name], coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], mode = resize_node.attrs["mode"], nearest_mode = resize_node.attrs["nearest_mode"]) + if len(resize_node.inputs) > 3: + new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", inputs = [x1_name, x2_name, x3_name, x4_name], outputs = [y_name], coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], mode = resize_node.attrs["mode"], nearest_mode = resize_node.attrs["nearest_mode"]) + + self.node = new_resize_node + + if len(resize_node.inputs) == 3: + x3_value = resize_node.inputs[2].values + x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.FLOAT) + intializer_list = [] + intializer_list.append(x3_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list diff --git a/src/qonnx/custom_op/qop/shape_op.py b/src/qonnx/custom_op/qop/shape_op.py new file mode 100644 index 00000000..aadc1179 --- /dev/null +++ b/src/qonnx/custom_op/qop/shape_op.py @@ -0,0 +1,44 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Shape: + + def __init__(self, node): + + shape_node = node + + x_name = shape_node.inputs[0].name + y_name = shape_node.outputs[0].name + + new_shape_node = onnx.helper.make_node(name = shape_node.name, op_type = "Shape", + inputs = [x_name], + outputs = [y_name]) + + self.node = new_shape_node + + def get_node(self): + return self.node diff --git a/src/qonnx/custom_op/qop/slice_op.py b/src/qonnx/custom_op/qop/slice_op.py new file mode 100644 index 00000000..ae06e86b --- /dev/null +++ b/src/qonnx/custom_op/qop/slice_op.py @@ -0,0 +1,77 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Slice: + + def __init__(self, node): + + slice_node = node + x1_name = slice_node.inputs[0].name + + x2_name = slice_node.inputs[1].name + x2_value = slice_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) + + x3_name = slice_node.inputs[2].name + x3_value = slice_node.inputs[2].values + x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.INT64) + + x4_name = slice_node.inputs[3].name + x4_value = slice_node.inputs[3].values + x4_tensor = helper.create_initializer_tensor(x4_name,x4_value,onnx.TensorProto.INT64) + + # x5_name = slice_node.inputs[4].name + # x5_value = slice_node.inputs[4].values + # x5_tensor = helper.create_initializer_tensor(x5_name,x5_value,onnx.TensorProto.INT64) + + y_name = slice_node.outputs[0].name + + # new_squeeze_node = onnx.helper.make_node(name = slice_node.name, + # op_type = "Slice", + # inputs = [x1_name, x2_name, x3_name, x4_name, x5_name], + # outputs = [y_name]) + + new_squeeze_node = onnx.helper.make_node(name = slice_node.name, + op_type = "Slice", + inputs = [x1_name, x2_name, x3_name, x4_name], + outputs = [y_name]) + + self.node = new_squeeze_node + + intializer_list = [] + intializer_list.append(x2_tensor) + intializer_list.append(x3_tensor) + intializer_list.append(x4_tensor) + # intializer_list.append(x5_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/softmax_op.py b/src/qonnx/custom_op/qop/softmax_op.py new file mode 100644 index 00000000..4e7f9786 --- /dev/null +++ b/src/qonnx/custom_op/qop/softmax_op.py @@ -0,0 +1,45 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Softmax: + + def __init__(self, node): + + softmax_node = node + + x_name = softmax_node.inputs[0].name + y_name = softmax_node.outputs[0].name + + new_sftmx_node = onnx.helper.make_node(name = softmax_node.name, op_type = "Softmax", + inputs = [x_name], + outputs = [y_name], + axis = softmax_node.attrs["axis"]) + + self.node = new_sftmx_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/squeeze_op.py b/src/qonnx/custom_op/qop/squeeze_op.py new file mode 100644 index 00000000..bdfbae0d --- /dev/null +++ b/src/qonnx/custom_op/qop/squeeze_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Squeeze: + + def __init__(self, node): + + squeeze_node = node + x1_name = squeeze_node.inputs[0].name + + x2_name = squeeze_node.inputs[1].name + x2_value = squeeze_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) + + y_name = squeeze_node.outputs[0].name + + new_squeeze_node = onnx.helper.make_node(name = squeeze_node.name, + op_type = "Squeeze", + inputs = [x1_name, x2_name], + outputs = [y_name]) + + self.node = new_squeeze_node + + intializer_list = [] + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/transpose_op.py b/src/qonnx/custom_op/qop/transpose_op.py new file mode 100644 index 00000000..4607a600 --- /dev/null +++ b/src/qonnx/custom_op/qop/transpose_op.py @@ -0,0 +1,45 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Transpose: + + def __init__(self, node): + + transpose_node = node + + x_name = transpose_node.inputs[0].name + y_name = transpose_node.outputs[0].name + + new_transpose_node = onnx.helper.make_node(name = transpose_node.name, op_type = "Transpose", + inputs = [x_name], + outputs = [y_name], + perm = transpose_node.attrs["perm"]) + + self.node = new_transpose_node + + def get_node(self): + return self.node diff --git a/src/qonnx/custom_op/qop/unsqueeze_op.py b/src/qonnx/custom_op/qop/unsqueeze_op.py new file mode 100644 index 00000000..b59d8d52 --- /dev/null +++ b/src/qonnx/custom_op/qop/unsqueeze_op.py @@ -0,0 +1,62 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Unsqueeze: + + def __init__(self, node): + + unsq_node = node + + x1_name = unsq_node.inputs[0].name + y_name = unsq_node.outputs[0].name + + if helper.is_constant_tensor(unsq_node.inputs[1]): + if unsq_node.inputs[1].dtype == "int64": + axes_tensor = helper.create_initializer_tensor(name=unsq_node.inputs[1].name, + tensor_array=unsq_node.inputs[1].values, + data_type=onnx.TensorProto.INT64) + else: + print("ERROR please check axes data type for Unsqueeze Node ", unsq_node.name) + + + new_unsq_node = onnx.helper.make_node(name = unsq_node.name, op_type = "Unsqueeze", + inputs = [x1_name, axes_tensor.name], + outputs = [y_name]) + + intializer_list = [] + if helper.is_constant_tensor(unsq_node.inputs[1]): + intializer_list.append(axes_tensor) + self.intializer_list = intializer_list + + self.node = new_unsq_node + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list From 3ffc786319b51d1f6620b8471e8c8488a9930d3d Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee Date: Fri, 10 Nov 2023 04:41:33 +0000 Subject: [PATCH 16/20] Removed add op [qop] --- src/qonnx/custom_op/qop/add_op.py | 519 ------------------------------ 1 file changed, 519 deletions(-) delete mode 100644 src/qonnx/custom_op/qop/add_op.py diff --git a/src/qonnx/custom_op/qop/add_op.py b/src/qonnx/custom_op/qop/add_op.py deleted file mode 100644 index cd7da025..00000000 --- a/src/qonnx/custom_op/qop/add_op.py +++ /dev/null @@ -1,519 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class QLinearAdd: - - def __init__(self, node, aecg_zendnn_opt, remove_relu): - - add_node = node - - if len(add_node.inputs[1].inputs)==0: - # if Add node has only 1 input node and other input is constant tensor we cannot change it to QLinearAdd node hence keeping it as is - x_name = add_node.inputs[0].name - y_name = add_node.outputs[0].name - - const_val = add_node.inputs[1].values - - const_name = add_node.name + "_const_add_tensor" - y_scale_tensor = helper.create_initializer_tensor(name=const_name, - tensor_array=const_val, - data_type=onnx.TensorProto.FLOAT) - - new_add_node = onnx.helper.make_node(name = add_node.name, - op_type = "Add", - inputs = [x_name, const_name], - outputs = [y_name]) - self.node = new_add_node - - if helper.is_child_present(add_node, 0, 0) and add_node.o().op == "Relu": - relu_node = add_node.o() - relu_node1 = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", inputs = [add_node.outputs[0].name], outputs = [relu_node.outputs[0].name]) - self.relu_node = relu_node1 - - intializer_list = [] - intializer_list.append(y_scale_tensor) - self.intializer_list = intializer_list - - else: - input_node1 = add_node.inputs[0].inputs[0] - input_node2 = add_node.inputs[1].inputs[0] - output_node = add_node.o() - - is_relu_present = False - if output_node.op == "Relu": - is_relu_present = True - relu_node = output_node - # relu_node gets updated in later conditions thus keeping relu_node_name and relu_node_output_tensor to make it simple to keep their track - relu_node_name = relu_node.name - relu_node_output_tensor = relu_node.outputs[0].name - if relu_node.o().op == "QuantizeLinear": - output_node = relu_node.o() - else: - print("*********************** ERROR output of Relu node ", relu_node.name, " is not QuantizeLinear ***********************") - elif not(output_node.op == "QuantizeLinear"): - print("*********************** ERROR output of Add node ", add_node.name, " is not QuantizeLinear ***********************") - - - # in order to get scale and zp for the 2 inputs to Add node, we need 2 DQL nodes. - if not (input_node1.op == "DequantizeLinear" and input_node2.op == "DequantizeLinear"): - - """ - case observed in Resnet50v1 - Add1 - | - | - V - Relu-------------------- - | | - | | - V | - QL | - | | - | | - | | - DQL DQL DQL2 | - | | | | - | | | | - ----------------------------Conv | - | | - | | - V | - QL | - | | - | | - V | - DQL DQL DQL | - | | | | - | | | | - | | V | - ---------------------------Conv | - | | - | | - V | - QL | - | | - | | - V | - DQL DQL DQL | - | | | | - | | | | - | | V | - ----------------------------Conv | - | | - | | - V | - QL | - | | - | | - V | - DQL1 | - | | - | | - V | - Add<--------------------- - - - here Add doesn't have 1 of the DQL node, so we take DQL2 as the other DQL node. - - in case both inputs are missing DQL node, haven't encountered this case to this is flagged for now, if needed will be handled later depending on the case - """ - if not (input_node1.op == "DequantizeLinear") and not (input_node2.op == "DequantizeLinear"): - print("***************************** ERROR No input of Add node is DequantizeLinear ***********************************") - elif not (input_node1.op == "DequantizeLinear"): - # if input_node1 is not DQL - if input_node1.op == "Relu": - relu_node = input_node1 - if relu_node.i().op == "Add" and relu_node.o().op == "QuantizeLinear": - if (relu_node.o()).o().op == "DequantizeLinear": - input_node1 = (relu_node.o()).o() - # in the example case, shown input_node1 is now DQL2 - elif input_node1.op == "MaxPool": - # when resnet strides has been implemented there will be a maxpool node between the shown Relu and Add node. - maxpool_node = input_node1 - relu_node = maxpool_node.i() - if relu_node.i().op == "Add" and (relu_node.o().op == "QuantizeLinear" or relu_node.output[0].outputs[1].op == "QuantizeLinear"): - if (relu_node.o()).o().op == "DequantizeLinear": - input_node1 = (relu_node.o()).o() - # input_node1 is now DQL2 - elif (relu_node.outputs[0].outputs[1]).o().op == "DequantizeLinear": - input_node2 = (relu_node.outputs[0].outputs[1]).o() - # input_node2 is now DQL2 - elif input_node1.op == "Add": - - """ - this case is observed in mobilenetv2-12-qdq.onnx - - - Add2------------------------- - | | - | | - | V - | QL1 - | | - | | - | V - | DQL1 DQL DQL - | | | | - | | | | - | V | | - | Conv<-------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL - | | | | - | | | | - | V | | - | Conv<-------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL - | | | | - | | | | - | V | | - | Conv<------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL - | | - | | - Add1------------------------- - - Add2 = parent_add_node - QL1 = parent_add_node_ql_node - input_node1 = DQL1 - - """ - parent_add_node = input_node1 - parent_add_node_ql_node = parent_add_node.o() - input_node1 = parent_add_node_ql_node.o() - elif not (input_node2.op == "DequantizeLinear"): - # if input_node2 is not DQL - if input_node2.op == "Relu": - relu_node = input_node2 - if relu_node.i().op == "Add" and relu_node.o().op == "QuantizeLinear": - if (relu_node.o()).o().op == "DequantizeLinear": - input_node2 = (relu_node.o()).o() - # input_node2 is now the DQL node from which we need to take scale and zp - - elif input_node2.op == "MaxPool": - maxpool_node = input_node2 - if maxpool_node.i().op == "Relu": - relu_node = maxpool_node.i() - elif maxpool_node.i().op == "DequantizeLinear": - if maxpool_node.i().i().op == "QuantizeLinear": - if maxpool_node.i().i().i().op == "Relu": - relu_node = maxpool_node.i().i().i() - if relu_node.i().op == "Add" and (relu_node.o().op == "QuantizeLinear" or (len(relu_node.outputs[0].outputs)>1 and relu_node.output[0].outputs[1].op == "QuantizeLinear")): - if (relu_node.o()).o().op == "DequantizeLinear": - input_node2 = (relu_node.o()).o() - elif len(relu_node.outputs[0].outputs)>1 and (relu_node.outputs[0].outputs[1]).o().op == "DequantizeLinear": - input_node2 = (relu_node.outputs[0].outputs[1]).o() - # input_node2 is now the DQL node from which we need to take scale and zp - - if input_node1.op == "DequantizeLinear" and input_node2.op == "DequantizeLinear" and output_node.op == "QuantizeLinear": - # now we have input_node1 = input_node2 = DQL and output as QL node - if add_node.inputs[0].inputs[0].op == "MaxPool": - # this is strides case now if Maxpool is parent to Add node, maxpool = node1 - node1 = add_node.i() - elif add_node.inputs[0].inputs[0].op == "Add": - # this is for mobilenet case, so Add2 = node1 - node1 = add_node.i() - else: - """ - if above 2 cases not there lets assume following case now from Resnet50v1 model - - | DQL DQL | DQL DQL - | | | | | | - | | | | | | - Conv<--------------- Conv--------------------- - | | - | | - QL1 QL2 - | | - | | - DQL DQL - | | - | | - Add<----------------------------------------- - - now node1 is QL1/QL2 - - """ - node1 = add_node.inputs[0].inputs[0].i() - - if add_node.inputs[1].inputs[0].op == "MaxPool": - # same as above but for other input, node2 = maxpool node - node2 = add_node.inputs[1].inputs[0] - else: - # same as the above general case discussed, node2 = QL1/QL2 - node2 =input_node2.i() - - if node1.op == "Add": - # this is mobilenet case explained abaove, node1 will be converted to QLinearAdd node and it wiil act as input to current add node - # this a_name = QL1 output tensor name (please refer above mobilenet case) - a_name = node1.o().outputs[0].name - else: - # refering to general case taken above from resnet50v1 model, a_name = QL1/QL2's output tensor name - a_name = node1.outputs[0].name - - a_scale_name = add_node.name + "_A_SCALE" - a_scale_value = input_node1.inputs[1].values - a_scale_tensor = helper.create_initializer_tensor(name=a_scale_name, - tensor_array=a_scale_value, - data_type=onnx.TensorProto.FLOAT) - - a_zp_name = add_node.name + "_A_ZP" - a_zp_value = input_node1.inputs[2].values - - if aecg_zendnn_opt: - a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, - tensor_array=a_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if node1.i().op == "QuantizeLinear" and node1.i().i() == "Relu": - a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, - tensor_array=a_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if input_node1.inputs[2].dtype == np.int8: - a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, - tensor_array=a_zp_value, - data_type=onnx.TensorProto.INT8) - elif input_node1.inputs[2].dtype == np.uint8: - a_zp_tensor = helper.create_initializer_tensor(name=a_zp_name, - tensor_array=a_zp_value, - data_type=onnx.TensorProto.UINT8) - - # TODO: Only 1 condition is handled here that Add Node's 1st parent is DQL<--QL and 2nd parent can be Relu. Vice Versa and other cases are not encountered yet thus not handled. - if helper.is_parent_exist(node2, 0, 0): - if remove_relu: - # b_name = the QL's output tensor - b_name = node2.outputs[0].name - else: - # check Relu and input of Add node is s8, any 1 input can be checked, thus we check for node1 - if node2.i().op == "Relu" and node1.inputs[2].values.dtype == np.int8: - """ - this case is observed in renset50v1.5 - - DQL DQL - | | - | | - V | - Add<----------------- - | - | - V - Relu1 - | - | - V - QL1 - | - | - V - ------------------------------------DQL1 DQL DQL - | | | | - | | | | - | V | | - | Conv4<-------------------- - | | - | | - | V - | Relu - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL - | | | | - | | | | - | V | | - | Conv3<-------------------- - | | - | | - | V - | Relu - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL - | | | | - | | | | - | V | | - | Conv2<-------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL - | | - | | - | V - ---------------------------------->Add1 - - - in this case node2 is QL1 - node2_relu_node = Relu1 - thus b_name = Relu1's output as abotve top Add node is converted as follows- - - QLinearAdd - | - | - V - Relu1 - - thus relu1 output is set to b_name - - - """ - node2_relu_node = node2.i() - if node2_relu_node.i().op == "Conv" or node2_relu_node.i().op == "Add": - b_name = node2_relu_node.outputs[0].name - else: - b_name = node2.outputs[0].name - else: - b_name = node2.outputs[0].name - else: - print("************* ERROR ****************** Please check parent of Add Node's parent, ", node2.name) - - b_scale_name = add_node.name + "_B_SCALE" - b_scale_value = input_node2.inputs[1].values - b_scale_tensor = helper.create_initializer_tensor(name=b_scale_name, - tensor_array=b_scale_value, - data_type=onnx.TensorProto.FLOAT) - - b_zp_name = add_node.name + "_B_ZP" - b_zp_value = input_node2.inputs[2].values - - if aecg_zendnn_opt: - b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, - tensor_array=b_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if node2.i().op == "QuantizeLinear" and node2.i().i().op == "Relu": - b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, - tensor_array=b_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if input_node2.inputs[2].dtype == np.int8: - b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, - tensor_array=b_zp_value, - data_type=onnx.TensorProto.INT8) - elif input_node2.inputs[2].dtype == np.uint8: - b_zp_tensor = helper.create_initializer_tensor(name=b_zp_name, - tensor_array=b_zp_value, - data_type=onnx.TensorProto.UINT8) - - y_scale_name = add_node.name + "_Y_SCALE" - y_scale_value = output_node.inputs[1].values - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=y_scale_value, - data_type=onnx.TensorProto.FLOAT) - - y_zp_name = add_node.name + "_Y_ZP" - y_zp_value = output_node.inputs[2].values - - if aecg_zendnn_opt: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - y_name = output_node.outputs[0].name - else: - if output_node.inputs[2].dtype == np.int8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.INT8) - elif output_node.inputs[2].dtype == np.uint8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - - if is_relu_present and not remove_relu and node1.inputs[2].values.dtype == np.int8: - y_name = add_node.outputs[0].name - else: - y_name = output_node.outputs[0].name - - kwargs = {} - kwargs["domain"] = 'com.microsoft' - - - new_add_node = onnx.helper.make_node(name = add_node.name, - op_type = "QLinearAdd", - inputs = [a_name, a_scale_name, a_zp_name, b_name, b_scale_name, b_zp_name, y_scale_name, y_zp_name], - outputs = [y_name], - **kwargs) - - self.node = new_add_node - - if is_relu_present: - relu_node = onnx.helper.make_node(name = relu_node_name, op_type = "Relu", inputs = [add_node.outputs[0].name], outputs = [relu_node_output_tensor]) - self.relu_node = relu_node - - intializer_list = [] - intializer_list.append(a_scale_tensor) - intializer_list.append(a_zp_tensor) - intializer_list.append(b_scale_tensor) - intializer_list.append(b_zp_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list - - def get_relu_node(self): - return self.relu_node From dae4ebbbca4d0b524291534820a08869820d019b Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee Date: Fri, 10 Nov 2023 04:49:11 +0000 Subject: [PATCH 17/20] [Code Cleanup - QCDQ Model Converter] * Support for only 2 specific QCDQ models * Removed support for other models --- .../custom_op/qop/globalAveragePool_op.py | 143 ---- src/qonnx/custom_op/qop/identity_op.py | 56 -- src/qonnx/custom_op/qop/lrn_op.py | 48 -- src/qonnx/custom_op/qop/matmul_op.py | 157 ----- src/qonnx/custom_op/qop/matmul_retained_op.py | 154 ----- src/qonnx/custom_op/qop/reshape_op.py | 65 -- src/qonnx/custom_op/qop/resize_op.py | 66 -- src/qonnx/custom_op/qop/shape_op.py | 44 -- src/qonnx/custom_op/qop/unsqueeze_op.py | 62 -- src/qonnx/transformation/qcdq_to_qop.py | 631 +----------------- 10 files changed, 8 insertions(+), 1418 deletions(-) delete mode 100644 src/qonnx/custom_op/qop/globalAveragePool_op.py delete mode 100644 src/qonnx/custom_op/qop/identity_op.py delete mode 100644 src/qonnx/custom_op/qop/lrn_op.py delete mode 100644 src/qonnx/custom_op/qop/matmul_op.py delete mode 100644 src/qonnx/custom_op/qop/matmul_retained_op.py delete mode 100644 src/qonnx/custom_op/qop/reshape_op.py delete mode 100644 src/qonnx/custom_op/qop/resize_op.py delete mode 100644 src/qonnx/custom_op/qop/shape_op.py delete mode 100644 src/qonnx/custom_op/qop/unsqueeze_op.py diff --git a/src/qonnx/custom_op/qop/globalAveragePool_op.py b/src/qonnx/custom_op/qop/globalAveragePool_op.py deleted file mode 100644 index 17f8cec6..00000000 --- a/src/qonnx/custom_op/qop/globalAveragePool_op.py +++ /dev/null @@ -1,143 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class GlobalAveragePool: - - def __init__(self, node, aecg_zendnn_opt, remove_relu): - - golbal_average_pool_node = node - x_name = golbal_average_pool_node.inputs[0].name - y_name = golbal_average_pool_node.outputs[0].name - - if helper.is_parent_exist(golbal_average_pool_node, 0, 0) and golbal_average_pool_node.i().op == "DequantizeLinear": - if helper.is_parent_exist(golbal_average_pool_node, 0, 0): - parent_dql_node = golbal_average_pool_node.i() - else: - print("************* ERROR ****************** Please check 1st parent of GlobalAveragePool, ", golbal_average_pool_node.name, " parent DNE") - - x_scale_name = node.name + "x_scale" - x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, - tensor_array=parent_dql_node.inputs[1].values, - data_type=onnx.TensorProto.FLOAT) - x_zp_name = node.name + "x_zp" - - is_input_s8 = True - - if helper.is_parent_exist(parent_dql_node, 0, 0): - if aecg_zendnn_opt: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=parent_dql_node.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - else: - second_parent = parent_dql_node.i() - if second_parent.op == "Relu": - if helper.is_parent_exist(second_parent, 0, 0) and second_parent.i().op == "QuantizeLinear": - third_parent = second_parent.i() - if third_parent.inputs[2].values.dtype == np.int8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=third_parent.inputs[2].values, - data_type=onnx.TensorProto.INT8) - is_input_s8 = True - else: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=third_parent.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - is_input_s8 = False - else: - if parent_dql_node.i().inputs[2].values.dtype == np.int8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=parent_dql_node.inputs[2].values, - data_type=onnx.TensorProto.INT8) - is_input_s8 = True - else: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=parent_dql_node.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - is_input_s8 = False - else: - print("************* ERROR ****************** Please check 2nd parent of GlobalAveragePool, ", golbal_average_pool_node.name, " 1st parent of ", parent_dql_node, " parent DNE") - - if parent_dql_node.i().i().op == "Relu" and parent_dql_node.i().i().i().i().inputs[2].values.dtype == np.int8: - if remove_relu: - x_name = parent_dql_node.inputs[0].name - else: - third_parent_relu = parent_dql_node.i().i() - if third_parent_relu.i().op == "Conv" or third_parent_relu.i().op == "Add": - x_name = third_parent_relu.outputs[0].name - else: - x_name = (third_parent_relu.o()).outputs[0].name - else: - x_name = parent_dql_node.inputs[0].name - - if helper.is_child_present(node, 0, 0) and golbal_average_pool_node.o().op == "QuantizeLinear": - child_ql_node = golbal_average_pool_node.o() - - y_scale_name = node.name + "y_scale" - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=child_ql_node.inputs[1].values, - data_type=onnx.TensorProto.FLOAT) - y_zp_name = node.name + "y_zp" - - if aecg_zendnn_opt: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=child_ql_node.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - else: - if is_input_s8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=child_ql_node.inputs[2].values, - data_type=onnx.TensorProto.INT8) - else: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=child_ql_node.inputs[2].values, - data_type=onnx.TensorProto.UINT8) - - y_name = child_ql_node.outputs[0].name - - kwargs = {} - kwargs["domain"] = 'com.microsoft' - new_average_pool_node = onnx.helper.make_node(name = golbal_average_pool_node.name, op_type = "QLinearGlobalAveragePool", - inputs = [x_name, x_scale_name, x_zp_name, y_scale_name, y_zp_name], - outputs = [y_name], - channels_last = 0,**kwargs) - - intializer_list = [] - intializer_list.append(x_scale_tensor) - intializer_list.append(x_zp_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - self.node = new_average_pool_node - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/identity_op.py b/src/qonnx/custom_op/qop/identity_op.py deleted file mode 100644 index e9019659..00000000 --- a/src/qonnx/custom_op/qop/identity_op.py +++ /dev/null @@ -1,56 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Identity: - - def __init__(self, node): - - identity_node = node - - x1_name = identity_node.inputs[0].name - x1_value = identity_node.inputs[0].values - x1_tensor = helper.create_initializer_tensor(x1_name,x1_value,onnx.TensorProto.FLOAT) - - y_name = identity_node.outputs[0].name - - new_identity_node = onnx.helper.make_node(name = identity_node.name, - op_type = "Identity", - inputs = [x1_name], - outputs = [y_name]) - - self.node = new_identity_node - - intializer_list = [] - intializer_list.append(x1_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/lrn_op.py b/src/qonnx/custom_op/qop/lrn_op.py deleted file mode 100644 index f8dcbf22..00000000 --- a/src/qonnx/custom_op/qop/lrn_op.py +++ /dev/null @@ -1,48 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class LRN: - - def __init__(self, node): - - lrn_node = node - - x_name = lrn_node.inputs[0].name - y_name = lrn_node.outputs[0].name - - new_lrn_node = onnx.helper.make_node(name = lrn_node.name, op_type = "LRN", - inputs = [x_name], - outputs = [y_name], - alpha = lrn_node.attrs["alpha"], - beta = lrn_node.attrs["beta"], - bias = lrn_node.attrs["bias"], - size = lrn_node.attrs["size"]) - - self.node = new_lrn_node - - def get_node(self): - return self.node diff --git a/src/qonnx/custom_op/qop/matmul_op.py b/src/qonnx/custom_op/qop/matmul_op.py deleted file mode 100644 index 1cb1842d..00000000 --- a/src/qonnx/custom_op/qop/matmul_op.py +++ /dev/null @@ -1,157 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class MatMul: - - def __init__(self, node): - matlmul_node = node - - if helper.is_parent_exist(matlmul_node, 0, 0): - x_DQL_node = matlmul_node.i() - else: - print("**************** ERROR ******************* Matmul node ", matlmul_node.name, " input(0,0) DNE") - - if helper.is_parent_exist(matlmul_node, 1, 0): - w_DQL_node = matlmul_node.inputs[1].inputs[0] - else: - print("************* ERROR ************************ Please check the Matmul node ", matlmul_node.name, " the input(1,0) DNE") - - if helper.is_parent_exist(x_DQL_node, 0, 0): - x_QL_node = x_DQL_node.i() - else: - print("**************** ERROR ******************* ", x_DQL_node.name, " input(0,0) DNE Please check") - - x_scale_tensor = x_DQL_node.inputs[1] - x_scale = x_scale_tensor.values - x_zp_tensor = x_DQL_node.inputs[2] - - w_scale_tensor = w_DQL_node.inputs[1] - w_scale = w_scale_tensor.values - w_zp_tensor = w_DQL_node.inputs[2] - - if helper.is_child_present(matlmul_node, 0, 0): - if (matlmul_node.o().op == "QuantizeLinear"): - y_QL_node = matlmul_node.o() - y_scale_tensor = y_QL_node.inputs[1] - y_scale = y_scale_tensor.values - y_zp_tensor = y_QL_node.inputs[2] - else: - print("*********************** ERROR output of Matmul node ", matlmul_node.name, " is not QuantizeLinear ***********************") - else: - print(matlmul_node.name, " output(0,0) DNE") - - quantized_weight_tensor_original = w_DQL_node.inputs[0].values - new_shape = quantized_weight_tensor_original.shape + (1,1) - a1 = np.reshape(quantized_weight_tensor_original, new_shape) - quantized_weight_tensor = np.transpose(a1, (1,0,2,3)) - - if x_QL_node.i().op == "DequantizeLinear" and x_QL_node.i().i().op == "QuantizeLinear": - x_name = x_QL_node.i().i().outputs[0].name - else: - x_name = x_QL_node.outputs[0].name - - y_name = matlmul_node.o().outputs[0].name - - x_scale_name = matlmul_node.name + "_X_SCALE" - x_scale_value = x_scale - x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, - tensor_array=x_scale_value, - data_type=onnx.TensorProto.FLOAT) - - x_zp_name = matlmul_node.name + "_X_ZERO_POINT" - x_zp_value = x_zp_tensor.values - - if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - - w_name = matlmul_node.inputs[1].name - w_value = quantized_weight_tensor - w_tensor = helper.create_initializer_tensor(name=w_name, - tensor_array=w_value, - data_type=onnx.TensorProto.INT8) - - w_scale_name = matlmul_node.name + "_W_SCALE" - w_scale_value = w_scale - w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, - tensor_array=w_scale_value, - data_type=onnx.TensorProto.FLOAT) - - w_zp_name = matlmul_node.name + "_W_ZERO_POINT" - w_zp_value = w_zp_tensor.values - w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, - tensor_array=w_zp_value, - data_type=onnx.TensorProto.INT8) - - y_scale_name = matlmul_node.name + "_Y_SCALE" - y_scale_value = y_scale - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=y_scale_value, - data_type=onnx.TensorProto.FLOAT) - - y_zp_name = matlmul_node.name + "_Y_ZERO_POINT" - y_zp_value = y_zp_tensor.values - - if y_zp_tensor.dtype == np.int8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.INT8) - elif y_zp_tensor.dtype == np.uint8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - - qlinearconv_node = onnx.helper.make_node(name = matlmul_node.name, op_type = "QLinearConv", - inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], - outputs = [y_name], dilations = [1,1], group = 1, - kernel_shape = [1,1], pads = [0,0,0,0], strides = [1,1]) - - - self.node = qlinearconv_node - - intializer_list = [] - intializer_list.append(x_scale_tensor) - intializer_list.append(x_zp_tensor) - intializer_list.append(w_tensor) - intializer_list.append(w_scale_tensor) - intializer_list.append(w_zp_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/matmul_retained_op.py b/src/qonnx/custom_op/qop/matmul_retained_op.py deleted file mode 100644 index ba410bc9..00000000 --- a/src/qonnx/custom_op/qop/matmul_retained_op.py +++ /dev/null @@ -1,154 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper -import numpy as np - -class MatMul_Retained: - - def __init__(self, node): - matlmul_node = node - - if helper.is_parent_exist(matlmul_node, 0, 0): - x_DQL_node = matlmul_node.i() - else: - print("**************** ERROR ******************* Matmul node ", matlmul_node.name, " input(0,0) DNE") - - if helper.is_parent_exist(matlmul_node, 1, 0): - w_DQL_node = matlmul_node.inputs[1].inputs[0] - else: - print("************* ERROR ************************ Please check the Matmul node ", matlmul_node.name, " the input(1,0) DNE") - - if helper.is_parent_exist(x_DQL_node, 0, 0): - x_QL_node = x_DQL_node.i() - else: - print("**************** ERROR ******************* ", x_DQL_node.name, " input(0,0) DNE Please check") - - x_scale_tensor = x_DQL_node.inputs[1] - x_scale = x_scale_tensor.values - x_zp_tensor = x_DQL_node.inputs[2] - - w_scale_tensor = w_DQL_node.inputs[1] - w_scale = w_scale_tensor.values - w_zp_tensor = w_DQL_node.inputs[2] - - if helper.is_child_present(matlmul_node, 0, 0): - if (matlmul_node.o().op == "QuantizeLinear"): - y_QL_node = matlmul_node.o() - y_scale_tensor = y_QL_node.inputs[1] - y_scale = y_scale_tensor.values - y_zp_tensor = y_QL_node.inputs[2] - else: - print("*********************** ERROR output of Matmul node ", matlmul_node.name, " is not QuantizeLinear ***********************") - else: - print(matlmul_node.name, " output(0,0) DNE") - - if x_QL_node.op == "QuantizeLinear" or x_QL_node.op == "MaxPool": - x_name = x_QL_node.outputs[0].name - else: - print("please check x_QL_node of Matmul node ", matlmul_node.name) - - y_name = y_QL_node.outputs[0].name - - x_scale_name = matlmul_node.name + "_X_SCALE" - x_scale_value = x_scale - x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, - tensor_array=x_scale_value, - data_type=onnx.TensorProto.FLOAT) - - x_zp_name = matlmul_node.name + "_X_ZERO_POINT" - x_zp_value = x_zp_tensor.values - - if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - elif (x_QL_node.op == "MaxPool"): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - - w_name = matlmul_node.inputs[1].name - w_value = w_DQL_node.inputs[0].values - w_tensor = helper.create_initializer_tensor(name=w_name, - tensor_array=w_value, - data_type=onnx.TensorProto.INT8) - - w_scale_name = matlmul_node.name + "_W_SCALE" - w_scale_value = w_scale - w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, - tensor_array=w_scale_value, - data_type=onnx.TensorProto.FLOAT) - - w_zp_name = matlmul_node.name + "_W_ZERO_POINT" - w_zp_value = w_zp_tensor.values - w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, - tensor_array=w_zp_value, - data_type=onnx.TensorProto.INT8) - - y_scale_name = matlmul_node.name + "_Y_SCALE" - y_scale_value = y_scale - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=y_scale_value, - data_type=onnx.TensorProto.FLOAT) - - y_zp_name = matlmul_node.name + "_Y_ZERO_POINT" - y_zp_value = y_zp_tensor.values - - if y_zp_tensor.dtype == np.int8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.INT8) - elif y_zp_tensor.dtype == np.uint8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - - qlinear_matmul = onnx.helper.make_node(name = matlmul_node.name, op_type = "QLinearMatMul", - inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], - outputs = [y_name]) - - self.node = qlinear_matmul - - intializer_list = [] - intializer_list.append(x_scale_tensor) - intializer_list.append(x_zp_tensor) - intializer_list.append(w_tensor) - intializer_list.append(w_scale_tensor) - intializer_list.append(w_zp_tensor) - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/reshape_op.py b/src/qonnx/custom_op/qop/reshape_op.py deleted file mode 100644 index 424cd38f..00000000 --- a/src/qonnx/custom_op/qop/reshape_op.py +++ /dev/null @@ -1,65 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Reshape: - - def __init__(self, node): - - reshape_node = node - - x_name = reshape_node.inputs[0].name - - x2_name = reshape_node.inputs[1].name - if helper.is_constant_tensor(reshape_node.inputs[1]): - x2_value = reshape_node.inputs[1].values - x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) - - y_name = reshape_node.outputs[0].name - - try: - new_reshape_node = onnx.helper.make_node(name = reshape_node.name, op_type = "Reshape", - inputs = [x_name, x2_name], - outputs = [y_name], - allowzero = reshape_node.attrs["allowzero"]) - except: - new_reshape_node = onnx.helper.make_node(name = reshape_node.name, op_type = "Reshape", - inputs = [x_name, x2_name], - outputs = [y_name]) - - self.node = new_reshape_node - - intializer_list = [] - if helper.is_constant_tensor(reshape_node.inputs[1]): - intializer_list.append(x2_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/resize_op.py b/src/qonnx/custom_op/qop/resize_op.py deleted file mode 100644 index 15b81f8d..00000000 --- a/src/qonnx/custom_op/qop/resize_op.py +++ /dev/null @@ -1,66 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Resize: - - def __init__(self, node): - - resize_node = node - - x1_name = resize_node.inputs[0].name - x2_name = resize_node.inputs[1].name - x3_name = resize_node.inputs[2].name - x4_name = resize_node - if len(resize_node.inputs) > 3: - x4_name = resize_node.inputs[3].name - - y_name = resize_node.outputs[0].name - - # Resize has 4 inputs, x, roi, scales, sizes. With later 3 as optional. - # In the model (retinanet) there are 2 inputs X and sizes thus 2nd input is obtained at 3rd index. - # 1st and 2nd index i.e x2_name and x3_name come out to be empty - print("WARNING check inputs of resize node") - - new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", inputs = [x1_name, x2_name, x3_name], outputs = [y_name], coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], mode = resize_node.attrs["mode"], nearest_mode = resize_node.attrs["nearest_mode"]) - if len(resize_node.inputs) > 3: - new_resize_node = onnx.helper.make_node(name = resize_node.name, op_type = "Resize", inputs = [x1_name, x2_name, x3_name, x4_name], outputs = [y_name], coordinate_transformation_mode = resize_node.attrs["coordinate_transformation_mode"], cubic_coeff_a = resize_node.attrs["cubic_coeff_a"], mode = resize_node.attrs["mode"], nearest_mode = resize_node.attrs["nearest_mode"]) - - self.node = new_resize_node - - if len(resize_node.inputs) == 3: - x3_value = resize_node.inputs[2].values - x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.FLOAT) - intializer_list = [] - intializer_list.append(x3_tensor) - self.intializer_list = intializer_list - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list diff --git a/src/qonnx/custom_op/qop/shape_op.py b/src/qonnx/custom_op/qop/shape_op.py deleted file mode 100644 index aadc1179..00000000 --- a/src/qonnx/custom_op/qop/shape_op.py +++ /dev/null @@ -1,44 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx - -class Shape: - - def __init__(self, node): - - shape_node = node - - x_name = shape_node.inputs[0].name - y_name = shape_node.outputs[0].name - - new_shape_node = onnx.helper.make_node(name = shape_node.name, op_type = "Shape", - inputs = [x_name], - outputs = [y_name]) - - self.node = new_shape_node - - def get_node(self): - return self.node diff --git a/src/qonnx/custom_op/qop/unsqueeze_op.py b/src/qonnx/custom_op/qop/unsqueeze_op.py deleted file mode 100644 index b59d8d52..00000000 --- a/src/qonnx/custom_op/qop/unsqueeze_op.py +++ /dev/null @@ -1,62 +0,0 @@ -######################################################################## -# -# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# -######################################################################### - -import onnx -from .helper import helper - -class Unsqueeze: - - def __init__(self, node): - - unsq_node = node - - x1_name = unsq_node.inputs[0].name - y_name = unsq_node.outputs[0].name - - if helper.is_constant_tensor(unsq_node.inputs[1]): - if unsq_node.inputs[1].dtype == "int64": - axes_tensor = helper.create_initializer_tensor(name=unsq_node.inputs[1].name, - tensor_array=unsq_node.inputs[1].values, - data_type=onnx.TensorProto.INT64) - else: - print("ERROR please check axes data type for Unsqueeze Node ", unsq_node.name) - - - new_unsq_node = onnx.helper.make_node(name = unsq_node.name, op_type = "Unsqueeze", - inputs = [x1_name, axes_tensor.name], - outputs = [y_name]) - - intializer_list = [] - if helper.is_constant_tensor(unsq_node.inputs[1]): - intializer_list.append(axes_tensor) - self.intializer_list = intializer_list - - self.node = new_unsq_node - - def get_node(self): - return self.node - - def get_intializers(self): - return self.intializer_list diff --git a/src/qonnx/transformation/qcdq_to_qop.py b/src/qonnx/transformation/qcdq_to_qop.py index 56902b44..35d50a50 100644 --- a/src/qonnx/transformation/qcdq_to_qop.py +++ b/src/qonnx/transformation/qcdq_to_qop.py @@ -30,7 +30,6 @@ import argparse from onnx import TensorProto import sys - import math import onnx.numpy_helper from typing import Tuple @@ -42,16 +41,10 @@ from qonnx.custom_op.qop.quantizelinear_op import * from qonnx.custom_op.qop.dequantizelinear_op import * from qonnx.custom_op.qop.maxpool_op import * -from qonnx.custom_op.qop.add_op import * -from qonnx.custom_op.qop.averagepool_op import * from qonnx.custom_op.qop.squeeze_op import * -from qonnx.custom_op.qop.globalAveragePool_op import * from qonnx.custom_op.qop.flatten_op import * -from qonnx.custom_op.qop.matmul_op import * -from qonnx.custom_op.qop.lrn_op import * from qonnx.custom_op.qop.concat_op import * from qonnx.custom_op.qop.softmax_op import * -from qonnx.custom_op.qop.matmul_retained_op import * from qonnx.custom_op.qop.cast_op import * from qonnx.custom_op.qop.gather_op import * from qonnx.custom_op.qop.gemm_op import * @@ -61,23 +54,10 @@ from qonnx.custom_op.qop.slice_op import * from qonnx.custom_op.qop.transpose_op import * from qonnx.custom_op.qop.relu_op import * -from qonnx.custom_op.qop.reshape_op import * -from qonnx.custom_op.qop.identity_op import * -from qonnx.custom_op.qop.shape_op import * -from qonnx.custom_op.qop.resize_op import * -from qonnx.custom_op.qop.unsqueeze_op import * from qonnx.custom_op.qop.clip_op import * class CustomEnv(): - imp_strides_opt=False - save_opt_qdq=False - change_avgpool=False - aecg_zendnn_opt=False remove_relu=True - retain_matmul=False - is_ryzenai_model=False - is_retinanet=False - def __init__(self): pass @@ -92,37 +72,7 @@ def apply(self, model: ModelWrapper) -> Tuple[ModelWrapper, bool]: graph.fold_constants() - aecg_zendnn_opt = args.aecg_zendnn_opt - retain_matmul = args.retain_matmul - - def is_parent_conv(index, add_node): - if len(add_node.inputs[index].inputs)==1 and add_node.inputs[index].inputs[0].op == "DequantizeLinear": - dql_node = add_node.inputs[index].inputs[0] - if len(dql_node.inputs)>0 and len(dql_node.inputs[0].inputs)==1 and dql_node.i().op == "QuantizeLinear": - ql_node = dql_node.i() - if len(ql_node.inputs)>0 and len(ql_node.inputs[0].inputs)==1 and ql_node.i().op == "Conv": - return True - return False - - def is_relu_input_s8_or_fp32(node): - if node.op == "Add" and (len(node.inputs[1].inputs)==0): - return True - elif helper.is_parent_exist(node, 0, 0) and node.i().op == "DequantizeLinear": - if helper.is_parent_exist(node.i(), 0, 0): - add_node_ql_parent = node.i().i() - if add_node_ql_parent.inputs[2].values.dtype == np.int8: - return True - else: - print("Please check Add node, ", add_node.name) - elif helper.is_parent_exist(node, 1, 0) and node.inputs[1].inputs[0].op == "DequantizeLinear": - if helper.is_parent_exist(node.inputs[1].inputs[0], 1, 0): - add_node_ql_parent = (node.inputs[1].inputs[0]).i() - if add_node_ql_parent.inputs[2].values.dtype == np.int8: - return True - else: - print("Please check Add node, ", add_node.name) - else: - return False + aecg_zendnn_opt = False def is_any_output_tensor_graph_output(node): for i in range(len(graph.outputs)): @@ -173,241 +123,17 @@ def get_child_conv(node): c3 = c2.o() return c3 - supported_op = ["Conv", "QuantizeLinear", "DequantizeLinear", "MaxPool", "Add", "AveragePool", "Squeeze", "GlobalAveragePool", "Flatten", "MatMul", "LRN", "Concat", "Softmax", "Cast", "Gather", "Gemm", "Greater", "Less", "Slice", "Transpose", "Relu", "Reshape", "Shape", "Resize", "Unsqueeze", "Clip"] - - ''' - for node in graph.nodes: - if not node.op in supported_op: - print(node.op, " op is currently not supported in the converter. Exiting model converter") - sys.exit() - ''' + supported_op = ["Conv", "QuantizeLinear", "DequantizeLinear", "MaxPool", "Squeeze", "Flatten", "Concat", "Softmax", "Cast", "Gather", "Gemm", "Greater", "Less", "Slice", "Transpose", "Relu", "Clip"] maxpool_count = 0 ctr = 0 cast_count = 0 clip_num = 0 - retinanet_end_pattern_found = False squeeze_output = False for node in graph.nodes: if node.op == "Flatten": squeeze_output = True - # Resnet strides optimization for Resnet50v1 - - """ - |--------->Relu2 - | | - V | - QL | - | | - | | - V | - DQL DQL DQL | - | | | | - | | | | - | | V | - ---------------------->Conv7 | - | | - | | - V | - QL | - | | - | | - V | - DQL DQL DQL | - | | | | - | | | | - | | V | - --------------------->Conv6 | - | | - | | - V | - QL | - | | - | | - V | - DQL DQL DQL | - | | | | - | | | | - | | V | - --------------------->Conv5 | - | | - | | - V | - QL | - | | - | | - V | - DQL | - | | - | V - |--------->Add2 - | - | - V - Relu1 - | - | - V - QL - | - | - V - ------------------------------------DQL1 DQL DQL - | | | | - | | | | - | V | | - | Conv4<-------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL - | | | | - | | | | - | V | | - | Conv3<-------------------- - | | - | | - | V - | QL - | | - | | - | V - | DQL DQL DQL DQL DQL - | | | | | | - | | | | | | - V | | V | | - Conv1<-------------------- Conv2<-------------------- - | | - | | - V V - QL QL - | | - | | - V V - DQL DQL - | | - | | - V V - ------------------------------------->Add1 - - - Add1 = add_node - Relu1 = relu_node - DQL1 = relu_dql_node - - Conv1 = conv_node1 - Conv2 = conv_node2 - make sure conv_node1 has strides [2,2] and conv_node2 has strides [1,1] and conv_node1 will be the shortcut path - - Conv4 and Conv1 are child1_node and child2_node (not necessary conv4 is child1_node and conv1 is child2_node) - but we are sure conv4's 6th child is conv2 thus get_child_name() gives 6th child name of child1_node and child2_name and check if the 6th child name = conv2's name when it is found - make child1_node = conv_node1 that is Conv1 = conv_node1 and child1_node and conv4 = child2_node - conv1 and conv4 should have strides = [2,2] - - Add2 = upper_add_node - Conv5 = upper_conv_node it should have strides = [1,1] - Relu2 = upper_relu_node - - Now add Maxpool between Relu2 and Add2 - """ - if args.imp_strides_opt and node.op == "Add" and len(node.inputs)==2: - add_node = node - if is_parent_conv(0, add_node) and is_parent_conv(1, add_node): - conv_node1 = add_node.inputs[0].inputs[0].i().i() - conv_node2 = add_node.inputs[1].inputs[0].i().i() - - strides1 = conv_node1.attrs["strides"] - strides2 = conv_node2.attrs["strides"] - - if (strides1==[1,1] and strides2==[2,2]) or (strides1==[2,2] and strides2==[1,1]): - if strides1==[1,1] and strides2==[2,2]: - temp_node = conv_node1 - conv_node1 = conv_node2 - conv_node2 = temp_node - # conv_node1 has stride [2,2] - relu_node = conv_node1.i().i().i() - # due to retinanet cases discussed below, instead of taking dql_node at relu_node.o().o() we take QL node at relu_node.o(), please check below case for more clarity - relu_ql_node = relu_node.o() - - if (len(relu_ql_node.outputs[0].outputs)==2): - - child1_node = relu_ql_node.outputs[0].outputs[0].o() - child2_node = relu_ql_node.outputs[0].outputs[1].o() - - if child1_node.op == "Conv" and child2_node.op == "Conv": - - if (child1_node.name == conv_node1.name and get_child_name(child2_node) == conv_node2.name) or (child2_node.name == conv_node1.name and get_child_name(child1_node) == conv_node2.name): - - if not(child1_node.name == conv_node1.name): - tem = child1_node - child1_node = child2_node - child2_node = tem - - if child1_node.attrs["strides"] == [2,2] and child2_node.attrs["strides"] == [2,2]: - - upper_add_node = relu_node.i() - - if upper_add_node.inputs[0].inputs[0].op == "Relu": - - upper_conv_node = upper_add_node.inputs[1].inputs[0].i().i() - upper_relu_node = upper_add_node.inputs[0].inputs[0] - - elif upper_add_node.inputs[1].inputs[0].op == "Relu": - - upper_conv_node = upper_add_node.inputs[0].inputs[0].i().i() - upper_relu_node = upper_add_node.inputs[1].inputs[0] - - else: - continue - if not (upper_conv_node.attrs["strides"] == [1,1]): - continue - else: - #all conditions satisfied - child1_node.attrs["strides"] = [1,1] - child2_node.attrs["strides"] = [1,1] - upper_conv_node.attrs["strides"] = [2,2] - - #now add maxpool between upper_relu and upper add - maxpool_attrs = { - "strides":[2,2], - "kernel_shape":[1,1] - } - maxpool_output = gs.Variable(name = "maxpool_output_" + child1_node.name, dtype = np.uint8) - - if len(upper_relu_node.outputs[0].outputs) == 1: - maxpool_node = gs.Node(op="MaxPool", name = "maxpool_" + child1_node.name, attrs=maxpool_attrs, inputs = [upper_relu_node.o().o().outputs[0]], outputs = [maxpool_output]) - else: - maxpool_node = gs.Node(op="MaxPool", name = "maxpool_" + child1_node.name, attrs=maxpool_attrs, inputs = [upper_relu_node.outputs[0]], outputs = [maxpool_output]) - - # conv_x_dql_node = child1_node.i() - list2 = [upper_add_node.inputs[0], upper_add_node.inputs[1]] - - if upper_relu_node.outputs[0].name == upper_add_node.inputs[0].name: - list2 = [upper_add_node.inputs[1]] - upper_add_node.inputs.clear() - upper_add_node.inputs = [maxpool_output, list2[0]] - elif upper_relu_node.outputs[0].name == upper_add_node.inputs[1].name: - list2 = [upper_add_node.inputs[0]] - upper_add_node.inputs.clear() - upper_add_node.inputs = [list2[0], maxpool_output] - else: - if upper_relu_node.o().o().outputs[0].name == upper_add_node.inputs[0].name: - list2 = [upper_add_node.inputs[1]] - upper_add_node.inputs.clear() - upper_add_node.inputs = [maxpool_output, list2[0]] - elif upper_relu_node.o().o().outputs[0].name == upper_add_node.inputs[1].name: - list2 = [upper_add_node.inputs[0]] - upper_add_node.inputs.clear() - upper_add_node.inputs = [list2[0], maxpool_output] - else: - print("ERROR in strides optimization") - graph.nodes.append(maxpool_node) - if node.op == "Gemm": gemm_node = node if helper.is_child_present(gemm_node, 0, 0) and gemm_node.o().op == "Softmax": @@ -489,126 +215,9 @@ def get_child_conv(node): graph.nodes.append(squeeze_node) - if node.op == "Reshape": - reshape_node = node - reshape_child_node = reshape_node.o() - - if reshape_child_node.op == "Gemm": - - """ - Removing a pattern in Resent50v1.5 model - - DQL-------------- - | | - | | - | V - | Shape - | | - | | - | V - | Gather - | | - | | - | V - | Unsqueeze - | | - | | - | V - | Concat - | | - | | - | V - Reshape<-------- - | - | DQL DQL - | | | - | | | - V | | - Gemm<------------------------------------------------ - | - | - | - QL - - - Connect DQL directly to Gemm and change Gemm to Conv node - - """ - gemm_node = reshape_child_node - DQL_node = reshape_node.i() - - DQL_node.outputs = reshape_node.outputs - reshape_node.outputs.clear() - - gemm_DQL_node = gemm_node.inputs[1].inputs[0] - gemm_QL_node = gemm_DQL_node.i() - - w_tensor = gemm_QL_node.inputs[0] - original = w_tensor.values - new_shape = original.shape + (1,1) - new = np.reshape(original, new_shape) - gemm_QL_node.inputs[0] = gs.Constant(name= gemm_QL_node.inputs[0].name , values=new.astype(np.float32)) - - new_attrs = { - "dilations":[1,1], - "group":1, - "kernel_shape":[1,1], - "pads":[0,0,0,0], - "strides":[1,1] - } - gemm_node.attrs = new_attrs - gemm_node.op = "Conv" - - elif reshape_child_node.op == "QuantizeLinear": - reshape_parent_node = reshape_node.i() - if reshape_parent_node.op == "DequantizeLinear": - if len(reshape_parent_node.inputs[0].inputs) == 1 and len(reshape_child_node.outputs[0].outputs) == 1: # 1 parent and 1 child - - """ - Node1-------->QL------>Reshape----->DQL---------->Node2 - - is changed to - - Node1------>Node2 - """ - pp = reshape_parent_node.i() - cc = reshape_child_node.o() - pp.outputs = reshape_child_node.outputs - reshape_child_node.outputs.clear() - else: - # if there is any other connection to the QL or DQL node, remove only the reshape node, let QL and DQL as is - reshape_parent_node.outputs = reshape_node.outputs - reshape_node.outputs.clear() - - elif reshape_child_node.op == "Transpose": - if helper.is_parent_exist(reshape_node, 0, 0) and reshape_node.i().op == "DequantizeLinear": - if helper.is_parent_exist(reshape_node.i(), 0, 0) and reshape_node.i().i().op == "QuantizeLinear": - new_shape = reshape_node.inputs[1].values - p1 = reshape_node.i() - p2 = p1.i() - if helper.is_constant_tensor(p2.inputs[0]): - p2.inputs[0].values = np.reshape(p2.inputs[0].values, new_shape) - p1.outputs = reshape_node.outputs - reshape_node.outputs.clear() - - elif reshape_child_node.op == "Add": - if reshape_child_node.i().op == "Conv": - conv_node = reshape_child_node.i() - conv_node.inputs = [conv_node.inputs[0], conv_node.inputs[1], reshape_node.inputs[0]] - reshape_node.inputs.clear - - conv_node.outputs = reshape_child_node.outputs - reshape_child_node.outputs.clear() - elif reshape_child_node.op == "Conv": - reshape_node.i().outputs = reshape_node.outputs - reshape_node.outputs.clear() - elif reshape_node.i().i().op == "Conv": - reshape_node.i().outputs = reshape_node.outputs - reshape_node.outputs.clear() - if node.op == "Clip": clip_num = clip_num + 1 - if helper.is_parent_exist(node, 0, 0) and (node.i().op == "Conv" or node.i().op == "Add"): + if helper.is_parent_exist(node, 0, 0) and (node.i().op == "Conv"): if helper.is_child_present(node, 0, 0) and node.o().op == "QuantizeLinear": clip_node = node clip_max = clip_node.inputs[2].values @@ -625,7 +234,7 @@ def get_child_conv(node): # p1---->Clip ------>c1----->c2 # becomes # p1---->c1-----> Clip----->c2 - # p1 = conv/add, c1 = QL, c2 = anything + # p1 = conv, c1 = QL, c2 = anything if helper.is_child_present(c1, 0, 0): c2 = c1.o() c1.inputs = [p1.outputs[0], c1.inputs[1], c1.inputs[2]] @@ -655,61 +264,12 @@ def get_child_conv(node): td.outputs = tranpose_node.outputs tranpose_node.outputs.clear() - if node.op == "Squeeze": - if helper.is_parent_exist(node,0,0) and node.i().op == "GlobalAveragePool": - squeeze_node = node - p1 = squeeze_node.i() - - if helper.is_child_present(squeeze_node, 0, 0) and squeeze_node.o().op == "Mul": - mul_node = squeeze_node.o() - - if helper.is_child_present(mul_node, 0, 0) and mul_node.o().op == "QuantizeLinear": - ql_node = mul_node.o() - - if helper.is_child_present(ql_node, 0, 0) and ql_node.o().op == "DequantizeLinear": - dql_node = ql_node.o() - - # GlobalAveragPool ---> Squeeze ---> Mul ---> QL ---> DQL - # becomes - # GlobalAveragPool --->QL ---> DQL - ql_node.inputs[0] = p1.outputs[0] - mul_node.outputs.clear() - - if node.op == "Mul": - # Remove Mul node - mul_node = node - if helper.is_parent_exist(mul_node, 0, 0) and helper.is_child_present(mul_node, 0, 0): - average_pool_node = mul_node.i() - average_pool_node.outputs = mul_node.outputs - mul_node.outputs.clear() - - if node.op == "Pad": - # Remove Pad node - pad_node = node - if len(pad_node.inputs) == 2: - nl,cl,hl,wl,nr,cr,hr,wr = pad_node.inputs[1].values - - if helper.is_child_present(pad_node, 0 ,0) and pad_node.o().op == "Conv": - conv_child_node = pad_node.o() - conv_child_node.attrs['pads'] = hl,wl,hr,wr - - DQL_node = pad_node.i() - DQL_node.outputs = pad_node.outputs - pad_node.outputs.clear() - - # TODO: Add a condition, if input size == Averagepool's kernel shape, only then change it to GlobalAveragePool not all, for time being adding a flag for it - if node.op == "AveragePool" and args.change_avgpool: - # Change AveragePool node toGlobalAveragePool Node. - node.op = "GlobalAveragePool" - if node.op == "Flatten": flatten_node = node if helper.is_parent_exist(flatten_node, 0, 0) and flatten_node.i().op == "DequantizeLinear": dql_node = flatten_node.i() if helper.is_child_present(flatten_node, 0, 0) and flatten_node.o().op == "QuantizeLinear": ql_node = flatten_node.o() - # don't remove this Flatten node in VGG model. as input is Maxpool (producing 4d tensor) and Child is Matmul expecting 2d tensor. - # this child matmul is retained (not converted to Conv) due to Add node after this Matmul which has 2nd input as 2d tensor. thus two 2d tensors will get added. if helper.is_child_present(dql_node, 0, 0) and dql_node.i().op == "MaxPool": continue # node1--->DQL--->Flatten---->QL----->node2 @@ -733,7 +293,7 @@ def get_child_conv(node): ql_node = relu_node.o() node1 = dql_node.i() node2 = ql_node.o() - if node1.op == "QuantizeLinear" and (not (node1.i()).op == "Add"): + if node1.op == "QuantizeLinear": #if node1 produces u8 output then Relu can also be removed, but if it creates s8 output then Relu should be retained if node1.inputs[2].values.dtype == np.uint8: # node1--->DQL--->Relu---->QL----->node2 @@ -753,17 +313,6 @@ def get_child_conv(node): relu_node.outputs = ql_node.outputs ql_node.outputs.clear() - if node1.op == "QuantizeLinear" and (node1.i()).op == "Add": - - # Add -----> node1 ------> DQL -----> Relu ------> QL -------> node2 - # becomes - # Add ----> node1 ------> Relu ------> node2 - node1.outputs = dql_node.outputs - dql_node.outputs.clear() - - relu_node.outputs = ql_node.outputs - ql_node.outputs.clear() - if node.op == "MaxPool": if helper.is_parent_exist(node, 0, 0) and helper.is_child_present(node, 0, 0): parent_node = node.i() @@ -787,13 +336,7 @@ def get_child_conv(node): conv_node1.inputs[0] = dql_node.outputs[0] # add Squeeze as input to last DequantizeLinear node - if squeeze_output and (not args.is_retinanet) and node.op == "DequantizeLinear" and ((len(node.outputs[0].outputs) == 0) or (len(node.outputs[0].outputs)==1 and (node.o().op == "Add" or node.o().op == "Softmax") and len(node.o().outputs[0].outputs)==0)): - - # no need to add Squeeze node if DQL is already getting 2d tensor - # TODO: add a check if input is 2d then don't add Squeeze node - # retain_matmul condition is sufficient to ensure Matmul will be present (not converted to conv) and it will give 2d tensor - if (retain_matmul): - continue + if squeeze_output and node.op == "DequantizeLinear" and ((len(node.outputs[0].outputs) == 0) or (len(node.outputs[0].outputs)==1 and (node.o().op == "Softmax") and len(node.o().outputs[0].outputs)==0)): squeeze_dim = [2, 3] @@ -815,7 +358,7 @@ def get_child_conv(node): if len(node.outputs) > 0 and len(node.outputs[0].outputs) > 1: for i in range(len(node.outputs[0].outputs)): # node.outputs[0].outputs[0].op is used instead of node.outputs[0].outputs[i].op because in each pass 1 child is removed - if node.outputs[0].outputs[0].op == "Shape" or node.outputs[0].outputs[0].op == "Add" or is_any_output_tensor_graph_output(node) or node.outputs[0].outputs[0].op == "Conv" or node.outputs[0].outputs[0].op == "Relu" or node.outputs[0].outputs[0].op == "Resize": + if is_any_output_tensor_graph_output(node) or node.outputs[0].outputs[0].op == "Conv" or node.outputs[0].outputs[0].op == "Relu": child_node = node.outputs[0].outputs[0] s = gs.Constant(name=node.inputs[1].name + "_" + str(i), values=(node.inputs[1].values).astype(np.float32)) zp = gs.Constant(name=node.inputs[2].name + "_" + str(i), values=(node.inputs[2].values).astype(node.inputs[2].dtype)) @@ -892,71 +435,9 @@ def get_child_conv(node): if node.op == "Conv": ctr = ctr + 1 conv_node = node - if len(conv_node.outputs[0].outputs) == 4: - if not (conv_node.outputs[0].outputs[0].op == "Shape" and conv_node.outputs[0].outputs[1].op == "Shape" and conv_node.outputs[0].outputs[2].op == "Shape" and conv_node.outputs[0].outputs[3].op == "Reshape"): - continue - shape_node1 = conv_node.outputs[0].outputs[0] - shape_node2 = conv_node.outputs[0].outputs[1] - shape_node3 = conv_node.outputs[0].outputs[2] - reshape_node = conv_node.outputs[0].outputs[3] - if helper.is_child_present(reshape_node, 0, 0) and reshape_node.o().op == "Transpose": - if helper.is_child_present(reshape_node.o(), 0, 0) and reshape_node.o().o().op == "Reshape": - if helper.is_child_present(reshape_node.o().o(), 0, 0) and reshape_node.o().o().o().op == "Concat": - if helper.is_child_present(reshape_node.o().o().o(), 0, 0) and reshape_node.o().o().o().o().op == "QuantizeLinear": - ret_ql_node = reshape_node.o().o().o().o() - # this is retinaNet pattern at the end - # Conv--->Reshape--->Transpose--->Reshape--->Concat--->QL---->node2 - # will be made as Conv--->QL---->Rehsape---->Transpose---->Reshape---->Concat---->QL----->node2 - # later QL at end will also be removed - s_ql = gs.Constant(name=ret_ql_node.inputs[1].name + "_" + str(ctr), values=(ret_ql_node.inputs[1].values).astype(np.float32)) - zp_ql = gs.Constant(name=ret_ql_node.inputs[2].name + "_" + str(ctr), values=(ret_ql_node.inputs[2].values).astype(np.int8)) - y_ql = gs.Variable(name=ret_ql_node.outputs[0].name + "_" + str(ctr), dtype=np.int8) - new_ql_node = gs.Node(op = "QuantizeLinear", name = ret_ql_node.name + "_" + str(ctr), inputs = [conv_node.outputs[0], s_ql, zp_ql], outputs = [y_ql]) - reshape_node.inputs[0] = new_ql_node.outputs[0] - shape_node1.inputs[0] = new_ql_node.outputs[0] - shape_node2.inputs[0] = new_ql_node.outputs[0] - shape_node3.inputs[0] = new_ql_node.outputs[0] - graph.nodes.append(new_ql_node) - retinanet_end_pattern_found = True - - if node.op == "QuantizeLinear" and retinanet_end_pattern_found: - if helper.is_parent_exist(node, 0, 0) and node.i().op == "Concat": - if helper.is_child_present(node, 0, 0) and node.o().op == "DequantizeLinear": - # remove the QL node as mentioned in the above condition. (Part of retinaNet model) - # Concat------>QL -------> DQL is changed to - # Concat------>DQL - node.i().outputs = node.outputs - node.outputs.clear() - - - if node.op == "Unsqueeze": - unsqueeze_node = node - if helper.is_parent_exist(unsqueeze_node, 0, 0) and unsqueeze_node.i().op == "Gather": - if helper.is_parent_exist(unsqueeze_node.i(), 0, 0) and unsqueeze_node.i().i().op == "Shape": - if helper.is_parent_exist(unsqueeze_node.i().i(), 0, 0) and unsqueeze_node.i().i().i().op == "QuantizeLinear": - if helper.is_child_present(unsqueeze_node, 0, 0) and unsqueeze_node.o().op == "Concat": - - # QL-------> Shape------> Gather ------>Unsqueeze------> Concat is changed to - # QL-------> Shape------> Gather ------>Unsqueeze------> Cast----->Concat - - concat_node = unsqueeze_node.o() - cast_count += 1 - cast_node_name = node.name + "_" + str(cast_count) - cast_output_tensor = gs.Variable(name=cast_node_name + "_output", dtype=np.int64) - new_cast_node = gs.Node(op = "Cast", name = cast_node_name, attrs = {"to":getattr(TensorProto, "INT64")}, inputs = [unsqueeze_node.outputs[0]], outputs = [cast_output_tensor]) - - for i in range(len(concat_node.inputs)): - if concat_node.inputs[i].name == node.outputs[0].name: - concat_node.inputs[i] = new_cast_node.outputs[0] - break - graph.nodes.append(new_cast_node) graph.cleanup() - if args.save_opt_qdq: - onnx.save(gs.export_onnx(graph), "optimized_qdq_" + onnx_model_name) - print("Optimized QDQ model has been saved") - node_list = [] initializer_list = [] node_count = 0 @@ -980,28 +461,14 @@ def all_dql_conditions_satisfy(node): has_output_ternsor = len(node.outputs) > 0 has_no_child = has_output_ternsor and len(node.outputs[0].outputs)==0 has_child = helper.is_child_present(node, 0, 0) - child_is_add_node = False child_has_no_child = False - child_is_averagepool_node = False - child_add_node_has_no_2nd_input = False if has_child: - child_is_add_node = node.o().op == "Add" child_is_softmax_node = node.o().op == "Softmax" child_has_no_child = len(node.o().outputs[0].outputs)==0 - child_is_averagepool_node = node.o().op == "AveragePool" - child_is_lrn_node = node.o().op == "LRN" child_is_gemm_node = node.o().op == "Gemm" child_is_relu_node = node.o().op == "Relu" - child_is_shape_node = node.o().op == "Shape" child_is_slice_node = node.o().op == "Slice" - child_is_resize_node = node.o().op == "Resize" - child_is_reshape_node = node.o().op == "Reshape" - - if child_is_add_node: - child_add_node = node.o() - if len(child_add_node.inputs[1].inputs) == 0: - child_add_node_has_no_2nd_input = True if not has_output_ternsor: return False @@ -1012,85 +479,41 @@ def all_dql_conditions_satisfy(node): if has_no_child: return True - if child_is_add_node and child_add_node_has_no_2nd_input: - return True - if child_is_softmax_node and child_has_no_child: return True - if child_is_averagepool_node: - return True - - if child_is_lrn_node: - return True - if child_is_gemm_node: return True if child_is_relu_node: return True - if child_is_shape_node or child_is_slice_node or child_is_resize_node: + if child_is_slice_node: return True - if helper.is_child_present(node, 0, 1): - c2 = node.outputs[0].outputs[1] - if c2.op == "Shape" or c2.op == "Resize": - return True - - if helper.is_child_present(node, 0, 2): - c2 = node.outputs[0].outputs[2] - if c2.op == "Shape" or c2.op == "Resize": - return True - - if child_is_reshape_node: - if helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Softmax": - return True - return False def all_ql_conditions_satify(count, node): - if args.is_ryzenai_model and count == 2: - return True if helper.is_child_present(node, 0, 0): if node.o().op == "Gather": return False if helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Gemm" and len(node.inputs[0].inputs) == 0: return True if count == 0: - if args.is_ryzenai_model and helper.is_child_present(node, 0, 0) and node.o().op == "DequantizeLinear": - if helper.is_child_present(node.o(), 0,0) and node.o().o().op == "Conv": - if helper.is_child_present(node.o().o(), 0, 0) and node.o().o().o().op == "QuantizeLinear": - return False return True has_parent = helper.is_parent_exist(node, 0, 0) if has_parent: - is_parent_averagepool = node.i().op == "AveragePool" - is_parent_lrn_node = node.i().op == "LRN" is_parent_maxpool_node = node.i().op == "MaxPool" is_parent_relu_node = node.i().op == "Relu" - is_parent_resize_node = node.i().op == "Resize" is_parent_concat = node.i().op == "Concat" - if is_parent_averagepool or is_parent_lrn_node: - return True - if is_parent_maxpool_node: # (Non DQL)--->MaxPool----->QL (keep this QL) if not (node.i().i().op == "DequantizeLinear"): return True if is_parent_relu_node: parent_relu_node = node.i() - if parent_relu_node.i().op == "Add": - parent_add_node = parent_relu_node.i() - if len(parent_add_node.inputs[1].inputs)==0: - return True - - if is_parent_resize_node: - return True - #if is_parent_concat: - # return True if helper.is_child_present(node, 0, 0): if helper.is_parent_exist(node, 0, 0): @@ -1118,36 +541,13 @@ def all_ql_conditions_satify(count, node): maxpool_node = MaxPool(node, maxpool_count, args.remove_relu) node_list.append(maxpool_node.get_node()) maxpool_count = maxpool_count + 1 - elif node.op == "Add": - add_node = QLinearAdd(node, aecg_zendnn_opt, args.remove_relu) - node_list.append(add_node.get_node()) - initializer_list.append(add_node.get_intializers()) - elif node.op == "AveragePool": - average_pool_node = AveragePool(node) - node_list.append(average_pool_node.get_node()) elif node.op == "Squeeze": squeeze_node = Squeeze(node) node_list.append(squeeze_node.get_node()) initializer_list.append(squeeze_node.get_intializers()) - elif node.op == "GlobalAveragePool": - global_average_pool_node = GlobalAveragePool(node, aecg_zendnn_opt, args.remove_relu) - node_list.append(global_average_pool_node.get_node()) - initializer_list.append(global_average_pool_node.get_intializers()) elif node.op == "Flatten": flatten_node = Flatten(node) node_list.append(flatten_node.get_node()) - elif node.op == "MatMul": - if retain_matmul: - matmul_node = MatMul_Retained(node) - node_list.append(matmul_node.get_node()) - initializer_list.append(matmul_node.get_intializers()) - else: - matmul_node = MatMul(node) - node_list.append(matmul_node.get_node()) - initializer_list.append(matmul_node.get_intializers()) - elif node.op == "LRN": - lrn_node = LRN(node) - node_list.append(lrn_node.get_node()) elif node.op == "Concat": concat_node = Concat(node, is_all_concat_input_dql(node)) node_list.append(concat_node.get_node()) @@ -1201,21 +601,6 @@ def all_ql_conditions_satify(count, node): if not args.remove_relu: relu_node = Relu(node) node_list.append(relu_node.get_node()) - elif node.op == "Reshape": - reshape_node = Reshape(node) - node_list.append(reshape_node.get_node()) - initializer_list.append(reshape_node.get_intializers()) - elif node.op == "Shape": - shape_node = Shape(node) - node_list.append(shape_node.get_node()) - elif node.op == "Resize": - resize_node = Resize(node) - node_list.append(resize_node.get_node()) - initializer_list.append(resize_node.get_intializers()) - elif node.op == "Unsqueeze": - unsq_node = Unsqueeze(node) - node_list.append(unsq_node.get_node()) - initializer_list.append(unsq_node.get_intializers()) elif node.op == "Clip": found = False for node_current in node_list: From 9b66aaa16b3b019dcc0ca7065350a520bf1012f3 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee Date: Wed, 15 Nov 2023 10:24:26 +0000 Subject: [PATCH 18/20] [Support for channel-wise scaling for QDQ model] --- src/qonnx/custom_op/qop/qlinearconv_op.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/qonnx/custom_op/qop/qlinearconv_op.py b/src/qonnx/custom_op/qop/qlinearconv_op.py index f1960df1..f4d3d08a 100644 --- a/src/qonnx/custom_op/qop/qlinearconv_op.py +++ b/src/qonnx/custom_op/qop/qlinearconv_op.py @@ -155,12 +155,16 @@ def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): weight_scale_tensor = w_QL_node.inputs[1] weight_zp_tensor = w_QL_node.inputs[2] + weight_scale_channel_detected = 0 + if (weight_scale_tensor.shape): + if (weight_scale_tensor.shape[0] > 1): + weight_scale_channel_detected = 1 weight_scaled_tensor = weight_scale_tensor.values * np.ones(weight_tensor.shape) - if QCDQ_model_detected: + if QCDQ_model_detected or weight_scale_channel_detected: weight_scaled_tensor = np.ones(weight_tensor.shape) * weight_scale_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] b = weight_tensor.values / weight_scaled_tensor c = weight_zp_tensor.values * np.ones(weight_tensor.shape) - if QCDQ_model_detected: + if QCDQ_model_detected or weight_scale_channel_detected: c = weight_zp_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] * np.ones(weight_tensor.shape) quantized_weight_tensor = b + c if weight_zp_tensor.dtype == "int8": From 89074f4a709e56f1b47ad18ac57be4cee61e58f2 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee Date: Tue, 21 Nov 2023 04:31:08 +0000 Subject: [PATCH 19/20] [Code Cleanup] QCDQ to QOp converter * Removed support of multiple features and models --- src/qonnx/custom_op/qop/concat_op.py | 81 ++-------- .../custom_op/qop/dequantizelinear_op.py | 29 ++-- src/qonnx/custom_op/qop/qlinearconv_op.py | 75 ++++----- src/qonnx/transformation/qcdq_to_qop.py | 149 +----------------- 4 files changed, 65 insertions(+), 269 deletions(-) diff --git a/src/qonnx/custom_op/qop/concat_op.py b/src/qonnx/custom_op/qop/concat_op.py index 4f5e5f6e..4eb3c5d8 100644 --- a/src/qonnx/custom_op/qop/concat_op.py +++ b/src/qonnx/custom_op/qop/concat_op.py @@ -28,7 +28,7 @@ class Concat: - def __init__(self, node, is_all_concat_input_dql): + def __init__(self, node): concat_node = node @@ -44,58 +44,15 @@ def __init__(self, node, is_all_concat_input_dql): input_names = [] for i in range(number_of_inputs): - if is_all_concat_input_dql: - parent_dql_node = concat_node.inputs[i].inputs[0] - scale_values_list.append(parent_dql_node.inputs[1].values) - scale_name_list.append(parent_dql_node.inputs[1].name) - zp_value_list.append(parent_dql_node.inputs[2].values) - zp_name_list.append(parent_dql_node.inputs[2].name) - input_tensor_names.append(parent_dql_node.inputs[0].name) - else: - input_tensor_names.append(concat_node.inputs[i].name) - if len(concat_node.inputs[i].inputs) == 0: - c_input = helper.create_initializer_tensor(name=concat_node.inputs[i].name, - tensor_array=concat_node.inputs[i].values, - data_type=onnx.TensorProto.INT64) - intializer_list.append(c_input) - self.intializer_list = intializer_list + input_tensor_names.append(concat_node.inputs[i].name) + if len(concat_node.inputs[i].inputs) == 0: + c_input = helper.create_initializer_tensor(name=concat_node.inputs[i].name, + tensor_array=concat_node.inputs[i].values, + data_type=onnx.TensorProto.INT64) + intializer_list.append(c_input) + self.intializer_list = intializer_list - if is_all_concat_input_dql: - for i in range(number_of_inputs): - scale_tesnor = helper.create_initializer_tensor(name=scale_name_list[i], - tensor_array=scale_values_list[i], - data_type=onnx.TensorProto.FLOAT) - zp_tensor = helper.create_initializer_tensor(name=zp_name_list[i], - tensor_array=zp_value_list[i], - data_type=onnx.TensorProto.UINT8) - intializer_list.append(scale_tesnor) - intializer_list.append(zp_tensor) - - if helper.is_child_present(concat_node, 0, 0) and concat_node.o().op == "DequantizeLinear" and is_all_concat_input_dql: - y_ql_node = concat_node.o() - y_name = y_ql_node.outputs[0].name - else: - y_name = concat_node.outputs[0].name - - if helper.is_child_present(concat_node, 0, 0) and concat_node.o().op == "DequantizeLinear" and is_all_concat_input_dql: - y_scale_name = y_ql_node.inputs[1].name - y_scale_value = y_ql_node.inputs[1].values - y_zp_name = y_ql_node.inputs[2].name - y_zp_value = y_ql_node.inputs[2].values - - y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, - tensor_array=y_scale_value, - data_type=onnx.TensorProto.FLOAT) - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) - - intializer_list.append(y_scale_tensor) - intializer_list.append(y_zp_tensor) - self.intializer_list = intializer_list - - input_names.append(y_scale_tensor.name) - input_names.append(y_zp_tensor.name) + y_name = concat_node.outputs[0].name for i in range(number_of_inputs): input_names.append(input_tensor_names[i]) @@ -106,19 +63,11 @@ def __init__(self, node, is_all_concat_input_dql): kwargs = {} kwargs["domain"] = 'com.microsoft' - if is_all_concat_input_dql: - new_concat_node = onnx.helper.make_node(name = concat_node.name, - op_type = "QLinearConcat", - inputs = input_names, - outputs = [y_name], - axis = concat_node.attrs["axis"], - **kwargs) - else: - new_concat_node = onnx.helper.make_node(name = concat_node.name, - op_type = "Concat", - inputs = input_names, - outputs = [y_name], - axis = concat_node.attrs["axis"]) + new_concat_node = onnx.helper.make_node(name = concat_node.name, + op_type = "Concat", + inputs = input_names, + outputs = [y_name], + axis = concat_node.attrs["axis"]) self.node = new_concat_node @@ -126,4 +75,4 @@ def get_node(self): return self.node def get_intializers(self): - return self.intializer_list \ No newline at end of file + return self.intializer_list diff --git a/src/qonnx/custom_op/qop/dequantizelinear_op.py b/src/qonnx/custom_op/qop/dequantizelinear_op.py index 309d6564..28b994f4 100644 --- a/src/qonnx/custom_op/qop/dequantizelinear_op.py +++ b/src/qonnx/custom_op/qop/dequantizelinear_op.py @@ -29,7 +29,7 @@ class DequantizeLinear: - def __init__(self, node, aecg_zendnn_opt, remove_relu): + def __init__(self, node, remove_relu): dql_node = node @@ -73,23 +73,18 @@ def __init__(self, node, aecg_zendnn_opt, remove_relu): x_zp_name = dql_node.inputs[2].name x_zp_value = dql_node.inputs[2].values - if aecg_zendnn_opt: + if dql_node.inputs[2].dtype == np.uint8: x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - if dql_node.inputs[2].dtype == np.uint8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - if dql_node.inputs[2].dtype == np.int32: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT32) - elif dql_node.inputs[2].dtype == np.int8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + if dql_node.inputs[2].dtype == np.int32: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT32) + elif dql_node.inputs[2].dtype == np.int8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) y_name = dql_node.outputs[0].name diff --git a/src/qonnx/custom_op/qop/qlinearconv_op.py b/src/qonnx/custom_op/qop/qlinearconv_op.py index f4d3d08a..bcaad1de 100644 --- a/src/qonnx/custom_op/qop/qlinearconv_op.py +++ b/src/qonnx/custom_op/qop/qlinearconv_op.py @@ -29,7 +29,7 @@ class QLinearConv: - def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): + def __init__(self, node, remove_relu, conv_count): x_DQL_node = node.i() conv_node = node @@ -335,44 +335,39 @@ def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): x_zp_name = conv_node.name + "_X_ZERO_POINT" x_zp_value = x_zp_tensor.values - if aecg_zendnn_opt and conv_count > 0: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + if is_x_QL_maxpool: + if maxpool_input_s8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + else: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, tensor_array=x_zp_value, data_type=onnx.TensorProto.UINT8) + elif is_X_QL_transpose: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) else: - if is_x_QL_maxpool: - if maxpool_input_s8: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - else: - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - elif is_X_QL_transpose: + if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - else: - if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): + data_type=onnx.TensorProto.UINT8) + elif x_QL_node.op == "Relu" or x_QL_node.op == "Clip": + if (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.uint8): x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - elif x_QL_node.op == "Relu" or x_QL_node.op == "Clip": - if (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.int8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, tensor_array=x_zp_value, - data_type=onnx.TensorProto.INT8) - elif (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.uint8): - x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, - tensor_array=x_zp_value, - data_type=onnx.TensorProto.UINT8) - else: - print("ERROR Please check x_zp_tensor of ", conv_node.name) + data_type=onnx.TensorProto.UINT8) + else: + print("ERROR Please check x_zp_tensor of ", conv_node.name) w_name = conv_node.inputs[1].name w_value = quantized_weight_tensor @@ -401,20 +396,14 @@ def __init__(self, node, aecg_zendnn_opt, remove_relu, conv_count): y_zp_name = conv_node.name + "_Y_ZERO_POINT" y_zp_value = y_zp_tensor.values - if aecg_zendnn_opt: - # if this opt is enabled then y_zp has be to set to u8 type + if y_zp_tensor.dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif y_zp_tensor.dtype == np.uint8: y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, tensor_array=y_zp_value, data_type=onnx.TensorProto.UINT8) - else: - if y_zp_tensor.dtype == np.int8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.INT8) - elif y_zp_tensor.dtype == np.uint8: - y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, - tensor_array=y_zp_value, - data_type=onnx.TensorProto.UINT8) if has_bias: b_name = conv_node.inputs[2].name diff --git a/src/qonnx/transformation/qcdq_to_qop.py b/src/qonnx/transformation/qcdq_to_qop.py index 35d50a50..b0259dff 100644 --- a/src/qonnx/transformation/qcdq_to_qop.py +++ b/src/qonnx/transformation/qcdq_to_qop.py @@ -72,8 +72,6 @@ def apply(self, model: ModelWrapper) -> Tuple[ModelWrapper, bool]: graph.fold_constants() - aecg_zendnn_opt = False - def is_any_output_tensor_graph_output(node): for i in range(len(graph.outputs)): output_tensor_name = graph.outputs[i].name @@ -81,48 +79,6 @@ def is_any_output_tensor_graph_output(node): return True return False - # return 6/7/8th child name depending on Relu is present or not - def get_child_name(node): - if helper.is_child_present(node, 0, 0): - c1 = node.o() - if c1.op == "Relu": #C1 is relu - if helper.is_child_present(c1, 0, 0): - c1 = c1.o() # c1 is QL node - # c1 is QL now - if helper.is_child_present(c1, 0, 0): - c2 = c1.o() # c2 is DQL - if helper.is_child_present(c2, 0, 0): - c3 = c2.o() # c3 is Conv - - if helper.is_child_present(c3, 0, 0): - c4 = c3.o() - - if c4.op == "Relu": - if helper.is_child_present(c4, 0, 0): - c4 = c4.o() - # c4 is QL now - - if helper.is_child_present(c4, 0, 0): - c5 = c4.o() # c5 is DQL - if helper.is_child_present(c5, 0, 0): - c6 = c5.o() # c6 is conv - - return c6.name - - print("************************* ERROR ************************* get_child_name() returned empty string") - return "" - - def get_child_conv(node): - if helper.is_child_present(node, 0, 0): - c1 = node.o() - if c1.op == "Relu" and helper.is_child_present(c1, 0, 0): - c1 = c1.o() - if helper.is_child_present(c1, 0, 0): - c2 = c1.o() - if helper.is_child_present(c2, 0, 0): - c3 = c2.o() - return c3 - supported_op = ["Conv", "QuantizeLinear", "DequantizeLinear", "MaxPool", "Squeeze", "Flatten", "Concat", "Softmax", "Cast", "Gather", "Gemm", "Greater", "Less", "Slice", "Transpose", "Relu", "Clip"] maxpool_count = 0 @@ -139,34 +95,7 @@ def get_child_conv(node): if helper.is_child_present(gemm_node, 0, 0) and gemm_node.o().op == "Softmax": continue gemm_input_node = gemm_node.i() - if gemm_input_node.op == "DequantizeLinear": - - if gemm_node.inputs[1].inputs[0].op == "DequantizeLinear": - w_dql_node = gemm_node.inputs[1].inputs[0] - is_weight_quantized = True if len(w_dql_node.inputs[0].inputs) == 0 else False - if is_weight_quantized: - wt_tensor = w_dql_node.inputs[0] - else: - w_ql_node = w_dql_node.i() - wt_tensor = w_ql_node.inputs[0] - org = wt_tensor.values - new_shape = org.shape + (1,1) - new = np.reshape(org, new_shape) - if is_weight_quantized: - w_dql_node.inputs[0] = gs.Constant(name=w_dql_node.inputs[0].name, values = new.astype(np.int8)) - else: - w_ql_node.inputs[0] = gs.Constant(name=w_ql_node.inputs[0].name, values = new.astype(np.float32)) - - gemm_node.op = "Conv" - new_attrs = { - "dilations":[1,1], - "group":1, - "kernel_shape":[1,1], - "pads":[0,0,0,0], - "strides":[1,1] - } - gemm_node.attrs = new_attrs - elif gemm_input_node.op == "Flatten": + if gemm_input_node.op == "Flatten": flatten_node = gemm_input_node flatten_dql_node = flatten_node.i() flatten_dql_node.outputs = flatten_node.outputs @@ -317,16 +246,7 @@ def get_child_conv(node): if helper.is_parent_exist(node, 0, 0) and helper.is_child_present(node, 0, 0): parent_node = node.i() child_node = node.o() - if len(parent_node.outputs[0].outputs) == 1 and parent_node.op == "DequantizeLinear" and child_node.op == "QuantizeLinear": - dql_node = parent_node - dql_parent = dql_node.i() - dql_parent.outputs = dql_node.outputs - dql_node.outputs.clear() - - ql_node = child_node - node.outputs = ql_node.outputs - ql_node.outputs.clear() - elif len(parent_node.outputs[0].outputs) == 1 and parent_node.op == "DequantizeLinear" and child_node.op == "Conv": + if len(parent_node.outputs[0].outputs) == 1 and parent_node.op == "DequantizeLinear" and child_node.op == "Conv": dql_node = parent_node dql_parent = dql_node.i() node.inputs[0] = dql_parent.outputs[0] @@ -335,56 +255,6 @@ def get_child_conv(node): dql_node.inputs[0] = node.outputs[0] conv_node1.inputs[0] = dql_node.outputs[0] - # add Squeeze as input to last DequantizeLinear node - if squeeze_output and node.op == "DequantizeLinear" and ((len(node.outputs[0].outputs) == 0) or (len(node.outputs[0].outputs)==1 and (node.o().op == "Softmax") and len(node.o().outputs[0].outputs)==0)): - - squeeze_dim = [2, 3] - - Y1 = gs.Variable(name="sq_output" + node.name, dtype=np.int8) - parent_node = node.i() - - X1 = parent_node.outputs[0] - X2 = gs.Constant(name="axes" + node.name, values=(np.array(squeeze_dim)).astype(np.int64)) - - squeeze_node = gs.Node(op="Squeeze", name="squeeze_node" + node.name, inputs=[X1, X2], outputs=[Y1]) - - node.inputs[0] = squeeze_node.outputs[0] - graph.nodes.append(squeeze_node) - - # Retinanet case - if node.op == "DequantizeLinear": - if helper.is_parent_exist(node, 0, 0): - dql_parent = node.i() - if len(node.outputs) > 0 and len(node.outputs[0].outputs) > 1: - for i in range(len(node.outputs[0].outputs)): - # node.outputs[0].outputs[0].op is used instead of node.outputs[0].outputs[i].op because in each pass 1 child is removed - if is_any_output_tensor_graph_output(node) or node.outputs[0].outputs[0].op == "Conv" or node.outputs[0].outputs[0].op == "Relu": - child_node = node.outputs[0].outputs[0] - s = gs.Constant(name=node.inputs[1].name + "_" + str(i), values=(node.inputs[1].values).astype(np.float32)) - zp = gs.Constant(name=node.inputs[2].name + "_" + str(i), values=(node.inputs[2].values).astype(node.inputs[2].dtype)) - y = gs.Variable(name=node.outputs[0].name + "_" + str(i), dtype=node.inputs[2].dtype) - new_dql_node = gs.Node(op = "DequantizeLinear", name = node.name + "_" + str(i), inputs = [node.i().outputs[0], s, zp], outputs = [y]) - - for j in range(len(child_node.inputs)): - if child_node.inputs[j].name == node.outputs[0].name: - child_node.inputs[j] = new_dql_node.outputs[0] - graph.nodes.append(new_dql_node) - - # QL QL-------DQL-------Conv - # | | \ - # | | \ - # DQL---------conv gets converted to DQL \ - # | | DQL - # | | - # Conv Conv - # this extra DQL needs to be removed, when later we do graph.cleanup() this node gets removed but before cleanup if any case needs QL childs it will reflect 3 childs - - for i in range(len(dql_parent.outputs[0].outputs)): - child_node = dql_parent.outputs[0].outputs[i] - if not helper.is_child_present(child_node, 0, 0) and not is_any_output_tensor_graph_output(child_node): - child_node.inputs.clear() - break - if node.op == "Gather" and node.o().op == "Transpose": gather_node = node transpose_node = gather_node.o() @@ -444,19 +314,12 @@ def get_child_conv(node): maxpool_count = 0 conv_count = 0 - def is_all_concat_input_dql(node): - for i in range(len(node.inputs)): - if helper.is_parent_exist(node, i, 0) and node.inputs[i].inputs[0].op != "DequantizeLinear": - return False - return True - def concat_input_not_constant(node): for i in range(len(node.inputs)): if len(node.inputs[i].inputs) == 0: return True return False - def all_dql_conditions_satisfy(node): has_output_ternsor = len(node.outputs) > 0 has_no_child = has_output_ternsor and len(node.outputs[0].outputs)==0 @@ -525,7 +388,7 @@ def all_ql_conditions_satify(count, node): for node in graph.nodes: if node.op == "Conv": - QLinearConv_node = QLinearConv(node, aecg_zendnn_opt, args.remove_relu, conv_count) + QLinearConv_node = QLinearConv(node, args.remove_relu, conv_count) node_list.append(QLinearConv_node.get_node()) initializer_list.append(QLinearConv_node.get_intializers()) conv_count = conv_count + 1 @@ -534,7 +397,7 @@ def all_ql_conditions_satify(count, node): node_list.append(QuantizeLinear_node.get_node()) initializer_list.append(QuantizeLinear_node.get_intializers()) elif node.op == "DequantizeLinear" and all_dql_conditions_satisfy(node): - DequantizeLinear_node = DequantizeLinear(node, aecg_zendnn_opt, args.remove_relu) + DequantizeLinear_node = DequantizeLinear(node, args.remove_relu) node_list.append(DequantizeLinear_node.get_node()) initializer_list.append(DequantizeLinear_node.get_intializers()) elif node.op == "MaxPool": @@ -549,9 +412,9 @@ def all_ql_conditions_satify(count, node): flatten_node = Flatten(node) node_list.append(flatten_node.get_node()) elif node.op == "Concat": - concat_node = Concat(node, is_all_concat_input_dql(node)) + concat_node = Concat(node) node_list.append(concat_node.get_node()) - if (is_all_concat_input_dql(node) or concat_input_not_constant(node)): + if (concat_input_not_constant(node)): initializer_list.append(concat_node.get_intializers()) elif node.op == "Softmax": softmax_node = Softmax(node) From 0a77123c5e56584cc94407064ea07a64d83e4012 Mon Sep 17 00:00:00 2001 From: Aditya Chatterjee Date: Mon, 4 Dec 2023 09:18:56 +0000 Subject: [PATCH 20/20] [Test code for QCDQ to QOp conversion] * Added test code for validating QCDQ to QOp converter. --- tests/transformation/test_qcdq_to_qop.py | 92 ++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 tests/transformation/test_qcdq_to_qop.py diff --git a/tests/transformation/test_qcdq_to_qop.py b/tests/transformation/test_qcdq_to_qop.py new file mode 100644 index 00000000..46148e1a --- /dev/null +++ b/tests/transformation/test_qcdq_to_qop.py @@ -0,0 +1,92 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of qonnx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest +import numpy as np +import os +import urllib.request + +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.qcdq_to_qop import QCDQToQOp +from qonnx.util.cleanup import cleanup_model +import onnxruntime as ort + +model_details = { + "model1.onnx": { + "path": "src/qonnx/data/onnx/qop-sub-graph/model1.onnx", + }, + "model2.onnx": { + "path": "src/qonnx/data/onnx/qop-sub-graph/model2.onnx", + }, +} + +def load_graph_session(model_file, execution_provider): + EP_List=[] + if 'CPU' in execution_provider: + EP_List.append('CPUExecutionProvider') + elif 'Zendnn' in execution_provider: + EP_List.append('ZendnnExecutionProvider') + elif 'Dnnl' in execution_provider: + EP_List.append('DnnlExecutionProvider') + sess = ort.InferenceSession(model_file , providers=EP_List) + return sess + +def get_output(model_file, engine): + infer_sess = load_graph_session(model_file, engine) + inputNames = [] + inputTensors = [] + batch_size = 32 + for inp in infer_sess.get_inputs(): + inputNames.append(inp.name) + x_i = batch_size + x_shape = [x_i if isinstance(s, str) or s==None else s for s in inp.shape] + if not x_shape: + x_shape.append(1) + inputX = np.random.uniform(low=-10, high=100, size=(np.product(x_shape))).astype(np.int64) + inputX = np.reshape(inputX, x_shape) + inputTensors.append(inputX) + feed_dict=dict(zip(inputNames, inputTensors)) + preds = infer_sess.run([], feed_dict) + return preds + +@pytest.mark.parametrize("test_model", model_details.keys()) +def test_qcdq_to_qop(test_model): + dl_file = model_details[test_model]["path"] + dl_file_reference = dl_file.replace(".onnx", "_qop_ref.onnx") + assert os.path.isfile(dl_file) + assert os.path.isfile(dl_file_reference) + output_reference = get_output(dl_file_reference, "Zendnn") + model = ModelWrapper(dl_file) + model = model.transform(QCDQToQOp()) + output_file = dl_file.replace(".onnx", "_qop.onnx") + model.save(output_file) + assert os.path.isfile(output_file) + output_converted_model = get_output(output_file, "Zendnn") + print("QCDQ to QOp Validation [", test_model, "] = ", np.isclose(output_reference, output_converted_model).all()) + +test_qcdq_to_qop("model1.onnx")