diff --git a/src/qonnx/custom_op/qop/averagepool_op.py b/src/qonnx/custom_op/qop/averagepool_op.py new file mode 100644 index 00000000..db385be5 --- /dev/null +++ b/src/qonnx/custom_op/qop/averagepool_op.py @@ -0,0 +1,48 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class AveragePool: + + def __init__(self, node): + + average_pool_node = node + a_name = average_pool_node.inputs[0].name + + y_name = average_pool_node.outputs[0].name + + new_average_pool_node = onnx.helper.make_node(name = average_pool_node.name, op_type = "AveragePool", + inputs = [a_name], + outputs = [y_name], + ceil_mode = average_pool_node.attrs["ceil_mode"], + kernel_shape = average_pool_node.attrs["kernel_shape"], + pads = average_pool_node.attrs["pads"], + strides = average_pool_node.attrs["strides"]) + + self.node = new_average_pool_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/cast_op.py b/src/qonnx/custom_op/qop/cast_op.py new file mode 100644 index 00000000..578329d9 --- /dev/null +++ b/src/qonnx/custom_op/qop/cast_op.py @@ -0,0 +1,44 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Cast: + + def __init__(self, node): + + cast_node = node + + x_name = cast_node.inputs[0].name + y_name = cast_node.outputs[0].name + + new_cast_node = onnx.helper.make_node(name = cast_node.name, op_type = "Cast", + inputs = [x_name], + outputs = [y_name], + to = cast_node.attrs["to"]) + self.node = new_cast_node + + def get_node(self): + return self.node diff --git a/src/qonnx/custom_op/qop/clip_op.py b/src/qonnx/custom_op/qop/clip_op.py new file mode 100644 index 00000000..f672bfde --- /dev/null +++ b/src/qonnx/custom_op/qop/clip_op.py @@ -0,0 +1,61 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Clip: + + def __init__(self, node): + + clip_node = node + + x_name = clip_node.inputs[0].name + + x2_name = clip_node.inputs[1].name + x2_value = clip_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT8) + + x3_name = clip_node.inputs[2].name + x3_value = clip_node.inputs[2].values + x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.INT8) + + new_clip_node = onnx.helper.make_node(name = clip_node.name, op_type = "Clip", + inputs= [x_name, x2_name, x3_name], + outputs = [clip_node.outputs[0].name]) + + self.node = new_clip_node + + intializer_list = [] + intializer_list.append(x2_tensor) + intializer_list.append(x3_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + diff --git a/src/qonnx/custom_op/qop/concat_op.py b/src/qonnx/custom_op/qop/concat_op.py new file mode 100644 index 00000000..4eb3c5d8 --- /dev/null +++ b/src/qonnx/custom_op/qop/concat_op.py @@ -0,0 +1,78 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Concat: + + def __init__(self, node): + + concat_node = node + + number_of_inputs = len(concat_node.inputs) + + zp_value_list = [] + zp_name_list = [] + scale_values_list = [] + scale_name_list = [] + input_tensor_names = [] + + intializer_list = [] + input_names = [] + + for i in range(number_of_inputs): + input_tensor_names.append(concat_node.inputs[i].name) + if len(concat_node.inputs[i].inputs) == 0: + c_input = helper.create_initializer_tensor(name=concat_node.inputs[i].name, + tensor_array=concat_node.inputs[i].values, + data_type=onnx.TensorProto.INT64) + intializer_list.append(c_input) + self.intializer_list = intializer_list + + y_name = concat_node.outputs[0].name + + for i in range(number_of_inputs): + input_names.append(input_tensor_names[i]) + if len(scale_name_list)>0 and len(zp_name_list)>0: + input_names.append(scale_name_list[i]) + input_names.append(zp_name_list[i]) + + kwargs = {} + kwargs["domain"] = 'com.microsoft' + + new_concat_node = onnx.helper.make_node(name = concat_node.name, + op_type = "Concat", + inputs = input_names, + outputs = [y_name], + axis = concat_node.attrs["axis"]) + + self.node = new_concat_node + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list diff --git a/src/qonnx/custom_op/qop/dequantizelinear_op.py b/src/qonnx/custom_op/qop/dequantizelinear_op.py new file mode 100644 index 00000000..28b994f4 --- /dev/null +++ b/src/qonnx/custom_op/qop/dequantizelinear_op.py @@ -0,0 +1,105 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class DequantizeLinear: + + def __init__(self, node, remove_relu): + + dql_node = node + + x_name = dql_node.inputs[0].name + + if helper.is_parent_exist(dql_node, 0, 0): + if dql_node.i().op == "QuantizeLinear": + ql_node = dql_node.i() + if helper.is_parent_exist(ql_node,0, 0): + if ql_node.i().op == "Relu": + relu_node = ql_node.i() + if remove_relu: + x_name = ql_node.outputs[0].name + else: + x_name = relu_node.outputs[0].name + else: + print("*************** WARNING *********************** Please check parent of QL node", ql_node.name, " ignore if pattern is correct") + else: + print("*************** WARNING *********************** Please check parent of DQL node", dql_node.name, " ignore if pattern is correct") + self.initializers = [] + + if len(dql_node.inputs[0].inputs) == 0: + if dql_node.inputs[0].dtype == np.uint8: + input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, + tensor_array=dql_node.inputs[0].values, + data_type=onnx.TensorProto.UINT8) + elif dql_node.inputs[0].dtype == np.int8: + input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, + tensor_array=dql_node.inputs[0].values, + data_type=onnx.TensorProto.INT8) + elif dql_node.inputs[0].dtype == np.int32: + input_tensor = helper.create_initializer_tensor(name= dql_node.inputs[0].name, + tensor_array=dql_node.inputs[0].values, + data_type=onnx.TensorProto.INT32) + self.initializers.append(input_tensor) + + x_scale_name = dql_node.inputs[1].name + x_scale_value = dql_node.inputs[1].values + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name,tensor_array=x_scale_value,data_type=onnx.TensorProto.FLOAT) + + x_zp_name = dql_node.inputs[2].name + x_zp_value = dql_node.inputs[2].values + + if dql_node.inputs[2].dtype == np.uint8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + if dql_node.inputs[2].dtype == np.int32: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT32) + elif dql_node.inputs[2].dtype == np.int8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + + y_name = dql_node.outputs[0].name + + dequantizelinear_node = onnx.helper.make_node(name = dql_node.name, + op_type = "DequantizeLinear", + inputs = [x_name, x_scale_name, x_zp_name], + outputs = [y_name]) + + self.node = dequantizelinear_node + + self.initializers.append(x_scale_tensor) + self.initializers.append(x_zp_tensor) + + def get_node(self): + return self.node + + def get_intializers(self): + return self.initializers diff --git a/src/qonnx/custom_op/qop/flatten_op.py b/src/qonnx/custom_op/qop/flatten_op.py new file mode 100644 index 00000000..62831558 --- /dev/null +++ b/src/qonnx/custom_op/qop/flatten_op.py @@ -0,0 +1,53 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Flatten: + + def __init__(self, node): + + flatten_node = node + x_name = flatten_node.inputs[0].name + y_name = flatten_node.outputs[0].name + + if flatten_node.i().op == "DequantizeLinear": + node1 = flatten_node.i() + x_name = node1.inputs[0].name + + if flatten_node.o().op == "QuantizeLinear": + node2 = flatten_node.o() + y_name = node2.outputs[0].name + + + new_flatten_node = onnx.helper.make_node(name = flatten_node.name, op_type = "Flatten", + inputs = [x_name], + outputs = [y_name]) + + + self.node = new_flatten_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/gather_op.py b/src/qonnx/custom_op/qop/gather_op.py new file mode 100644 index 00000000..5fd01faa --- /dev/null +++ b/src/qonnx/custom_op/qop/gather_op.py @@ -0,0 +1,112 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class Gather: + + def __init__(self, node): + + gather_node = node + # -------------------------------- + # For QCDQ / QDQ model, this case: + # QuantizeLinear + # | (0) + # Gather ---------- (1) Input + # | + # -------------------------------- + gather_parent_node = node + quantized_data_tensor = node + if helper.is_parent_exist(gather_node, 0, 0): + gather_parent_node = node.i(0) + if len(gather_parent_node.inputs) > 1 and helper.is_constant_tensor(gather_parent_node.inputs[1]): + quantized_data_tensor = gather_parent_node.inputs[1].values + + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + if gather_parent_node.op == "QuantizeLinear": + X_DQL_node = gather_parent_node + dequantized_data_tensor = X_DQL_node.inputs[0] + data_scale_tensor = X_DQL_node.inputs[1] + data_zero_point_tensor = X_DQL_node.inputs[2] + + data_scale_tensor = data_scale_tensor.values * np.ones(dequantized_data_tensor.shape) + a = dequantized_data_tensor.values / data_scale_tensor + b = data_zero_point_tensor.values * np.ones(dequantized_data_tensor.shape) + quantized_data_tensor = a + b + quantized_data_tensor = quantized_data_tensor.astype(np.int8) + + else: + if gather_parent_node.op == "QuantizeLinear": + X_QL_node = gather_parent_node + quantized_data_tensor = X_QL_node.inputs[1].values + + data_tensor = helper.create_initializer_tensor(name=gather_node.inputs[0].name, + tensor_array=quantized_data_tensor, + data_type=onnx.TensorProto.INT8) + + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + data_tensor = helper.create_initializer_tensor(name=gather_node.inputs[0].name, + tensor_array=quantized_data_tensor, + data_type=onnx.TensorProto.INT8) + if helper.is_constant_tensor(gather_node.inputs[1]): + if gather_node.inputs[1].dtype == "int64": + indices_tensor = helper.create_initializer_tensor(name=gather_node.inputs[1].name, + tensor_array=gather_node.inputs[1].values, + data_type=onnx.TensorProto.INT64) + else: + print("ERROR check data type in Gather node ", gather_node.name) + + new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", + inputs= [data_tensor.name, gather_node.inputs[1].name], + outputs = [gather_node.outputs[0].name], + axis = 0) + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", + inputs= [data_tensor.name, gather_node.inputs[1].name], + outputs = [gather_node.outputs[0].name], + axis = 0) + elif helper.is_constant_tensor(gather_node.inputs[1]): + new_gather_node = onnx.helper.make_node(name = gather_node.name, op_type = "Gather", + inputs= [gather_node.inputs[0].name,indices_tensor.name], + outputs = [gather_node.outputs[0].name], + axis = gather_node.attrs['axis']) + + self.node = new_gather_node + + intializer_list = [] + if helper.is_constant_tensor(gather_parent_node.inputs[0]): + intializer_list.append(data_tensor) + elif helper.is_constant_tensor(gather_node.inputs[1]): + intializer_list.append(indices_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + diff --git a/src/qonnx/custom_op/qop/gemm_op.py b/src/qonnx/custom_op/qop/gemm_op.py new file mode 100644 index 00000000..30a9a904 --- /dev/null +++ b/src/qonnx/custom_op/qop/gemm_op.py @@ -0,0 +1,51 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class Gemm: + + def __init__(self, node): + + gemm_node = node + + x1 = gemm_node.inputs[0] + x2 = gemm_node.inputs[1] + x3 = gemm_node.inputs[2] + y = gemm_node.outputs[0] + + new_gemm_node = onnx.helper.make_node(name = gemm_node.name, op_type = "Gemm", + inputs= [x1.name, x2.name, x3.name], + outputs = [y.name], + alpha = gemm_node.attrs["alpha"], + beta = gemm_node.attrs["beta"], + transB = gemm_node.attrs["transB"]) + + self.node = new_gemm_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/gemm_op_optimized.py b/src/qonnx/custom_op/qop/gemm_op_optimized.py new file mode 100644 index 00000000..aff0526b --- /dev/null +++ b/src/qonnx/custom_op/qop/gemm_op_optimized.py @@ -0,0 +1,98 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class Gemm_optimized: + + def __init__(self, node): + + gemm_node = node + + x1 = gemm_node.inputs[0] + x2 = gemm_node.inputs[1] + x3 = gemm_node.inputs[2] + y = gemm_node.outputs[0] + + bias_node = gemm_node.i(2); + bias_tensor = bias_node.inputs[0] + bias_scale_tensor = bias_node.inputs[1] + bias_zero_point = bias_node.inputs[2] + bias_scale_tensor = bias_scale_tensor.values * np.ones(bias_tensor.shape) + a = bias_tensor.values * bias_scale_tensor + b = bias_zero_point.values * np.ones(bias_tensor.shape) + fp32_bias_tensor = a + b + fp32_bias_tensor = fp32_bias_tensor.astype(np.float32) + + weight_node = gemm_node.i(1).i() + if gemm_node.i(1).i().op == "Clip": + weight_node = gemm_node.i(1).i().i() + weight_tensor = weight_node.inputs[0] + weight_scale_tensor = weight_node.inputs[1] + weight_zero_point = weight_node.inputs[2] + weight_scale_tensor = weight_scale_tensor.values * np.ones(weight_tensor.shape) + a = weight_tensor.values * weight_scale_tensor + b = weight_zero_point.values * np.ones(weight_tensor.shape) + int8_weight = a + b + int8_weight = np.clip(int8_weight, -127, 127) + dq_weight_scale_tensor = gemm_node.i(1).inputs[1] + dq_weight_zero_point = gemm_node.i(1).inputs[2] + fp32_weight = (int8_weight / (dq_weight_scale_tensor.values * np.ones(int8_weight.shape)) + dq_weight_zero_point.values * np.ones(int8_weight.shape)) + + bias_name = x1.name + ".1" + weight_name = x1.name + ".2" + bias_tensor_1 = helper.create_initializer_tensor(name=bias_name, + tensor_array=fp32_bias_tensor, + data_type=onnx.TensorProto.FLOAT) + weight_tensor_1 = helper.create_initializer_tensor(name=weight_name, + tensor_array=fp32_weight, + data_type=onnx.TensorProto.FLOAT) + + new_gemm_node = onnx.helper.make_node(name = gemm_node.name, op_type = "Gemm", + inputs= [x1.name, weight_name, bias_name], + outputs = [y.name], + alpha = gemm_node.attrs["alpha"], + beta = gemm_node.attrs["beta"], + transB = gemm_node.attrs["transB"]) + + + node.i(1).i(0).inputs.clear() + node.i(1).i(0).outputs.clear() + node.i(1).inputs.clear() + node.i(1).outputs.clear() + + self.node = new_gemm_node + intializer_list = [] + intializer_list.append(weight_tensor_1) + intializer_list.append(bias_tensor_1) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list diff --git a/src/qonnx/custom_op/qop/greater_op.py b/src/qonnx/custom_op/qop/greater_op.py new file mode 100644 index 00000000..fc54c6e5 --- /dev/null +++ b/src/qonnx/custom_op/qop/greater_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Greater: + + def __init__(self, node): + + greater_node = node + x1_name = greater_node.inputs[0].name + + x2_name = greater_node.inputs[1].name + x2_value = greater_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.FLOAT) + + y_name = greater_node.outputs[0].name + + new_greater_node = onnx.helper.make_node(name = greater_node.name, + op_type = "Greater", + inputs = [x1_name, x2_name], + outputs = [y_name]) + + self.node = new_greater_node + + intializer_list = [] + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/helper.py b/src/qonnx/custom_op/qop/helper.py new file mode 100644 index 00000000..c070a6a4 --- /dev/null +++ b/src/qonnx/custom_op/qop/helper.py @@ -0,0 +1,65 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +import numpy as np + +class helper : + + def __init__(self) -> None: + pass + + def create_initializer_tensor(name: str, tensor_array: np.ndarray, data_type: onnx.TensorProto = onnx.TensorProto.FLOAT) -> onnx.TensorProto: + initializer_tensor = onnx.helper.make_tensor(name=name, + data_type=data_type, + dims=tensor_array.shape, + vals=tensor_array.flatten().tolist()) + return initializer_tensor + + # to check node.i() exists pass tesor_idx=0, node_idx=0 + # to check node.inputs[1].inputs[0] exists pass tesor_idx=1, node_idx=0 + def is_parent_exist(node, tesor_idx, node_idx): + if len(node.inputs)>tesor_idx and len(node.inputs[tesor_idx].inputs)>node_idx: + return True + return False + + def is_child_present(node,tesor_idx, node_idx): + if len(node.outputs)>tesor_idx and len(node.outputs[tesor_idx].outputs)>node_idx: + return True + return False + + def is_attr_exist(node, attr_name): + try: + node.attrs[attr_name] + return True + except: + return False + + def is_constant_tensor(tensor): + try: + tensor.values + return True + except: + return False \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/less_op.py b/src/qonnx/custom_op/qop/less_op.py new file mode 100644 index 00000000..9d54216f --- /dev/null +++ b/src/qonnx/custom_op/qop/less_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Less: + + def __init__(self, node): + + less_node = node + x1_name = less_node.inputs[0].name + + x2_name = less_node.inputs[1].name + x2_value = less_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.FLOAT) + + y_name = less_node.outputs[0].name + + new_less_node = onnx.helper.make_node(name = less_node.name, + op_type = "Less", + inputs = [x1_name, x2_name], + outputs = [y_name]) + + self.node = new_less_node + + intializer_list = [] + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/maxpool_op.py b/src/qonnx/custom_op/qop/maxpool_op.py new file mode 100644 index 00000000..345393f8 --- /dev/null +++ b/src/qonnx/custom_op/qop/maxpool_op.py @@ -0,0 +1,125 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class MaxPool: + + def __init__(self, node, maxpool_count, remove_relu): + + maxpool_node = node + x_name = maxpool_node.inputs[0].name + y_name = maxpool_node.outputs[0].name + + if helper.is_child_present(maxpool_node, 0, 0) and maxpool_node.o().op == "QuantizeLinear": + if helper.is_parent_exist(maxpool_node, 0, 0) and maxpool_node.i().op == "DequantizeLinear": + q_node = maxpool_node.o() + y_name = q_node.outputs[0].name + + if helper.is_parent_exist(maxpool_node, 0, 0): + found_relu = False + if maxpool_node.i().op == "Relu": + relu_node = maxpool_node.i() + found_relu = True + elif maxpool_node.i().op == "DequantizeLinear": + if maxpool_node.i().i().i().op == "Relu": + relu_node = maxpool_node.i().i().i() + found_relu = True + elif maxpool_node.i().i().i().op == "Concat": + x_name = maxpool_node.i().i().outputs[0].name + if maxpool_node.o().op == "QuantizeLinear": + y_name = maxpool_node.o().outputs[0].name + elif maxpool_node.i().i().op == "MaxPool": + x_name = maxpool_node.i().i().outputs[0].name + + if found_relu: + if helper.is_child_present(relu_node, 0, 0) and relu_node.outputs[0].outputs[0].op == "MaxPool": + ql_node = relu_node.outputs[0].outputs[0] + x_name = ql_node.outputs[0].name + elif helper.is_child_present(relu_node, 0, 1) and relu_node.outputs[0].outputs[1].op == "MaxPool": + ql_node = relu_node.outputs[0].outputs[0] + x_name = ql_node.outputs[0].name + elif helper.is_child_present(relu_node, 0, 0) and relu_node.o().o().outputs[0].outputs[0].op == "MaxPool": + x_name = relu_node.outputs[0].name + elif helper.is_child_present(relu_node, 0, 0) and relu_node.o().o().outputs[0].outputs[1].op == "MaxPool": + x_name = relu_node.outputs[0].name + + + if maxpool_node.i().op == "QuantizeLinear": + x_ql_node = maxpool_node.i() + if remove_relu: + x_name = x_ql_node.outputs[0].name + else: + if helper.is_parent_exist(x_ql_node, 0, 0) and x_ql_node.i().op == "Relu" and x_ql_node.i().i().op == "Conv": + relu_node = x_ql_node.i() + x_name = relu_node.outputs[0].name + + if helper.is_attr_exist(maxpool_node, 'auto_pad'): + auto_pad_attr = maxpool_node.attrs["auto_pad"] + else: + auto_pad_attr = "NOTSET" + + if helper.is_attr_exist(maxpool_node, 'ceil_mode'): + ceil_mode_attr = maxpool_node.attrs["ceil_mode"] + else: + ceil_mode_attr = 0 + + if helper.is_attr_exist(maxpool_node, 'dilations'): + dilations_attr = maxpool_node.attrs["dilations"] + else: + dilations_attr =[1,1] + + if helper.is_attr_exist(maxpool_node, 'pads'): + pads_attr = maxpool_node.attrs["pads"] + else: + pads_attr = [0,0,0,0] + + if helper.is_attr_exist(maxpool_node, 'storage_order'): + storage_order_attr = maxpool_node.attrs["storage_order"] + else: + storage_order_attr = 0 + + if helper.is_attr_exist(maxpool_node, 'strides'): + strides_attr = maxpool_node.attrs["strides"] + else: + strides_attr = [1,1] + + new_mapool_node = onnx.helper.make_node(name = maxpool_node.name, + op_type = "MaxPool", + inputs = [x_name], + outputs = [y_name], + auto_pad = auto_pad_attr, + ceil_mode = ceil_mode_attr, + dilations = dilations_attr, + pads = pads_attr, + storage_order = storage_order_attr, + strides = strides_attr, + kernel_shape = maxpool_node.attrs["kernel_shape"]) + + self.node = new_mapool_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/qlinearconv_op.py b/src/qonnx/custom_op/qop/qlinearconv_op.py new file mode 100644 index 00000000..bcaad1de --- /dev/null +++ b/src/qonnx/custom_op/qop/qlinearconv_op.py @@ -0,0 +1,469 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +import numpy as np +from .helper import helper + +class QLinearConv: + + def __init__(self, node, remove_relu, conv_count): + x_DQL_node = node.i() + + conv_node = node + + has_bias = True if len(conv_node.inputs) == 3 else False + + w_DQL_node = conv_node.inputs[1].inputs[0] + QCDQ_model_detected=False + clip_max = np.iinfo(np.int8).min + clip_min = np.iinfo(np.int8).max + if (helper.is_constant_tensor(w_DQL_node.i())==False and w_DQL_node.i().op == "Clip"): + QCDQ_model_detected=True + clip_min = w_DQL_node.i().inputs[1].values + clip_max = w_DQL_node.i().inputs[2].values + + # b_DQL_node = (3) + # ------------------------------------------------------------------------ + # (1) (2) DequantizeLinear (1) (2) + # \ | / (3) for bias OR \ / + # \ | / \ / + # Conv (QDQ model) Conv (3 - FP32 bias embedded) (QCDQ model) + # | | + # ------------------------------------------------------------------------ + # Initialization + b_DQL_node = conv_node + b_DQL_tensor = conv_node + if has_bias: + b_DQL_node = conv_node.inputs[2] # For QDQ + b_DQL_tensor = conv_node.inputs[2] # For QCDQ + if has_bias and QCDQ_model_detected==False: + b_DQL_node = conv_node.inputs[2].inputs[0] + is_fp32_bias_embedded = False + if QCDQ_model_detected: + if helper.is_constant_tensor(b_DQL_tensor) and b_DQL_tensor.dtype == "float32": + is_fp32_bias_embedded = True + b_QL_tensor = b_DQL_tensor + if is_fp32_bias_embedded: + if not helper.is_parent_exist(b_DQL_tensor, 0, 0): + b_QL_tensor = b_DQL_tensor + + is_weight_tensor_quantized = False + if len(w_DQL_node.inputs[0].inputs) == 0: + is_weight_tensor_quantized = True + is_bias_tensor_quantized = False + if QCDQ_model_detected and has_bias and not is_fp32_bias_embedded and not helper.is_parent_exist(b_DQL_tensor, 0, 0) and b_DQL_tensor.dtype == "int32": + is_bias_tensor_quantized = True + elif QCDQ_model_detected==False and has_bias and len(b_DQL_node.inputs[0].inputs) == 0: + is_bias_tensor_quantized = True + + if not is_weight_tensor_quantized: + w_QL_node = w_DQL_node.i() + + if QCDQ_model_detected==False and has_bias and (not is_bias_tensor_quantized): + b_QL_node = b_DQL_node.i() + + x_scale_tensor = x_DQL_node.inputs[1] + x_scale = x_scale_tensor.values + x_zp_tensor = x_DQL_node.inputs[2] + + w_scale_tensor = w_DQL_node.inputs[1] + w_scale = w_scale_tensor.values + w_zp_tensor = w_DQL_node.inputs[2] + + is_relu_present = False + if conv_node.o().op == "Relu": + relu_node = conv_node.o() + is_relu_present = True + if relu_node.o().op == "QuantizeLinear": + y_QL_node = relu_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Relu node ", relu_node.name, " is not QuantizeLinear ***********************") + elif (conv_node.o().op == "QuantizeLinear"): + y_QL_node = conv_node.o() + y_scale_tensor = y_QL_node.inputs[1] + y_scale = y_scale_tensor.values + y_zp_tensor = y_QL_node.inputs[2] + else: + print("*********************** ERROR output of Conv node ", conv_node.name, " is not QuantizeLinear ***********************") + + S8_MIN = np.iinfo(np.int8).min + S8_MAX = np.iinfo(np.int8).max + if clip_min != np.iinfo(np.int8).max and clip_max != np.iinfo(np.int8).min: + S8_MIN = clip_min + S8_MAX = clip_max + U8_MIN = np.iinfo(np.uint8).min + U8_MAX = np.iinfo(np.uint8).max + S32_MIN = np.iinfo(np.int32).min + S32_MAX = np.iinfo(np.int32).max + + if (QCDQ_model_detected and helper.is_parent_exist(w_DQL_node, 0, 0) and helper.is_parent_exist(w_DQL_node.i(0), 0, 0) and w_DQL_node.i(0).i(0).op == "QuantizeLinear"): + w_QL_node = w_DQL_node.i(0).i(0) + + if QCDQ_model_detected==False and has_bias and (not is_bias_tensor_quantized) and helper.is_parent_exist(b_DQL_node, 0, 0): + b_QL_node = b_DQL_node.i() + + # -------------------------------------------------------------------------- + # QuantizeLinear (w_QL_node set to this in first if condition) + # | + # Clip + # | + # DequantizeLinear (for weight) + # (0) / (1) + # | / + # Conv + # -------------------------------------------------------------------------- + if QCDQ_model_detected and helper.is_parent_exist(w_DQL_node, 0, 0) and helper.is_parent_exist(w_DQL_node.i(0), 0, 0): + w_QL_node = w_DQL_node.i().i() + quantized_weight_tensor = w_QL_node.inputs[0] + #if is_weight_tensor_quantized and QCDQ_model_detected: + # quantized_weight_tensor = w_DQL_node.inputs[1].values + if is_weight_tensor_quantized and not QCDQ_model_detected: + quantized_weight_tensor = w_DQL_node.inputs[0].values + elif helper.is_constant_tensor(w_QL_node): + quantized_weight_tensor = w_QL_node.values + quantized_weight_tensor = np.clip(quantized_weight_tensor, S8_MIN, S8_MAX) + quantized_weight_tensor = np.round(quantized_weight_tensor) + quantized_weight_tensor = quantized_weight_tensor.astype(np.int8) + elif not helper.is_constant_tensor(w_QL_node): + weight_tensor = w_QL_node.inputs[0] + weight_scale_tensor = w_QL_node.inputs[1] + weight_zp_tensor = w_QL_node.inputs[2] + + weight_scale_channel_detected = 0 + if (weight_scale_tensor.shape): + if (weight_scale_tensor.shape[0] > 1): + weight_scale_channel_detected = 1 + weight_scaled_tensor = weight_scale_tensor.values * np.ones(weight_tensor.shape) + if QCDQ_model_detected or weight_scale_channel_detected: + weight_scaled_tensor = np.ones(weight_tensor.shape) * weight_scale_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] + b = weight_tensor.values / weight_scaled_tensor + c = weight_zp_tensor.values * np.ones(weight_tensor.shape) + if QCDQ_model_detected or weight_scale_channel_detected: + c = weight_zp_tensor.values[:, np.newaxis, np.newaxis, np.newaxis] * np.ones(weight_tensor.shape) + quantized_weight_tensor = b + c + if weight_zp_tensor.dtype == "int8": + quantized_weight_tensor = np.clip(quantized_weight_tensor, S8_MIN, S8_MAX) + elif weight_zp_tensor.dtype == "uint8": + quantized_weight_tensor = np.clip(quantized_weight_tensor, U8_MIN, U8_MAX) + quantized_weight_tensor = np.round(quantized_weight_tensor) + quantized_weight_tensor = quantized_weight_tensor.astype(np.int8) + if QCDQ_model_detected: + clip_node = w_DQL_node.i() + clip_node.inputs.clear() + clip_node.outputs.clear() + + if has_bias and is_bias_tensor_quantized: + quantized_bias_tensor = b_DQL_node.inputs[0].values + elif is_fp32_bias_embedded and has_bias: + bias_tensor = b_QL_tensor + bias_scale_tensor1 = w_QL_node.inputs[1] + bias_zp_tensor = w_QL_node.inputs[2] + + # satutration after QL node + a = x_scale * bias_scale_tensor1.values + b = bias_tensor.values / a + # Zero point is set to 0 for quantizing bias + d = b + d = np.round(d) + quantized_bias_tensor = d + quantized_bias_tensor = np.clip(quantized_bias_tensor, S32_MIN, S32_MAX) + quantized_bias_tensor = np.round(quantized_bias_tensor) + quantized_bias_tensor = quantized_bias_tensor.astype(np.int32) + elif has_bias: + bias_tensor = b_QL_node.inputs[0] + bias_scale_tensor1 = b_QL_node.inputs[1] + bias_zp_tensor = b_QL_node.inputs[2] + + # satutration after QL node + a = bias_scale_tensor1.values * np.ones(bias_tensor.shape) + b = bias_tensor.values / a + c = bias_zp_tensor.values * np.ones(bias_tensor.shape) + d = b + c + if bias_zp_tensor.dtype == "int8": + d = np.clip(d, S8_MIN, S8_MAX) + elif bias_zp_tensor.dtype == "uint8": + d = np.clip(d, U8_MIN, U8_MAX) + d = np.round(d) + + # now again dequantize it + e = d * a + f = e - c + # f is now fp32 tensor + + bias_scale = x_scale * w_scale + bias_scale_tensor = bias_scale * np.ones(bias_tensor.shape) + quantized_bias_tensor = (f / bias_scale_tensor) + quantized_bias_tensor = np.clip(quantized_bias_tensor, S32_MIN, S32_MAX) + quantized_bias_tensor = np.round(quantized_bias_tensor) + quantized_bias_tensor = quantized_bias_tensor.astype(np.int32) + + x_QL_node = x_DQL_node.i() + is_x_QL_maxpool = False + is_X_QL_transpose = True if x_QL_node.op == "Transpose" else False + maxpool_input_s8 = False # True means s8 False means u8 + if x_QL_node.op == "MaxPool": + is_x_QL_maxpool = True + + if helper.is_parent_exist(x_QL_node, 0, 0): + if x_QL_node.i().op == "Relu": + if remove_relu: + # if this flag is enabled, then relu will not be added thus x_name will be x_QL's output tensor name + x_name = x_QL_node.outputs[0].name + else: + if (x_QL_node.i().i().op == "Conv") or (x_QL_node.i().i().op == "Add" and x_QL_node.i().i().i().inputs[2].values.dtype == np.int8): + + """ + these are 2 condtions + one in resnet50v1 + + DQL DQL + | | + | | + V | + Add<------------- + | + | + V + Relu------------------------------ + | + | + QL (x_QL_node) + | + | + DQL DQL DQL + | | | + | | | + Conv<------------ + + if Add input is s8 + x_relu_node = Relu + relu will be maintained due to s8 data type thus + x_name = relu's output + + other case is in Resnet50v1.5 + + Conv + | + | + Relu + | + | + QL + | + | + DQL DQL DQL + | | | + | | | + Conv<------------ + + we maintain relu node here thus x_name = relu's output + + """ + x_relu_node = x_QL_node.i() + x_name = x_relu_node.outputs[0].name + else: + x_name = x_QL_node.outputs[0].name + elif x_QL_node.op == "MaxPool": + """ + this is resnet50v1 case + + QL + | + | + V + Maxpool + | + | + V + DQL DQL DQL + | | | + | | | + V | | + Conv<------------ + + """ + x_name = x_QL_node.outputs[0].name + if x_QL_node.i().op == "QuantizeLinear": + if (x_QL_node.i()).inputs[2].dtype == np.int8: + maxpool_input_s8 = True + elif (x_QL_node.i()).inputs[2].dtype == np.uint8: + maxpool_input_s8 = False + else: + x_name = x_QL_node.outputs[0].name + if x_QL_node.op == "Clip": + x_name = str(int(x_QL_node.o().outputs[0].name)-3) + else: + x_name = x_QL_node.outputs[0].name + + if is_relu_present and not(remove_relu): + y_name = conv_node.outputs[0].name + else: + y_name = y_QL_node.outputs[0].name + + x_scale_name = conv_node.name + "_X_SCALE" + x_scale_value = x_scale + x_scale_tensor = helper.create_initializer_tensor(name=x_scale_name, + tensor_array=x_scale_value, + data_type=onnx.TensorProto.FLOAT) + + x_zp_name = conv_node.name + "_X_ZERO_POINT" + x_zp_value = x_zp_tensor.values + + if is_x_QL_maxpool: + if maxpool_input_s8: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + else: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + elif is_X_QL_transpose: + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + else: + if (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.op == "QuantizeLinear" and x_QL_node.inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + elif x_QL_node.op == "Relu" or x_QL_node.op == "Clip": + if (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.int8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.INT8) + elif (x_QL_node.i().op == "QuantizeLinear" and x_QL_node.i().inputs[2].dtype == np.uint8): + x_zp_tensor = helper.create_initializer_tensor(name=x_zp_name, + tensor_array=x_zp_value, + data_type=onnx.TensorProto.UINT8) + else: + print("ERROR Please check x_zp_tensor of ", conv_node.name) + + w_name = conv_node.inputs[1].name + w_value = quantized_weight_tensor + w_tensor = helper.create_initializer_tensor(name=w_name, + tensor_array=w_value, + data_type=onnx.TensorProto.INT8) + + w_scale_name = conv_node.name + "_W_SCALE" + w_scale_value = w_scale + w_scale_tensor = helper.create_initializer_tensor(name=w_scale_name, + tensor_array=w_scale_value, + data_type=onnx.TensorProto.FLOAT) + + w_zp_name = conv_node.name + "_W_ZERO_POINT" + w_zp_value = w_zp_tensor.values + w_zp_tensor = helper.create_initializer_tensor(name=w_zp_name, + tensor_array=w_zp_value, + data_type=onnx.TensorProto.INT8) + + y_scale_name = conv_node.name + "_Y_SCALE" + y_scale_value = y_scale + y_scale_tensor = helper.create_initializer_tensor(name=y_scale_name, + tensor_array=y_scale_value, + data_type=onnx.TensorProto.FLOAT) + + y_zp_name = conv_node.name + "_Y_ZERO_POINT" + y_zp_value = y_zp_tensor.values + + if y_zp_tensor.dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.INT8) + elif y_zp_tensor.dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type=onnx.TensorProto.UINT8) + + if has_bias: + b_name = conv_node.inputs[2].name + b_value = quantized_bias_tensor + b_tensor = helper.create_initializer_tensor(name=b_name, + tensor_array=b_value, + data_type=onnx.TensorProto.INT32) + + if helper.is_attr_exist(conv_node, 'auto_pad'): + auto_pad_attr = conv_node.attrs["auto_pad"] + else: + auto_pad_attr = "NOTSET" + + if helper.is_attr_exist(conv_node, 'dilations'): + dilations_attr = conv_node.attrs["dilations"] + else: + dilations_attr = 1 + + if helper.is_attr_exist(conv_node, 'group'): + group_attr = conv_node.attrs["group"] + else: + group_attr = 1 + + if helper.is_attr_exist(conv_node, 'pads'): + pads_attr = conv_node.attrs["pads"] + else: + pads_attr = [0,0,0,0] + + if helper.is_attr_exist(conv_node, 'strides'): + strides_attr = conv_node.attrs["strides"] + else: + strides_attr = 1 + + qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name], outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) + if has_bias: + qlinearconv_node = onnx.helper.make_node(name = conv_node.name, op_type = "QLinearConv", inputs = [x_name, x_scale_name, x_zp_name, w_name, w_scale_name, w_zp_name, y_scale_name, y_zp_name, b_name], outputs = [y_name], auto_pad = auto_pad_attr, group = group_attr, dilations = dilations_attr, kernel_shape = conv_node.attrs["kernel_shape"], pads = pads_attr, strides = strides_attr) + + if is_relu_present: + relu_node = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", inputs = [conv_node.outputs[0].name], outputs = [relu_node.outputs[0].name]) + self.relu_node = relu_node + + self.node = qlinearconv_node + + intializer_list = [] + intializer_list.append(x_scale_tensor) + intializer_list.append(x_zp_tensor) + intializer_list.append(w_tensor) + intializer_list.append(w_scale_tensor) + intializer_list.append(w_zp_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + if has_bias: + intializer_list.append(b_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list + + def get_relu_node(self): + return self.relu_node diff --git a/src/qonnx/custom_op/qop/quantizelinear_op.py b/src/qonnx/custom_op/qop/quantizelinear_op.py new file mode 100644 index 00000000..d35b21b0 --- /dev/null +++ b/src/qonnx/custom_op/qop/quantizelinear_op.py @@ -0,0 +1,78 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper +import numpy as np + +class QuantizeLinear: + + def __init__(self, node): + ql_node = node + + x_name = ql_node.inputs[0].name + flag = False + if helper.is_child_present(node, 0, 0) and node.o().op == "DequantizeLinear": + if helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Conv": + if helper.is_child_present(node.o().o(), 0, 0) and node.o().o().o().op == "Reshape": + flag = True + x_tensor = helper.create_initializer_tensor(name = x_name,tensor_array = ql_node.inputs[0].values, data_type = onnx.TensorProto.FLOAT) + elif helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Gemm": + flag = True + x_tensor = helper.create_initializer_tensor(name = x_name,tensor_array = ql_node.inputs[0].values, data_type = onnx.TensorProto.FLOAT) + + y_scale_name = ql_node.inputs[1].name + y_scale_value = ql_node.inputs[1].values + y_scale_tensor = helper.create_initializer_tensor(name = y_scale_name,tensor_array = y_scale_value, data_type = onnx.TensorProto.FLOAT) + + y_zp_name = ql_node.inputs[2].name + y_zp_value = ql_node.inputs[2].values + if ql_node.inputs[2].dtype == np.int8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type = onnx.TensorProto.INT8) + elif ql_node.inputs[2].dtype == np.uint8: + y_zp_tensor = helper.create_initializer_tensor(name=y_zp_name, + tensor_array=y_zp_value, + data_type = onnx.TensorProto.UINT8) + + y_name = ql_node.outputs[0].name + + quantizelinear_node = onnx.helper.make_node(name = ql_node.name, op_type = "QuantizeLinear", inputs = [x_name, y_scale_name, y_zp_name], outputs = [y_name]) + + self.node = quantizelinear_node + + intializer_list = [] + if flag: + intializer_list.append(x_tensor) + intializer_list.append(y_scale_tensor) + intializer_list.append(y_zp_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/relu_op.py b/src/qonnx/custom_op/qop/relu_op.py new file mode 100644 index 00000000..58cc23cd --- /dev/null +++ b/src/qonnx/custom_op/qop/relu_op.py @@ -0,0 +1,44 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Relu: + + def __init__(self, node): + + relu_node = node + + x_name = relu_node.inputs[0].name + y_name = relu_node.outputs[0].name + + new_relu_node = onnx.helper.make_node(name = relu_node.name, op_type = "Relu", + inputs = [x_name], + outputs = [y_name]) + + self.node = new_relu_node + + def get_node(self): + return self.node diff --git a/src/qonnx/custom_op/qop/slice_op.py b/src/qonnx/custom_op/qop/slice_op.py new file mode 100644 index 00000000..ae06e86b --- /dev/null +++ b/src/qonnx/custom_op/qop/slice_op.py @@ -0,0 +1,77 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Slice: + + def __init__(self, node): + + slice_node = node + x1_name = slice_node.inputs[0].name + + x2_name = slice_node.inputs[1].name + x2_value = slice_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) + + x3_name = slice_node.inputs[2].name + x3_value = slice_node.inputs[2].values + x3_tensor = helper.create_initializer_tensor(x3_name,x3_value,onnx.TensorProto.INT64) + + x4_name = slice_node.inputs[3].name + x4_value = slice_node.inputs[3].values + x4_tensor = helper.create_initializer_tensor(x4_name,x4_value,onnx.TensorProto.INT64) + + # x5_name = slice_node.inputs[4].name + # x5_value = slice_node.inputs[4].values + # x5_tensor = helper.create_initializer_tensor(x5_name,x5_value,onnx.TensorProto.INT64) + + y_name = slice_node.outputs[0].name + + # new_squeeze_node = onnx.helper.make_node(name = slice_node.name, + # op_type = "Slice", + # inputs = [x1_name, x2_name, x3_name, x4_name, x5_name], + # outputs = [y_name]) + + new_squeeze_node = onnx.helper.make_node(name = slice_node.name, + op_type = "Slice", + inputs = [x1_name, x2_name, x3_name, x4_name], + outputs = [y_name]) + + self.node = new_squeeze_node + + intializer_list = [] + intializer_list.append(x2_tensor) + intializer_list.append(x3_tensor) + intializer_list.append(x4_tensor) + # intializer_list.append(x5_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/softmax_op.py b/src/qonnx/custom_op/qop/softmax_op.py new file mode 100644 index 00000000..4e7f9786 --- /dev/null +++ b/src/qonnx/custom_op/qop/softmax_op.py @@ -0,0 +1,45 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Softmax: + + def __init__(self, node): + + softmax_node = node + + x_name = softmax_node.inputs[0].name + y_name = softmax_node.outputs[0].name + + new_sftmx_node = onnx.helper.make_node(name = softmax_node.name, op_type = "Softmax", + inputs = [x_name], + outputs = [y_name], + axis = softmax_node.attrs["axis"]) + + self.node = new_sftmx_node + + def get_node(self): + return self.node \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/squeeze_op.py b/src/qonnx/custom_op/qop/squeeze_op.py new file mode 100644 index 00000000..bdfbae0d --- /dev/null +++ b/src/qonnx/custom_op/qop/squeeze_op.py @@ -0,0 +1,57 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx +from .helper import helper + +class Squeeze: + + def __init__(self, node): + + squeeze_node = node + x1_name = squeeze_node.inputs[0].name + + x2_name = squeeze_node.inputs[1].name + x2_value = squeeze_node.inputs[1].values + x2_tensor = helper.create_initializer_tensor(x2_name,x2_value,onnx.TensorProto.INT64) + + y_name = squeeze_node.outputs[0].name + + new_squeeze_node = onnx.helper.make_node(name = squeeze_node.name, + op_type = "Squeeze", + inputs = [x1_name, x2_name], + outputs = [y_name]) + + self.node = new_squeeze_node + + intializer_list = [] + intializer_list.append(x2_tensor) + self.intializer_list = intializer_list + + def get_node(self): + return self.node + + def get_intializers(self): + return self.intializer_list \ No newline at end of file diff --git a/src/qonnx/custom_op/qop/transpose_op.py b/src/qonnx/custom_op/qop/transpose_op.py new file mode 100644 index 00000000..4607a600 --- /dev/null +++ b/src/qonnx/custom_op/qop/transpose_op.py @@ -0,0 +1,45 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx + +class Transpose: + + def __init__(self, node): + + transpose_node = node + + x_name = transpose_node.inputs[0].name + y_name = transpose_node.outputs[0].name + + new_transpose_node = onnx.helper.make_node(name = transpose_node.name, op_type = "Transpose", + inputs = [x_name], + outputs = [y_name], + perm = transpose_node.attrs["perm"]) + + self.node = new_transpose_node + + def get_node(self): + return self.node diff --git a/src/qonnx/transformation/qcdq_to_qop.py b/src/qonnx/transformation/qcdq_to_qop.py new file mode 100644 index 00000000..b0259dff --- /dev/null +++ b/src/qonnx/transformation/qcdq_to_qop.py @@ -0,0 +1,522 @@ +######################################################################## +# +# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +######################################################################### + +import onnx_graphsurgeon as gs +import numpy as np +import onnx +import os +import argparse +from onnx import TensorProto +import sys +import math +import onnx.numpy_helper +from typing import Tuple +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.base import Transformation +from qonnx.util.basic import get_by_name + +from qonnx.custom_op.qop.qlinearconv_op import * +from qonnx.custom_op.qop.quantizelinear_op import * +from qonnx.custom_op.qop.dequantizelinear_op import * +from qonnx.custom_op.qop.maxpool_op import * +from qonnx.custom_op.qop.squeeze_op import * +from qonnx.custom_op.qop.flatten_op import * +from qonnx.custom_op.qop.concat_op import * +from qonnx.custom_op.qop.softmax_op import * +from qonnx.custom_op.qop.cast_op import * +from qonnx.custom_op.qop.gather_op import * +from qonnx.custom_op.qop.gemm_op import * +from qonnx.custom_op.qop.gemm_op_optimized import * +from qonnx.custom_op.qop.greater_op import * +from qonnx.custom_op.qop.less_op import * +from qonnx.custom_op.qop.slice_op import * +from qonnx.custom_op.qop.transpose_op import * +from qonnx.custom_op.qop.relu_op import * +from qonnx.custom_op.qop.clip_op import * + +class CustomEnv(): + remove_relu=True + def __init__(self): + pass + +class QCDQToQOp(Transformation): + + def __init__(self) -> None: + super().__init__() + + def apply(self, model: ModelWrapper) -> Tuple[ModelWrapper, bool]: + args = CustomEnv() + graph = gs.import_onnx(model.model) + + graph.fold_constants() + + def is_any_output_tensor_graph_output(node): + for i in range(len(graph.outputs)): + output_tensor_name = graph.outputs[i].name + if node.outputs[0].name == output_tensor_name: + return True + return False + + supported_op = ["Conv", "QuantizeLinear", "DequantizeLinear", "MaxPool", "Squeeze", "Flatten", "Concat", "Softmax", "Cast", "Gather", "Gemm", "Greater", "Less", "Slice", "Transpose", "Relu", "Clip"] + + maxpool_count = 0 + ctr = 0 + cast_count = 0 + clip_num = 0 + squeeze_output = False + for node in graph.nodes: + + if node.op == "Flatten": + squeeze_output = True + if node.op == "Gemm": + gemm_node = node + if helper.is_child_present(gemm_node, 0, 0) and gemm_node.o().op == "Softmax": + continue + gemm_input_node = gemm_node.i() + if gemm_input_node.op == "Flatten": + flatten_node = gemm_input_node + flatten_dql_node = flatten_node.i() + flatten_dql_node.outputs = flatten_node.outputs + flatten_node.outputs.clear() + gemm_ql_node = node.o().o() + + w_dql_node = gemm_node.inputs[1].inputs[0] + is_weight_quantized = True if len(w_dql_node.inputs[0].inputs) == 0 else False + wt_tensor = w_dql_node.i().inputs[0] + if is_weight_quantized: + wt_tensor = w_dql_node.i().inputs[0] + else: + wt_tensor = w_dql_node.i().inputs[0] + w_ql_node = w_dql_node.inputs[0] + if w_dql_node.i().op == "Clip": + w_ql_node = w_dql_node.i(0).i(0) + wt_tensor = w_ql_node.inputs[0] + org = wt_tensor.values + new_shape = org.shape + (1,1) + new = np.reshape(org, new_shape) + if is_weight_quantized: + w_dql_node.inputs[0] = gs.Constant(name=w_dql_node.inputs[0].name, values = new.astype(np.int8)) + else: + w_ql_node.inputs[0] = gs.Constant(name=w_ql_node.inputs[0].name, values = new.astype(np.float32)) + + gemm_node.op = "Conv" + new_attrs = { + "dilations":[1,1], + "group":1, + "kernel_shape":[1,1], + "pads":[0,0,0,0], + "strides":[1,1] + } + gemm_node.attrs = new_attrs + + squeeze_dim = [2, 3] + Y1 = gs.Variable(name="sq_output_" + node.name, dtype=np.uint8) + parent_node = gemm_ql_node if node.o().op == "Relu" else node + + X1 = parent_node.outputs[0] + X2 = gs.Constant(name="axes" + node.name, values=(np.array(squeeze_dim)).astype(np.int64)) + + squeeze_node = gs.Node(op="Squeeze", name="squeeze_node_" + node.name, inputs=[X1, X2], outputs=[Y1]) + + gemm_ql_node.o().inputs[0] = squeeze_node.outputs[0] + + graph.nodes.append(squeeze_node) + + if node.op == "Clip": + clip_num = clip_num + 1 + if helper.is_parent_exist(node, 0, 0) and (node.i().op == "Conv"): + if helper.is_child_present(node, 0, 0) and node.o().op == "QuantizeLinear": + clip_node = node + clip_max = clip_node.inputs[2].values + + p1 = clip_node.i() + c1 = clip_node.o() + + scale = c1.inputs[1].values + new_clip_max_tensor = gs.Constant(name=clip_node.inputs[2].name+"_"+str(clip_num), values=(np.asarray(clip_max/scale)).astype(np.int8)) + new_clip_min_tensor = gs.Constant(name=clip_node.inputs[1].name+"_"+str(clip_num), values=clip_node.inputs[1].values.astype(np.int8)) + clip_node.inputs[2] = new_clip_max_tensor + clip_node.inputs[1] = new_clip_min_tensor + + # p1---->Clip ------>c1----->c2 + # becomes + # p1---->c1-----> Clip----->c2 + # p1 = conv, c1 = QL, c2 = anything + if helper.is_child_present(c1, 0, 0): + c2 = c1.o() + c1.inputs = [p1.outputs[0], c1.inputs[1], c1.inputs[2]] + clip_node.inputs = [c1.outputs[0], clip_node.inputs[1], clip_node.inputs[2]] + c2.inputs = [clip_node.outputs[0], c2.inputs[1], c2.inputs[2]] + else: + # p1---->Clip ------>c1---->graph.outputs + # becomes + # p1---->c1-----> Clip---->graph.outputs + c1.inputs = [p1.outputs[0], c1.inputs[1], c1.inputs[2]] + clip_node.inputs = [c1.outputs[0], clip_node.inputs[1], clip_node.inputs[2]] + + clip_node.outputs[0].dtype = "int8" + graph.outputs[0] = clip_node.outputs[0] + + if node.op == "Transpose": + tranpose_node = node + if helper.is_parent_exist(tranpose_node, 0, 0) and tranpose_node.i().op == "DequantizeLinear": + if helper.is_parent_exist(tranpose_node.i(), 0, 0) and tranpose_node.i().i().op == "QuantizeLinear": + td = tranpose_node.i() + tq = td.i() + + if helper.is_constant_tensor(tq.inputs[0]): + tq.inputs[0].values = np.transpose(tq.inputs[0].values, (3,2,0,1)) + else: + tq.inputs[0].shape = [None, 3, 224, 224] + td.outputs = tranpose_node.outputs + tranpose_node.outputs.clear() + + if node.op == "Flatten": + flatten_node = node + if helper.is_parent_exist(flatten_node, 0, 0) and flatten_node.i().op == "DequantizeLinear": + dql_node = flatten_node.i() + if helper.is_child_present(flatten_node, 0, 0) and flatten_node.o().op == "QuantizeLinear": + ql_node = flatten_node.o() + if helper.is_child_present(dql_node, 0, 0) and dql_node.i().op == "MaxPool": + continue + # node1--->DQL--->Flatten---->QL----->node2 + # becomes + # node1 ----> node2 + node1 = dql_node.i() + node1.outputs = ql_node.outputs + ql_node.outputs.clear() + + if helper.is_parent_exist(flatten_node, 0, 0) and flatten_node.i().op == "Relu": + relu_node1 = flatten_node.i() + if helper.is_child_present(flatten_node, 0, 0) and flatten_node.o().op == "QuantizeLinear": + relu_node1.outputs = flatten_node.outputs + flatten_node.outputs.clear() + + if node.op == "Relu": + relu_node = node + if helper.is_parent_exist(relu_node, 0, 0) and relu_node.i().op == "DequantizeLinear": + dql_node = relu_node.i() + if helper.is_child_present(relu_node, 0, 0) and relu_node.o().op == "QuantizeLinear": + ql_node = relu_node.o() + node1 = dql_node.i() + node2 = ql_node.o() + if node1.op == "QuantizeLinear": + #if node1 produces u8 output then Relu can also be removed, but if it creates s8 output then Relu should be retained + if node1.inputs[2].values.dtype == np.uint8: + # node1--->DQL--->Relu---->QL----->node2 + # becomes + # node1 ----> node2 + for i in range(len(node2.inputs)): + if node2.inputs[i].name == ql_node.ouputs[0].name: + node2.inputs[i] = node1.outputs[0] + ql_node.outputs.clear() + else: + # node1--->DQL--->Relu---->QL----->node2 + # becomes + # node1 ----> Relu ---> node2 + + #relu has single input + relu_node.inputs = node1.outputs + relu_node.outputs = ql_node.outputs + ql_node.outputs.clear() + + if node.op == "MaxPool": + if helper.is_parent_exist(node, 0, 0) and helper.is_child_present(node, 0, 0): + parent_node = node.i() + child_node = node.o() + if len(parent_node.outputs[0].outputs) == 1 and parent_node.op == "DequantizeLinear" and child_node.op == "Conv": + dql_node = parent_node + dql_parent = dql_node.i() + node.inputs[0] = dql_parent.outputs[0] + + conv_node1 = child_node + dql_node.inputs[0] = node.outputs[0] + conv_node1.inputs[0] = dql_node.outputs[0] + + if node.op == "Gather" and node.o().op == "Transpose": + gather_node = node + transpose_node = gather_node.o() + gather_dql_node = gather_node.i() + gather_ql_node = gather_dql_node.i() + if gather_ql_node.op == "Clip" and gather_dql_node.i().i().op == "QuantizeLinear": + gather_ql_node = gather_dql_node.i().i() + transpose_conv_node = transpose_node.o() + # QL QL + # | | + # | Clip + # | | + # DQL DQL + # | | + # | | + # ---------Gather OR ---------Gather + # | | + # | | + # Transpose Transpose + # | | + # | | + # Conv Conv + + # is changed to + + + # QL + # | + # | + # ------------Gather + # | + # | + # Transpose + # | + # | + # DQL + # | + # | + # Conv + gather_dql_node_inputs = gather_dql_node.inputs + gather_node.inputs[0] = gather_ql_node.outputs[0] + + gather_dql_node_inputs[0] = transpose_node.outputs[0] + transpose_conv_node.inputs[0] = gather_dql_node.outputs[0] + gather_dql_node.inputs = gather_dql_node_inputs + + + if node.op == "Conv": + ctr = ctr + 1 + conv_node = node + + graph.cleanup() + + node_list = [] + initializer_list = [] + node_count = 0 + maxpool_count = 0 + conv_count = 0 + + def concat_input_not_constant(node): + for i in range(len(node.inputs)): + if len(node.inputs[i].inputs) == 0: + return True + return False + + def all_dql_conditions_satisfy(node): + has_output_ternsor = len(node.outputs) > 0 + has_no_child = has_output_ternsor and len(node.outputs[0].outputs)==0 + has_child = helper.is_child_present(node, 0, 0) + child_has_no_child = False + + if has_child: + child_is_softmax_node = node.o().op == "Softmax" + child_has_no_child = len(node.o().outputs[0].outputs)==0 + child_is_gemm_node = node.o().op == "Gemm" + child_is_relu_node = node.o().op == "Relu" + child_is_slice_node = node.o().op == "Slice" + + if not has_output_ternsor: + return False + + if has_output_ternsor and is_any_output_tensor_graph_output(node): + return True + + if has_no_child: + return True + + if child_is_softmax_node and child_has_no_child: + return True + + if child_is_gemm_node: + return True + + if child_is_relu_node: + return True + + if child_is_slice_node: + return True + + return False + + def all_ql_conditions_satify(count, node): + if helper.is_child_present(node, 0, 0): + if node.o().op == "Gather": + return False + if helper.is_child_present(node.o(), 0, 0) and node.o().o().op == "Gemm" and len(node.inputs[0].inputs) == 0: + return True + if count == 0: + return True + has_parent = helper.is_parent_exist(node, 0, 0) + + if has_parent: + is_parent_maxpool_node = node.i().op == "MaxPool" + is_parent_relu_node = node.i().op == "Relu" + is_parent_concat = node.i().op == "Concat" + + if is_parent_maxpool_node: + # (Non DQL)--->MaxPool----->QL (keep this QL) + if not (node.i().i().op == "DequantizeLinear"): + return True + if is_parent_relu_node: + parent_relu_node = node.i() + + if helper.is_child_present(node, 0, 0): + if helper.is_parent_exist(node, 0, 0): + if node.i().op == "Relu": + return False + + return False + + for node in graph.nodes: + + if node.op == "Conv": + QLinearConv_node = QLinearConv(node, args.remove_relu, conv_count) + node_list.append(QLinearConv_node.get_node()) + initializer_list.append(QLinearConv_node.get_intializers()) + conv_count = conv_count + 1 + elif node.op == "QuantizeLinear" and all_ql_conditions_satify(node_count, node): + QuantizeLinear_node = QuantizeLinear(node) + node_list.append(QuantizeLinear_node.get_node()) + initializer_list.append(QuantizeLinear_node.get_intializers()) + elif node.op == "DequantizeLinear" and all_dql_conditions_satisfy(node): + DequantizeLinear_node = DequantizeLinear(node, args.remove_relu) + node_list.append(DequantizeLinear_node.get_node()) + initializer_list.append(DequantizeLinear_node.get_intializers()) + elif node.op == "MaxPool": + maxpool_node = MaxPool(node, maxpool_count, args.remove_relu) + node_list.append(maxpool_node.get_node()) + maxpool_count = maxpool_count + 1 + elif node.op == "Squeeze": + squeeze_node = Squeeze(node) + node_list.append(squeeze_node.get_node()) + initializer_list.append(squeeze_node.get_intializers()) + elif node.op == "Flatten": + flatten_node = Flatten(node) + node_list.append(flatten_node.get_node()) + elif node.op == "Concat": + concat_node = Concat(node) + node_list.append(concat_node.get_node()) + if (concat_input_not_constant(node)): + initializer_list.append(concat_node.get_intializers()) + elif node.op == "Softmax": + softmax_node = Softmax(node) + node_list.append(softmax_node.get_node()) + elif node.op == "Cast": + cast_node = Cast(node) + node_list.append(cast_node.get_node()) + elif node.op == "Gather": + gather_node = Gather(node) + node_list.append(gather_node.get_node()) + initializer_list.append(gather_node.get_intializers()) + elif node.op == "Gemm": + # If weights and bias are dequantized, embed it in Gemm + if node.i(0).op == "DequantizeLinear" and node.i(1).op == "DequantizeLinear" and node.i(2).op == "DequantizeLinear": + dql_node1 = node.i(1).name + dql_node2 = node.i(2).name + ql_node1 = node.i(1).i(0).name + dql_list = [dql_node1, dql_node2, ql_node1] + dql_found = [] + gemm_node = Gemm_optimized(node) + for node_current in node_list: + if node_current.name in dql_list: + dql_found.append(node_current) + for node_dql in dql_found: + node_list.remove(node_dql) + node_list.append(gemm_node.get_node()) + initializer_list.append(gemm_node.get_intializers()) + else: + gemm_node = Gemm(node) + node_list.append(gemm_node.get_node()) + elif node.op == "Greater": + greater_node = Greater(node) + node_list.append(greater_node.get_node()) + initializer_list.append(greater_node.get_intializers()) + elif node.op == "Less": + less_node = Less(node) + node_list.append(less_node.get_node()) + initializer_list.append(less_node.get_intializers()) + elif node.op == "Slice": + slice_node = Slice(node) + node_list.append(slice_node.get_node()) + initializer_list.append(slice_node.get_intializers()) + elif node.op == "Transpose": + transpose_node = Transpose(node) + node_list.append(transpose_node.get_node()) + elif node.op == "Relu": + if not args.remove_relu: + relu_node = Relu(node) + node_list.append(relu_node.get_node()) + elif node.op == "Clip": + found = False + for node_current in node_list: + if node_current.name == node.i(0).name: + found = True + if found == False: + continue + clip_node = Clip(node) + node_list.append(clip_node.get_node()) + initializer_list.append(clip_node.get_intializers()) + + if node.op in supported_op: + node_count = node_count + 1 + + new_list = [] + for list1 in initializer_list: + for i in list1: + new_list.append(i) + + graph_input_shape = graph.inputs[0].shape + graph_input_shape[0] = None + + if graph.inputs[0].dtype == "float32": + grapth_input_tensor_dtype = onnx.TensorProto.FLOAT + elif graph.inputs[0].dtype == "int8": + grapth_input_tensor_dtype = onnx.TensorProto.INT8 + elif graph.inputs[0].dtype == "int64": + grapth_input_tensor_dtype = onnx.TensorProto.INT64 + X = onnx.helper.make_tensor_value_info(graph.inputs[0].name, + grapth_input_tensor_dtype, + graph_input_shape) + graph_output_tensor_list = [] + for i in range(len(graph.outputs)): + if graph.outputs[i].dtype == "float32": + grapth_output_tensor_dtype = onnx.TensorProto.FLOAT + elif graph.outputs[i].dtype == "int8": + grapth_output_tensor_dtype = onnx.TensorProto.INT8 + elif graph.outputs[i].dtype == "bool": + grapth_output_tensor_dtype = onnx.TensorProto.BOOL + + graph_output_shape = graph.outputs[i].shape + + Y = onnx.helper.make_tensor_value_info(graph.outputs[i].name, + grapth_output_tensor_dtype, + graph_output_shape) + graph_output_tensor_list.append(Y) + + graph_def = onnx.helper.make_graph(nodes=node_list, name=graph.name, + inputs=[X], + outputs=graph_output_tensor_list, + initializer=new_list) + + model_def = onnx.helper.make_model(graph_def, producer_name="onnx-example") + model_def.opset_import[0].version = 16 + model_qop = ModelWrapper(model_def) + return (model_qop, False) diff --git a/src/qonnx/util/convert.py b/src/qonnx/util/convert.py index 5e7d2495..ec243cd0 100644 --- a/src/qonnx/util/convert.py +++ b/src/qonnx/util/convert.py @@ -31,16 +31,20 @@ from qonnx.core.modelwrapper import ModelWrapper from qonnx.transformation.qcdq_to_qonnx import QCDQToQuant from qonnx.transformation.qonnx_to_qcdq import QuantToQCDQ +from qonnx.custom_op.qop import * +from qonnx.transformation.qcdq_to_qop import * CONVERT_MODE_QCDQ = "qcdq" CONVERT_MODE_QUANT = "quant" +CONVERT_MODE_QOP = "qop" -convert_modes = {CONVERT_MODE_QCDQ, CONVERT_MODE_QUANT} +convert_modes = {CONVERT_MODE_QCDQ, CONVERT_MODE_QUANT, CONVERT_MODE_QOP} convert_mode_options = clize.parameters.mapped( [ (CONVERT_MODE_QCDQ, [CONVERT_MODE_QCDQ], "Convert from Quant to QCDQ"), (CONVERT_MODE_QUANT, [CONVERT_MODE_QUANT], "Convert from QCDQ to Quant"), + (CONVERT_MODE_QOP, [CONVERT_MODE_QOP], "Convert from QCDQ to QOp"), ] ) @@ -55,11 +59,14 @@ def convert(input_model_file, *, output_style: convert_mode_options, output_file :param output_file: If specified, write the output ONNX model to this filename. Otherwise, will default to the input file with an _output_style suffix. """ + print(input_model_file) model = ModelWrapper(input_model_file) if output_style == CONVERT_MODE_QCDQ: model = model.transform(QuantToQCDQ()) elif output_style == CONVERT_MODE_QUANT: model = model.transform(QCDQToQuant()) + elif output_style == CONVERT_MODE_QOP: + model = model.transform(QCDQToQOp(), False, False) else: print("Unknown output_style for conversion: %s" % output_style) exit(-1) @@ -67,10 +74,8 @@ def convert(input_model_file, *, output_style: convert_mode_options, output_file output_file = input_model_file.replace(".onnx", "_%s.onnx" % output_style) model.save(output_file) - def main(): clize.run(convert) - if __name__ == "__main__": main() diff --git a/tests/transformation/test_qcdq_to_qop.py b/tests/transformation/test_qcdq_to_qop.py new file mode 100644 index 00000000..46148e1a --- /dev/null +++ b/tests/transformation/test_qcdq_to_qop.py @@ -0,0 +1,92 @@ +# Copyright (c) 2023 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of qonnx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest +import numpy as np +import os +import urllib.request + +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.qcdq_to_qop import QCDQToQOp +from qonnx.util.cleanup import cleanup_model +import onnxruntime as ort + +model_details = { + "model1.onnx": { + "path": "src/qonnx/data/onnx/qop-sub-graph/model1.onnx", + }, + "model2.onnx": { + "path": "src/qonnx/data/onnx/qop-sub-graph/model2.onnx", + }, +} + +def load_graph_session(model_file, execution_provider): + EP_List=[] + if 'CPU' in execution_provider: + EP_List.append('CPUExecutionProvider') + elif 'Zendnn' in execution_provider: + EP_List.append('ZendnnExecutionProvider') + elif 'Dnnl' in execution_provider: + EP_List.append('DnnlExecutionProvider') + sess = ort.InferenceSession(model_file , providers=EP_List) + return sess + +def get_output(model_file, engine): + infer_sess = load_graph_session(model_file, engine) + inputNames = [] + inputTensors = [] + batch_size = 32 + for inp in infer_sess.get_inputs(): + inputNames.append(inp.name) + x_i = batch_size + x_shape = [x_i if isinstance(s, str) or s==None else s for s in inp.shape] + if not x_shape: + x_shape.append(1) + inputX = np.random.uniform(low=-10, high=100, size=(np.product(x_shape))).astype(np.int64) + inputX = np.reshape(inputX, x_shape) + inputTensors.append(inputX) + feed_dict=dict(zip(inputNames, inputTensors)) + preds = infer_sess.run([], feed_dict) + return preds + +@pytest.mark.parametrize("test_model", model_details.keys()) +def test_qcdq_to_qop(test_model): + dl_file = model_details[test_model]["path"] + dl_file_reference = dl_file.replace(".onnx", "_qop_ref.onnx") + assert os.path.isfile(dl_file) + assert os.path.isfile(dl_file_reference) + output_reference = get_output(dl_file_reference, "Zendnn") + model = ModelWrapper(dl_file) + model = model.transform(QCDQToQOp()) + output_file = dl_file.replace(".onnx", "_qop.onnx") + model.save(output_file) + assert os.path.isfile(output_file) + output_converted_model = get_output(output_file, "Zendnn") + print("QCDQ to QOp Validation [", test_model, "] = ", np.isclose(output_reference, output_converted_model).all()) + +test_qcdq_to_qop("model1.onnx")