From 3208a8912b958b7832635c443e49309ff8f86143 Mon Sep 17 00:00:00 2001 From: nicologhielmetti Date: Mon, 9 Dec 2024 21:47:17 +0100 Subject: [PATCH 1/4] =?UTF-8?q?Sample=20`=20FloatQuant`=20function=20imple?= =?UTF-8?q?mented.=20A=20sample=20use=20of=20the=20function=20can=20be=20f?= =?UTF-8?q?ound=20in=20the=20`Examples`.=20`=C2=B1inf`=20are=20clipped=20t?= =?UTF-8?q?o=20`=C2=B1max=5Fval`.=20`=C2=B1NaN`=20are=20mapped=20to=20`?= =?UTF-8?q?=C2=B1NaN`.=20The=20zero=20is=20always=20representable.=20I=20t?= =?UTF-8?q?ested=20with=20subnormals=20(to=20be=20intended=20as=20subnorma?= =?UTF-8?q?ls=20for=20the=20output=20representation)=20and=20the=20quantiz?= =?UTF-8?q?er=20represented=20the=20subnormals=20with=20no=20loss=20(I=20d?= =?UTF-8?q?idn't=20extensively=20tested=20this=20part=20though).=20I=20tes?= =?UTF-8?q?ted=20the=20function=20against=20Brevitas=20`FloatQuant`=20impl?= =?UTF-8?q?ementation:=20they=20do=20not=20always=20match.=20For=20example?= =?UTF-8?q?=20I=20think=20`0.3125`=20should=20be=20representable=20(`x=20?= =?UTF-8?q?=3D=3D=20xq`)=20by=20a=20float=20quantizer=20with=204bits=20for?= =?UTF-8?q?=20mantissa,=204bits=20for=20the=20exponent,=200=20bias=20and?= =?UTF-8?q?=201bit=20for=20the=20sign.=20Brevitas=20`FloatQuant`=20impleme?= =?UTF-8?q?ntation=20quantize=20it=20to=20`0.25`.=20Not=20sure=20what=20I?= =?UTF-8?q?=20should=20consider=20correct=20for=20this=20case.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/qonnx-custom-ops/floatquant_op.md | 83 +++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/docs/qonnx-custom-ops/floatquant_op.md b/docs/qonnx-custom-ops/floatquant_op.md index ec8b85fd..ebd35c90 100644 --- a/docs/qonnx-custom-ops/floatquant_op.md +++ b/docs/qonnx-custom-ops/floatquant_op.md @@ -64,8 +64,87 @@ This operator is not part of the ONNX standard and is not currently versioned. #### Examples -TODO +```python +def compute_max_val(exponent_bit_width, mantissa_bit_width, exponent_bias): + max_exponent = (2. ** exponent_bit_width) - 1. - exponent_bias + max_mantissa = np.sum(( + 2. ** np.arange( + 0, + -1. * mantissa_bit_width - 1., + -1. + ))) + max_val = max_mantissa * (2 ** max_exponent) + return max_val + +import numpy as np +x = np.random.rand(100).astype(np.float32) +scale = 1 +exponent_bitwidth = 4 +mantissa_bitwidth = 3 +exponent_bias = 0 +max_val = compute_max_val(exponent_bitwidth, mantissa_bitwidth, exponent_bias) +rounding_mode = 'ROUND' +signed = True +xq = float_quantize(x, scale, exponent_bitwidth, mantissa_bitwidth, exponent_bias, max_val, rounding_mode) +``` #### Sample Implementation -TODO +```python +def float_quantize(X, scale, exponent_bitwidth, mantissa_bitwidth, exponent_bias, max_val, rounding_mode): + """Quantize a given floating point array to minifloat format by specifying the desired minifloat quantization""" + + def resolve_rounding_mode(mode_string): + """Resolve the rounding mode string to the corresponding numpy functions.""" + mode_string = mode_string.upper() + if mode_string == "ROUND": + return np.round + elif mode_string == "CEIL": + return np.ceil + elif mode_string == "FLOOR": + return np.floor + else: + raise ValueError(f"Could not resolve rounding mode called: {mode_string}") + + # copy the sign of the input + sign = np.sign(X) + # compute the mask of the values equal to 0 - it will always be zero at the output + zero_mask = np.where(X == 0) + # copy the input in order to not modify it + X = X.copy() + # set the zeros to 1.0 - but could be any random value + X[zero_mask] = 1.0 + # apply the scale to the input + X /= scale + # get input exponents from the floats - no need to use eps since the zeros have been already removed + e_inp = np.floor(np.log2(np.abs(X))) + # compute the max exponent given the exponent bitwidth. + # Note: inf/NaN representation is included and it is clipped at the end of this function + e_max = np.maximum(2.**(exponent_bitwidth), 1.) + # compute exponent range given the max exponent. e_low represent the subnormals of the quantized representation, e_high the infs/NaNs + e_low, e_high = -e_max + exponent_bias + 1, e_max + exponent_bias + # limit the value of the exponent given the quantization range + e_quant = np.clip(e_inp, e_low, e_high) + # compute the shift to get the quantized value rounded properly. This part basically quantize the mantissa + # (round the mantissa by setting to 0 the bits not beloging to the quantised representation) + round_shift = 2.**(e_quant - mantissa_bitwidth) + # apply the shift + man = X / round_shift + # round the mantissa + man_quant = resolve_rounding_mode(rounding_mode)(man) + # compute the max value of the mantissa (i.e. all the mantissa bits set to 1) + man_max = 2.**(mantissa_bitwidth + 1) - 1 + # if the quantised value is a subnormal, remove 1 from the mantissa (i.e. 1 + 2**m => 2**m) + man_max = np.where(e_quant != e_low, man_max, man_max - 1) + # make sure the mantissa is in the representable range + man_clip = np.clip(man_quant, -man_max, man_max) + # go back to float representation + qx = man_clip * round_shift + # if it's inf or nan, saturates to sign*max_val + qx = np.where(e_quant == e_high, sign * max_val, qx) + # restore the original zeros + qx[zero_mask] = 0.0 + # unscale the input + qx *= scale + return qx +``` From d7d35b284f6ac50c3a4d541079a1de39e67614b6 Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 10 Dec 2024 12:47:17 +0100 Subject: [PATCH 2/4] [FloatQ] copy over float_quantize into custom op placeholder Co-authored-by: Nicolo Ghielmetti --- src/qonnx/custom_op/general/floatquant.py | 95 +++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 src/qonnx/custom_op/general/floatquant.py diff --git a/src/qonnx/custom_op/general/floatquant.py b/src/qonnx/custom_op/general/floatquant.py new file mode 100644 index 00000000..9cddb945 --- /dev/null +++ b/src/qonnx/custom_op/general/floatquant.py @@ -0,0 +1,95 @@ +# Copyright (c) 2024 Nicolo Ghielmetti +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of qonnx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np + +from qonnx.custom_op.general.quant import resolve_rounding_mode + + +def compute_default_exponent_bias(exponent_bitwidth): + return (2.0 ** (exponent_bitwidth - 1)) - 1 + + +def compute_max_val(exponent_bitwidth, mantissa_bitwidth, exponent_bias=None): + if exponent_bias is None: + exponent_bias = compute_default_exponent_bias(exponent_bitwidth) + max_exponent = (2.0**exponent_bitwidth) - 1.0 - exponent_bias + max_mantissa = np.sum((2.0 ** np.arange(0, -1.0 * mantissa_bitwidth - 1.0, -1.0))) + max_val = max_mantissa * (2**max_exponent) + return max_val + + +def float_quantize(X, scale, exponent_bitwidth, mantissa_bitwidth, exponent_bias=None, max_val=None, rounding_mode="ROUND"): + """Quantize a given floating point array to minifloat format by specifying the desired minifloat quantization""" + if exponent_bias is None: + exponent_bias = compute_default_exponent_bias(exponent_bitwidth) + if max_val is None: + max_val = compute_max_val(exponent_bitwidth, mantissa_bitwidth, exponent_bias) + # copy the sign of the input + sign = np.sign(X) + # compute the mask of the values equal to 0 - it will always be zero at the output + zero_mask = np.where(X == 0) + # copy the input in order to not modify it + X = X.copy() + # set the zeros to 1.0 - but could be any random value + X[zero_mask] = 1.0 + # apply the scale to the input + X /= scale + # get input exponents from the floats - no need to use eps since the zeros have been already removed + e_inp = np.floor(np.log2(np.abs(X))) + # compute the max exponent given the exponent bitwidth. + # Note: inf/NaN representation is included and it is clipped at the end of this function + e_max = np.maximum(2.0 ** (exponent_bitwidth), 1.0) + # compute exponent range given the max exponent. e_low represent the subnormals of the + # quantized representation, e_high the infs/NaNs + e_low, e_high = -e_max + exponent_bias + 1, e_max + exponent_bias + # limit the value of the exponent given the quantization range + e_quant = np.clip(e_inp, e_low, e_high) + # compute the shift to get the quantized value rounded properly. This part basically quantize the mantissa + # (round the mantissa by setting to 0 the bits not beloging to the quantised representation) + round_shift = 2.0 ** (e_quant - mantissa_bitwidth) + # apply the shift + man = X / round_shift + # round the mantissa + man_quant = resolve_rounding_mode(rounding_mode)(man) + # compute the max value of the mantissa (i.e. all the mantissa bits set to 1) + man_max = 2.0 ** (mantissa_bitwidth + 1) - 1 + # if the quantised value is a subnormal, remove 1 from the mantissa (i.e. 1 + 2**m => 2**m) + man_max = np.where(e_quant != e_low, man_max, man_max - 1) + # make sure the mantissa is in the representable range + man_clip = np.clip(man_quant, -man_max, man_max) + # go back to float representation + qx = man_clip * round_shift + # if it's inf or nan, saturates to sign*max_val + qx = np.where(e_quant == e_high, sign * max_val, qx) + # restore the original zeros + qx[zero_mask] = 0.0 + # unscale the input + qx *= scale + return qx From 0f6633a0ac5e8d37e51f84f5fba98a194fb9508b Mon Sep 17 00:00:00 2001 From: Yaman Umuroglu Date: Tue, 10 Dec 2024 12:48:52 +0100 Subject: [PATCH 3/4] [Test] add test skeleton for compute_max_val and float_quantize --- tests/custom_op/test_floatquant.py | 49 ++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/custom_op/test_floatquant.py diff --git a/tests/custom_op/test_floatquant.py b/tests/custom_op/test_floatquant.py new file mode 100644 index 00000000..91ebdc92 --- /dev/null +++ b/tests/custom_op/test_floatquant.py @@ -0,0 +1,49 @@ +# Copyright (c) 2024 Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of qonnx nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +import numpy as np + +from qonnx.custom_op.general.floatquant import compute_max_val, float_quantize + + +def test_compute_max_val(): + # reference max normal values from OCP MX 1.0 standard + assert compute_max_val(2, 3) == 7.5 # FP6 E2M3 + assert compute_max_val(3, 2) == 28.0 # FP6 E3M2 + assert compute_max_val(2, 1) == 6.0 # FP4 E2M1 + + +def test_float_quantize(): + zero_tensor = np.zeros((2, 2)) + unit_scale = np.asarray([1.0], dtype=np.float32) + assert np.all(float_quantize(zero_tensor, unit_scale, 2, 3) == zero_tensor) + testcase_a = np.asarray([1.5], dtype=np.float32) + testcase_b = np.asarray([3.25], dtype=np.float32) + assert np.all(float_quantize(testcase_a, unit_scale, 2, 3) == testcase_a) + assert np.all(float_quantize(testcase_b, unit_scale, 2, 3) == testcase_b) From 491a3bebb958da47c802f22cb190b73c9be01c7a Mon Sep 17 00:00:00 2001 From: nicologhielmetti Date: Thu, 12 Dec 2024 00:36:39 +0100 Subject: [PATCH 4/4] FloatQuant implementation improved to pass the nullifying tests Yaman provided. Some other tests have been added --- src/qonnx/custom_op/general/floatquant.py | 23 +++++++++++++++++++---- tests/custom_op/test_floatquant.py | 8 ++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/qonnx/custom_op/general/floatquant.py b/src/qonnx/custom_op/general/floatquant.py index 9cddb945..7447392e 100644 --- a/src/qonnx/custom_op/general/floatquant.py +++ b/src/qonnx/custom_op/general/floatquant.py @@ -45,7 +45,16 @@ def compute_max_val(exponent_bitwidth, mantissa_bitwidth, exponent_bias=None): return max_val -def float_quantize(X, scale, exponent_bitwidth, mantissa_bitwidth, exponent_bias=None, max_val=None, rounding_mode="ROUND"): +def float_quantize( + X, + scale, + exponent_bitwidth, + mantissa_bitwidth, + exponent_bias=None, + max_val=None, + rounding_mode="ROUND", + lt_subnorm_to_zero=False, +): """Quantize a given floating point array to minifloat format by specifying the desired minifloat quantization""" if exponent_bias is None: exponent_bias = compute_default_exponent_bias(exponent_bitwidth) @@ -65,10 +74,10 @@ def float_quantize(X, scale, exponent_bitwidth, mantissa_bitwidth, exponent_bias e_inp = np.floor(np.log2(np.abs(X))) # compute the max exponent given the exponent bitwidth. # Note: inf/NaN representation is included and it is clipped at the end of this function - e_max = np.maximum(2.0 ** (exponent_bitwidth), 1.0) + e_max = np.maximum(2.0 ** (exponent_bitwidth) - 1, 1.0) # compute exponent range given the max exponent. e_low represent the subnormals of the # quantized representation, e_high the infs/NaNs - e_low, e_high = -e_max + exponent_bias + 1, e_max + exponent_bias + e_low, e_high = -e_max + exponent_bias + 1, e_max - exponent_bias # limit the value of the exponent given the quantization range e_quant = np.clip(e_inp, e_low, e_high) # compute the shift to get the quantized value rounded properly. This part basically quantize the mantissa @@ -80,6 +89,8 @@ def float_quantize(X, scale, exponent_bitwidth, mantissa_bitwidth, exponent_bias man_quant = resolve_rounding_mode(rounding_mode)(man) # compute the max value of the mantissa (i.e. all the mantissa bits set to 1) man_max = 2.0 ** (mantissa_bitwidth + 1) - 1 + # compute the min value of the mantissa (i.e. one bit at the position indicated by the exponent) + man_min = 2.0**-mantissa_bitwidth # if the quantised value is a subnormal, remove 1 from the mantissa (i.e. 1 + 2**m => 2**m) man_max = np.where(e_quant != e_low, man_max, man_max - 1) # make sure the mantissa is in the representable range @@ -88,7 +99,11 @@ def float_quantize(X, scale, exponent_bitwidth, mantissa_bitwidth, exponent_bias qx = man_clip * round_shift # if it's inf or nan, saturates to sign*max_val qx = np.where(e_quant == e_high, sign * max_val, qx) - # restore the original zeros + if lt_subnorm_to_zero: + # compute the min subnormal as the lower possible exponent x the min mantissa + min_subnormal = 2.0 ** (e_low + 1) * man_min + # if the value is closer to zero than the minimum subnormal then set it to 0 + qx = np.where((X <= min_subnormal) & (X >= -min_subnormal), 0.0, qx) # restore the original zeros qx[zero_mask] = 0.0 # unscale the input qx *= scale diff --git a/tests/custom_op/test_floatquant.py b/tests/custom_op/test_floatquant.py index 91ebdc92..d4ff17d0 100644 --- a/tests/custom_op/test_floatquant.py +++ b/tests/custom_op/test_floatquant.py @@ -45,5 +45,13 @@ def test_float_quantize(): assert np.all(float_quantize(zero_tensor, unit_scale, 2, 3) == zero_tensor) testcase_a = np.asarray([1.5], dtype=np.float32) testcase_b = np.asarray([3.25], dtype=np.float32) + testcase_c = np.asarray([8.0], dtype=np.float32) + testcase_d = np.asarray([28.2], dtype=np.float32) + testcase_e = np.asarray([6.1], dtype=np.float32) + testcase_f = np.asarray([0.124], dtype=np.float32) assert np.all(float_quantize(testcase_a, unit_scale, 2, 3) == testcase_a) assert np.all(float_quantize(testcase_b, unit_scale, 2, 3) == testcase_b) + assert np.all(float_quantize(testcase_c, unit_scale, 2, 3) == compute_max_val(2, 3)) + assert np.all(float_quantize(testcase_d, unit_scale, 3, 2) == compute_max_val(3, 2)) + assert np.all(float_quantize(testcase_e, unit_scale, 2, 1) == compute_max_val(2, 1)) + assert np.all(float_quantize(testcase_f, unit_scale, 2, 3, lt_subnorm_to_zero=True) == 0.0)