diff --git a/docs/Changelog.md b/docs/Changelog.md
index 02d86d66475..a836f3582f7 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -24771,7 +24771,6 @@ This version of the operator has been available since version 21 of the default
   must have the same shape, determining the quantization's granularity: a scalar for per-tensor/per-layer quantization,
   a 1-D tensor for per-axis quantization, or have a rank identical to the input for blocked quantization.
   See QuantizeLinear for details on quantization granularity.
-
   `x_zero_point` and `x` must have the same type. `x` and `y` must have the same shape. In the case of dequantizing
   `int32`, there's no zero point (zero point is supposed to be 0).
   `zero-point` is usually not used in the case of float8 types quantization, but the dequantization formula remains the same
@@ -25375,7 +25374,6 @@ This version of the operator has been available since version 21 of the default
   The linear quantization operator consumes a high-precision tensor, a scale, and a zero point to compute the
   low-precision/quantized tensor. The scale factor and zero point must have the same shape, determining the quantization
   granularity. The quantization formula is `y = saturate((x / y_scale) + y_zero_point)`.
-
   Saturation is done according to:
   - uint16: [0, 65535]
   - int16: [-32768, 32767]
@@ -25383,12 +25381,9 @@ This version of the operator has been available since version 21 of the default
   - int8: [-128, 127]
   - uint4: [0, 15]
   - int4: [-8, 7]
-
   For `(x / y_scale)`, it rounds to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details.
-
   `y_zero_point` and `y` must have the same type. `y_zero_point` is usually not used for quantization to float8 types, but the quantization
   formula remains the same for consistency, and the type of the attribute `y_zero_point` still determines the quantization type.
-
   There are three supported quantization granularities, determined by the shape of `y_scale`.
   In all cases, `y_zero_point` must have the same shape as `y_scale`.
   - Per-tensor (per-layer) quantization: `y_scale` is a scalar.
@@ -28229,6 +28224,277 @@ This version of the operator has been available since version 22 of the default
 <dd>Constrain input and output types to float tensors.</dd>
 </dl>
 
+## Version 23 of the default ONNX operator set
+### <a name="Cast-23"></a>**Cast-23**</a>
+
+  The operator casts the elements of a given input tensor to a data type
+  specified by the 'to' argument and returns an output tensor of the same size in
+  the converted type. The 'to' argument must be one of the data types specified
+  in the 'DataType' enum field in the TensorProto message.
+
+  Casting from string tensor in plain (e.g., "3.14" and "1000") and scientific numeric representations
+  (e.g., "1e-5" and "1E8") to float types is supported. For example, converting string "100.5" to an integer may
+  yield result 100. There are some string literals reserved for special floating-point values;
+  "+INF" (and "INF"), "-INF", and "NaN" are positive infinity, negative infinity, and not-a-number, respectively.
+  Any string which can exactly match "+INF" in a case-insensitive way would be mapped to positive infinite. Similarly,
+  this case-insensitive rule is applied to "INF" and "NaN". When casting from numeric tensors
+  to string tensors, plain floating-point representation (such as "314.15926") would be used.
+  Converting non-numerical-literal string such as "Hello World!" is an undefined behavior. Cases
+  of converting string representing floating-point arithmetic value, such as "2.718", to INT is an undefined behavior.
+
+  Conversion from a numerical type to any numerical type is always allowed.
+  User must be aware of precision loss and value change caused by range difference between two types.
+  For example, a 64-bit float 3.1415926459 may be round to a 32-bit float 3.141592. Similarly, converting
+  an integer 36 to Boolean may produce 1 because we truncate bits which can't be stored in the targeted type.
+
+  In more detail, the conversion among numerical types should follow these rules
+  if the destination type is not a float 8 type.
+
+  * Casting from floating point to:
+    * floating point: +/- infinity if OOR (out of range).
+    * fixed point: undefined if OOR.
+    * bool: +/- 0.0 to False; all else to True.
+  * Casting from fixed point to:
+    * floating point: +/- infinity if OOR. (+ infinity in the case of uint)
+    * fixed point: when OOR, discard higher bits and reinterpret (with respect to two's complement representation for
+      signed types). For example, 200 (int16) -> -56 (int8).
+    * bool: zero to False; nonzero to True.
+  * Casting from bool to:
+    * floating point: `{1.0, 0.0}`.
+    * fixed point: `{1, 0}`.
+    * bool: no change.
+
+  Float 8 type were introduced to speed up the training of
+  deep models. By default the conversion of a float *x* obeys
+  to the following rules. `[x]` means the value rounded to
+  the target mantissa width.
+
+  | x | E4M3FN | E4M3FNUZ | E5M2 | E5M2FNUZ |
+  |------|----|----|----|----|
+  | 0 | 0 | 0 | 0 | 0 |
+  |-0 | -0 | 0 | -0 | 0 |
+  | NaN | NaN | NaN | NaN | NaN |
+  | +/- Inf | +/- FLT_MAX | NaN | FLT_MAX | NaN |
+  | [x] > FLT_MAX | FLT_MAX | FLT_MAX | FLT_MAX | FLT_MAX |
+  | [x] < -FLT_MAX | -FLT_MAX | -FLT_MAX | -FLT_MAX | -FLT_MAX |
+  | else | RNE | RNE | RNE | RNE |
+
+  The behavior changes if the parameter 'saturate' is set to False.
+  The rules then become:
+
+  | x | E4M3FN | E4M3FNUZ | E5M2 | E5M2FNUZ |
+  |------|----|----|----|----|
+  | 0 | 0 | 0 | 0 | 0 |
+  |-0 | -0 | 0 | -0 | 0 |
+  | NaN | NaN | NaN | NaN | NaN |
+  | +/- Inf | NaN | NaN | +/- Inf | NaN |
+  | [x] > FLT_MAX | NaN | NaN | Inf | NaN |
+  | [x] < -FLT_MAX | NaN | NaN | -Inf | NaN |
+  | else | RNE | RNE | RNE | RNE |
+
+#### Version
+
+This version of the operator has been available since version 23 of the default ONNX operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>saturate</tt> : int (default is 1)</dt>
+<dd>The parameter defines how the conversion behaves if an input value is out of range of the destination type. It only applies for float 8 conversion (float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz). It is true by default. All cases are fully described in two tables inserted in the operator description.</dd>
+<dt><tt>to</tt> : int (required)</dt>
+<dd>The data type to which the elements of the input tensor are cast. Strictly must be one of the types from DataType enum in TensorProto</dd>
+</dl>
+
+#### Inputs
+
+<dl>
+<dt><tt>input</tt> (differentiable) : T1</dt>
+<dd>Input tensor to be cast.</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>output</tt> (differentiable) : T2</dt>
+<dd>Output tensor with the same shape as input with type specified by the 'to' argument</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T1</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
+<dd>Constrain input types. Casting from complex is not supported.</dd>
+<dt><tt>T2</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
+<dd>Constrain output types. Casting to complex is not supported.</dd>
+</dl>
+
+### <a name="CastLike-23"></a>**CastLike-23**</a>
+
+  The operator casts the elements of a given input tensor (the first input) to
+  the same data type as the elements of the second input tensor.
+  See documentation of the Cast operator for further details.
+
+#### Version
+
+This version of the operator has been available since version 23 of the default ONNX operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>saturate</tt> : int (default is 1)</dt>
+<dd>The parameter defines how the conversion behaves if an input value is out of range of the destination type. It only applies for float 8 conversion (float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz). It is true by default. Please refer to operator Cast description for further details.</dd>
+</dl>
+
+#### Inputs
+
+<dl>
+<dt><tt>input</tt> (differentiable) : T1</dt>
+<dd>Input tensor to be cast.</dd>
+<dt><tt>target_type</tt> (non-differentiable) : T2</dt>
+<dd>The (first) input tensor will be cast to produce a tensor of the same type as this (second input) tensor.</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>output</tt> (differentiable) : T2</dt>
+<dd>Output tensor produced by casting the first input tensor to have the same type as the second input tensor.</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T1</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
+<dd>Constrain input types. Casting from complex is not supported.</dd>
+<dt><tt>T2</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
+<dd>Constrain output types. Casting to complex is not supported.</dd>
+</dl>
+
+### <a name="DequantizeLinear-23"></a>**DequantizeLinear-23**</a>
+
+  The linear dequantization operator. It consumes a quantized tensor, a scale, and a zero point to compute the
+  full-precision tensor. The dequantization formula is `y = (x - x_zero_point) * x_scale`. `x_scale` and `x_zero_point`
+  must have the same shape, determining the quantization's granularity: a scalar for per-tensor/per-layer quantization,
+  a 1-D tensor for per-axis quantization, or have a rank identical to the input for blocked quantization.
+  See QuantizeLinear for details on quantization granularity.
+
+  `x_zero_point` and `x` must have the same type. `x` and `y` must have the same shape. In the case of dequantizing
+  `int32`, there's no zero point (zero point is supposed to be 0).
+  `zero-point` is usually not used in the case of float8 types quantization, but the dequantization formula remains the same
+  for consistency, and `x_scale` still determines the output type.
+
+#### Version
+
+This version of the operator has been available since version 23 of the default ONNX operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>axis</tt> : int (default is 1)</dt>
+<dd>(Optional) The axis of the dequantizing dimension of the input tensor. Used for per-axis and blocked quantization. Negative value means counting dimensions from the back. Accepted range is `[-r, r-1]` where `r = rank(input)`.</dd>
+<dt><tt>block_size</tt> : int (default is 0)</dt>
+<dd>(Optional) The size of the quantization block (number of times every scale is replicated). Used only for blocked quantization. The block size is a positive integer. Given `x` shape `(D0, ..., Di, ..., Dn)`, `y_scale` shape `(S0, ... Si, ...Sn)` and `axis=i`, the accepted range is `[ceil(Di/Si), ceil(Di/(Si-1))-1]`</dd>
+</dl>
+
+#### Inputs (2 - 3)
+
+<dl>
+<dt><tt>x</tt> : T1</dt>
+<dd>N-D quantized input tensor to be de-quantized.</dd>
+<dt><tt>x_scale</tt> : T2</dt>
+<dd>Scale for input `x`. For per-tensor/layer dequantization the scale is a scalar, for per per-axis dequantization it is a 1-D Tensor and for blocked dequantization it has the same shape as the input, except for one dimension in which blocking is performed.</dd>
+<dt><tt>x_zero_point</tt> (optional) : T1</dt>
+<dd>Zero point for input `x`. Shape must match x_scale. It's optional. Zero point is 0 when it's not specified.</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>y</tt> : T2</dt>
+<dd>N-D full precision output tensor. It has same shape as input `x`.</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T1</tt> : tensor(int8), tensor(uint8), tensor(int16), tensor(uint16), tensor(int32), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
+<dd>The type of the inputs 'x_zero_point' and 'x'.</dd>
+<dt><tt>T2</tt> : tensor(float), tensor(float16), tensor(bfloat16)</dt>
+<dd>'x_scale' determines the output type.</dd>
+</dl>
+
+### <a name="QuantizeLinear-23"></a>**QuantizeLinear-23**</a>
+
+  The linear quantization operator consumes a high-precision tensor, a scale, and a zero point to compute the
+  low-precision/quantized tensor. The scale factor and zero point must have the same shape, determining the quantization
+  granularity. The quantization formula is `y = saturate((x / y_scale) + y_zero_point)`.
+
+  Saturation is done according to:
+  - uint16: [0, 65535]
+  - int16: [-32768, 32767]
+  - uint8: [0, 255]
+  - int8: [-128, 127]
+  - uint4: [0, 15]
+  - int4: [-8, 7]
+
+  For `(x / y_scale)`, it rounds to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details.
+
+  `y_zero_point` and `y` must have the same type. `y_zero_point` is usually not used for quantization to float8 types, but the quantization
+  formula remains the same for consistency, and the type of the attribute `y_zero_point` still determines the quantization type.
+
+  There are three supported quantization granularities, determined by the shape of `y_scale`.
+  In all cases, `y_zero_point` must have the same shape as `y_scale`.
+  - Per-tensor (per-layer) quantization: `y_scale` is a scalar.
+  - Per-axis quantization: The scale must be a 1-D tensor, with the length of the quantization axis. For an input shape
+   `(D0, ..., Di, ..., Dn)` and `axis=i`, `y_scale` is a 1-D tensor of length `Di`.
+  - Blocked quantization: The scale's shape is identical to the input's shape, except for one dimension, in which
+    blocking is performed. Given `x` shape `(D0, ..., Di, ..., Dn)`, `axis=i`, and block size `B`: `y_scale` shape is
+    `(D0, ..., ceil(Di/B), ..., Dn)`.
+
+#### Version
+
+This version of the operator has been available since version 23 of the default ONNX operator set.
+
+#### Attributes
+
+<dl>
+<dt><tt>axis</tt> : int (default is 1)</dt>
+<dd>(Optional) The axis of the dequantizing dimension of the input tensor. Used only for per-axis and blocked quantization. Negative value means counting dimensions from the back. Accepted range is `[-r, r-1]` where `r = rank(input)`. When the rank of the input is 1, per-tensor quantization is applied, rendering the axis unnecessary in this scenario.</dd>
+<dt><tt>block_size</tt> : int (default is 0)</dt>
+<dd>(Optional) The size of the quantization block (number of times every scale is replicated). Used only for blocked quantization. The block size is a positive integer. Given `x` shape `(D0, ..., Di, ..., Dn)`, `y_scale` shape `(S0, ... Si, ...Sn)` and `axis=i`, the accepted range is `[ceil(Di/Si), ceil(Di/(Si-1))-1]`</dd>
+<dt><tt>output_dtype</tt> : int (default is 0)</dt>
+<dd>(Optional) The output data type. If not supplied, the output data type is inferred from `y_zero_point` data type (`T2`). If neither `output_dtype` nor `y_zero_point` are supplied, output data type is uint8. If both `output_dtype` and `y_zero_point` are specified, `output_dtype` must be `T2`.</dd>
+<dt><tt>saturate</tt> : int (default is 1)</dt>
+<dd>The parameter defines how the conversion behaves if an input value is out of range of the destination type. It only applies for float 8 quantization (float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz). It is true by default. All cases are fully described in two tables inserted in the operator description.</dd>
+</dl>
+
+#### Inputs (2 - 3)
+
+<dl>
+<dt><tt>x</tt> : T1</dt>
+<dd>N-D full precision Input tensor to be quantized.</dd>
+<dt><tt>y_scale</tt> : T1</dt>
+<dd>Scale for doing quantization to get `y`. For per-tensor/layer quantization the scale is a scalar, for per-axis quantization it is a 1-D Tensor and for blocked quantization it has the same shape as the input, except for one dimension in which blocking is performed.</dd>
+<dt><tt>y_zero_point</tt> (optional) : T2</dt>
+<dd>Zero point for doing quantization to get `y`. Shape must match `y_scale`.Default is uint8 with zero point of 0 if it's not specified.</dd>
+</dl>
+
+#### Outputs
+
+<dl>
+<dt><tt>y</tt> : T2</dt>
+<dd>N-D quantized output tensor. It has same shape as input `x`.</dd>
+</dl>
+
+#### Type Constraints
+
+<dl>
+<dt><tt>T1</tt> : tensor(float), tensor(float16), tensor(bfloat16), tensor(int32)</dt>
+<dd>The type of the input 'x'.</dd>
+<dt><tt>T2</tt> : tensor(int8), tensor(uint8), tensor(int16), tensor(uint16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
+<dd>The type of the input `y_zero_point` and the output `y`.</dd>
+</dl>
+
 # ai.onnx.preview.training
 ## Version 1 of the 'ai.onnx.preview.training' operator set
 ### <a name="ai.onnx.preview.training.Adagrad-1"></a>**ai.onnx.preview.training.Adagrad-1**</a>
diff --git a/docs/Operators.md b/docs/Operators.md
index 32e063feee0..6c1ce4d088c 100644
--- a/docs/Operators.md
+++ b/docs/Operators.md
@@ -29,7 +29,7 @@ For an operator input/output's differentiability, it can be differentiable,
 |<a href="#BitwiseNot">BitwiseNot</a>|<a href="Changelog.md#BitwiseNot-18">18</a>|
 |<a href="#BitwiseOr">BitwiseOr</a>|<a href="Changelog.md#BitwiseOr-18">18</a>|
 |<a href="#BitwiseXor">BitwiseXor</a>|<a href="Changelog.md#BitwiseXor-18">18</a>|
-|<a href="#Cast">Cast</a>|<a href="Changelog.md#Cast-21">21</a>, <a href="Changelog.md#Cast-19">19</a>, <a href="Changelog.md#Cast-13">13</a>, <a href="Changelog.md#Cast-9">9</a>, <a href="Changelog.md#Cast-6">6</a>, <a href="Changelog.md#Cast-1">1</a>|
+|<a href="#Cast">Cast</a>|<a href="Changelog.md#Cast-23">23</a>, <a href="Changelog.md#Cast-21">21</a>, <a href="Changelog.md#Cast-19">19</a>, <a href="Changelog.md#Cast-13">13</a>, <a href="Changelog.md#Cast-9">9</a>, <a href="Changelog.md#Cast-6">6</a>, <a href="Changelog.md#Cast-1">1</a>|
 |<a href="#Ceil">Ceil</a>|<a href="Changelog.md#Ceil-13">13</a>, <a href="Changelog.md#Ceil-6">6</a>, <a href="Changelog.md#Ceil-1">1</a>|
 |<a href="#Col2Im">Col2Im</a>|<a href="Changelog.md#Col2Im-18">18</a>|
 |<a href="#Compress">Compress</a>|<a href="Changelog.md#Compress-11">11</a>, <a href="Changelog.md#Compress-9">9</a>|
@@ -46,7 +46,7 @@ For an operator input/output's differentiability, it can be differentiable,
 |<a href="#DFT">DFT</a>|<a href="Changelog.md#DFT-20">20</a>, <a href="Changelog.md#DFT-17">17</a>|
 |<a href="#DeformConv">DeformConv</a>|<a href="Changelog.md#DeformConv-22">22</a>, <a href="Changelog.md#DeformConv-19">19</a>|
 |<a href="#DepthToSpace">DepthToSpace</a>|<a href="Changelog.md#DepthToSpace-13">13</a>, <a href="Changelog.md#DepthToSpace-11">11</a>, <a href="Changelog.md#DepthToSpace-1">1</a>|
-|<a href="#DequantizeLinear">DequantizeLinear</a>|<a href="Changelog.md#DequantizeLinear-21">21</a>, <a href="Changelog.md#DequantizeLinear-19">19</a>, <a href="Changelog.md#DequantizeLinear-13">13</a>, <a href="Changelog.md#DequantizeLinear-10">10</a>|
+|<a href="#DequantizeLinear">DequantizeLinear</a>|<a href="Changelog.md#DequantizeLinear-23">23</a>, <a href="Changelog.md#DequantizeLinear-21">21</a>, <a href="Changelog.md#DequantizeLinear-19">19</a>, <a href="Changelog.md#DequantizeLinear-13">13</a>, <a href="Changelog.md#DequantizeLinear-10">10</a>|
 |<a href="#Det">Det</a>|<a href="Changelog.md#Det-22">22</a>, <a href="Changelog.md#Det-11">11</a>|
 |<a href="#Div">Div</a>|<a href="Changelog.md#Div-14">14</a>, <a href="Changelog.md#Div-13">13</a>, <a href="Changelog.md#Div-7">7</a>, <a href="Changelog.md#Div-6">6</a>, <a href="Changelog.md#Div-1">1</a>|
 |<a href="#Dropout">Dropout</a>|<a href="Changelog.md#Dropout-22">22</a>, <a href="Changelog.md#Dropout-13">13</a>, <a href="Changelog.md#Dropout-12">12</a>, <a href="Changelog.md#Dropout-10">10</a>, <a href="Changelog.md#Dropout-7">7</a>, <a href="Changelog.md#Dropout-6">6</a>, <a href="Changelog.md#Dropout-1">1</a>|
@@ -107,7 +107,7 @@ For an operator input/output's differentiability, it can be differentiable,
 |<a href="#Pow">Pow</a>|<a href="Changelog.md#Pow-15">15</a>, <a href="Changelog.md#Pow-13">13</a>, <a href="Changelog.md#Pow-12">12</a>, <a href="Changelog.md#Pow-7">7</a>, <a href="Changelog.md#Pow-1">1</a>|
 |<a href="#QLinearConv">QLinearConv</a>|<a href="Changelog.md#QLinearConv-10">10</a>|
 |<a href="#QLinearMatMul">QLinearMatMul</a>|<a href="Changelog.md#QLinearMatMul-21">21</a>, <a href="Changelog.md#QLinearMatMul-10">10</a>|
-|<a href="#QuantizeLinear">QuantizeLinear</a>|<a href="Changelog.md#QuantizeLinear-21">21</a>, <a href="Changelog.md#QuantizeLinear-19">19</a>, <a href="Changelog.md#QuantizeLinear-13">13</a>, <a href="Changelog.md#QuantizeLinear-10">10</a>|
+|<a href="#QuantizeLinear">QuantizeLinear</a>|<a href="Changelog.md#QuantizeLinear-23">23</a>, <a href="Changelog.md#QuantizeLinear-21">21</a>, <a href="Changelog.md#QuantizeLinear-19">19</a>, <a href="Changelog.md#QuantizeLinear-13">13</a>, <a href="Changelog.md#QuantizeLinear-10">10</a>|
 |<a href="#RNN">RNN</a>|<a href="Changelog.md#RNN-22">22</a>, <a href="Changelog.md#RNN-14">14</a>, <a href="Changelog.md#RNN-7">7</a>, <a href="Changelog.md#RNN-1">1</a>|
 |<a href="#RandomNormal">RandomNormal</a>|<a href="Changelog.md#RandomNormal-22">22</a>, <a href="Changelog.md#RandomNormal-1">1</a>|
 |<a href="#RandomNormalLike">RandomNormalLike</a>|<a href="Changelog.md#RandomNormalLike-22">22</a>, <a href="Changelog.md#RandomNormalLike-1">1</a>|
@@ -169,7 +169,7 @@ For an operator input/output's differentiability, it can be differentiable,
 |<a href="#AffineGrid">AffineGrid</a>|<a href="Changelog.md#AffineGrid-20">20</a>|20|
 |<a href="#Bernoulli">Bernoulli</a>|<a href="Changelog.md#Bernoulli-22">22</a>, <a href="Changelog.md#Bernoulli-15">15</a>|22|
 |<a href="#BlackmanWindow">BlackmanWindow</a>|<a href="Changelog.md#BlackmanWindow-17">17</a>|17|
-|<a href="#CastLike">CastLike</a>|<a href="Changelog.md#CastLike-21">21</a>, <a href="Changelog.md#CastLike-19">19</a>, <a href="Changelog.md#CastLike-15">15</a>|21|
+|<a href="#CastLike">CastLike</a>|<a href="Changelog.md#CastLike-23">23</a>, <a href="Changelog.md#CastLike-21">21</a>, <a href="Changelog.md#CastLike-19">19</a>, <a href="Changelog.md#CastLike-15">15</a>|23|
 |<a href="#Celu">Celu</a>|<a href="Changelog.md#Celu-12">12</a>|12|
 |<a href="#CenterCropPad">CenterCropPad</a>|<a href="Changelog.md#CenterCropPad-18">18</a>|18|
 |<a href="#Clip">Clip</a>|<a href="Changelog.md#Clip-13">13</a>, <a href="Changelog.md#Clip-12">12</a>, <a href="Changelog.md#Clip-11">11</a>, <a href="Changelog.md#Clip-6">6</a>, <a href="Changelog.md#Clip-1">1</a>|13|
@@ -3400,9 +3400,9 @@ expect(
 
 #### Version
 
-This version of the operator has been available since version 21 of the default ONNX operator set.
+This version of the operator has been available since version 23 of the default ONNX operator set.
 
-Other versions of this operator: <a href="Changelog.md#Cast-1">1</a>, <a href="Changelog.md#Cast-6">6</a>, <a href="Changelog.md#Cast-9">9</a>, <a href="Changelog.md#Cast-13">13</a>, <a href="Changelog.md#Cast-19">19</a>
+Other versions of this operator: <a href="Changelog.md#Cast-1">1</a>, <a href="Changelog.md#Cast-6">6</a>, <a href="Changelog.md#Cast-9">9</a>, <a href="Changelog.md#Cast-13">13</a>, <a href="Changelog.md#Cast-19">19</a>, <a href="Changelog.md#Cast-21">21</a>
 
 #### Attributes
 
@@ -3430,9 +3430,9 @@ Other versions of this operator: <a href="Changelog.md#Cast-1">1</a>, <a href="C
 #### Type Constraints
 
 <dl>
-<dt><tt>T1</tt> : tensor(float16), tensor(float), tensor(double), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(bool), tensor(string), tensor(bfloat16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4)</dt>
+<dt><tt>T1</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
 <dd>Constrain input types. Casting from complex is not supported.</dd>
-<dt><tt>T2</tt> : tensor(float16), tensor(float), tensor(double), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(bool), tensor(string), tensor(bfloat16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4)</dt>
+<dt><tt>T2</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
 <dd>Constrain output types. Casting to complex is not supported.</dd>
 </dl>
 
@@ -3481,6 +3481,10 @@ test_cases = [
     ("INT4", "FLOAT"),
     ("INT4", "FLOAT16"),
     ("INT4", "INT8"),
+    ("FLOAT4E2M1", "FLOAT"),
+    ("FLOAT4E2M1", "FLOAT16"),
+    ("FLOAT", "FLOAT4E2M1"),
+    ("FLOAT16", "FLOAT4E2M1"),
 ]
 
 vect_float32_to_float8e4m3 = np.vectorize(float32_to_float8e4m3)
@@ -3697,7 +3701,57 @@ for from_type, to_type in test_cases:
         output_type_proto = onnx.helper.make_tensor_type_proto(
             getattr(TensorProto, to_type), input_shape
         )
+    elif from_type == "FLOAT4E2M1" or to_type == "FLOAT4E2M1":
+        np_fp32 = np.array(
+            [
+                "0.48",
+                "0.25",
+                "1.05",
+                "-3.5",
+                "-8",
+                "9",
+                "1000000",
+                "1e-7",
+                "NaN",
+                "INF",
+                "+INF",
+                "-INF",
+                "-4",
+                "0.01",
+                "-0.0",
+            ],
+            dtype=np.float32,
+        )
+        input_shape = (3, 5)
+        if from_type == "FLOAT":
+            input_values = np_fp32
+            input = make_tensor(
+                "x", TensorProto.FLOAT, input_shape, input_values.tolist()
+            )
+        elif from_type == "FLOAT16":
+            input_values = np_fp32.astype(np.float16).astype(np.float32)
+            input = make_tensor(
+                "x", TensorProto.FLOAT16, input_shape, input_values.tolist()
+            )
+        elif from_type == "FLOAT4E2M1":
+            input = make_tensor(
+                "x", TensorProto.FLOAT4E2M1, input_shape, np_fp32.tolist()
+            )
+        else:
+            raise ValueError(
+                f"Conversion from {from_type} to {to_type} is not tested."
+            )
 
+        if to_type not in ("FLOAT", "FLOAT16", "FLOAT4E2M1"):
+            raise ValueError(
+                f"Conversion from {from_type} to {to_type} is not tested."
+            )
+        expected = unpacked_float4e2m1_to_float32(
+            subbyte.float32_to_float4e2m1_unpacked(np_fp32)
+        )
+        output = make_tensor(
+            "y", getattr(TensorProto, to_type), input_shape, expected.tolist()
+        )
     elif from_type != "STRING":
         input = np.random.random_sample(shape).astype(
             helper.tensor_dtype_to_np_dtype(getattr(TensorProto, from_type))
@@ -3868,9 +3922,9 @@ for from_type, to_type in test_cases:
 
 #### Version
 
-This version of the operator has been available since version 21 of the default ONNX operator set.
+This version of the operator has been available since version 23 of the default ONNX operator set.
 
-Other versions of this operator: <a href="Changelog.md#CastLike-15">15</a>, <a href="Changelog.md#CastLike-19">19</a>
+Other versions of this operator: <a href="Changelog.md#CastLike-15">15</a>, <a href="Changelog.md#CastLike-19">19</a>, <a href="Changelog.md#CastLike-21">21</a>
 
 #### Attributes
 
@@ -3898,9 +3952,9 @@ Other versions of this operator: <a href="Changelog.md#CastLike-15">15</a>, <a h
 #### Type Constraints
 
 <dl>
-<dt><tt>T1</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4)</dt>
+<dt><tt>T1</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
 <dd>Constrain input types. Casting from complex is not supported.</dd>
-<dt><tt>T2</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4)</dt>
+<dt><tt>T2</tt> : tensor(uint8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(int8), tensor(int16), tensor(int32), tensor(int64), tensor(bfloat16), tensor(float16), tensor(float), tensor(double), tensor(string), tensor(bool), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
 <dd>Constrain output types. Casting to complex is not supported.</dd>
 </dl>
 
@@ -7514,9 +7568,9 @@ expect(node, inputs=[x], outputs=[y], name="test_depthtospace_example")
 
 #### Version
 
-This version of the operator has been available since version 21 of the default ONNX operator set.
+This version of the operator has been available since version 23 of the default ONNX operator set.
 
-Other versions of this operator: <a href="Changelog.md#DequantizeLinear-10">10</a>, <a href="Changelog.md#DequantizeLinear-13">13</a>, <a href="Changelog.md#DequantizeLinear-19">19</a>
+Other versions of this operator: <a href="Changelog.md#DequantizeLinear-10">10</a>, <a href="Changelog.md#DequantizeLinear-13">13</a>, <a href="Changelog.md#DequantizeLinear-19">19</a>, <a href="Changelog.md#DequantizeLinear-21">21</a>
 
 #### Attributes
 
@@ -7548,7 +7602,7 @@ Other versions of this operator: <a href="Changelog.md#DequantizeLinear-10">10</
 #### Type Constraints
 
 <dl>
-<dt><tt>T1</tt> : tensor(int8), tensor(uint8), tensor(int16), tensor(uint16), tensor(int32), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4)</dt>
+<dt><tt>T1</tt> : tensor(int8), tensor(uint8), tensor(int16), tensor(uint16), tensor(int32), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
 <dd>The type of the inputs 'x_zero_point' and 'x'.</dd>
 <dt><tt>T2</tt> : tensor(float), tensor(float16), tensor(bfloat16)</dt>
 <dd>'x_scale' determines the output type.</dd>
@@ -7808,6 +7862,34 @@ expect(
 </details>
 
 
+<details>
+<summary>float4e2m1</summary>
+
+```python
+node = onnx.helper.make_node(
+    "DequantizeLinear",
+    inputs=["x", "x_scale", "x_zero_point"],
+    outputs=["y"],
+    axis=0,
+)
+
+# scalar zero point and scale
+x = make_tensor("x", TensorProto.FLOAT4E2M1, [5], [0, 1, -1, 1.5, -4])
+x_scale = np.float32(2)
+x_zero_point = make_tensor("x_zero_point", TensorProto.FLOAT4E2M1, (1,), [0])
+y = np.array([0, 2, -2, 3, -8], dtype=np.float32)
+
+expect(
+    node,
+    inputs=[x, x_scale, x_zero_point],
+    outputs=[y],
+    name="test_dequantizelinear_float4e2m1",
+)
+```
+
+</details>
+
+
 <details>
 <summary>int16</summary>
 
@@ -20466,9 +20548,9 @@ for quant_type_name in ["uint8", "int8"]:
 
 #### Version
 
-This version of the operator has been available since version 21 of the default ONNX operator set.
+This version of the operator has been available since version 23 of the default ONNX operator set.
 
-Other versions of this operator: <a href="Changelog.md#QuantizeLinear-10">10</a>, <a href="Changelog.md#QuantizeLinear-13">13</a>, <a href="Changelog.md#QuantizeLinear-19">19</a>
+Other versions of this operator: <a href="Changelog.md#QuantizeLinear-10">10</a>, <a href="Changelog.md#QuantizeLinear-13">13</a>, <a href="Changelog.md#QuantizeLinear-19">19</a>, <a href="Changelog.md#QuantizeLinear-21">21</a>
 
 #### Attributes
 
@@ -20506,7 +20588,7 @@ Other versions of this operator: <a href="Changelog.md#QuantizeLinear-10">10</a>
 <dl>
 <dt><tt>T1</tt> : tensor(float), tensor(float16), tensor(bfloat16), tensor(int32)</dt>
 <dd>The type of the input 'x'.</dd>
-<dt><tt>T2</tt> : tensor(int8), tensor(uint8), tensor(int16), tensor(uint16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4)</dt>
+<dt><tt>T2</tt> : tensor(int8), tensor(uint8), tensor(int16), tensor(uint16), tensor(float8e4m3fn), tensor(float8e4m3fnuz), tensor(float8e5m2), tensor(float8e5m2fnuz), tensor(uint4), tensor(int4), tensor(float4e2m1)</dt>
 <dd>The type of the input `y_zero_point` and the output `y`.</dd>
 </dl>
 
@@ -20737,6 +20819,50 @@ expect(
 </details>
 
 
+<details>
+<summary>float4e2m1</summary>
+
+```python
+node = onnx.helper.make_node(
+    "QuantizeLinear",
+    inputs=["x", "y_scale", "y_zero_point"],
+    outputs=["y"],
+    axis=0,
+)
+
+x = np.array(
+    [
+        [0.0, 2.5, 4.8, 8.6],
+        [-30, -20, 6, 9],
+        [-0.0, -2.5, -4.8, -8.6],
+    ]
+).astype(np.float32)
+
+y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
+y_zero_point = make_tensor(
+    "y_zero_point",
+    TensorProto.FLOAT4E2M1,
+    y_scale.shape,
+    np.zeros_like(y_scale),
+)
+y = make_tensor(
+    "y",
+    TensorProto.FLOAT4E2M1,
+    x.shape,
+    [0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2],
+)
+
+expect(
+    node,
+    inputs=[x, y_scale, y_zero_point],
+    outputs=[y],
+    name="test_quantizelinear_float4e2m1",
+)
+```
+
+</details>
+
+
 <details>
 <summary>int16</summary>
 
diff --git a/docs/TestCoverage.md b/docs/TestCoverage.md
index a6cf61752f1..a974fac2498 100644
--- a/docs/TestCoverage.md
+++ b/docs/TestCoverage.md
@@ -2350,6 +2350,10 @@ test_cases = [
     ("INT4", "FLOAT"),
     ("INT4", "FLOAT16"),
     ("INT4", "INT8"),
+    ("FLOAT4E2M1", "FLOAT"),
+    ("FLOAT4E2M1", "FLOAT16"),
+    ("FLOAT", "FLOAT4E2M1"),
+    ("FLOAT16", "FLOAT4E2M1"),
 ]
 
 vect_float32_to_float8e4m3 = np.vectorize(float32_to_float8e4m3)
@@ -2566,7 +2570,57 @@ for from_type, to_type in test_cases:
         output_type_proto = onnx.helper.make_tensor_type_proto(
             getattr(TensorProto, to_type), input_shape
         )
+    elif from_type == "FLOAT4E2M1" or to_type == "FLOAT4E2M1":
+        np_fp32 = np.array(
+            [
+                "0.48",
+                "0.25",
+                "1.05",
+                "-3.5",
+                "-8",
+                "9",
+                "1000000",
+                "1e-7",
+                "NaN",
+                "INF",
+                "+INF",
+                "-INF",
+                "-4",
+                "0.01",
+                "-0.0",
+            ],
+            dtype=np.float32,
+        )
+        input_shape = (3, 5)
+        if from_type == "FLOAT":
+            input_values = np_fp32
+            input = make_tensor(
+                "x", TensorProto.FLOAT, input_shape, input_values.tolist()
+            )
+        elif from_type == "FLOAT16":
+            input_values = np_fp32.astype(np.float16).astype(np.float32)
+            input = make_tensor(
+                "x", TensorProto.FLOAT16, input_shape, input_values.tolist()
+            )
+        elif from_type == "FLOAT4E2M1":
+            input = make_tensor(
+                "x", TensorProto.FLOAT4E2M1, input_shape, np_fp32.tolist()
+            )
+        else:
+            raise ValueError(
+                f"Conversion from {from_type} to {to_type} is not tested."
+            )
 
+        if to_type not in ("FLOAT", "FLOAT16", "FLOAT4E2M1"):
+            raise ValueError(
+                f"Conversion from {from_type} to {to_type} is not tested."
+            )
+        expected = unpacked_float4e2m1_to_float32(
+            subbyte.float32_to_float4e2m1_unpacked(np_fp32)
+        )
+        output = make_tensor(
+            "y", getattr(TensorProto, to_type), input_shape, expected.tolist()
+        )
     elif from_type != "STRING":
         input = np.random.random_sample(shape).astype(
             helper.tensor_dtype_to_np_dtype(getattr(TensorProto, from_type))
@@ -5317,7 +5371,7 @@ expect(node, inputs=[x], outputs=[y], name="test_depthtospace_example")
 
 
 ### DequantizeLinear
-There are 11 test cases, listed as following:
+There are 12 test cases, listed as following:
 <details>
 <summary>axis</summary>
 
@@ -5554,6 +5608,32 @@ expect(
 )
 ```
 
+</details>
+<details>
+<summary>float4e2m1</summary>
+
+```python
+node = onnx.helper.make_node(
+    "DequantizeLinear",
+    inputs=["x", "x_scale", "x_zero_point"],
+    outputs=["y"],
+    axis=0,
+)
+
+# scalar zero point and scale
+x = make_tensor("x", TensorProto.FLOAT4E2M1, [5], [0, 1, -1, 1.5, -4])
+x_scale = np.float32(2)
+x_zero_point = make_tensor("x_zero_point", TensorProto.FLOAT4E2M1, (1,), [0])
+y = np.array([0, 2, -2, 3, -8], dtype=np.float32)
+
+expect(
+    node,
+    inputs=[x, x_scale, x_zero_point],
+    outputs=[y],
+    name="test_dequantizelinear_float4e2m1",
+)
+```
+
 </details>
 <details>
 <summary>int16</summary>
@@ -13937,7 +14017,7 @@ for quant_type_name in ["uint8", "int8"]:
 
 
 ### QuantizeLinear
-There are 10 test cases, listed as following:
+There are 11 test cases, listed as following:
 <details>
 <summary>axis</summary>
 
@@ -14151,6 +14231,48 @@ expect(
 )
 ```
 
+</details>
+<details>
+<summary>float4e2m1</summary>
+
+```python
+node = onnx.helper.make_node(
+    "QuantizeLinear",
+    inputs=["x", "y_scale", "y_zero_point"],
+    outputs=["y"],
+    axis=0,
+)
+
+x = np.array(
+    [
+        [0.0, 2.5, 4.8, 8.6],
+        [-30, -20, 6, 9],
+        [-0.0, -2.5, -4.8, -8.6],
+    ]
+).astype(np.float32)
+
+y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
+y_zero_point = make_tensor(
+    "y_zero_point",
+    TensorProto.FLOAT4E2M1,
+    y_scale.shape,
+    np.zeros_like(y_scale),
+)
+y = make_tensor(
+    "y",
+    TensorProto.FLOAT4E2M1,
+    x.shape,
+    [0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2],
+)
+
+expect(
+    node,
+    inputs=[x, y_scale, y_zero_point],
+    outputs=[y],
+    name="test_quantizelinear_float4e2m1",
+)
+```
+
 </details>
 <details>
 <summary>int16</summary>
diff --git a/onnx/backend/test/case/node/cast.py b/onnx/backend/test/case/node/cast.py
index efcc7ddf944..9696373920d 100644
--- a/onnx/backend/test/case/node/cast.py
+++ b/onnx/backend/test/case/node/cast.py
@@ -18,7 +18,11 @@
     make_tensor,
     tensor_dtype_to_field,
 )
-from onnx.numpy_helper import float8e4m3_to_float32, float8e5m2_to_float32
+from onnx.numpy_helper import (
+    float8e4m3_to_float32,
+    float8e5m2_to_float32,
+    unpacked_float4e2m1_to_float32,
+)
 
 
 class Cast(Base):
@@ -62,6 +66,10 @@ def export() -> None:
             ("INT4", "FLOAT"),
             ("INT4", "FLOAT16"),
             ("INT4", "INT8"),
+            ("FLOAT4E2M1", "FLOAT"),
+            ("FLOAT4E2M1", "FLOAT16"),
+            ("FLOAT", "FLOAT4E2M1"),
+            ("FLOAT16", "FLOAT4E2M1"),
         ]
 
         vect_float32_to_float8e4m3 = np.vectorize(float32_to_float8e4m3)
@@ -278,7 +286,57 @@ def export() -> None:
                 output_type_proto = onnx.helper.make_tensor_type_proto(
                     getattr(TensorProto, to_type), input_shape
                 )
+            elif from_type == "FLOAT4E2M1" or to_type == "FLOAT4E2M1":
+                np_fp32 = np.array(
+                    [
+                        "0.48",
+                        "0.25",
+                        "1.05",
+                        "-3.5",
+                        "-8",
+                        "9",
+                        "1000000",
+                        "1e-7",
+                        "NaN",
+                        "INF",
+                        "+INF",
+                        "-INF",
+                        "-4",
+                        "0.01",
+                        "-0.0",
+                    ],
+                    dtype=np.float32,
+                )
+                input_shape = (3, 5)
+                if from_type == "FLOAT":
+                    input_values = np_fp32
+                    input = make_tensor(
+                        "x", TensorProto.FLOAT, input_shape, input_values.tolist()
+                    )
+                elif from_type == "FLOAT16":
+                    input_values = np_fp32.astype(np.float16).astype(np.float32)
+                    input = make_tensor(
+                        "x", TensorProto.FLOAT16, input_shape, input_values.tolist()
+                    )
+                elif from_type == "FLOAT4E2M1":
+                    input = make_tensor(
+                        "x", TensorProto.FLOAT4E2M1, input_shape, np_fp32.tolist()
+                    )
+                else:
+                    raise ValueError(
+                        f"Conversion from {from_type} to {to_type} is not tested."
+                    )
 
+                if to_type not in ("FLOAT", "FLOAT16", "FLOAT4E2M1"):
+                    raise ValueError(
+                        f"Conversion from {from_type} to {to_type} is not tested."
+                    )
+                expected = unpacked_float4e2m1_to_float32(
+                    subbyte.float32_to_float4e2m1_unpacked(np_fp32)
+                )
+                output = make_tensor(
+                    "y", getattr(TensorProto, to_type), input_shape, expected.tolist()
+                )
             elif from_type != "STRING":
                 input = np.random.random_sample(shape).astype(
                     helper.tensor_dtype_to_np_dtype(getattr(TensorProto, from_type))
diff --git a/onnx/backend/test/case/node/dequantizelinear.py b/onnx/backend/test/case/node/dequantizelinear.py
index cdaf8bdf5b8..6d917449bb6 100644
--- a/onnx/backend/test/case/node/dequantizelinear.py
+++ b/onnx/backend/test/case/node/dequantizelinear.py
@@ -235,6 +235,28 @@ def export_int4() -> None:
             name="test_dequantizelinear_int4",
         )
 
+    @staticmethod
+    def export_float4e2m1() -> None:
+        node = onnx.helper.make_node(
+            "DequantizeLinear",
+            inputs=["x", "x_scale", "x_zero_point"],
+            outputs=["y"],
+            axis=0,
+        )
+
+        # scalar zero point and scale
+        x = make_tensor("x", TensorProto.FLOAT4E2M1, [5], [0, 1, -1, 1.5, -4])
+        x_scale = np.float32(2)
+        x_zero_point = make_tensor("x_zero_point", TensorProto.FLOAT4E2M1, (1,), [0])
+        y = np.array([0, 2, -2, 3, -8], dtype=np.float32)
+
+        expect(
+            node,
+            inputs=[x, x_scale, x_zero_point],
+            outputs=[y],
+            name="test_dequantizelinear_float4e2m1",
+        )
+
     @staticmethod
     def export_blocked() -> None:
         node = onnx.helper.make_node(
diff --git a/onnx/backend/test/case/node/quantizelinear.py b/onnx/backend/test/case/node/quantizelinear.py
index fbb2d0693cf..79cf1963c94 100644
--- a/onnx/backend/test/case/node/quantizelinear.py
+++ b/onnx/backend/test/case/node/quantizelinear.py
@@ -276,6 +276,44 @@ def export_int4() -> None:
             name="test_quantizelinear_int4",
         )
 
+    @staticmethod
+    def export_float4e2m1() -> None:
+        node = onnx.helper.make_node(
+            "QuantizeLinear",
+            inputs=["x", "y_scale", "y_zero_point"],
+            outputs=["y"],
+            axis=0,
+        )
+
+        x = np.array(
+            [
+                [0.0, 2.5, 4.8, 8.6],
+                [-30, -20, 6, 9],
+                [-0.0, -2.5, -4.8, -8.6],
+            ]
+        ).astype(np.float32)
+
+        y_scale = np.asarray([2.0, 3.0, 4.0], dtype=np.float32)
+        y_zero_point = make_tensor(
+            "y_zero_point",
+            TensorProto.FLOAT4E2M1,
+            y_scale.shape,
+            np.zeros_like(y_scale),
+        )
+        y = make_tensor(
+            "y",
+            TensorProto.FLOAT4E2M1,
+            x.shape,
+            [0, 1, 2, 4, -6, -6, 2, 3, 0, -0.5, -1, -2],
+        )
+
+        expect(
+            node,
+            inputs=[x, y_scale, y_zero_point],
+            outputs=[y],
+            name="test_quantizelinear_float4e2m1",
+        )
+
     @staticmethod
     def export_blocked_asymmetric() -> None:
         node = onnx.helper.make_node(
diff --git a/onnx/backend/test/data/node/test_cast_BFLOAT16_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_BFLOAT16_to_FLOAT/model.onnx
index 7402af390d4..caebcbd248a 100644
Binary files a/onnx/backend/test/data/node/test_cast_BFLOAT16_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_BFLOAT16_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_DOUBLE_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_DOUBLE_to_FLOAT/model.onnx
index 3594e20970a..cda31f82bb5 100644
Binary files a/onnx/backend/test/data/node/test_cast_DOUBLE_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_DOUBLE_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_DOUBLE_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_DOUBLE_to_FLOAT16/model.onnx
index db456b92e86..a6b9d6fb14c 100644
Binary files a/onnx/backend/test/data/node/test_cast_DOUBLE_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_cast_DOUBLE_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_DOUBLE/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT16_to_DOUBLE/model.onnx
index 358701489e2..f68f402c54f 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT16_to_DOUBLE/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT16_to_DOUBLE/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT/model.onnx
index cebf493bff6..2da51dfbb59 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT4E2M1/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT4E2M1/model.onnx
new file mode 100644
index 00000000000..27d5957e2f9
Binary files /dev/null and b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT4E2M1/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT4E2M1/test_data_set_0/input_0.pb b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT4E2M1/test_data_set_0/input_0.pb
new file mode 100644
index 00000000000..1a384c7934e
--- /dev/null
+++ b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT4E2M1/test_data_set_0/input_0.pb
@@ -0,0 +1,2 @@
+
+*'�o�h�x�������������������B��Bx
\ No newline at end of file
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT4E2M1/test_data_set_0/output_0.pb b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT4E2M1/test_data_set_0/output_0.pb
new file mode 100644
index 00000000000..8b4a410ccfe
--- /dev/null
+++ b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT4E2M1/test_data_set_0/output_0.pb
@@ -0,0 +1,2 @@
+*
+�w�By
\ No newline at end of file
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E4M3FN/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E4M3FN/model.onnx
index 79c77d5e7a4..fbe9df364c7 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E4M3FN/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E4M3FN/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E4M3FNUZ/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E4M3FNUZ/model.onnx
index 4374cc235be..a964306416a 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E4M3FNUZ/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E4M3FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E5M2/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E5M2/model.onnx
index 9c184eed0a7..302b275fa33 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E5M2/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E5M2/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E5M2FNUZ/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E5M2FNUZ/model.onnx
index 64a4c1cf671..ff45041d2e7 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E5M2FNUZ/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT16_to_FLOAT8E5M2FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_INT4/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT16_to_INT4/model.onnx
index 5b007d53f17..8df7b40d83d 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT16_to_INT4/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT16_to_INT4/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT16_to_UINT4/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT16_to_UINT4/model.onnx
index ea80055f7dd..8e03f69793c 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT16_to_UINT4/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT16_to_UINT4/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT/model.onnx
new file mode 100644
index 00000000000..4bdbba640e5
Binary files /dev/null and b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT/test_data_set_0/input_0.pb b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT/test_data_set_0/input_0.pb
new file mode 100644
index 00000000000..e493e52316a
--- /dev/null
+++ b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT/test_data_set_0/input_0.pb
@@ -0,0 +1,2 @@
+*
+�w�Bx
\ No newline at end of file
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT/test_data_set_0/output_0.pb b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT/test_data_set_0/output_0.pb
new file mode 100644
index 00000000000..25682984fd8
Binary files /dev/null and b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT/test_data_set_0/output_0.pb differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT16/model.onnx
new file mode 100644
index 00000000000..8e870de7662
Binary files /dev/null and b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT16/test_data_set_0/input_0.pb b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT16/test_data_set_0/input_0.pb
new file mode 100644
index 00000000000..e493e52316a
--- /dev/null
+++ b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT16/test_data_set_0/input_0.pb
@@ -0,0 +1,2 @@
+*
+�w�Bx
\ No newline at end of file
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT16/test_data_set_0/output_0.pb b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT16/test_data_set_0/output_0.pb
new file mode 100644
index 00000000000..333fbe86675
Binary files /dev/null and b/onnx/backend/test/data/node/test_cast_FLOAT4E2M1_to_FLOAT16/test_data_set_0/output_0.pb differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FNUZ_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FNUZ_to_FLOAT/model.onnx
index afc96a8f4fc..a2b1b31557d 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FNUZ_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FNUZ_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FNUZ_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FNUZ_to_FLOAT16/model.onnx
index 13d37732025..128b8b999d7 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FNUZ_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FNUZ_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FN_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FN_to_FLOAT/model.onnx
index 4e1c7b923b6..c043fc223b7 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FN_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FN_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FN_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FN_to_FLOAT16/model.onnx
index 4a465f2af59..c2eae564154 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FN_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT8E4M3FN_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT8E5M2FNUZ_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT8E5M2FNUZ_to_FLOAT/model.onnx
index ee198dad01e..5103c888145 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT8E5M2FNUZ_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT8E5M2FNUZ_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT8E5M2FNUZ_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT8E5M2FNUZ_to_FLOAT16/model.onnx
index 302d3375996..e2120722408 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT8E5M2FNUZ_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT8E5M2FNUZ_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT8E5M2_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT8E5M2_to_FLOAT/model.onnx
index a3e8e4ac337..2ef3c1b6853 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT8E5M2_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT8E5M2_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT8E5M2_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT8E5M2_to_FLOAT16/model.onnx
index f187fc0ed9d..e4ef5b789a7 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT8E5M2_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT8E5M2_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_BFLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_BFLOAT16/model.onnx
index 811b2d84113..f78f2462dab 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_BFLOAT16/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_BFLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_DOUBLE/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_DOUBLE/model.onnx
index dc7997cddd8..6d57123bfab 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_DOUBLE/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_DOUBLE/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT16/model.onnx
index a80e603f0d1..bc0a082bfc0 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT4E2M1/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT4E2M1/model.onnx
new file mode 100644
index 00000000000..e3a3cd6cbaa
Binary files /dev/null and b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT4E2M1/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT4E2M1/test_data_set_0/input_0.pb b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT4E2M1/test_data_set_0/input_0.pb
new file mode 100644
index 00000000000..c30e7f3c452
Binary files /dev/null and b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT4E2M1/test_data_set_0/input_0.pb differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT4E2M1/test_data_set_0/output_0.pb b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT4E2M1/test_data_set_0/output_0.pb
new file mode 100644
index 00000000000..8b4a410ccfe
--- /dev/null
+++ b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT4E2M1/test_data_set_0/output_0.pb
@@ -0,0 +1,2 @@
+*
+�w�By
\ No newline at end of file
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E4M3FN/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E4M3FN/model.onnx
index 6612d1b0206..c85c7aad64c 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E4M3FN/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E4M3FN/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx
index 294293e6154..caf8f5d34b7 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E5M2/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E5M2/model.onnx
index f92192a72be..19c3266cc41 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E5M2/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E5M2/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx
index 1c7e8748bab..01f5ed6189c 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_INT4/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_INT4/model.onnx
index 9798921001e..0e2100cc86e 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_INT4/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_INT4/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_STRING/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_STRING/model.onnx
index 04fe3fb35e3..0f7727e7bdc 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_STRING/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_STRING/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_FLOAT_to_UINT4/model.onnx b/onnx/backend/test/data/node/test_cast_FLOAT_to_UINT4/model.onnx
index 432ba0a4bfc..d07449f667f 100644
Binary files a/onnx/backend/test/data/node/test_cast_FLOAT_to_UINT4/model.onnx and b/onnx/backend/test/data/node/test_cast_FLOAT_to_UINT4/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_INT4_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_INT4_to_FLOAT/model.onnx
index 8449c1737f0..e08ed20c2e3 100644
Binary files a/onnx/backend/test/data/node/test_cast_INT4_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_INT4_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_INT4_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_INT4_to_FLOAT16/model.onnx
index f7f581c3d9a..542b8e90e2c 100644
Binary files a/onnx/backend/test/data/node/test_cast_INT4_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_cast_INT4_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_INT4_to_INT8/model.onnx b/onnx/backend/test/data/node/test_cast_INT4_to_INT8/model.onnx
index ecfafda26b3..0ce8722b60b 100644
Binary files a/onnx/backend/test/data/node/test_cast_INT4_to_INT8/model.onnx and b/onnx/backend/test/data/node/test_cast_INT4_to_INT8/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_STRING_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_STRING_to_FLOAT/model.onnx
index bc088fce757..ca49d476a9c 100644
Binary files a/onnx/backend/test/data/node/test_cast_STRING_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_STRING_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_UINT4_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_cast_UINT4_to_FLOAT/model.onnx
index 991b21832ea..6ec793b8a4f 100644
Binary files a/onnx/backend/test/data/node/test_cast_UINT4_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_cast_UINT4_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_UINT4_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_cast_UINT4_to_FLOAT16/model.onnx
index 9edb9126318..dd4f409b340 100644
Binary files a/onnx/backend/test/data/node/test_cast_UINT4_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_cast_UINT4_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_UINT4_to_UINT8/model.onnx b/onnx/backend/test/data/node/test_cast_UINT4_to_UINT8/model.onnx
index ff3edd591ce..f2d61ab673c 100644
Binary files a/onnx/backend/test/data/node/test_cast_UINT4_to_UINT8/model.onnx and b/onnx/backend/test/data/node/test_cast_UINT4_to_UINT8/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FN/model.onnx b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FN/model.onnx
index 5f6fa0874c3..ecf92ce8691 100644
Binary files a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FN/model.onnx and b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FN/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FNUZ/model.onnx b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FNUZ/model.onnx
index 6bddc0fce58..64b97499e91 100644
Binary files a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FNUZ/model.onnx and b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E4M3FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2/model.onnx b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2/model.onnx
index 2de339e0b9f..48727e0ed1d 100644
Binary files a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2/model.onnx and b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2FNUZ/model.onnx b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2FNUZ/model.onnx
index 6784a0cea84..b0d93efc40b 100644
Binary files a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2FNUZ/model.onnx and b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT16_to_FLOAT8E5M2FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FN/model.onnx b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FN/model.onnx
index 85d6893319e..f3e57ee5189 100644
Binary files a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FN/model.onnx and b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FN/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx
index 6a9040e8dd8..637e59627fc 100644
Binary files a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx and b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E5M2/model.onnx b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E5M2/model.onnx
index bbb523b53c9..d5338daa8ee 100644
Binary files a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E5M2/model.onnx and b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E5M2/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx
index 2949e4fecf7..049585694ad 100644
Binary files a/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx and b/onnx/backend/test/data/node/test_cast_no_saturate_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_BFLOAT16_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_castlike_BFLOAT16_to_FLOAT/model.onnx
index e317ab8b07c..5db6e3d281d 100644
Binary files a/onnx/backend/test/data/node/test_castlike_BFLOAT16_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_castlike_BFLOAT16_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_BFLOAT16_to_FLOAT_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_BFLOAT16_to_FLOAT_expanded/model.onnx
index ba46944dda9..33389f328d9 100644
Binary files a/onnx/backend/test/data/node/test_castlike_BFLOAT16_to_FLOAT_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_BFLOAT16_to_FLOAT_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT/model.onnx
index c22a921f46f..013bdc6abf3 100644
Binary files a/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT16/model.onnx
index 54497577cbe..205e430fe95 100644
Binary files a/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT16_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT16_expanded/model.onnx
index 402f6db2b1a..124adb65d18 100644
Binary files a/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT16_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT16_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT_expanded/model.onnx
index 59650e11234..5ff61d77bb4 100644
Binary files a/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_DOUBLE_to_FLOAT_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT16_to_DOUBLE/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT16_to_DOUBLE/model.onnx
index d58627cf3b4..22cd04e8bc3 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT16_to_DOUBLE/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT16_to_DOUBLE/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT16_to_DOUBLE_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT16_to_DOUBLE_expanded/model.onnx
index a734e2346e6..97062b11ab6 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT16_to_DOUBLE_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT16_to_DOUBLE_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT16_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT16_to_FLOAT/model.onnx
index 88caac7e985..1f9cefac0fb 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT16_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT16_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT16_to_FLOAT_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT16_to_FLOAT_expanded/model.onnx
index b32abb1e770..43a0dfc6e33 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT16_to_FLOAT_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT16_to_FLOAT_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FNUZ_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FNUZ_to_FLOAT/model.onnx
index ce19c577d5c..5b6c1d318fc 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FNUZ_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FNUZ_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FNUZ_to_FLOAT_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FNUZ_to_FLOAT_expanded/model.onnx
index 41340017e82..84f29cb3c3a 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FNUZ_to_FLOAT_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FNUZ_to_FLOAT_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FN_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FN_to_FLOAT/model.onnx
index e7e454dcda0..4b7ce8654da 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FN_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FN_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FN_to_FLOAT_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FN_to_FLOAT_expanded/model.onnx
index df41bc33ed5..0168f57985d 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FN_to_FLOAT_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT8E4M3FN_to_FLOAT_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2FNUZ_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2FNUZ_to_FLOAT/model.onnx
index 8d28ca1ad49..2b4af760968 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2FNUZ_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2FNUZ_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2FNUZ_to_FLOAT_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2FNUZ_to_FLOAT_expanded/model.onnx
index e65bcf05676..c5cc28cfafa 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2FNUZ_to_FLOAT_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2FNUZ_to_FLOAT_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2_to_FLOAT/model.onnx
index 5d3e9dc5a9b..bae9ae46c58 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2_to_FLOAT_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2_to_FLOAT_expanded/model.onnx
index 2c267d260cc..a93e3a08388 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2_to_FLOAT_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT8E5M2_to_FLOAT_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_BFLOAT16/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_BFLOAT16/model.onnx
index ab692876e69..5055948da64 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_BFLOAT16/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_BFLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_BFLOAT16_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_BFLOAT16_expanded/model.onnx
index 38fcc29ec90..25536e1eded 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_BFLOAT16_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_BFLOAT16_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_DOUBLE/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_DOUBLE/model.onnx
index 5621a9fd4f4..40051c68b35 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_DOUBLE/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_DOUBLE/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_DOUBLE_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_DOUBLE_expanded/model.onnx
index c87485cf41d..6cd67b2b959 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_DOUBLE_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_DOUBLE_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT16/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT16/model.onnx
index eac88033128..45a20c797a2 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT16/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT16_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT16_expanded/model.onnx
index 2d810d9dce2..e2ebfe340ad 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT16_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT16_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FN/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FN/model.onnx
index 63ceb525138..496d8c0aca0 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FN/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FN/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx
index 931961e2a98..26c14094ff0 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FNUZ_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FNUZ_expanded/model.onnx
index 7cbdb218282..b1a09400222 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FNUZ_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FNUZ_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FN_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FN_expanded/model.onnx
index 1479a635e37..d6bc622a834 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FN_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E4M3FN_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2/model.onnx
index f133decc26c..e8ad6f9b7c1 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx
index 2b08ec039ec..967988ea83f 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2FNUZ/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2FNUZ_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2FNUZ_expanded/model.onnx
index 21caa48f0f7..5c42c2543ac 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2FNUZ_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2FNUZ_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2_expanded/model.onnx
index ac95dc3eda3..7d95ccc230a 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_FLOAT8E5M2_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_STRING/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_STRING/model.onnx
index 4d1f6be991d..4df795fc36c 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_STRING/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_STRING/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_FLOAT_to_STRING_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_FLOAT_to_STRING_expanded/model.onnx
index 7a0ae59b49e..8974ca55a77 100644
Binary files a/onnx/backend/test/data/node/test_castlike_FLOAT_to_STRING_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_FLOAT_to_STRING_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_STRING_to_FLOAT/model.onnx b/onnx/backend/test/data/node/test_castlike_STRING_to_FLOAT/model.onnx
index d574b950c05..f3b7f6505c7 100644
Binary files a/onnx/backend/test/data/node/test_castlike_STRING_to_FLOAT/model.onnx and b/onnx/backend/test/data/node/test_castlike_STRING_to_FLOAT/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_castlike_STRING_to_FLOAT_expanded/model.onnx b/onnx/backend/test/data/node/test_castlike_STRING_to_FLOAT_expanded/model.onnx
index a2d99d3a8cd..dedd9577858 100644
Binary files a/onnx/backend/test/data/node/test_castlike_STRING_to_FLOAT_expanded/model.onnx and b/onnx/backend/test/data/node/test_castlike_STRING_to_FLOAT_expanded/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear/model.onnx
index 290c586624e..2e4ee2da8b2 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_axis/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_axis/model.onnx
index d595d79e2e8..228da4dfc39 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_axis/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_axis/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_blocked/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_blocked/model.onnx
index a92e8bf607e..b334509aacf 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_blocked/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_blocked/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn/model.onnx
index 128b8d1747f..0a01ef513de 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn_float16/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn_float16/model.onnx
index 0b6ce1fcb8e..67432330599 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn_float16/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn_float16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn_zero_point/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn_zero_point/model.onnx
index e92fa6e0e6a..88569cfdf59 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn_zero_point/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_e4m3fn_zero_point/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_e5m2/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_e5m2/model.onnx
index b81cfaf5301..86a5b045edf 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_e5m2/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_e5m2/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/model.onnx
new file mode 100644
index 00000000000..9957cc0c524
Binary files /dev/null and b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/input_0.pb b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/input_0.pb
new file mode 100644
index 00000000000..41e46fec0bc
--- /dev/null
+++ b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/input_0.pb
@@ -0,0 +1 @@
+* :Bx
\ No newline at end of file
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/input_1.pb b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/input_1.pb
new file mode 100644
index 00000000000..d0d64800429
Binary files /dev/null and b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/input_1.pb differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/input_2.pb b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/input_2.pb
new file mode 100644
index 00000000000..e5a43e2f5c9
Binary files /dev/null and b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/input_2.pb differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/output_0.pb b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/output_0.pb
new file mode 100644
index 00000000000..df2e8bac210
Binary files /dev/null and b/onnx/backend/test/data/node/test_dequantizelinear_float4e2m1/test_data_set_0/output_0.pb differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_int16/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_int16/model.onnx
index bc40cc7d0d6..3c2235fb0bc 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_int16/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_int16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_int4/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_int4/model.onnx
index 353ffe9f3fe..c2cc2215b99 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_int4/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_int4/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_uint16/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_uint16/model.onnx
index f42ab9f657c..3c52ad78d10 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_uint16/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_uint16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_dequantizelinear_uint4/model.onnx b/onnx/backend/test/data/node/test_dequantizelinear_uint4/model.onnx
index 658cc4ecdd9..254a1c124b0 100644
Binary files a/onnx/backend/test/data/node/test_dequantizelinear_uint4/model.onnx and b/onnx/backend/test/data/node/test_dequantizelinear_uint4/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear/model.onnx b/onnx/backend/test/data/node/test_quantizelinear/model.onnx
index 5c25d26c940..ae860ca0f90 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_axis/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_axis/model.onnx
index cba37b4c5b7..fd0419cff90 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear_axis/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear_axis/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_blocked_asymmetric/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_blocked_asymmetric/model.onnx
index 8adafd0dcd2..287437d3972 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear_blocked_asymmetric/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear_blocked_asymmetric/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_blocked_symmetric/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_blocked_symmetric/model.onnx
index 4afdf6cacfe..2121744ef2e 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear_blocked_symmetric/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear_blocked_symmetric/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_e4m3fn/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_e4m3fn/model.onnx
index 448b6fea940..066c5b94b8a 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear_e4m3fn/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear_e4m3fn/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_e5m2/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_e5m2/model.onnx
index 670ec3343e4..3b82f6a1299 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear_e5m2/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear_e5m2/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/model.onnx
new file mode 100644
index 00000000000..dfecbdbf44f
Binary files /dev/null and b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/input_0.pb b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/input_0.pb
new file mode 100644
index 00000000000..29e2d756015
Binary files /dev/null and b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/input_0.pb differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/input_1.pb b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/input_1.pb
new file mode 100644
index 00000000000..c4cda36fa82
Binary files /dev/null and b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/input_1.pb differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/input_2.pb b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/input_2.pb
new file mode 100644
index 00000000000..dca288f7b12
Binary files /dev/null and b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/input_2.pb differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/output_0.pb b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/output_0.pb
new file mode 100644
index 00000000000..ad7efc327c2
--- /dev/null
+++ b/onnx/backend/test/data/node/test_quantizelinear_float4e2m1/test_data_set_0/output_0.pb
@@ -0,0 +1 @@
+*	 d�T��By
\ No newline at end of file
diff --git a/onnx/backend/test/data/node/test_quantizelinear_int16/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_int16/model.onnx
index 640b17df488..4ed02fdfa7e 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear_int16/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear_int16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_int4/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_int4/model.onnx
index e55587153f5..e80a1915509 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear_int4/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear_int4/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_uint16/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_uint16/model.onnx
index 2dd43b2f547..9c78767c029 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear_uint16/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear_uint16/model.onnx differ
diff --git a/onnx/backend/test/data/node/test_quantizelinear_uint4/model.onnx b/onnx/backend/test/data/node/test_quantizelinear_uint4/model.onnx
index a2c0062f272..045c5f32f71 100644
Binary files a/onnx/backend/test/data/node/test_quantizelinear_uint4/model.onnx and b/onnx/backend/test/data/node/test_quantizelinear_uint4/model.onnx differ
diff --git a/onnx/defs/operator_sets.h b/onnx/defs/operator_sets.h
index de19cf1f32f..34aaf81598d 100644
--- a/onnx/defs/operator_sets.h
+++ b/onnx/defs/operator_sets.h
@@ -1291,11 +1291,18 @@ class OpSet_Onnx_ver22 {
 };
 
 // Iterate over schema from ai.onnx version 23
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 23, Cast);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 23, CastLike);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 23, DequantizeLinear);
+class ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 23, QuantizeLinear);
+
 class OpSet_Onnx_ver23 {
  public:
   static void ForEachSchema(std::function<void(OpSchema&&)> fn) {
-    // TODO: Remove after introducing the first schema to opset 23
-    (void)fn;
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 23, Cast)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 23, CastLike)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 23, DequantizeLinear)>());
+    fn(GetOpSchema<ONNX_OPERATOR_SET_SCHEMA_CLASS_NAME(Onnx, 23, QuantizeLinear)>());
   }
 };
 
diff --git a/onnx/defs/quantization/defs.cc b/onnx/defs/quantization/defs.cc
index a68e076b3dd..e5938e7a4b2 100644
--- a/onnx/defs/quantization/defs.cc
+++ b/onnx/defs/quantization/defs.cc
@@ -37,7 +37,7 @@ In all cases, `y_zero_point` must have the same shape as `y_scale`.
 
 ONNX_OPERATOR_SET_SCHEMA(
     QuantizeLinear,
-    21,
+    23,
     OpSchema()
         .Input(0, "x", "N-D full precision Input tensor to be quantized.", "T1")
         .Input(
@@ -101,7 +101,8 @@ ONNX_OPERATOR_SET_SCHEMA(
              "tensor(float8e5m2)",
              "tensor(float8e5m2fnuz)",
              "tensor(uint4)",
-             "tensor(int4)"},
+             "tensor(int4)",
+             "tensor(float4e2m1)"},
             "The type of the input `y_zero_point` and the output `y`.")
         .SetDoc(QuantizeLinear_ver21_doc)
         .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
@@ -147,7 +148,7 @@ for consistency, and `x_scale` still determines the output type.
 
 ONNX_OPERATOR_SET_SCHEMA(
     DequantizeLinear,
-    21,
+    23,
     OpSchema()
         .Input(0, "x", "N-D quantized input tensor to be de-quantized.", "T1")
         .Input(
@@ -192,7 +193,8 @@ ONNX_OPERATOR_SET_SCHEMA(
              "tensor(float8e5m2)",
              "tensor(float8e5m2fnuz)",
              "tensor(uint4)",
-             "tensor(int4)"},
+             "tensor(int4)",
+             "tensor(float4e2m1)"},
             "The type of the inputs 'x_zero_point' and 'x'.")
         .TypeConstraint(
             "T2",
diff --git a/onnx/defs/quantization/old.cc b/onnx/defs/quantization/old.cc
index 12be8ad03cb..a20c2da3eb8 100644
--- a/onnx/defs/quantization/old.cc
+++ b/onnx/defs/quantization/old.cc
@@ -7,6 +7,202 @@
 
 namespace ONNX_NAMESPACE {
 
+static const char* QuantizeLinear_ver21_doc = R"DOC(
+The linear quantization operator consumes a high-precision tensor, a scale, and a zero point to compute the
+low-precision/quantized tensor. The scale factor and zero point must have the same shape, determining the quantization
+granularity. The quantization formula is `y = saturate((x / y_scale) + y_zero_point)`.
+Saturation is done according to:
+- uint16: [0, 65535]
+- int16: [-32768, 32767]
+- uint8: [0, 255]
+- int8: [-128, 127]
+- uint4: [0, 15]
+- int4: [-8, 7]
+For `(x / y_scale)`, it rounds to the nearest even. Refer to https://en.wikipedia.org/wiki/Rounding for details.
+`y_zero_point` and `y` must have the same type. `y_zero_point` is usually not used for quantization to float8 types, but the quantization
+formula remains the same for consistency, and the type of the attribute `y_zero_point` still determines the quantization type.
+There are three supported quantization granularities, determined by the shape of `y_scale`.
+In all cases, `y_zero_point` must have the same shape as `y_scale`.
+- Per-tensor (per-layer) quantization: `y_scale` is a scalar.
+- Per-axis quantization: The scale must be a 1-D tensor, with the length of the quantization axis. For an input shape
+ `(D0, ..., Di, ..., Dn)` and `axis=i`, `y_scale` is a 1-D tensor of length `Di`.
+- Blocked quantization: The scale's shape is identical to the input's shape, except for one dimension, in which
+  blocking is performed. Given `x` shape `(D0, ..., Di, ..., Dn)`, `axis=i`, and block size `B`: `y_scale` shape is
+  `(D0, ..., ceil(Di/B), ..., Dn)`.
+)DOC";
+
+ONNX_OPERATOR_SET_SCHEMA(
+    QuantizeLinear,
+    21,
+    OpSchema()
+        .Input(0, "x", "N-D full precision Input tensor to be quantized.", "T1")
+        .Input(
+            1,
+            "y_scale",
+            "Scale for doing quantization to get `y`. For per-tensor/layer quantization the scale is a scalar, for "
+            "per-axis quantization it is a 1-D Tensor and for blocked quantization it has the same shape as the "
+            "input, except for one dimension in which blocking is performed.",
+            "T1")
+        .Input(
+            2,
+            "y_zero_point",
+            "Zero point for doing quantization to get `y`. Shape must match `y_scale`."
+            "Default is uint8 with zero point of 0 if it's not specified.",
+            "T2",
+            OpSchema::Optional)
+        .Output(0, "y", "N-D quantized output tensor. It has same shape as input `x`.", "T2")
+        .Attr(
+            "axis",
+            "(Optional) The axis of the dequantizing dimension of the input tensor. Used only for per-axis and blocked "
+            "quantization. Negative value means counting dimensions from the back. Accepted range is `[-r, r-1]` "
+            "where `r = rank(input)`. When the rank of the input is 1, per-tensor quantization is applied, "
+            "rendering the axis unnecessary in this scenario.",
+            AttributeProto::INT,
+            static_cast<int64_t>(1))
+        .Attr(
+            "saturate",
+            "The parameter defines how the conversion behaves if an input value is out of "
+            "range of the destination type. It only applies for float 8 quantization "
+            "(float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz). It is true by default. "
+            "All cases are fully described in two tables inserted in the operator description.",
+            AttributeProto::INT,
+            static_cast<int64_t>(1))
+        .Attr(
+            "block_size",
+            "(Optional) The size of the quantization block (number of times every scale is replicated). Used only for "
+            "blocked quantization. The block size is a positive integer. Given `x` shape `(D0, ..., Di, ..., Dn)`, "
+            "`y_scale` shape `(S0, ... Si, ...Sn)` and `axis=i`, the accepted range is "
+            "`[ceil(Di/Si), ceil(Di/(Si-1))-1]`",
+            AttributeProto::INT,
+            static_cast<int64_t>(0))
+        .Attr(
+            "output_dtype",
+            "(Optional) The output data type. If not supplied, the output data type is inferred from `y_zero_point` data type (`T2`). "
+            "If neither `output_dtype` nor `y_zero_point` are supplied, output data type is uint8. "
+            "If both `output_dtype` and `y_zero_point` are specified, `output_dtype` must be `T2`.",
+            AttributeProto::INT,
+            static_cast<int64_t>(0))
+        .TypeConstraint(
+            "T1",
+            {"tensor(float)", "tensor(float16)", "tensor(bfloat16)", "tensor(int32)"},
+            "The type of the input 'x'.")
+        .TypeConstraint(
+            "T2",
+            {"tensor(int8)",
+             "tensor(uint8)",
+             "tensor(int16)",
+             "tensor(uint16)",
+             "tensor(float8e4m3fn)",
+             "tensor(float8e4m3fnuz)",
+             "tensor(float8e5m2)",
+             "tensor(float8e5m2fnuz)",
+             "tensor(uint4)",
+             "tensor(int4)"},
+            "The type of the input `y_zero_point` and the output `y`.")
+        .SetDoc(QuantizeLinear_ver21_doc)
+        .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+          auto const zp_type = ctx.hasInput(2) ? ctx.getInputType(2) : nullptr;
+          auto const output_dtype =
+              static_cast<TensorProto_DataType>(getAttribute(ctx, "output_dtype", TensorProto::UNDEFINED));
+          if (zp_type != nullptr) {
+            auto const zp_elem_type = static_cast<TensorProto_DataType>(getTensorElementType(*zp_type));
+            if (output_dtype != TensorProto::UNDEFINED && output_dtype != zp_elem_type) {
+              fail_type_inference(
+                  "output_dtype ",
+                  TensorProto_DataType_Name(output_dtype),
+                  " does not match y_zero_point type ",
+                  TensorProto_DataType_Name(zp_elem_type),
+                  ".");
+            }
+            propagateElemTypeFromInputToOutput(ctx, 2, 0);
+          } else if (output_dtype != TensorProto::UNDEFINED) {
+            propagateElemTypeFromAttributeToOutput(ctx, "output_dtype", 0);
+          } else {
+            updateOutputElemType(ctx, 0, TensorProto::UINT8);
+          }
+          if (!hasInputShape(ctx, 0)) {
+            return;
+          }
+
+          auto& input_shape = getInputShape(ctx, 0);
+          updateOutputShape(ctx, 0, input_shape);
+        }));
+
+static const char* DequantizeLinear_ver21_doc = R"DOC(
+The linear dequantization operator. It consumes a quantized tensor, a scale, and a zero point to compute the
+full-precision tensor. The dequantization formula is `y = (x - x_zero_point) * x_scale`. `x_scale` and `x_zero_point`
+must have the same shape, determining the quantization's granularity: a scalar for per-tensor/per-layer quantization,
+a 1-D tensor for per-axis quantization, or have a rank identical to the input for blocked quantization.
+See QuantizeLinear for details on quantization granularity.
+`x_zero_point` and `x` must have the same type. `x` and `y` must have the same shape. In the case of dequantizing
+`int32`, there's no zero point (zero point is supposed to be 0).
+`zero-point` is usually not used in the case of float8 types quantization, but the dequantization formula remains the same
+for consistency, and `x_scale` still determines the output type.
+)DOC";
+
+ONNX_OPERATOR_SET_SCHEMA(
+    DequantizeLinear,
+    21,
+    OpSchema()
+        .Input(0, "x", "N-D quantized input tensor to be de-quantized.", "T1")
+        .Input(
+            1,
+            "x_scale",
+            "Scale for input `x`. For per-tensor/layer dequantization the scale is a scalar, for "
+            "per per-axis dequantization it is a 1-D Tensor and for blocked dequantization it has the same shape as "
+            "the input, except for one dimension in which blocking is performed.",
+            "T2")
+        .Input(
+            2,
+            "x_zero_point",
+            "Zero point for input `x`. Shape must match x_scale. "
+            "It's optional. Zero point is 0 when it's not specified.",
+            "T1",
+            OpSchema::Optional)
+        .Output(0, "y", "N-D full precision output tensor. It has same shape as input `x`.", "T2")
+        .Attr(
+            "axis",
+            "(Optional) The axis of the dequantizing dimension of the input tensor. Used for per-axis and blocked "
+            "quantization. Negative value means counting dimensions from the back. Accepted range is `[-r, r-1]` "
+            "where `r = rank(input)`.",
+            AttributeProto::INT,
+            static_cast<int64_t>(1))
+        .Attr(
+            "block_size",
+            "(Optional) The size of the quantization block (number of times every scale is replicated). Used only for "
+            "blocked quantization. The block size is a positive integer. Given `x` shape `(D0, ..., Di, ..., Dn)`, "
+            "`y_scale` shape `(S0, ... Si, ...Sn)` and `axis=i`, the accepted range is "
+            "`[ceil(Di/Si), ceil(Di/(Si-1))-1]`",
+            AttributeProto::INT,
+            static_cast<int64_t>(0))
+        .TypeConstraint(
+            "T1",
+            {"tensor(int8)",
+             "tensor(uint8)",
+             "tensor(int16)",
+             "tensor(uint16)",
+             "tensor(int32)",
+             "tensor(float8e4m3fn)",
+             "tensor(float8e4m3fnuz)",
+             "tensor(float8e5m2)",
+             "tensor(float8e5m2fnuz)",
+             "tensor(uint4)",
+             "tensor(int4)"},
+            "The type of the inputs 'x_zero_point' and 'x'.")
+        .TypeConstraint(
+            "T2",
+            {"tensor(float)", "tensor(float16)", "tensor(bfloat16)"},
+            "'x_scale' determines the output type.")
+        .SetDoc(DequantizeLinear_ver21_doc)
+        .TypeAndShapeInferenceFunction([](ONNX_NAMESPACE::InferenceContext& ctx) {
+          propagateElemTypeFromInputToOutput(ctx, 1, 0);
+          if (!hasInputShape(ctx, 0)) {
+            return;
+          }
+          auto& input_shape = getInputShape(ctx, 0);
+          updateOutputShape(ctx, 0, input_shape);
+        }));
+
 static const char* QuantizeLinear_ver19_doc = R"DOC(
 The linear quantization operator. It consumes a high precision tensor, a scale, and a zero point to compute the low precision / quantized tensor.
 The scale factor and zero point must have same shape, and can be either a scalar for per-tensor / per layer quantization, or a 1-D tensor for per-axis quantization.
diff --git a/onnx/defs/tensor/defs.cc b/onnx/defs/tensor/defs.cc
index c27620db5ee..84f72fd907e 100644
--- a/onnx/defs/tensor/defs.cc
+++ b/onnx/defs/tensor/defs.cc
@@ -83,7 +83,7 @@ The rules then become:
 
 ONNX_OPERATOR_SET_SCHEMA(
     Cast,
-    21,
+    23,
     OpSchema()
         .SetDoc(Cast_ver19_doc)
         .Attr(
@@ -112,19 +112,11 @@ ONNX_OPERATOR_SET_SCHEMA(
             OpSchema::Differentiable)
         .TypeConstraint(
             "T1",
-            {"tensor(float16)",    "tensor(float)",          "tensor(double)",       "tensor(int8)",
-             "tensor(int16)",      "tensor(int32)",          "tensor(int64)",        "tensor(uint8)",
-             "tensor(uint16)",     "tensor(uint32)",         "tensor(uint64)",       "tensor(bool)",
-             "tensor(string)",     "tensor(bfloat16)",       "tensor(float8e4m3fn)", "tensor(float8e4m3fnuz)",
-             "tensor(float8e5m2)", "tensor(float8e5m2fnuz)", "tensor(uint4)",        "tensor(int4)"},
+            OpSchema::all_non_complex_tensor_types_ir11(),
             "Constrain input types. Casting from complex is not supported.")
         .TypeConstraint(
             "T2",
-            {"tensor(float16)",    "tensor(float)",          "tensor(double)",       "tensor(int8)",
-             "tensor(int16)",      "tensor(int32)",          "tensor(int64)",        "tensor(uint8)",
-             "tensor(uint16)",     "tensor(uint32)",         "tensor(uint64)",       "tensor(bool)",
-             "tensor(string)",     "tensor(bfloat16)",       "tensor(float8e4m3fn)", "tensor(float8e4m3fnuz)",
-             "tensor(float8e5m2)", "tensor(float8e5m2fnuz)", "tensor(uint4)",        "tensor(int4)"},
+            OpSchema::all_non_complex_tensor_types_ir11(),
             "Constrain output types. Casting to complex is not supported.")
         .TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
           propagateElemTypeFromAttributeToOutput(ctx, "to", 0);
@@ -144,7 +136,7 @@ See documentation of the Cast operator for further details.
 
 ONNX_OPERATOR_SET_SCHEMA(
     CastLike,
-    21,
+    23,
     OpSchema()
         .SetDoc(CastLike_ver21_doc)
         .Attr(
@@ -176,11 +168,11 @@ ONNX_OPERATOR_SET_SCHEMA(
             OpSchema::Differentiable)
         .TypeConstraint(
             "T1",
-            OpSchema::all_non_complex_tensor_types_ir10(),
+            OpSchema::all_non_complex_tensor_types_ir11(),
             "Constrain input types. Casting from complex is not supported.")
         .TypeConstraint(
             "T2",
-            OpSchema::all_non_complex_tensor_types_ir10(),
+            OpSchema::all_non_complex_tensor_types_ir11(),
             "Constrain output types. Casting to complex is not supported.")
         .TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
           propagateElemTypeFromInputToOutput(ctx, 1, 0);
diff --git a/onnx/defs/tensor/old.cc b/onnx/defs/tensor/old.cc
index 78d47ab0c3a..a7daf41a145 100644
--- a/onnx/defs/tensor/old.cc
+++ b/onnx/defs/tensor/old.cc
@@ -178,6 +178,61 @@ The rules then become:
 | else | RNE | RNE | RNE | RNE |
 )DOC";
 
+ONNX_OPERATOR_SET_SCHEMA(
+    Cast,
+    21,
+    OpSchema()
+        .SetDoc(Cast_ver19_doc)
+        .Attr(
+            "to",
+            "The data type to which the elements of the input tensor are cast. "
+            "Strictly must be one of the types from DataType enum in TensorProto",
+            AttributeProto::INT)
+        .Attr(
+            "saturate",
+            "The parameter defines how the conversion behaves if an input value is out of "
+            "range of the destination type. It only applies for float 8 conversion "
+            "(float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz). It is true by default. "
+            "All cases are fully described in two tables inserted in the operator description.",
+            AttributeProto::INT,
+            static_cast<int64_t>(1))
+        .Input(0, "input", "Input tensor to be cast.", "T1", OpSchema::Single, true, 1, OpSchema::Differentiable)
+        .Output(
+            0,
+            "output",
+            "Output tensor with the same shape as input with type "
+            "specified by the 'to' argument",
+            "T2",
+            OpSchema::Single,
+            true,
+            1,
+            OpSchema::Differentiable)
+        .TypeConstraint(
+            "T1",
+            {"tensor(float16)",    "tensor(float)",          "tensor(double)",       "tensor(int8)",
+             "tensor(int16)",      "tensor(int32)",          "tensor(int64)",        "tensor(uint8)",
+             "tensor(uint16)",     "tensor(uint32)",         "tensor(uint64)",       "tensor(bool)",
+             "tensor(string)",     "tensor(bfloat16)",       "tensor(float8e4m3fn)", "tensor(float8e4m3fnuz)",
+             "tensor(float8e5m2)", "tensor(float8e5m2fnuz)", "tensor(uint4)",        "tensor(int4)"},
+            "Constrain input types. Casting from complex is not supported.")
+        .TypeConstraint(
+            "T2",
+            {"tensor(float16)",    "tensor(float)",          "tensor(double)",       "tensor(int8)",
+             "tensor(int16)",      "tensor(int32)",          "tensor(int64)",        "tensor(uint8)",
+             "tensor(uint16)",     "tensor(uint32)",         "tensor(uint64)",       "tensor(bool)",
+             "tensor(string)",     "tensor(bfloat16)",       "tensor(float8e4m3fn)", "tensor(float8e4m3fnuz)",
+             "tensor(float8e5m2)", "tensor(float8e5m2fnuz)", "tensor(uint4)",        "tensor(int4)"},
+            "Constrain output types. Casting to complex is not supported.")
+        .TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
+          propagateElemTypeFromAttributeToOutput(ctx, "to", 0);
+          if (hasNInputShapes(ctx, 1)) {
+            propagateShapeFromInputToOutput(ctx, 0, 0);
+          }
+        })
+        .PartialDataPropagationFunction([](DataPropagationContext& ctx) {
+          PropagateShapeDataFromInputToOutput(ctx, 0);
+        }));
+
 ONNX_OPERATOR_SET_SCHEMA(
     Cast,
     19,
@@ -362,6 +417,74 @@ ONNX_OPERATOR_SET_SCHEMA(
           PropagateShapeDataFromInputToOutput(ctx, 0);
         }));
 
+static const char* CastLike_ver21_doc = R"DOC(
+The operator casts the elements of a given input tensor (the first input) to
+the same data type as the elements of the second input tensor.
+See documentation of the Cast operator for further details.
+)DOC";
+
+ONNX_OPERATOR_SET_SCHEMA(
+    CastLike,
+    21,
+    OpSchema()
+        .SetDoc(CastLike_ver21_doc)
+        .Attr(
+            "saturate",
+            "The parameter defines how the conversion behaves if an input value is out of "
+            "range of the destination type. It only applies for float 8 conversion "
+            "(float8e4m3fn, float8e4m3fnuz, float8e5m2, float8e5m2fnuz). It is true by default. "
+            "Please refer to operator Cast description for further details.",
+            AttributeProto::INT,
+            static_cast<int64_t>(1))
+        .Input(0, "input", "Input tensor to be cast.", "T1", OpSchema::Single, true, 1, OpSchema::Differentiable)
+        .Input(
+            1,
+            "target_type",
+            "The (first) input tensor will be cast to produce a tensor of the same type as this (second input) tensor.",
+            "T2",
+            OpSchema::Single,
+            true,
+            1,
+            OpSchema::NonDifferentiable)
+        .Output(
+            0,
+            "output",
+            "Output tensor produced by casting the first input tensor to have the same type as the second input tensor.",
+            "T2",
+            OpSchema::Single,
+            true,
+            1,
+            OpSchema::Differentiable)
+        .TypeConstraint(
+            "T1",
+            OpSchema::all_non_complex_tensor_types_ir10(),
+            "Constrain input types. Casting from complex is not supported.")
+        .TypeConstraint(
+            "T2",
+            OpSchema::all_non_complex_tensor_types_ir10(),
+            "Constrain output types. Casting to complex is not supported.")
+        .TypeAndShapeInferenceFunction([](InferenceContext& ctx) {
+          propagateElemTypeFromInputToOutput(ctx, 1, 0);
+          if (hasNInputShapes(ctx, 1)) {
+            propagateShapeFromInputToOutput(ctx, 0, 0);
+          }
+        })
+        .SetContextDependentFunctionBodyBuilder(
+            [](const FunctionBodyBuildContext& ctx, const OpSchema& schema, FunctionProto& functionProto) -> bool {
+              auto target_type = ctx.getInputType(1);
+              if ((target_type == nullptr) || (!target_type->has_tensor_type())) {
+                // we cannot create a correct function body without knowing the target element type
+                return false;
+              }
+              auto target_elt_type = target_type->tensor_type().elem_type();
+              FunctionBuilder builder(functionProto);
+              builder.Add(
+                  MakeString("output = Cast <to= ", (int64_t)(target_elt_type), ", saturate: int = @saturate> (input)")
+                      .c_str());
+              schema.BuildFunction(functionProto);
+              return true;
+            }));
+
 static const char* CastLike_ver19_doc = R"DOC(
 The operator casts the elements of a given input tensor (the first input) to
 the same data type as the elements of the second input tensor.
diff --git a/onnx/reference/ops/op_cast_like.py b/onnx/reference/ops/op_cast_like.py
index 7604c51c32e..e8df4f988bc 100644
--- a/onnx/reference/ops/op_cast_like.py
+++ b/onnx/reference/ops/op_cast_like.py
@@ -9,6 +9,7 @@
 from onnx.reference.ops.op_cast import (
     bfloat16,
     cast_to,
+    float4e2m1,
     float8e4m3fn,
     float8e4m3fnuz,
     float8e5m2,
@@ -34,6 +35,8 @@ def _cast_like(x, y, saturate):
         to = TensorProto.UINT4
     elif y.dtype == int4 and y.dtype.descr[0][0] == "int4":
         to = TensorProto.INT4
+    elif y.dtype == float4e2m1 and y.dtype.descr[0][0] == "float4e2m1":
+        to = TensorProto.FLOAT4E2M1
     else:
         to = np_dtype_to_tensor_dtype(y.dtype)  # type: ignore
     return (cast_to(x, to, saturate),)
diff --git a/onnx/reference/ops/op_constant.py b/onnx/reference/ops/op_constant.py
index e31713b1eed..5586c0ffc88 100644
--- a/onnx/reference/ops/op_constant.py
+++ b/onnx/reference/ops/op_constant.py
@@ -7,6 +7,7 @@
 
 from onnx._custom_element_types import (
     bfloat16,
+    float4e2m1,
     float8e4m3fn,
     float8e4m3fnuz,
     float8e5m2,
@@ -27,6 +28,7 @@ def _check_dtype(val):  # type: ignore
         float8e5m2fnuz,
         uint4,
         int4,
+        float4e2m1,
         np.int8,
         np.uint8,
         np.float16,
diff --git a/onnx/reference/ops/op_dequantize_linear.py b/onnx/reference/ops/op_dequantize_linear.py
index 83920208776..d9d89d1f210 100644
--- a/onnx/reference/ops/op_dequantize_linear.py
+++ b/onnx/reference/ops/op_dequantize_linear.py
@@ -7,6 +7,7 @@
 
 from onnx import TensorProto
 from onnx._custom_element_types import (
+    float4e2m1,
     float8e4m3fn,
     float8e4m3fnuz,
     float8e5m2,
@@ -15,7 +16,11 @@
     uint4,
 )
 from onnx.helper import np_dtype_to_tensor_dtype
-from onnx.numpy_helper import float8e4m3_to_float32, float8e5m2_to_float32
+from onnx.numpy_helper import (
+    float8e4m3_to_float32,
+    float8e5m2_to_float32,
+    unpacked_float4e2m1_to_float32,
+)
 from onnx.reference.op_run import OpRun
 from onnx.reference.ops.op_quantize_linear import reshape_input
 
@@ -35,6 +40,8 @@ def get_x_type(self, x: np.ndarray) -> int:
             tensor_dtype = TensorProto.UINT4
         elif x.dtype == int4 and x.dtype.descr[0][0] == "int4":
             tensor_dtype = TensorProto.INT4
+        elif x.dtype == float4e2m1 and x.dtype.descr[0][0] == "float4e2m1":
+            tensor_dtype = TensorProto.FLOAT4E2M1
         else:
             tensor_dtype = np_dtype_to_tensor_dtype(x.dtype)
         return tensor_dtype
@@ -54,7 +61,11 @@ def _run(
             TensorProto.FLOAT8E5M2,
             TensorProto.FLOAT8E5M2FNUZ,
         }
-        if x_zero_point is not None and not fp8_type:
+        if (
+            x_zero_point is not None
+            and not fp8_type
+            and x_type != TensorProto.FLOAT4E2M1
+        ):
             zero_type = self.get_x_type(x_zero_point)
             if x_type != zero_type:
                 raise ValueError(
@@ -81,6 +92,8 @@ def _run(
                 dx = float8e5m2_to_float32(x)
             elif x_type == TensorProto.FLOAT8E5M2FNUZ:
                 dx = float8e5m2_to_float32(x, fn=True, uz=True)
+            elif x_type == TensorProto.FLOAT4E2M1:
+                dx = unpacked_float4e2m1_to_float32(x)
             else:
                 dx = x.astype(np.float32)
         y = dx * reshape_input(x_scale, x.shape, axis, block_size)
diff --git a/onnx/reference/ops/op_quantize_linear.py b/onnx/reference/ops/op_quantize_linear.py
index ddc04c2a13e..d5f2fe7a63b 100644
--- a/onnx/reference/ops/op_quantize_linear.py
+++ b/onnx/reference/ops/op_quantize_linear.py
@@ -11,6 +11,7 @@
 
 from onnx import TensorProto, subbyte
 from onnx._custom_element_types import (
+    float4e2m1,
     float8e4m3fn,
     float8e4m3fnuz,
     float8e5m2,
@@ -97,6 +98,7 @@ class _CommonQuantizeLinear(OpRun):
         TensorProto.FLOAT8E4M3FNUZ,
         TensorProto.FLOAT8E5M2,
         TensorProto.FLOAT8E5M2FNUZ,
+        TensorProto.FLOAT4E2M1,
     )
 
     def get_zero_point_type(self, zero_point: np.ndarray) -> int:
@@ -122,6 +124,11 @@ def get_zero_point_type(self, zero_point: np.ndarray) -> int:
             zero_point_type = TensorProto.UINT4
         elif zero_point.dtype == int4 and zero_point.dtype.descr[0][0] == "int4":
             zero_point_type = TensorProto.INT4
+        elif (
+            zero_point.dtype == float4e2m1
+            and zero_point.dtype.descr[0][0] == "float4e2m1"
+        ):
+            zero_point_type = TensorProto.FLOAT4E2M1
         else:
             zero_point_type = np_dtype_to_tensor_dtype(zero_point.dtype)
         return zero_point_type
@@ -199,6 +206,11 @@ def _run(
             i4 = func(xi)
             return (i4,)  # type: ignore[attr-defined]
 
+        if tensor_type == TensorProto.FLOAT4E2M1:
+            x += zero_point
+            f4 = subbyte.float32_to_float4e2m1_unpacked(x)
+            return (f4,)  # type: ignore[attr-defined]
+
         raise ValueError(
             f"Unexpected type: output_dtype={tensor_type} is not a supported quantized type."
         )
diff --git a/onnx/version_converter/convert.h b/onnx/version_converter/convert.h
index c3024ab7564..84b7c809ad0 100644
--- a/onnx/version_converter/convert.h
+++ b/onnx/version_converter/convert.h
@@ -776,6 +776,21 @@ class DefaultVersionConverter : public BaseVersionConverter {
     registerAdapter(std::make_unique<TypeRestriction>("GRU", OpSetID(22), OpSetID(21), bfloat16_not_allowed));
     registerAdapter(std::make_unique<TypeRestriction>("LSTM", OpSetID(22), OpSetID(21), bfloat16_not_allowed));
     registerAdapter(std::make_unique<TypeRestriction>("GridSample", OpSetID(22), OpSetID(21), bfloat16_not_allowed));
+
+    /******** 22 -> 23 ********/
+    registerAdapter(std::make_unique<CompatibleAdapter>("Cast", OpSetID(22), OpSetID(23)));
+    registerAdapter(std::make_unique<CompatibleAdapter>("CastLike", OpSetID(22), OpSetID(23)));
+    registerAdapter(std::make_unique<CompatibleAdapter>("DequantizeLinear", OpSetID(22), OpSetID(23)));
+    registerAdapter(std::make_unique<CompatibleAdapter>("QuantizeLinear", OpSetID(22), OpSetID(23)));
+
+    /******** 23 -> 22 ********/
+    const std::vector<TensorProto_DataType> ir11_types_not_in_ir10 = {TensorProto_DataType_FLOAT4E2M1};
+    registerAdapter(std::make_unique<TypeRestriction>("Cast", OpSetID(23), OpSetID(22), ir11_types_not_in_ir10));
+    registerAdapter(std::make_unique<TypeRestriction>("CastLike", OpSetID(23), OpSetID(22), ir11_types_not_in_ir10));
+    registerAdapter(
+        std::make_unique<TypeRestriction>("DequantizeLinear", OpSetID(23), OpSetID(22), ir11_types_not_in_ir10));
+    registerAdapter(
+        std::make_unique<TypeRestriction>("QuantizeLinear", OpSetID(23), OpSetID(22), ir11_types_not_in_ir10));
   }
 
   ModelProto convert_version(const ModelProto& mp_in, const OpSetID& initial_version, const OpSetID& target_version)