diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h index 3f6995d800f..1ae8a3d58d6 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h @@ -39,24 +39,9 @@ inline void relu_max(uint uint_threshold) { } } -template +template inline void calculate_lrelu(uint slope) { - // SFPU microcode - Converter c_slope; - c_slope.u = slope; - vFloat s = c_slope.f; - -#pragma GCC unroll 0 - for (int d = 0; d < 8; d++) { - vFloat v = dst_reg[0]; - - v_if(v < 0.0f) { v *= s; } - v_endif; - - dst_reg[0] = v; - - dst_reg++; - } + _calculate_lrelu_(ITERATIONS, slope); } } // namespace sfpu diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h index b0c7571899a..9abcfa165dc 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h @@ -60,27 +60,10 @@ inline void relu_min(uint uint_threshold) // LRELU = LEAKY RELU -template +template inline void calculate_lrelu(uint slope) { - // SFPU microcode - Converter c_slope; - c_slope.u = slope; - vFloat s = c_slope.f; - - #pragma GCC unroll 0 - for (int d = 0; d < ITERATIONS; d++) { - vFloat v = dst_reg[0]; - - v_if (v < 0.0f) { - v *= s; - } - v_endif; - - dst_reg[0] = v; - - dst_reg++; - } + _calculate_lrelu_(ITERATIONS, slope); } } // namespace sfpu diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h index e5b0cf849dc..5dd68ee58b5 100644 --- a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h +++ b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h @@ -71,7 +71,7 @@ inline void llk_math_eltwise_unary_sfpu_lrelu_init() { template inline void llk_math_eltwise_unary_sfpu_lrelu(uint dst_index, int param0 = 0) { llk_math_eltwise_unary_sfpu_params( - ckernel::sfpu::calculate_lrelu, + ckernel::sfpu::calculate_lrelu, dst_index, VectorMode::RC, param0); diff --git a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h index e6facb34814..b4b14ca626e 100644 --- a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h +++ b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h @@ -53,27 +53,10 @@ inline void relu_max(uint uint_threshold) } } -template -inline void calculate_lrelu(uint slope) +template +inline void calculate_lrelu(const uint slope) { - // SFPU microcode - Converter c_slope; - c_slope.u = slope; - vFloat s = c_slope.f; - - #pragma GCC unroll 0 - for (int d = 0; d < 8; d++) { - vFloat v = dst_reg[0]; - - v_if (v < 0.0f) { - v *= s; - } - v_endif; - - dst_reg[0] = v; - - dst_reg++; - } + _calculate_lrelu_(ITERATIONS, slope); } } // namespace sfpu diff --git a/tt_metal/include/compute_kernel_api/eltwise_unary/relu.h b/tt_metal/include/compute_kernel_api/eltwise_unary/relu.h index ee8e7b761b5..b8f2b738c14 100644 --- a/tt_metal/include/compute_kernel_api/eltwise_unary/relu.h +++ b/tt_metal/include/compute_kernel_api/eltwise_unary/relu.h @@ -100,10 +100,10 @@ ALWI void relu_tile_init() { * | Argument | Description | Type | Valid Range | Required | * |----------------|----------------------------------------------------------------------------|----------|-------------------------------------------------------|----------| * | tile_index | The index of the tile in DST register buffer to perform the computation on | uint32_t | Must be less than the size of the DST register buffer | True | - * | slope | slope used in leaky relu calculation | uint32_t | Greater than 0 | True | + * | slope | slope used in leaky relu - will reinterpret unsigned int to float | uint32_t | Greater than 0 | True | */ -ALWI void leaky_relu_tile(uint32_t idst,uint32_t param0) { - MATH(( llk_math_eltwise_unary_sfpu_lrelu(idst,param0) )); +ALWI void leaky_relu_tile(uint32_t idst, uint32_t slope) { + MATH(( llk_math_eltwise_unary_sfpu_lrelu(idst, slope) )); } /** diff --git a/tt_metal/third_party/tt_llk_blackhole b/tt_metal/third_party/tt_llk_blackhole index dc92e255cfd..72362766ea1 160000 --- a/tt_metal/third_party/tt_llk_blackhole +++ b/tt_metal/third_party/tt_llk_blackhole @@ -1 +1 @@ -Subproject commit dc92e255cfd75945ccda2fa72e34e1cc9efe942e +Subproject commit 72362766ea11ce0fd36d51b9e9b9181ef625813f diff --git a/tt_metal/third_party/tt_llk_grayskull b/tt_metal/third_party/tt_llk_grayskull index 9cadb14bc90..4e7183e18ba 160000 --- a/tt_metal/third_party/tt_llk_grayskull +++ b/tt_metal/third_party/tt_llk_grayskull @@ -1 +1 @@ -Subproject commit 9cadb14bc905b84abb0e99e41ab83a612c4cd428 +Subproject commit 4e7183e18ba7d6ea546d6f7ec8a66a0dce1b3d14 diff --git a/tt_metal/third_party/tt_llk_wormhole_b0 b/tt_metal/third_party/tt_llk_wormhole_b0 index 56ec2f52c81..38df92426d6 160000 --- a/tt_metal/third_party/tt_llk_wormhole_b0 +++ b/tt_metal/third_party/tt_llk_wormhole_b0 @@ -1 +1 @@ -Subproject commit 56ec2f52c817053116058c7ca4ed22da11c236ae +Subproject commit 38df92426d6b3226e177d86b8c8a308f34bd2829