#5424: Cleanup lrelu (#13959)

* #5424: Cleanup lrelu
tenstorrent · Oct 29, 2024 · cdcf7b5 · cdcf7b5
1 parent 7e29389
commit cdcf7b5
Show file tree

Hide file tree

Showing 8 changed files with 14 additions and 63 deletions.
diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h
@@ -39,24 +39,9 @@ inline void relu_max(uint uint_threshold) {
     }
 }
 
-template <bool APPROXIMATION_MODE>
+template <bool APPROXIMATION_MODE, int ITERATIONS=8>
 inline void calculate_lrelu(uint slope) {
-    // SFPU microcode
-    Converter c_slope;
-    c_slope.u = slope;
-    vFloat s = c_slope.f;
-
-#pragma GCC unroll 0
-    for (int d = 0; d < 8; d++) {
-        vFloat v = dst_reg[0];
-
-        v_if(v < 0.0f) { v *= s; }
-        v_endif;
-
-        dst_reg[0] = v;
-
-        dst_reg++;
-    }
+    _calculate_lrelu_<APPROXIMATION_MODE>(ITERATIONS, slope);
 }
 
 }  // namespace sfpu

diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h
@@ -60,27 +60,10 @@ inline void relu_min(uint uint_threshold)
 
 // LRELU = LEAKY RELU
 
-template <bool APPROXIMATION_MODE, int ITERATIONS>
+template <bool APPROXIMATION_MODE, int ITERATIONS=4>
 inline void calculate_lrelu(uint slope)
 {
-    // SFPU microcode
-    Converter c_slope;
-    c_slope.u = slope;
-    vFloat s = c_slope.f;
-
-    #pragma GCC unroll 0
-    for (int d = 0; d < ITERATIONS; d++) {
-        vFloat v = dst_reg[0];
-
-        v_if (v < 0.0f) {
-            v *= s;
-        }
-        v_endif;
-
-        dst_reg[0] = v;
-
-        dst_reg++;
-    }
+    _calculate_lrelu_<APPROXIMATION_MODE>(ITERATIONS, slope);
 }
 
 }  // namespace sfpu

diff --git a/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h b/tt_metal/hw/ckernels/grayskull/metal/llk_api/llk_sfpu/llk_math_eltwise_unary_sfpu_relu.h
@@ -71,7 +71,7 @@ inline void llk_math_eltwise_unary_sfpu_lrelu_init() {
 template <bool APPROXIMATE>
 inline void llk_math_eltwise_unary_sfpu_lrelu(uint dst_index, int param0 = 0) {
     llk_math_eltwise_unary_sfpu_params<APPROXIMATE>(
-        ckernel::sfpu::calculate_lrelu<APPROXIMATE,4>,
+        ckernel::sfpu::calculate_lrelu<APPROXIMATE>,
         dst_index,
         VectorMode::RC,
         param0);

diff --git a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_relu.h
@@ -53,27 +53,10 @@ inline void relu_max(uint uint_threshold)
     }
 }
 
-template <bool APPROXIMATION_MODE>
-inline void calculate_lrelu(uint slope)
+template <bool APPROXIMATION_MODE, int ITERATIONS=8>
+inline void calculate_lrelu(const uint slope)
 {
-    // SFPU microcode
-    Converter c_slope;
-    c_slope.u = slope;
-    vFloat s = c_slope.f;
-
-    #pragma GCC unroll 0
-    for (int d = 0; d < 8; d++) {
-        vFloat v = dst_reg[0];
-
-        v_if (v < 0.0f) {
-            v *= s;
-        }
-        v_endif;
-
-        dst_reg[0] = v;
-
-        dst_reg++;
-    }
+    _calculate_lrelu_<APPROXIMATION_MODE>(ITERATIONS, slope);
 }
 
 }  // namespace sfpu

diff --git a/tt_metal/include/compute_kernel_api/eltwise_unary/relu.h b/tt_metal/include/compute_kernel_api/eltwise_unary/relu.h
@@ -100,10 +100,10 @@ ALWI void relu_tile_init() {
  * | Argument       | Description                                                                | Type     | Valid Range                                           | Required |
  * |----------------|----------------------------------------------------------------------------|----------|-------------------------------------------------------|----------|
  * | tile_index     | The index of the tile in DST register buffer to perform the computation on | uint32_t | Must be less than the size of the DST register buffer | True     |
- * | slope          | slope used in leaky relu calculation                                       | uint32_t | Greater than 0                                        | True     |
+ * | slope          | slope used in leaky relu - will reinterpret unsigned int to float          | uint32_t | Greater than 0                                        | True     |
  */
-ALWI void leaky_relu_tile(uint32_t idst,uint32_t param0) {
-  MATH(( llk_math_eltwise_unary_sfpu_lrelu<APPROX>(idst,param0) ));
+ALWI void leaky_relu_tile(uint32_t idst, uint32_t slope) {
+  MATH(( llk_math_eltwise_unary_sfpu_lrelu<APPROX>(idst, slope) ));
 }
 
 /**

diff --git a/tt_metal/third_party/tt_llk_blackhole b/tt_metal/third_party/tt_llk_blackhole
diff --git a/tt_metal/third_party/tt_llk_grayskull b/tt_metal/third_party/tt_llk_grayskull
diff --git a/tt_metal/third_party/tt_llk_wormhole_b0 b/tt_metal/third_party/tt_llk_wormhole_b0