From b78bfbceec8fda0794e59db0419a1cb20df2285c Mon Sep 17 00:00:00 2001 From: Radomir Djogo Date: Thu, 10 Oct 2024 19:57:37 +0000 Subject: [PATCH] #13439: sin/cos llk bug fixes --- .../metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h | 8 ++------ .../metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h | 8 ++------ tt_metal/third_party/tt_llk_blackhole | 2 +- tt_metal/third_party/tt_llk_wormhole_b0 | 2 +- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h index 5caf95d66d0c..4e8cba3150ae 100644 --- a/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h +++ b/tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h @@ -93,10 +93,6 @@ sfpi_inline vFloat sfpu_sine_maclaurin_series(vFloat val) { tmp = tmp * val * val; output += 0.0000027557 * tmp; - // x^11/11! - tmp = tmp * val * val; - output += -0.00000002505 * tmp; - if constexpr (not APPROXIMATION_MODE) { // x^11/11! tmp = tmp * val * val; @@ -155,7 +151,7 @@ inline void calculate_sine() { for (int d = 0; d < ITERATIONS; d++) { vFloat v = dst_reg[0]; v = 0.318309886183791f * v; // *1/pi to get number of pi rads. - vInt whole_v = float_to_int16(v); + vInt whole_v = float_to_int16(v, 0); vFloat whole_v_float = int32_to_float(whole_v, 0); v = v - whole_v_float; v *= 3.141592653589793f; // fractional * pi to get it in [-pi:pi] @@ -177,7 +173,7 @@ inline void calculate_cosine() { for (int d = 0; d < ITERATIONS; d++) { vFloat v = dst_reg[0]; v = 0.318309886183791f * v; // *1/pi to get number of pi rads. - vInt whole_v = float_to_int16(v); + vInt whole_v = float_to_int16(v, 0); vFloat whole_v_float = int32_to_float(whole_v, 0); v = v - whole_v_float; v *= 3.141592653589793f; // fractional * pi to get it in [-pi:pi] diff --git a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h index 4da3114f7636..d3a06d6d7e59 100644 --- a/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h +++ b/tt_metal/hw/ckernels/wormhole_b0/metal/llk_api/llk_sfpu/ckernel_sfpu_trigonometry.h @@ -101,10 +101,6 @@ sfpi_inline vFloat sfpu_sine_maclaurin_series(vFloat val) tmp = tmp*val*val; output += 0.0000027557*tmp; - // x^11/11! - tmp = tmp*val*val; - output += -0.00000002505*tmp; - if constexpr (not APPROXIMATION_MODE) { // x^11/11! tmp = tmp*val*val; @@ -166,7 +162,7 @@ inline void calculate_sine() { vFloat v = dst_reg[0]; v = 0.318309886183791f*v; // *1/pi to get number of pi rads. - vInt whole_v = float_to_int16(v); + vInt whole_v = float_to_int16(v, 0); vFloat whole_v_float = int32_to_float(whole_v, 0); v = v - whole_v_float; v *= 3.141592653589793f; // fractional * pi to get it in [-pi:pi] @@ -190,7 +186,7 @@ inline void calculate_cosine() { vFloat v = dst_reg[0]; v = 0.318309886183791f*v; // *1/pi to get number of pi rads. - vInt whole_v = float_to_int16(v); + vInt whole_v = float_to_int16(v, 0); vFloat whole_v_float = int32_to_float(whole_v, 0); v = v - whole_v_float; v *= 3.141592653589793f; // fractional * pi to get it in [-pi:pi] diff --git a/tt_metal/third_party/tt_llk_blackhole b/tt_metal/third_party/tt_llk_blackhole index 9a68fd09d8ee..0c617b0fb38a 160000 --- a/tt_metal/third_party/tt_llk_blackhole +++ b/tt_metal/third_party/tt_llk_blackhole @@ -1 +1 @@ -Subproject commit 9a68fd09d8ee2d81c445c576861cf146c9b54810 +Subproject commit 0c617b0fb38ac141b9147404e715969a9808b6ff diff --git a/tt_metal/third_party/tt_llk_wormhole_b0 b/tt_metal/third_party/tt_llk_wormhole_b0 index 166515054c09..fd081e86c043 160000 --- a/tt_metal/third_party/tt_llk_wormhole_b0 +++ b/tt_metal/third_party/tt_llk_wormhole_b0 @@ -1 +1 @@ -Subproject commit 166515054c09553317f569d3689198c3891cefe0 +Subproject commit fd081e86c04388a7c6152d341e0a08c260c42daf