diff --git a/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h b/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h index b1297c8d..235462f7 100644 --- a/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h +++ b/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h @@ -670,12 +670,12 @@ static inline void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char* Y, high_bits = vreinterpretq_u16_u8(vshrq_n_u8(d9, 7)); paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 6)); paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 12)); - paired64_new = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 24)); + paired64_new = vreinterpretq_u16_u64(vsraq_n_u64(paired32, paired32, 24)); s20 = ((unsigned int)vgetq_lane_u16(paired64_new, 0) << 1) | ((unsigned int)vgetq_lane_u16(paired64_new, 4) << 17); high_bits = vreinterpretq_u16_u8(vshrq_n_u8(d10, 7)); paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 6)); paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 12)); - paired64_new = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 24)); + paired64_new = vreinterpretq_u16_u64(vsraq_n_u64(paired32, paired32, 24)); s20 |= ((unsigned int)vgetq_lane_u16(paired64_new, 0)) | ((unsigned int)vgetq_lane_u8(paired64_new, 4) << 16); a91 = ((short int*)dec); a92 = (4 * i9);