Skip to content

Commit

Permalink
fix for reals?
Browse files Browse the repository at this point in the history
  • Loading branch information
argilo committed Feb 1, 2024
1 parent a289514 commit 48d113d
Showing 1 changed file with 16 additions and 16 deletions.
32 changes: 16 additions & 16 deletions kernels/volk/volk_8u_x4_conv_k7_r2_8u.h
Original file line number Diff line number Diff line change
Expand Up @@ -668,17 +668,17 @@ static inline void volk_8u_x4_conv_k7_r2_8u_neonspiral(unsigned char* Y,
high_bits = vreinterpretq_u16_u8(vshrq_n_u8(d9, 7));
paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 6));
paired32_new = vreinterpretq_u8_u32(vsraq_n_u32(paired16, paired16, 12));
s20 = ((unsigned int)vgetq_lane_u8(paired32_new, 0) << 1)
| ((unsigned int)vgetq_lane_u8(paired32_new, 4) << 9)
| ((unsigned int)vgetq_lane_u8(paired32_new, 8) << 17)
| ((unsigned int)vgetq_lane_u8(paired32_new, 12) << 25);
high_bits = vreinterpretq_u16_u8(vshrq_n_u8(d10, 7));
paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 6));
paired32_new = vreinterpretq_u8_u32(vsraq_n_u32(paired16, paired16, 12));
s20 |= ((unsigned int)vgetq_lane_u8(paired32_new, 0) << 0)
s20 = ((unsigned int)vgetq_lane_u8(paired32_new, 0) << 0)
| ((unsigned int)vgetq_lane_u8(paired32_new, 4) << 8)
| ((unsigned int)vgetq_lane_u8(paired32_new, 8) << 16)
| ((unsigned int)vgetq_lane_u8(paired32_new, 12) << 24);
high_bits = vreinterpretq_u16_u8(vshrq_n_u8(d10, 7));
paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 6));
paired32_new = vreinterpretq_u8_u32(vsraq_n_u32(paired16, paired16, 12));
s20 |= ((unsigned int)vgetq_lane_u8(paired32_new, 0) << 1)
| ((unsigned int)vgetq_lane_u8(paired32_new, 4) << 9)
| ((unsigned int)vgetq_lane_u8(paired32_new, 8) << 17)
| ((unsigned int)vgetq_lane_u8(paired32_new, 12) << 25);
a91 = ((unsigned int*)dec);
a92 = (4 * i9);
a93 = (a91 + a92);
Expand Down Expand Up @@ -715,17 +715,17 @@ printf("%02x%02x%02x%02x", dec[i9*16 + 0], dec[i9*16 + 1], dec[i9*16 + 2], dec[i
high_bits = vreinterpretq_u16_u8(vshrq_n_u8(d11, 7));
paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 6));
paired32_new = vreinterpretq_u8_u32(vsraq_n_u32(paired16, paired16, 12));
s26 = ((unsigned int)vgetq_lane_u8(paired32_new, 0) << 1)
| ((unsigned int)vgetq_lane_u8(paired32_new, 4) << 9)
| ((unsigned int)vgetq_lane_u8(paired32_new, 8) << 17)
| ((unsigned int)vgetq_lane_u8(paired32_new, 12) << 25);
high_bits = vreinterpretq_u16_u8(vshrq_n_u8(d12, 7));
paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 6));
paired32_new = vreinterpretq_u8_u32(vsraq_n_u32(paired16, paired16, 12));
s26 |= ((unsigned int)vgetq_lane_u8(paired32_new, 0) << 0)
s26 = ((unsigned int)vgetq_lane_u8(paired32_new, 0) << 0)
| ((unsigned int)vgetq_lane_u8(paired32_new, 4) << 8)
| ((unsigned int)vgetq_lane_u8(paired32_new, 8) << 16)
| ((unsigned int)vgetq_lane_u8(paired32_new, 12) << 24);
high_bits = vreinterpretq_u16_u8(vshrq_n_u8(d12, 7));
paired16 = vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 6));
paired32_new = vreinterpretq_u8_u32(vsraq_n_u32(paired16, paired16, 12));
s26 |= ((unsigned int)vgetq_lane_u8(paired32_new, 0) << 1)
| ((unsigned int)vgetq_lane_u8(paired32_new, 4) << 9)
| ((unsigned int)vgetq_lane_u8(paired32_new, 8) << 17)
| ((unsigned int)vgetq_lane_u8(paired32_new, 12) << 25);
a110 = (a93 + 1);
*(a110) = s26;
printf("%02x%02x%02x%02x", dec[i9*16 + 4], dec[i9*16 + 5], dec[i9*16 + 6], dec[i9*16 + 7]);
Expand Down

0 comments on commit 48d113d

Please sign in to comment.