Skip to content

Commit

Permalink
perf(simd): avx2 fallack to swar instead of sse4.2
Browse files Browse the repository at this point in the history
This has massive implications on the default runtime perf, improving how the code is lowered/inlined. (Falling back to SSE4.2 for a handful of bytes was wasteful).

Should supersede #175, #156
  • Loading branch information
AaronO committed Sep 2, 2024
1 parent fff851f commit 9888d7f
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/simd/avx2.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::iter::Bytes;

#[inline]
#[target_feature(enable = "avx2", enable = "sse4.2")]
#[target_feature(enable = "avx2")]
pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_url_char_32_avx(bytes.as_ref());
Expand All @@ -11,8 +11,8 @@ pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
return;
}
}
// do both, since avx2 only works when bytes.len() >= 32
super::sse42::match_uri_vectored(bytes)
// NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
super::swar::match_uri_vectored(bytes)
}

#[inline(always)]
Expand Down Expand Up @@ -56,7 +56,7 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
r.trailing_zeros() as usize
}

#[target_feature(enable = "avx2", enable = "sse4.2")]
#[target_feature(enable = "avx2")]
pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_header_value_char_32_avx(bytes.as_ref());
Expand All @@ -66,8 +66,8 @@ pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
return;
}
}
// do both, since avx2 only works when bytes.len() >= 32
super::sse42::match_header_value_vectored(bytes)
// NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
super::swar::match_header_value_vectored(bytes)
}

#[inline(always)]
Expand Down

0 comments on commit 9888d7f

Please sign in to comment.