From 22d55029c262cbb127d96a78b27792dc78d2c99d Mon Sep 17 00:00:00 2001 From: "Frank J. T. Wojcik" Date: Thu, 3 Aug 2023 20:19:09 -0700 Subject: [PATCH] Speed up Blob highzerobits implementation --- util/Blob.cpp | 20 +------------------- util/Blob.h | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/util/Blob.cpp b/util/Blob.cpp index 47c1b7b5..6d02d640 100644 --- a/util/Blob.cpp +++ b/util/Blob.cpp @@ -21,22 +21,4 @@ #include "Blob.h" //----------------------------------------------------------------------------- -// For highzerobits() -const uint8_t hzb[256] = { - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; +// Someday this will have Blob unit tests diff --git a/util/Blob.h b/util/Blob.h index 820f7a7f..1420742c 100644 --- a/util/Blob.h +++ b/util/Blob.h @@ -47,8 +47,6 @@ */ #include -extern const uint8_t hzb[256]; - //----------------------------------------------------------------------------- #define _bytes ((size_t)(_bits + 7) / 8) template @@ -283,13 +281,39 @@ class Blob { static FORCE_INLINE uint32_t _highzerobits( const uint8_t * bytes, const size_t len ) { uint32_t zb = 0; + size_t i = _bytes; - for (ssize_t i = len - 1; i >= 0; i--) { - zb += hzb[bytes[i]]; - if (bytes[i] != 0) { - break; + while (i >= 8) { + uint64_t a; + i -= 8; + memcpy(&a, &bytes[i], 8); a = COND_BSWAP(a, isBE()); + if (a != 0) { + zb += clz8(a); + return zb; } + zb += 64; } + while (i >= 4) { + uint32_t a; + i -= 4; + memcpy(&a, &bytes[i], 4); a = COND_BSWAP(a, isBE()); + if (a != 0) { + zb += clz4(a); + return zb; + } + zb += 32; + } + while (i >= 1) { + uint32_t a; + i -= 1; + a = bytes[i]; + if (a != 0) { + zb += clz4(a); + return zb; + } + zb += 8; + } + return zb; }