From 6cb4b332180b4f21ed26123b3479c9ad0c38437c Mon Sep 17 00:00:00 2001
From: "Frank J. T. Wojcik" <fwojcik@uw.edu>
Date: Wed, 20 Jul 2022 19:50:08 -0700
Subject: [PATCH] Bulk code reformatting (NFC)

This is mostly automatic, but some manual changes have been done.
---
 hashes/EXAMPLE-mit.cpp             |   34 +-
 hashes/EXAMPLE.cpp                 |   34 +-
 hashes/aesnihash.cpp               |   64 +-
 hashes/aesrng.cpp                  |  278 +-
 hashes/ascon.cpp                   |  671 ++--
 hashes/badhash.cpp                 |  122 +-
 hashes/beamsplitter.cpp            |  644 ++--
 hashes/blake2.cpp                  |  603 ++--
 hashes/blake2/compress-portable.h  |  193 +-
 hashes/blake2/compress-sse2-plus.h |  860 +++---
 hashes/blake3.cpp                  |  992 +++---
 hashes/blake3/compress-portable.h  |  277 +-
 hashes/blake3/compress-sse2.h      |  992 +++---
 hashes/blake3/compress-sse41.h     |  981 +++---
 hashes/blockpearson.cpp            |  314 +-
 hashes/chaskey.cpp                 |  529 ++--
 hashes/cityhash.cpp                | 1304 ++++----
 hashes/clhash.cpp                  |  393 +--
 hashes/crap.cpp                    |  172 +-
 hashes/crc.cpp                     |  225 +-
 hashes/discohash.cpp               |  266 +-
 hashes/donothing.cpp               |  164 +-
 hashes/falcon_oaat.cpp             |  103 +-
 hashes/falkhash.cpp                |  253 +-
 hashes/farmhash.cpp                | 3093 ++++++++++---------
 hashes/farsh.cpp                   |  378 +--
 hashes/fasthash.cpp                |   88 +-
 hashes/fletcher.cpp                |  236 +-
 hashes/floppsyhash.cpp             |  112 +-
 hashes/fnv.cpp                     |  412 +--
 hashes/halftimehash.cpp            | 2240 +++++++-------
 hashes/hasshe2.cpp                 |  282 +-
 hashes/jodyhash.cpp                |  110 +-
 hashes/khash.cpp                   |  110 +-
 hashes/komihash.cpp                |  191 +-
 hashes/lookup3.cpp                 |  144 +-
 hashes/md5.cpp                     |  345 +--
 hashes/meowhash.cpp                |  307 +-
 hashes/metrohash.cpp               |  541 ++--
 hashes/multiply_shift.cpp          |  247 +-
 hashes/mum_mir.cpp                 | 1625 +++++-----
 hashes/murmur_oaat.cpp             |   41 +-
 hashes/murmurhash1.cpp             |   67 +-
 hashes/murmurhash2.cpp             |  297 +-
 hashes/murmurhash3.cpp             |  516 ++--
 hashes/mx3.cpp                     |  108 +-
 hashes/nmhash.cpp                  |  491 +--
 hashes/o1hash.cpp                  |   61 +-
 hashes/pearson.cpp                 |  849 ++---
 hashes/pengyhash.cpp               |   45 +-
 hashes/perlhashes.cpp              |  216 +-
 hashes/pmp_multilinear.cpp         | 4629 ++++++++++++++--------------
 hashes/poly_mersenne.cpp           |  197 +-
 hashes/prvhash.cpp                 |  255 +-
 hashes/rmd.cpp                     | 1336 ++++----
 hashes/seahash.cpp                 |  222 +-
 hashes/sha1.cpp                    |  734 ++---
 hashes/sha2.cpp                    | 1257 ++++----
 hashes/sha3.cpp                    |  375 +--
 hashes/siphash.cpp                 |  608 ++--
 hashes/spookyhash.cpp              |  454 ++-
 hashes/superfasthash.cpp           |   83 +-
 hashes/t1ha.cpp                    | 2027 ++++++------
 hashes/tabulation.cpp              |  485 +--
 hashes/umash.cpp                   |  912 +++---
 hashes/vmac.cpp                    |  756 ++---
 hashes/wyhash.cpp                  |  345 ++-
 hashes/x17.cpp                     |   41 +-
 hashes/xxhash.cpp                  |  860 +++---
 hashes/xxhash/xxh3-arm.h           |  101 +-
 hashes/xxhash/xxh3-avx2.h          |  126 +-
 hashes/xxhash/xxh3-avx512.h        |   85 +-
 hashes/xxhash/xxh3-ppc.h           |   84 +-
 hashes/xxhash/xxh3-sse2.h          |  106 +-
 include/common/Hashinfo.h          |  209 +-
 include/common/Intrinsics.h        |  232 +-
 include/hashlib/AES-aesni.h        |   73 +-
 include/hashlib/AES-arm.h          |   34 +-
 include/hashlib/AES-portable.h     |  531 ++--
 include/hashlib/AES-ppc.h          |   24 +-
 include/hashlib/AES.h              |   34 +-
 include/hashlib/Hashlib.h          |   59 +-
 include/hashlib/Mathmult.h         |  364 +--
 lib/Hashinfo.cpp                   |  100 +-
 lib/Hashlib.cpp                    |  228 +-
 lib/Mathmult.cpp                   |  320 +-
 main.cpp                           |  969 +++---
 misc/exactcoll.c                   |   39 +-
 tests/AvalancheTest.cpp            |  370 ++-
 tests/AvalancheTest.h              |    4 +-
 tests/BadSeedsTest.cpp             |  202 +-
 tests/BadSeedsTest.h               |    4 +-
 tests/BitIndependenceTest.cpp      |  483 ++-
 tests/BitIndependenceTest.h        |    4 +-
 tests/CyclicKeysetTest.cpp         |   95 +-
 tests/CyclicKeysetTest.h           |    4 +-
 tests/DiffDistributionTest.cpp     |  133 +-
 tests/DiffDistributionTest.h       |    4 +-
 tests/DifferentialTest.cpp         |  251 +-
 tests/DifferentialTest.h           |    4 +-
 tests/HashMapTest.cpp              |  363 ++-
 tests/HashMapTest.h                |    4 +-
 tests/PRNGTest.cpp                 |   56 +-
 tests/PRNGTest.h                   |    4 +-
 tests/PerlinNoiseTest.cpp          |   93 +-
 tests/PerlinNoiseTest.h            |    4 +-
 tests/PermutationKeysetTest.cpp    |  370 ++-
 tests/PermutationKeysetTest.h      |    4 +-
 tests/PopcountTest.cpp             |  439 +--
 tests/PopcountTest.h               |    4 +-
 tests/SanityTest.cpp               |  329 +-
 tests/SanityTest.h                 |    4 +-
 tests/SeedTest.cpp                 |  148 +-
 tests/SeedTest.h                   |    4 +-
 tests/SparseKeysetTest.cpp         |  125 +-
 tests/SparseKeysetTest.h           |    4 +-
 tests/SpeedTest.cpp                |  343 +--
 tests/SpeedTest.h                  |    6 +-
 tests/TextKeysetTest.cpp           |  250 +-
 tests/TextKeysetTest.h             |    4 +-
 tests/TwoBytesKeysetTest.cpp       |   49 +-
 tests/TwoBytesKeysetTest.h         |    4 +-
 tests/WindowedKeysetTest.cpp       |  112 +-
 tests/WindowedKeysetTest.h         |    4 +-
 tests/ZeroesKeysetTest.cpp         |   46 +-
 tests/ZeroesKeysetTest.h           |    4 +-
 util/Analyze.cpp                   | 1288 ++++----
 util/Analyze.h                     |   22 +-
 util/Blob.h                        |  642 ++--
 util/Blobsort.cpp                  |  279 +-
 util/Blobsort.h                    |  292 +-
 util/Instantiate.h                 |   14 +-
 util/Platform.cpp                  |    6 +-
 util/Random.h                      |  177 +-
 util/Stats.cpp                     | 1304 ++++----
 util/Stats.h                       |   38 +-
 util/TestGlobals.h                 |   58 +-
 util/VCode.cpp                     | 1848 +++++------
 util/VCode.h                       |  141 +-
 139 files changed, 27085 insertions(+), 26273 deletions(-)

diff --git a/hashes/EXAMPLE-mit.cpp b/hashes/EXAMPLE-mit.cpp
index 248eedfc..ae78eb85 100644
--- a/hashes/EXAMPLE-mit.cpp
+++ b/hashes/EXAMPLE-mit.cpp
@@ -33,29 +33,29 @@
 // hashes/Hashsrc.cmake, keeping the list sorted by size!
 
 //------------------------------------------------------------
-//###YOURHASHCODE
+// ###YOURHASHCODE
 
 //------------------------------------------------------------
-template < bool bswap >
-static void ###YOURHASHNAMEHash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void ###YOURHASHNAMEHash( const void * in, const size_t len, const seed_t seed, void * out ) {
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(###YOURHASHFAMILYNAME,
-  $.src_url = "###YOURREPOSITORYURL",
-  $.src_status = HashFamilyInfo::SRC_###YOURSRCSTATUS
-);
+   $.src_url    = "###YOURREPOSITORYURL",
+   $.src_status = HashFamilyInfo::SRC_###YOURSRCSTATUS
+ );
 
 REGISTER_HASH(###YOURHASHNAME,
-  $.desc = "",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x0,
-  $.verification_BE = 0x0,
-  $.hashfn_native = ###YOURHASHNAMEHash<false>,
-  $.hashfn_bswap = ###YOURHASHNAMEHash<true>
-);
+   $.desc            = "",
+   $.hash_flags      =
+         0,
+   $.impl_flags      =
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 32,
+   $.verification_LE = 0x0,
+   $.verification_BE = 0x0,
+   $.hashfn_native   = ###YOURHASHNAMEHash<false>,
+   $.hashfn_bswap    = ###YOURHASHNAMEHash<true>
+ );
diff --git a/hashes/EXAMPLE.cpp b/hashes/EXAMPLE.cpp
index a63d1021..42b494ae 100644
--- a/hashes/EXAMPLE.cpp
+++ b/hashes/EXAMPLE.cpp
@@ -14,29 +14,29 @@
 // hashes/Hashsrc.cmake, keeping the list sorted by size!
 
 //------------------------------------------------------------
-//###YOURHASHCODE
+// ###YOURHASHCODE
 
 //------------------------------------------------------------
-template < bool bswap >
-static void ###YOURHASHNAMEHash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void ###YOURHASHNAMEHash( const void * in, const size_t len, const seed_t seed, void * out ) {
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(###YOURHASHFAMILYNAME,
-  $.src_url = "###YOURREPOSITORYURL",
-  $.src_status = HashFamilyInfo::SRC_###YOURSRCSTATUS
-);
+   $.src_url    = "###YOURREPOSITORYURL",
+   $.src_status = HashFamilyInfo::SRC_###YOURSRCSTATUS
+ );
 
 REGISTER_HASH(###YOURHASHNAME,
-  $.desc = "",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        0,
-  $.bits = 32,
-  $.verification_LE = 0x0,
-  $.verification_BE = 0x0,
-  $.hashfn_native = ###YOURHASHNAMEHash<false>,
-  $.hashfn_bswap = ###YOURHASHNAMEHash<true>
-);
+   $.desc            = "",
+   $.hash_flags      =
+         0,
+   $.impl_flags      =
+         0,
+   $.bits            = 32,
+   $.verification_LE = 0x0,
+   $.verification_BE = 0x0,
+   $.hashfn_native   = ###YOURHASHNAMEHash<false>,
+   $.hashfn_bswap    = ###YOURHASHNAMEHash<true>
+ );
diff --git a/hashes/aesnihash.cpp b/hashes/aesnihash.cpp
index 62af7825..54ae7ef0 100644
--- a/hashes/aesnihash.cpp
+++ b/hashes/aesnihash.cpp
@@ -36,29 +36,29 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_X86_64_AES)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 
-template < bool bswap >
-static void aesnihash(const void * inv, const size_t len, const seed_t seed, void * out) {
-    const uint8_t * in = (uint8_t *)inv;
-    uint64_t src_sz = len;
+template <bool bswap>
+static void aesnihash( const void * inv, const size_t len, const seed_t seed, void * out ) {
+    const uint8_t * in     = (uint8_t *)inv;
+    uint64_t        src_sz = len;
 
-    uint8_t tmp_buf[16] = {0};
-    __m128i rk0 = {UINT64_C(0x736f6d6570736575), UINT64_C(0x646f72616e646f6d)};
-    __m128i rk1 = {UINT64_C(0x1231236570743245), UINT64_C(0x126f12321321456d)};
+    uint8_t tmp_buf[16]    = { 0 };
+    __m128i rk0     =        { UINT64_C(0x736f6d6570736575), UINT64_C(0x646f72616e646f6d) };
+    __m128i rk1     =        { UINT64_C(0x1231236570743245), UINT64_C(0x126f12321321456d) };
     // Homegrown seeding for SMHasher3
-    __m128i seed128 = {(int64_t)seed, 0};
-    __m128i hash = _mm_xor_si128(rk0, seed128);
+    __m128i seed128 = { (int64_t)seed, 0 };
+    __m128i hash    = _mm_xor_si128(rk0, seed128);
 
     while (src_sz >= 16) {
-    onemoretry:
+  onemoretry:
         __m128i piece = _mm_loadu_si128((__m128i *)in);
         // Arbitrarily chose 64-bit wordlen
         if (bswap) { piece = mm_bswap64(piece); }
-        in += 16;
+        in     += 16;
         src_sz -= 16;
-        hash = _mm_aesenc_si128(_mm_xor_si128(hash, piece), rk0);
-        hash = _mm_aesenc_si128(hash, rk1);
+        hash    = _mm_aesenc_si128(_mm_xor_si128(hash, piece), rk0);
+        hash    = _mm_aesenc_si128(hash, rk1);
     }
 
     if (src_sz > 0) {
@@ -67,7 +67,7 @@ static void aesnihash(const void * inv, const size_t len, const seed_t seed, voi
             tmp_buf[i] = in[i];
         }
         src_sz = 16;
-        in = &tmp_buf[0];
+        in     = &tmp_buf[0];
         goto onemoretry;
     }
 
@@ -80,25 +80,25 @@ static void aesnihash(const void * inv, const size_t len, const seed_t seed, voi
 }
 
 REGISTER_FAMILY(aesnihash,
-  $.src_url = "https://gist.github.com/majek/96dd615ed6c8aa64f60aac14e3f6ab5a",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://gist.github.com/majek/96dd615ed6c8aa64f60aac14e3f6ab5a",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(aesnihash,
-  $.desc = "majek's aesnihash",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED        |
-        FLAG_HASH_AES_BASED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS   |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 64,
-  $.verification_LE = 0xA68E0D42,
-  $.verification_BE = 0xEBC48EDA,
-  $.hashfn_native = aesnihash<false>,
-  $.hashfn_bswap = aesnihash<true>,
-  $.badseeds = {0x70736575}
-);
+   $.desc       = "majek's aesnihash",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED        |
+         FLAG_HASH_AES_BASED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS   |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 64,
+   $.verification_LE = 0xA68E0D42,
+   $.verification_BE = 0xEBC48EDA,
+   $.hashfn_native   = aesnihash<false>,
+   $.hashfn_bswap    = aesnihash<true>,
+   $.badseeds        = { 0x70736575 }
+ );
 
 #else
 REGISTER_FAMILY(aesnihash);
diff --git a/hashes/aesrng.cpp b/hashes/aesrng.cpp
index 9b88ba8d..1e8181b0 100644
--- a/hashes/aesrng.cpp
+++ b/hashes/aesrng.cpp
@@ -31,12 +31,12 @@
 
 #include <cassert>
 
-// ------------------------------------------------------------
+//------------------------------------------------------------
 // This is not strictly AES CTR mode, it is based on that plus the ARS
 // RNG constructions.
 
 static thread_local uint64_t ctr[2], oldctr[2];
-static const uint64_t incr[2] = {UINT64_C(1), UINT64_C(-1)};
+static const uint64_t        incr[2] = { UINT64_C(1), UINT64_C(-1) };
 static uint32_t round_keys[44]; // only modified on main thread
 
 // A little ugly...
@@ -45,8 +45,10 @@ extern seed_t g_seed;
 /* K1 is golden ratio - 1, K2 is sqrt(3) - 1 */
 #define K1 UINT64_C(0x9E3779B97F4A7C15)
 #define K2 UINT64_C(0xBB67AE8584CAA73B)
-static bool aesrng_init(void) {
+
+static bool aesrng_init( void ) {
     uint8_t key[16];
+
     if (isLE()) {
         PUT_U64<false>(g_seed + K2, key, 0);
         PUT_U64<false>(g_seed + K1, key, 8);
@@ -59,8 +61,9 @@ static bool aesrng_init(void) {
     return true;
 }
 
-static uint64_t rnd64(void) {
+static uint64_t rnd64( void ) {
     uint8_t result[16];
+
     if (isLE()) {
         PUT_U64<false>(ctr[0], result, 0);
         PUT_U64<false>(ctr[1], result, 8);
@@ -75,16 +78,17 @@ static uint64_t rnd64(void) {
     return GET_U64<false>(result, 0);
 }
 
-static void rng_ffwd(int64_t ffwd) {
+static void rng_ffwd( int64_t ffwd ) {
     ctr[0] += ffwd; ctr[1] -= ffwd;
 }
 
-static void rng_setctr(uint64_t stream, uint64_t seq) {
+static void rng_setctr( uint64_t stream, uint64_t seq ) {
     ctr[0] = seq; ctr[1] = stream;
 }
 
 // This variable is _not_ thread-local
 static uint64_t hash_mode;
+
 // These complications are intended to make this "hash" return the
 // same results if threading is enabled or not. It makes the following
 // assumptions about the rest of the code:
@@ -110,14 +114,14 @@ static uint64_t hash_mode;
 // thread's results should be unaffected if threading is enabled or
 // disabled, or if the possibly-threaded tests are skipped, and the
 // per-thread results should be unaffected by the number of threads.
-static seed_t aesrng_seedfix(const HashInfo * hinfo, const seed_t hint) {
+static seed_t aesrng_seedfix( const HashInfo * hinfo, const seed_t hint ) {
     if (hash_mode == hint) {
         oldctr[0] = ctr[0];
         oldctr[1] = ctr[1];
     } else {
         hash_mode = hint;
-        ctr[0] = oldctr[0];
-        ctr[1] = oldctr[1];
+        ctr[0]    = oldctr[0];
+        ctr[1]    = oldctr[1];
     }
     return 0;
 }
@@ -131,7 +135,8 @@ static seed_t aesrng_seedfix(const HashInfo * hinfo, const seed_t hint) {
 // Hash_mode 2 is for Avalanche, which is very hard to fool in a
 // consistent way, so we have some magic knowledge of how it calls us.
 static thread_local uint64_t callcount;
-static void rng_keyseq(const void * key, size_t len, uint64_t seed) {
+
+static void rng_keyseq( const void * key, size_t len, uint64_t seed ) {
     if (hash_mode == 2) {
         if (callcount-- != 0) {
             return;
@@ -140,15 +145,15 @@ static void rng_keyseq(const void * key, size_t len, uint64_t seed) {
     }
     uint64_t s = 0;
     memcpy(&s, key, len > 8 ? 8 : len);
-    s = COND_BSWAP(s, isBE());
-    s ^= len * K2;
+    s     = COND_BSWAP(s, isBE());
+    s    ^= len * K2;
     seed ^= s * K1;
-    s ^= seed * K2;
+    s    ^= seed * K2;
     rng_setctr(s, seed);
 }
 
-template < uint32_t nbytes >
-static void rng_impl(void * out) {
+template <uint32_t nbytes>
+static void rng_impl( void * out ) {
     assert((nbytes >= 0) && (nbytes <= 39));
     uint8_t * result = (uint8_t *)out;
     if (nbytes >= 8) {
@@ -177,140 +182,141 @@ static void rng_impl(void * out) {
     }
 }
 
-template < uint32_t hashbits >
-static void aesrng(const void * in, const size_t len, const seed_t seed, void * out) {
-    if (hash_mode != 0)
-      rng_keyseq(in, len, seed);
+template <uint32_t hashbits>
+static void aesrng( const void * in, const size_t len, const seed_t seed, void * out ) {
+    if (hash_mode != 0) {
+        rng_keyseq(in, len, seed);
+    }
     rng_impl<(hashbits >> 3)>(out);
 }
 
 REGISTER_FAMILY(aesrng,
-  $.src_url = "https://gitlab.com/fwojcik/smhasher3/-/blob/main/hashes/aesrng.cpp",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://gitlab.com/fwojcik/smhasher3/-/blob/main/hashes/aesrng.cpp",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(aesrng_32,
-  $.desc = "32-bit RNG using AES in CTR mode; not a hash",
-  $.hash_flags =
-        FLAG_HASH_MOCK                |
-        FLAG_HASH_AES_BASED           |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS        |
-        FLAG_IMPL_SEED_WITH_HINT      |
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xED1590AC,
-  $.verification_BE = 0xED1590AC,
-  $.hashfn_native = aesrng<32>,
-  $.hashfn_bswap = aesrng<32>,
-  $.initfn = aesrng_init,
-  $.seedfixfn = aesrng_seedfix,
-  $.sort_order = 50
-);
+   $.desc       = "32-bit RNG using AES in CTR mode; not a hash",
+   $.hash_flags =
+         FLAG_HASH_MOCK                |
+         FLAG_HASH_AES_BASED           |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS        |
+         FLAG_IMPL_SEED_WITH_HINT      |
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xED1590AC,
+   $.verification_BE = 0xED1590AC,
+   $.hashfn_native   = aesrng<32>,
+   $.hashfn_bswap    = aesrng<32>,
+   $.initfn = aesrng_init,
+   $.seedfixfn       = aesrng_seedfix,
+   $.sort_order      = 50
+ );
 
 REGISTER_HASH(aesrng_64,
-  $.desc = "64-bit RNG using AES in CTR mode; not a hash",
-  $.hash_flags =
-        FLAG_HASH_MOCK                |
-        FLAG_HASH_AES_BASED           |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS        |
-        FLAG_IMPL_SEED_WITH_HINT      |
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xAE36B667,
-  $.verification_BE = 0xAE36B667,
-  $.hashfn_native = aesrng<64>,
-  $.hashfn_bswap = aesrng<64>,
-  $.initfn = aesrng_init,
-  $.seedfixfn = aesrng_seedfix,
-  $.sort_order = 50
-);
+   $.desc       = "64-bit RNG using AES in CTR mode; not a hash",
+   $.hash_flags =
+         FLAG_HASH_MOCK                |
+         FLAG_HASH_AES_BASED           |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS        |
+         FLAG_IMPL_SEED_WITH_HINT      |
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xAE36B667,
+   $.verification_BE = 0xAE36B667,
+   $.hashfn_native   = aesrng<64>,
+   $.hashfn_bswap    = aesrng<64>,
+   $.initfn = aesrng_init,
+   $.seedfixfn       = aesrng_seedfix,
+   $.sort_order      = 50
+ );
 
 REGISTER_HASH(aesrng_128,
-  $.desc = "128-bit RNG using AES in CTR mode; not a hash",
-  $.hash_flags =
-        FLAG_HASH_MOCK                |
-        FLAG_HASH_AES_BASED           |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS        |
-        FLAG_IMPL_SEED_WITH_HINT      |
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x2D1A1DB5,
-  $.verification_BE = 0x2D1A1DB5,
-  $.hashfn_native = aesrng<128>,
-  $.hashfn_bswap = aesrng<128>,
-  $.initfn = aesrng_init,
-  $.seedfixfn = aesrng_seedfix,
-  $.sort_order = 50
-);
+   $.desc       = "128-bit RNG using AES in CTR mode; not a hash",
+   $.hash_flags =
+         FLAG_HASH_MOCK                |
+         FLAG_HASH_AES_BASED           |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS        |
+         FLAG_IMPL_SEED_WITH_HINT      |
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x2D1A1DB5,
+   $.verification_BE = 0x2D1A1DB5,
+   $.hashfn_native   = aesrng<128>,
+   $.hashfn_bswap    = aesrng<128>,
+   $.initfn = aesrng_init,
+   $.seedfixfn       = aesrng_seedfix,
+   $.sort_order      = 50
+ );
 
 REGISTER_HASH(aesrng_160,
-  $.desc = "160-bit RNG using AES in CTR mode; not a hash",
-  $.hash_flags =
-        FLAG_HASH_MOCK                |
-        FLAG_HASH_AES_BASED           |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS        |
-        FLAG_IMPL_SEED_WITH_HINT      |
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 160,
-  $.verification_LE = 0x3FC284C3,
-  $.verification_BE = 0x3FC284C3,
-  $.hashfn_native = aesrng<160>,
-  $.hashfn_bswap = aesrng<160>,
-  $.initfn = aesrng_init,
-  $.seedfixfn = aesrng_seedfix,
-  $.sort_order = 50
-);
+   $.desc       = "160-bit RNG using AES in CTR mode; not a hash",
+   $.hash_flags =
+         FLAG_HASH_MOCK                |
+         FLAG_HASH_AES_BASED           |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS        |
+         FLAG_IMPL_SEED_WITH_HINT      |
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 160,
+   $.verification_LE = 0x3FC284C3,
+   $.verification_BE = 0x3FC284C3,
+   $.hashfn_native   = aesrng<160>,
+   $.hashfn_bswap    = aesrng<160>,
+   $.initfn = aesrng_init,
+   $.seedfixfn       = aesrng_seedfix,
+   $.sort_order      = 50
+ );
 
 REGISTER_HASH(aesrng_224,
-  $.desc = "224-bit RNG using AES in CTR mode; not a hash",
-  $.hash_flags =
-        FLAG_HASH_MOCK                |
-        FLAG_HASH_AES_BASED           |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS        |
-        FLAG_IMPL_SEED_WITH_HINT      |
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 224,
-  $.verification_LE = 0x9288A516,
-  $.verification_BE = 0x9288A516,
-  $.hashfn_native = aesrng<224>,
-  $.hashfn_bswap = aesrng<224>,
-  $.initfn = aesrng_init,
-  $.seedfixfn = aesrng_seedfix,
-  $.sort_order = 50
-);
+   $.desc       = "224-bit RNG using AES in CTR mode; not a hash",
+   $.hash_flags =
+         FLAG_HASH_MOCK                |
+         FLAG_HASH_AES_BASED           |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS        |
+         FLAG_IMPL_SEED_WITH_HINT      |
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 224,
+   $.verification_LE = 0x9288A516,
+   $.verification_BE = 0x9288A516,
+   $.hashfn_native   = aesrng<224>,
+   $.hashfn_bswap    = aesrng<224>,
+   $.initfn = aesrng_init,
+   $.seedfixfn       = aesrng_seedfix,
+   $.sort_order      = 50
+ );
 
 REGISTER_HASH(aesrng_256,
-  $.desc = "256-bit RNG using AES in CTR mode; not a hash",
-  $.hash_flags =
-        FLAG_HASH_MOCK                |
-        FLAG_HASH_AES_BASED           |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS        |
-        FLAG_IMPL_SEED_WITH_HINT      |
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 256,
-  $.verification_LE = 0x2816EEC1,
-  $.verification_BE = 0x2816EEC1,
-  $.hashfn_native = aesrng<256>,
-  $.hashfn_bswap = aesrng<256>,
-  $.initfn = aesrng_init,
-  $.seedfixfn = aesrng_seedfix,
-  $.sort_order = 50
-);
+   $.desc       = "256-bit RNG using AES in CTR mode; not a hash",
+   $.hash_flags =
+         FLAG_HASH_MOCK                |
+         FLAG_HASH_AES_BASED           |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS        |
+         FLAG_IMPL_SEED_WITH_HINT      |
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 256,
+   $.verification_LE = 0x2816EEC1,
+   $.verification_BE = 0x2816EEC1,
+   $.hashfn_native   = aesrng<256>,
+   $.hashfn_bswap    = aesrng<256>,
+   $.initfn = aesrng_init,
+   $.seedfixfn       = aesrng_seedfix,
+   $.sort_order      = 50
+ );
diff --git a/hashes/ascon.cpp b/hashes/ascon.cpp
index 5fabc4d5..c79d490d 100644
--- a/hashes/ascon.cpp
+++ b/hashes/ascon.cpp
@@ -134,11 +134,11 @@
 #include "Platform.h"
 #include "Hashlib.h"
 
-//#define CRYPTO_VERSION "1.2.6"
+// #define CRYPTO_VERSION "1.2.6"
 
 //------------------------------------------------------------
 typedef struct {
-    uint64_t x[5];
+    uint64_t  x[5];
 } state_t;
 
 #define ASCON_HASH_RATE 8
@@ -147,38 +147,39 @@ typedef struct {
 #define P_ROUNDS_XOF    12
 #define P_ROUNDS_XOFA   8
 
-static FORCE_INLINE void ROUND(state_t * s, uint8_t C) {
+static FORCE_INLINE void ROUND( state_t * s, uint8_t C ) {
     state_t t;
+
     /* round constant */
     s->x[2] ^= C;
     /* s-box layer */
     s->x[0] ^= s->x[4];
     s->x[4] ^= s->x[3];
     s->x[2] ^= s->x[1];
-    t.x[0]  = s->x[0] ^ (~s->x[1] & s->x[2]);
-    t.x[2]  = s->x[2] ^ (~s->x[3] & s->x[4]);
-    t.x[4]  = s->x[4] ^ (~s->x[0] & s->x[1]);
-    t.x[1]  = s->x[1] ^ (~s->x[2] & s->x[3]);
-    t.x[3]  = s->x[3] ^ (~s->x[4] & s->x[0]);
-    t.x[1] ^= t.x[0];
-    t.x[3] ^= t.x[2];
-    t.x[0] ^= t.x[4];
+    t.x[0]   = s->x[0] ^ (~s->x[1] & s->x[2]);
+    t.x[2]   = s->x[2] ^ (~s->x[3] & s->x[4]);
+    t.x[4]   = s->x[4] ^ (~s->x[0] & s->x[1]);
+    t.x[1]   = s->x[1] ^ (~s->x[2] & s->x[3]);
+    t.x[3]   = s->x[3] ^ (~s->x[4] & s->x[0]);
+    t.x[1]  ^= t.x[0];
+    t.x[3]  ^= t.x[2];
+    t.x[0]  ^= t.x[4];
     /* linear layer */
-    s->x[2] = t.x[2] ^ ROTR64(t.x[2], 6 - 1);
-    s->x[3] = t.x[3] ^ ROTR64(t.x[3], 17 - 10);
-    s->x[4] = t.x[4] ^ ROTR64(t.x[4], 41 - 7);
-    s->x[0] = t.x[0] ^ ROTR64(t.x[0], 28 - 19);
-    s->x[1] = t.x[1] ^ ROTR64(t.x[1], 61 - 39);
-    s->x[2] = t.x[2] ^ ROTR64(s->x[2], 1);
-    s->x[3] = t.x[3] ^ ROTR64(s->x[3], 10);
-    s->x[4] = t.x[4] ^ ROTR64(s->x[4], 7);
-    s->x[0] = t.x[0] ^ ROTR64(s->x[0], 19);
-    s->x[1] = t.x[1] ^ ROTR64(s->x[1], 39);
-    s->x[2] = ~s->x[2];
+    s->x[2]  = t.x[2] ^ ROTR64(t.x [2],  6 -  1);
+    s->x[3]  = t.x[3] ^ ROTR64(t.x [3], 17 - 10);
+    s->x[4]  = t.x[4] ^ ROTR64(t.x [4], 41 -  7);
+    s->x[0]  = t.x[0] ^ ROTR64(t.x [0], 28 - 19);
+    s->x[1]  = t.x[1] ^ ROTR64(t.x [1], 61 - 39);
+    s->x[2]  = t.x[2] ^ ROTR64(s->x[2],  1);
+    s->x[3]  = t.x[3] ^ ROTR64(s->x[3], 10);
+    s->x[4]  = t.x[4] ^ ROTR64(s->x[4],  7);
+    s->x[0]  = t.x[0] ^ ROTR64(s->x[0], 19);
+    s->x[1]  = t.x[1] ^ ROTR64(s->x[1], 39);
+    s->x[2]  = ~s->x[2];
 }
 
-template < uint32_t rounds >
-static FORCE_INLINE void P(state_t * s) {
+template <uint32_t rounds>
+static FORCE_INLINE void P( state_t * s ) {
     if (rounds > MAX_P_ROUNDS) { return; }
 
     const uint8_t RC[MAX_P_ROUNDS] = {
@@ -192,8 +193,8 @@ static FORCE_INLINE void P(state_t * s) {
 }
 
 // Homegrown seeding for SMHasher3
-template < bool XOFa >
-static FORCE_INLINE void ascon_initxof(state_t * s, uint64_t seed) {
+template <bool XOFa>
+static FORCE_INLINE void ascon_initxof( state_t * s, uint64_t seed ) {
     if (XOFa) {
         s->x[0] = UINT64_C(0x44906568b77b9832);
         s->x[1] = UINT64_C(0xcd8d6cae53455532);
@@ -209,14 +210,14 @@ static FORCE_INLINE void ascon_initxof(state_t * s, uint64_t seed) {
     }
 }
 
-template < bool XOFa, bool bswap >
-static FORCE_INLINE void ascon_absorb(state_t* s, const uint8_t* in, uint64_t inlen) {
+template <bool XOFa, bool bswap>
+static FORCE_INLINE void ascon_absorb( state_t * s, const uint8_t * in, uint64_t inlen ) {
     /* absorb full plaintext blocks */
     while (inlen >= ASCON_HASH_RATE) {
         s->x[0] ^= GET_U64<bswap>(in, 0);
         P<XOFa ? P_ROUNDS_XOFA : P_ROUNDS_XOF>(s);
-        in += ASCON_HASH_RATE;
-        inlen -= ASCON_HASH_RATE;
+        in      += ASCON_HASH_RATE;
+        inlen   -= ASCON_HASH_RATE;
     }
     /* absorb final plaintext block */
     if (inlen) {
@@ -228,12 +229,12 @@ static FORCE_INLINE void ascon_absorb(state_t* s, const uint8_t* in, uint64_t in
     s->x[0] ^= UINT64_C(0x80) << (56 - 8 * inlen);
 }
 
-template < bool XOFa, bool bswap >
-static void ascon_squeeze(state_t * s, uint8_t * out, uint64_t outlen) {
+template <bool XOFa, bool bswap>
+static void ascon_squeeze( state_t * s, uint8_t * out, uint64_t outlen ) {
     while (outlen > ASCON_HASH_RATE) {
         PUT_U64<bswap>(s->x[0], out, 0);
         P<XOFa ? P_ROUNDS_XOFA : P_ROUNDS_XOF>(s);
-        out += ASCON_HASH_RATE;
+        out    += ASCON_HASH_RATE;
         outlen -= ASCON_HASH_RATE;
     }
     uint8_t buf[8];
@@ -242,9 +243,10 @@ static void ascon_squeeze(state_t * s, uint8_t * out, uint64_t outlen) {
 }
 
 //------------------------------------------------------------
-template < uint64_t outbits, bool XOFa, bool bswap >
-static void ascon_xof(const void * in, const size_t len, const seed_t seed, void * out) {
+template <uint64_t outbits, bool XOFa, bool bswap>
+static void ascon_xof( const void * in, const size_t len, const seed_t seed, void * out ) {
     state_t s;
+
     ascon_initxof<XOFa>(&s, seed);
     ascon_absorb<XOFa, bswap>(&s, (const uint8_t *)in, (uint64_t)len);
     P<P_ROUNDS_XOF>(&s); // Always! Never P_ROUNDS_XOFA
@@ -263,84 +265,187 @@ static void ascon_xof(const void * in, const size_t len, const seed_t seed, void
 // ascon using `./genkat_crypto_hash_asconxofv12_opt64` and
 // `./genkat_crypto_hash_asconxofav12_opt64`.
 #define KAT_NUM 17
-static const uint8_t KAT[KAT_NUM][2][256/8] = {
+static const uint8_t KAT[KAT_NUM][2][256 / 8] = {
     {
-        { 0x5D, 0x4C, 0xBD, 0xE6, 0x35, 0x0E, 0xA4, 0xC1, 0x74, 0xBD, 0x65, 0xB5, 0xB3, 0x32, 0xF8, 0x40, 0x8F, 0x99, 0x74, 0x0B, 0x81, 0xAA, 0x02, 0x73, 0x5E, 0xAE, 0xFB, 0xCF, 0x0B, 0xA0, 0x33, 0x9E, },
-        { 0x7C, 0x10, 0xDF, 0xFD, 0x6B, 0xB0, 0x3B, 0xE2, 0x62, 0xD7, 0x2F, 0xBE, 0x1B, 0x0F, 0x53, 0x00, 0x13, 0xC6, 0xC4, 0xEA, 0xDA, 0xAB, 0xDE, 0x27, 0x8D, 0x6F, 0x29, 0xD5, 0x79, 0xE3, 0x90, 0x8D, },
+        {
+            0x5D, 0x4C, 0xBD, 0xE6, 0x35, 0x0E, 0xA4, 0xC1, 0x74, 0xBD, 0x65, 0xB5, 0xB3, 0x32, 0xF8, 0x40,
+            0x8F, 0x99, 0x74, 0x0B, 0x81, 0xAA, 0x02, 0x73, 0x5E, 0xAE, 0xFB, 0xCF, 0x0B, 0xA0, 0x33, 0x9E,
+        },
+        {
+            0x7C, 0x10, 0xDF, 0xFD, 0x6B, 0xB0, 0x3B, 0xE2, 0x62, 0xD7, 0x2F, 0xBE, 0x1B, 0x0F, 0x53, 0x00,
+            0x13, 0xC6, 0xC4, 0xEA, 0xDA, 0xAB, 0xDE, 0x27, 0x8D, 0x6F, 0x29, 0xD5, 0x79, 0xE3, 0x90, 0x8D,
+        },
     },
     {
-        { 0xB2, 0xED, 0xBB, 0x27, 0xAC, 0x83, 0x97, 0xA5, 0x5B, 0xC8, 0x3D, 0x13, 0x7C, 0x15, 0x1D, 0xE9, 0xED, 0xE0, 0x48, 0x33, 0x8F, 0xE9, 0x07, 0xF0, 0xD3, 0x62, 0x9E, 0x71, 0x78, 0x46, 0xFE, 0xDC, },
-        { 0x96, 0x54, 0x45, 0xC4, 0x6C, 0x8E, 0x9B, 0x94, 0x8E, 0xDF, 0xEF, 0x7B, 0x58, 0x79, 0xE0, 0x6A, 0xB5, 0xF0, 0x23, 0x77, 0x0E, 0xA8, 0x92, 0xFA, 0x4B, 0x54, 0x52, 0x50, 0x08, 0x46, 0x7E, 0xA3, },
+        {
+            0xB2, 0xED, 0xBB, 0x27, 0xAC, 0x83, 0x97, 0xA5, 0x5B, 0xC8, 0x3D, 0x13, 0x7C, 0x15, 0x1D, 0xE9,
+            0xED, 0xE0, 0x48, 0x33, 0x8F, 0xE9, 0x07, 0xF0, 0xD3, 0x62, 0x9E, 0x71, 0x78, 0x46, 0xFE, 0xDC,
+        },
+        {
+            0x96, 0x54, 0x45, 0xC4, 0x6C, 0x8E, 0x9B, 0x94, 0x8E, 0xDF, 0xEF, 0x7B, 0x58, 0x79, 0xE0, 0x6A,
+            0xB5, 0xF0, 0x23, 0x77, 0x0E, 0xA8, 0x92, 0xFA, 0x4B, 0x54, 0x52, 0x50, 0x08, 0x46, 0x7E, 0xA3,
+        },
     },
     {
-        { 0xD1, 0x96, 0x46, 0x1C, 0x29, 0x9D, 0xB7, 0x14, 0xD7, 0x8C, 0x26, 0x79, 0x24, 0xB5, 0x78, 0x6E, 0xE2, 0x6F, 0xC4, 0x3B, 0x3E, 0x64, 0x0D, 0xAA, 0x53, 0x97, 0xE3, 0x8E, 0x39, 0xD3, 0x9D, 0xC6, },
-        { 0x48, 0xEB, 0x41, 0xB7, 0xA4, 0x35, 0x2A, 0xFB, 0x89, 0x43, 0xB7, 0x65, 0x65, 0x48, 0x55, 0xB1, 0xD7, 0x10, 0x4B, 0x22, 0xE9, 0x81, 0xE5, 0x12, 0x0D, 0xA9, 0x96, 0x25, 0x79, 0xA7, 0xBA, 0xE6, },
+        {
+            0xD1, 0x96, 0x46, 0x1C, 0x29, 0x9D, 0xB7, 0x14, 0xD7, 0x8C, 0x26, 0x79, 0x24, 0xB5, 0x78, 0x6E,
+            0xE2, 0x6F, 0xC4, 0x3B, 0x3E, 0x64, 0x0D, 0xAA, 0x53, 0x97, 0xE3, 0x8E, 0x39, 0xD3, 0x9D, 0xC6,
+        },
+        {
+            0x48, 0xEB, 0x41, 0xB7, 0xA4, 0x35, 0x2A, 0xFB, 0x89, 0x43, 0xB7, 0x65, 0x65, 0x48, 0x55, 0xB1,
+            0xD7, 0x10, 0x4B, 0x22, 0xE9, 0x81, 0xE5, 0x12, 0x0D, 0xA9, 0x96, 0x25, 0x79, 0xA7, 0xBA, 0xE6,
+        },
     },
     {
-        { 0x1D, 0x18, 0xB9, 0xDD, 0x8F, 0xF9, 0xA1, 0xBF, 0x59, 0x75, 0x1B, 0x88, 0xD3, 0x27, 0x66, 0xC5, 0xE0, 0x54, 0x91, 0x0F, 0x49, 0x7B, 0xFF, 0x40, 0x92, 0xAF, 0xC4, 0x7F, 0x58, 0x85, 0x52, 0x3B, },
-        { 0x5C, 0xFD, 0x8A, 0xCE, 0x65, 0x3E, 0x21, 0x27, 0x57, 0xD4, 0xA4, 0xAC, 0x3B, 0x6F, 0xAD, 0x31, 0xAB, 0xCB, 0xFA, 0x3F, 0x9E, 0x0F, 0x92, 0x24, 0x46, 0xF7, 0x6A, 0xF3, 0x72, 0xC5, 0x3E, 0xED, },
+        {
+            0x1D, 0x18, 0xB9, 0xDD, 0x8F, 0xF9, 0xA1, 0xBF, 0x59, 0x75, 0x1B, 0x88, 0xD3, 0x27, 0x66, 0xC5,
+            0xE0, 0x54, 0x91, 0x0F, 0x49, 0x7B, 0xFF, 0x40, 0x92, 0xAF, 0xC4, 0x7F, 0x58, 0x85, 0x52, 0x3B,
+        },
+        {
+            0x5C, 0xFD, 0x8A, 0xCE, 0x65, 0x3E, 0x21, 0x27, 0x57, 0xD4, 0xA4, 0xAC, 0x3B, 0x6F, 0xAD, 0x31,
+            0xAB, 0xCB, 0xFA, 0x3F, 0x9E, 0x0F, 0x92, 0x24, 0x46, 0xF7, 0x6A, 0xF3, 0x72, 0xC5, 0x3E, 0xED,
+        },
     },
     {
-        { 0x66, 0xFB, 0x74, 0x17, 0x47, 0x82, 0xAF, 0xED, 0x89, 0x84, 0x78, 0xAA, 0x72, 0x90, 0x58, 0xD5, 0xC3, 0x0A, 0xF1, 0x9A, 0xF2, 0xF5, 0xD4, 0xE1, 0xCE, 0x65, 0xCD, 0x32, 0x05, 0x94, 0xEF, 0x66, },
-        { 0xE2, 0xFE, 0xE1, 0x11, 0xA8, 0xE4, 0xB6, 0x22, 0x46, 0x2F, 0x89, 0x7D, 0xA4, 0x8C, 0x02, 0xB8, 0x07, 0xCA, 0xDD, 0xC2, 0x80, 0x17, 0x18, 0x6D, 0xC8, 0x56, 0xD8, 0xCF, 0x3D, 0xC2, 0x02, 0x48, },
+        {
+            0x66, 0xFB, 0x74, 0x17, 0x47, 0x82, 0xAF, 0xED, 0x89, 0x84, 0x78, 0xAA, 0x72, 0x90, 0x58, 0xD5,
+            0xC3, 0x0A, 0xF1, 0x9A, 0xF2, 0xF5, 0xD4, 0xE1, 0xCE, 0x65, 0xCD, 0x32, 0x05, 0x94, 0xEF, 0x66,
+        },
+        {
+            0xE2, 0xFE, 0xE1, 0x11, 0xA8, 0xE4, 0xB6, 0x22, 0x46, 0x2F, 0x89, 0x7D, 0xA4, 0x8C, 0x02, 0xB8,
+            0x07, 0xCA, 0xDD, 0xC2, 0x80, 0x17, 0x18, 0x6D, 0xC8, 0x56, 0xD8, 0xCF, 0x3D, 0xC2, 0x02, 0x48,
+        },
     },
     {
-        { 0xF4, 0x73, 0xC7, 0xA7, 0xD9, 0xF1, 0x40, 0xAA, 0x1A, 0xFB, 0x2D, 0xD0, 0xA0, 0xEC, 0xC2, 0x63, 0x5B, 0x01, 0x74, 0x94, 0x2A, 0x70, 0x94, 0xEC, 0x34, 0xF4, 0xD8, 0x02, 0x5B, 0x9F, 0xC3, 0x91, },
-        { 0x05, 0x2E, 0xA9, 0x65, 0x27, 0x96, 0xB2, 0xD7, 0xBA, 0x5B, 0x63, 0x05, 0xAD, 0x3E, 0x42, 0x91, 0x27, 0x71, 0x30, 0x25, 0x29, 0xBA, 0xDF, 0x73, 0x51, 0x7C, 0x54, 0xC7, 0xDA, 0xD9, 0x5F, 0xDF, },
+        {
+            0xF4, 0x73, 0xC7, 0xA7, 0xD9, 0xF1, 0x40, 0xAA, 0x1A, 0xFB, 0x2D, 0xD0, 0xA0, 0xEC, 0xC2, 0x63,
+            0x5B, 0x01, 0x74, 0x94, 0x2A, 0x70, 0x94, 0xEC, 0x34, 0xF4, 0xD8, 0x02, 0x5B, 0x9F, 0xC3, 0x91,
+        },
+        {
+            0x05, 0x2E, 0xA9, 0x65, 0x27, 0x96, 0xB2, 0xD7, 0xBA, 0x5B, 0x63, 0x05, 0xAD, 0x3E, 0x42, 0x91,
+            0x27, 0x71, 0x30, 0x25, 0x29, 0xBA, 0xDF, 0x73, 0x51, 0x7C, 0x54, 0xC7, 0xDA, 0xD9, 0x5F, 0xDF,
+        },
     },
     {
-        { 0xD7, 0x65, 0x8B, 0x24, 0xB9, 0x88, 0x60, 0x57, 0xB8, 0x82, 0x75, 0x18, 0xA2, 0xA3, 0x67, 0x15, 0xA1, 0xB7, 0x32, 0x56, 0xE6, 0x5D, 0x04, 0x93, 0xDD, 0x0A, 0xF3, 0xE2, 0x73, 0x87, 0xDF, 0x40, },
-        { 0x30, 0xBC, 0x8D, 0x20, 0xC4, 0xAA, 0x4D, 0xF5, 0x39, 0xE9, 0xE6, 0xB5, 0x8A, 0x45, 0x2C, 0xAC, 0x9E, 0x5E, 0x98, 0xF9, 0x4C, 0x6C, 0x90, 0xBF, 0x6C, 0x3B, 0xC9, 0xCF, 0x57, 0x3E, 0xB9, 0xED, },
+        {
+            0xD7, 0x65, 0x8B, 0x24, 0xB9, 0x88, 0x60, 0x57, 0xB8, 0x82, 0x75, 0x18, 0xA2, 0xA3, 0x67, 0x15,
+            0xA1, 0xB7, 0x32, 0x56, 0xE6, 0x5D, 0x04, 0x93, 0xDD, 0x0A, 0xF3, 0xE2, 0x73, 0x87, 0xDF, 0x40,
+        },
+        {
+            0x30, 0xBC, 0x8D, 0x20, 0xC4, 0xAA, 0x4D, 0xF5, 0x39, 0xE9, 0xE6, 0xB5, 0x8A, 0x45, 0x2C, 0xAC,
+            0x9E, 0x5E, 0x98, 0xF9, 0x4C, 0x6C, 0x90, 0xBF, 0x6C, 0x3B, 0xC9, 0xCF, 0x57, 0x3E, 0xB9, 0xED,
+        },
     },
     {
-        { 0x1D, 0xB7, 0x47, 0x6C, 0xD7, 0x20, 0x64, 0xC6, 0x8E, 0x73, 0x6D, 0x82, 0x1E, 0xA6, 0xF0, 0xC9, 0x36, 0x10, 0xFE, 0x22, 0x32, 0x67, 0x54, 0xF5, 0x36, 0x68, 0x36, 0x87, 0x1A, 0x6F, 0x5A, 0x10, },
-        { 0x00, 0x75, 0x5B, 0x9D, 0x72, 0xB2, 0x63, 0x2D, 0x88, 0xCB, 0x69, 0x45, 0xD5, 0x36, 0x38, 0x2C, 0x1E, 0x0B, 0x49, 0x57, 0xB4, 0xA4, 0x4B, 0xB5, 0x1C, 0x14, 0x88, 0x6A, 0x6F, 0xB3, 0x1A, 0x45, },
+        {
+            0x1D, 0xB7, 0x47, 0x6C, 0xD7, 0x20, 0x64, 0xC6, 0x8E, 0x73, 0x6D, 0x82, 0x1E, 0xA6, 0xF0, 0xC9,
+            0x36, 0x10, 0xFE, 0x22, 0x32, 0x67, 0x54, 0xF5, 0x36, 0x68, 0x36, 0x87, 0x1A, 0x6F, 0x5A, 0x10,
+        },
+        {
+            0x00, 0x75, 0x5B, 0x9D, 0x72, 0xB2, 0x63, 0x2D, 0x88, 0xCB, 0x69, 0x45, 0xD5, 0x36, 0x38, 0x2C,
+            0x1E, 0x0B, 0x49, 0x57, 0xB4, 0xA4, 0x4B, 0xB5, 0x1C, 0x14, 0x88, 0x6A, 0x6F, 0xB3, 0x1A, 0x45,
+        },
     },
     {
-        { 0x18, 0x42, 0x7D, 0x2D, 0x29, 0xDF, 0x1E, 0x02, 0x02, 0x64, 0x9F, 0x03, 0x2F, 0x20, 0x80, 0x36, 0x3F, 0xEC, 0x5D, 0xE7, 0x2E, 0xCA, 0xE1, 0x1B, 0x4F, 0x98, 0xCC, 0xC7, 0x58, 0x43, 0xE7, 0xCC, },
-        { 0x91, 0xC7, 0x2F, 0x62, 0x73, 0xB6, 0xED, 0x44, 0x4B, 0xF5, 0x60, 0xF2, 0xFA, 0xC9, 0x9E, 0x8F, 0xED, 0xDD, 0xF3, 0x01, 0x62, 0x68, 0x8B, 0x86, 0x55, 0x3E, 0xB5, 0x7F, 0x1C, 0x98, 0xC2, 0x0E, },
+        {
+            0x18, 0x42, 0x7D, 0x2D, 0x29, 0xDF, 0x1E, 0x02, 0x02, 0x64, 0x9F, 0x03, 0x2F, 0x20, 0x80, 0x36,
+            0x3F, 0xEC, 0x5D, 0xE7, 0x2E, 0xCA, 0xE1, 0x1B, 0x4F, 0x98, 0xCC, 0xC7, 0x58, 0x43, 0xE7, 0xCC,
+        },
+        {
+            0x91, 0xC7, 0x2F, 0x62, 0x73, 0xB6, 0xED, 0x44, 0x4B, 0xF5, 0x60, 0xF2, 0xFA, 0xC9, 0x9E, 0x8F,
+            0xED, 0xDD, 0xF3, 0x01, 0x62, 0x68, 0x8B, 0x86, 0x55, 0x3E, 0xB5, 0x7F, 0x1C, 0x98, 0xC2, 0x0E,
+        },
     },
     {
-        { 0xCE, 0x60, 0x6E, 0x3F, 0xFC, 0xEE, 0x53, 0xB1, 0x13, 0xAA, 0x5A, 0x5C, 0xA3, 0xA1, 0x63, 0x76, 0xA3, 0xDE, 0x36, 0x43, 0x52, 0x87, 0x5D, 0x33, 0x60, 0xE1, 0x31, 0x66, 0x6A, 0x56, 0x72, 0x48, },
-        { 0x7E, 0x79, 0x76, 0x8F, 0x37, 0xD2, 0x13, 0xB1, 0x1B, 0x41, 0x93, 0xE1, 0xD6, 0x2D, 0x33, 0x99, 0x54, 0xA3, 0xB9, 0xE1, 0x6C, 0xCE, 0xF0, 0x5F, 0xD5, 0x74, 0xE1, 0x33, 0x06, 0x68, 0xB6, 0x28, },
+        {
+            0xCE, 0x60, 0x6E, 0x3F, 0xFC, 0xEE, 0x53, 0xB1, 0x13, 0xAA, 0x5A, 0x5C, 0xA3, 0xA1, 0x63, 0x76,
+            0xA3, 0xDE, 0x36, 0x43, 0x52, 0x87, 0x5D, 0x33, 0x60, 0xE1, 0x31, 0x66, 0x6A, 0x56, 0x72, 0x48,
+        },
+        {
+            0x7E, 0x79, 0x76, 0x8F, 0x37, 0xD2, 0x13, 0xB1, 0x1B, 0x41, 0x93, 0xE1, 0xD6, 0x2D, 0x33, 0x99,
+            0x54, 0xA3, 0xB9, 0xE1, 0x6C, 0xCE, 0xF0, 0x5F, 0xD5, 0x74, 0xE1, 0x33, 0x06, 0x68, 0xB6, 0x28,
+        },
     },
     {
-        { 0xAA, 0x1F, 0x11, 0xB1, 0x73, 0x85, 0xCC, 0xEB, 0xDC, 0x06, 0x5F, 0x20, 0xA6, 0x19, 0x5A, 0xB6, 0x54, 0x0D, 0x98, 0xA1, 0xCA, 0xBE, 0x6D, 0xBB, 0x35, 0x81, 0x33, 0x3E, 0x70, 0x32, 0xD0, 0xDB, },
-        { 0xB1, 0x9D, 0x75, 0xF2, 0x26, 0x60, 0x8F, 0xBB, 0x58, 0x30, 0x72, 0x44, 0x49, 0x0A, 0xC6, 0x7E, 0x96, 0x3A, 0x66, 0x44, 0x43, 0x94, 0x1F, 0xD6, 0xB1, 0xEE, 0x03, 0x71, 0xB7, 0x6F, 0x45, 0xF3, },
+        {
+            0xAA, 0x1F, 0x11, 0xB1, 0x73, 0x85, 0xCC, 0xEB, 0xDC, 0x06, 0x5F, 0x20, 0xA6, 0x19, 0x5A, 0xB6,
+            0x54, 0x0D, 0x98, 0xA1, 0xCA, 0xBE, 0x6D, 0xBB, 0x35, 0x81, 0x33, 0x3E, 0x70, 0x32, 0xD0, 0xDB,
+        },
+        {
+            0xB1, 0x9D, 0x75, 0xF2, 0x26, 0x60, 0x8F, 0xBB, 0x58, 0x30, 0x72, 0x44, 0x49, 0x0A, 0xC6, 0x7E,
+            0x96, 0x3A, 0x66, 0x44, 0x43, 0x94, 0x1F, 0xD6, 0xB1, 0xEE, 0x03, 0x71, 0xB7, 0x6F, 0x45, 0xF3,
+        },
     },
     {
-        { 0xB7, 0x4A, 0xC0, 0x1F, 0xBE, 0xCE, 0xA5, 0x2A, 0x80, 0x11, 0xDD, 0x6F, 0x94, 0x71, 0x47, 0x39, 0x56, 0x03, 0x4D, 0xF5, 0x47, 0xA7, 0x81, 0x13, 0x92, 0x4D, 0x73, 0x69, 0xB6, 0xB1, 0xDC, 0x0D, },
-        { 0x1C, 0x93, 0xD3, 0xA4, 0x48, 0xEC, 0x29, 0x44, 0xCC, 0x74, 0x05, 0x60, 0x08, 0xE5, 0x2B, 0x1D, 0x8F, 0xCC, 0xA9, 0x78, 0x4C, 0x80, 0x63, 0x3B, 0xCB, 0xF5, 0x74, 0x5B, 0x57, 0xA2, 0xFD, 0x58, },
+        {
+            0xB7, 0x4A, 0xC0, 0x1F, 0xBE, 0xCE, 0xA5, 0x2A, 0x80, 0x11, 0xDD, 0x6F, 0x94, 0x71, 0x47, 0x39,
+            0x56, 0x03, 0x4D, 0xF5, 0x47, 0xA7, 0x81, 0x13, 0x92, 0x4D, 0x73, 0x69, 0xB6, 0xB1, 0xDC, 0x0D,
+        },
+        {
+            0x1C, 0x93, 0xD3, 0xA4, 0x48, 0xEC, 0x29, 0x44, 0xCC, 0x74, 0x05, 0x60, 0x08, 0xE5, 0x2B, 0x1D,
+            0x8F, 0xCC, 0xA9, 0x78, 0x4C, 0x80, 0x63, 0x3B, 0xCB, 0xF5, 0x74, 0x5B, 0x57, 0xA2, 0xFD, 0x58,
+        },
     },
     {
-        { 0x46, 0x50, 0xC5, 0x70, 0x93, 0x29, 0x66, 0x08, 0x25, 0xA9, 0xA5, 0xDA, 0xED, 0x9F, 0xA5, 0x0B, 0xE5, 0xAB, 0xAB, 0xAA, 0x9D, 0x37, 0x32, 0x71, 0x9A, 0x01, 0xBF, 0x29, 0xD7, 0xBF, 0xE5, 0x43, },
-        { 0x20, 0x91, 0x42, 0xD4, 0xB9, 0x49, 0xBF, 0xFA, 0xC2, 0x8D, 0xB9, 0x79, 0xAF, 0x84, 0xC9, 0xC2, 0x91, 0xF8, 0x75, 0x40, 0x41, 0x0F, 0x2C, 0xC6, 0xBF, 0x96, 0xAA, 0x63, 0x7B, 0x45, 0x85, 0x64, },
+        {
+            0x46, 0x50, 0xC5, 0x70, 0x93, 0x29, 0x66, 0x08, 0x25, 0xA9, 0xA5, 0xDA, 0xED, 0x9F, 0xA5, 0x0B,
+            0xE5, 0xAB, 0xAB, 0xAA, 0x9D, 0x37, 0x32, 0x71, 0x9A, 0x01, 0xBF, 0x29, 0xD7, 0xBF, 0xE5, 0x43,
+        },
+        {
+            0x20, 0x91, 0x42, 0xD4, 0xB9, 0x49, 0xBF, 0xFA, 0xC2, 0x8D, 0xB9, 0x79, 0xAF, 0x84, 0xC9, 0xC2,
+            0x91, 0xF8, 0x75, 0x40, 0x41, 0x0F, 0x2C, 0xC6, 0xBF, 0x96, 0xAA, 0x63, 0x7B, 0x45, 0x85, 0x64,
+        },
     },
     {
-        { 0x6E, 0x68, 0x23, 0xD3, 0xC0, 0x4E, 0xA3, 0xBC, 0x20, 0xB4, 0x3B, 0xEC, 0xEB, 0x5B, 0x42, 0x85, 0x4E, 0xF8, 0x40, 0xEE, 0x47, 0x7B, 0x58, 0x70, 0x94, 0x49, 0xBB, 0x8D, 0x8F, 0x63, 0xEE, 0x78, },
-        { 0xF8, 0x4E, 0x89, 0xA3, 0xE9, 0x07, 0x0A, 0xAE, 0xFE, 0x86, 0x0D, 0x49, 0x83, 0x80, 0x7E, 0x07, 0xD1, 0xFB, 0xF6, 0x5D, 0xAB, 0x2F, 0x1B, 0x81, 0x51, 0x34, 0x7F, 0x82, 0x8C, 0x9F, 0x0F, 0xC0, },
+        {
+            0x6E, 0x68, 0x23, 0xD3, 0xC0, 0x4E, 0xA3, 0xBC, 0x20, 0xB4, 0x3B, 0xEC, 0xEB, 0x5B, 0x42, 0x85,
+            0x4E, 0xF8, 0x40, 0xEE, 0x47, 0x7B, 0x58, 0x70, 0x94, 0x49, 0xBB, 0x8D, 0x8F, 0x63, 0xEE, 0x78,
+        },
+        {
+            0xF8, 0x4E, 0x89, 0xA3, 0xE9, 0x07, 0x0A, 0xAE, 0xFE, 0x86, 0x0D, 0x49, 0x83, 0x80, 0x7E, 0x07,
+            0xD1, 0xFB, 0xF6, 0x5D, 0xAB, 0x2F, 0x1B, 0x81, 0x51, 0x34, 0x7F, 0x82, 0x8C, 0x9F, 0x0F, 0xC0,
+        },
     },
     {
-        { 0x3D, 0x02, 0xF6, 0x79, 0xEF, 0x69, 0xD3, 0x3D, 0xF1, 0x7C, 0xC8, 0x04, 0x0A, 0xBC, 0xAC, 0xDD, 0xF8, 0x13, 0x3A, 0x04, 0xE0, 0xD8, 0x9E, 0x3C, 0xF1, 0x0D, 0xAD, 0x74, 0xE0, 0x08, 0x04, 0xD9, },
-        { 0x82, 0xE2, 0x74, 0x4E, 0xE7, 0xD9, 0x32, 0x76, 0xD1, 0x74, 0xE9, 0x87, 0x7A, 0x42, 0x6A, 0x83, 0x0D, 0xF9, 0x1A, 0xAE, 0x41, 0x24, 0x57, 0x6A, 0x7E, 0xC5, 0x2E, 0xE8, 0x47, 0xEB, 0x0B, 0xC0, },
+        {
+            0x3D, 0x02, 0xF6, 0x79, 0xEF, 0x69, 0xD3, 0x3D, 0xF1, 0x7C, 0xC8, 0x04, 0x0A, 0xBC, 0xAC, 0xDD,
+            0xF8, 0x13, 0x3A, 0x04, 0xE0, 0xD8, 0x9E, 0x3C, 0xF1, 0x0D, 0xAD, 0x74, 0xE0, 0x08, 0x04, 0xD9,
+        },
+        {
+            0x82, 0xE2, 0x74, 0x4E, 0xE7, 0xD9, 0x32, 0x76, 0xD1, 0x74, 0xE9, 0x87, 0x7A, 0x42, 0x6A, 0x83,
+            0x0D, 0xF9, 0x1A, 0xAE, 0x41, 0x24, 0x57, 0x6A, 0x7E, 0xC5, 0x2E, 0xE8, 0x47, 0xEB, 0x0B, 0xC0,
+        },
     },
     {
-        { 0x39, 0x9E, 0x6B, 0xE5, 0x84, 0xDE, 0x50, 0x91, 0xF4, 0x97, 0x11, 0xED, 0x6C, 0x19, 0x5F, 0x0D, 0xE0, 0xEE, 0x81, 0x11, 0x13, 0xC6, 0x8B, 0x37, 0x23, 0x99, 0xDB, 0xBF, 0xF2, 0x8F, 0x11, 0x73, },
-        { 0x75, 0xF6, 0x13, 0x59, 0xF0, 0x4C, 0x77, 0xFF, 0x4D, 0xE5, 0x8A, 0x10, 0xF9, 0xF8, 0x7B, 0x31, 0xB5, 0xB8, 0xDA, 0x33, 0x73, 0xF6, 0x23, 0x0F, 0xE1, 0x73, 0x50, 0x33, 0x44, 0x6B, 0x99, 0x48, },
+        {
+            0x39, 0x9E, 0x6B, 0xE5, 0x84, 0xDE, 0x50, 0x91, 0xF4, 0x97, 0x11, 0xED, 0x6C, 0x19, 0x5F, 0x0D,
+            0xE0, 0xEE, 0x81, 0x11, 0x13, 0xC6, 0x8B, 0x37, 0x23, 0x99, 0xDB, 0xBF, 0xF2, 0x8F, 0x11, 0x73,
+        },
+        {
+            0x75, 0xF6, 0x13, 0x59, 0xF0, 0x4C, 0x77, 0xFF, 0x4D, 0xE5, 0x8A, 0x10, 0xF9, 0xF8, 0x7B, 0x31,
+            0xB5, 0xB8, 0xDA, 0x33, 0x73, 0xF6, 0x23, 0x0F, 0xE1, 0x73, 0x50, 0x33, 0x44, 0x6B, 0x99, 0x48,
+        },
     },
     {
-        { 0xC8, 0x61, 0xA8, 0x9C, 0xFB, 0x13, 0x35, 0xF2, 0x78, 0xC9, 0x6C, 0xF7, 0xFF, 0xC9, 0x75, 0x3C, 0x29, 0x0C, 0xBE, 0x1A, 0x4E, 0x18, 0x6D, 0x29, 0x23, 0xB4, 0x96, 0xBB, 0x4E, 0xA5, 0xE5, 0x19, },
-        { 0x94, 0x24, 0xB7, 0xAE, 0x5F, 0xA7, 0x2D, 0x3E, 0xE4, 0xA2, 0x66, 0x11, 0x2E, 0x7A, 0xBC, 0x40, 0x92, 0xE8, 0x15, 0xAE, 0x29, 0xFA, 0xB2, 0x6D, 0xA6, 0x66, 0xC1, 0x48, 0x5B, 0xA9, 0x2B, 0xDC, },
+        {
+            0xC8, 0x61, 0xA8, 0x9C, 0xFB, 0x13, 0x35, 0xF2, 0x78, 0xC9, 0x6C, 0xF7, 0xFF, 0xC9, 0x75, 0x3C,
+            0x29, 0x0C, 0xBE, 0x1A, 0x4E, 0x18, 0x6D, 0x29, 0x23, 0xB4, 0x96, 0xBB, 0x4E, 0xA5, 0xE5, 0x19,
+        },
+        {
+            0x94, 0x24, 0xB7, 0xAE, 0x5F, 0xA7, 0x2D, 0x3E, 0xE4, 0xA2, 0x66, 0x11, 0x2E, 0x7A, 0xBC, 0x40,
+            0x92, 0xE8, 0x15, 0xAE, 0x29, 0xFA, 0xB2, 0x6D, 0xA6, 0x66, 0xC1, 0x48, 0x5B, 0xA9, 0x2B, 0xDC,
+        },
     },
 };
 
-static bool ascon_xof_selftest(void) {
+static bool ascon_xof_selftest( void ) {
     uint8_t input[KAT_NUM - 1];
+
     for (int i = 0; i < sizeof(input); i++) { input[i] = (uint8_t)i; }
 
     bool passed = true;
     for (int i = 0; i < KAT_NUM; i++) {
-        uint8_t output[256/8];
+        uint8_t output[256 / 8];
 
         if (isLE()) {
             ascon_xof<256, true, true>(input, i, 0, output);
@@ -349,9 +454,9 @@ static bool ascon_xof_selftest(void) {
         }
         if (0 != memcmp(KAT[i][1], output, sizeof(output))) {
             printf("Mismatch with XOFa len %d\n  Expected:", i);
-            for (int j = 0; j < 256/8; j++) { printf(" %02x", KAT[i][1][j]); }
+            for (int j = 0; j < 256 / 8; j++) { printf(" %02x", KAT[i][1][j]); }
             printf("\n  Found   :");
-            for (int j = 0; j < 256/8; j++) { printf(" %02x", output[j]); }
+            for (int j = 0; j < 256 / 8; j++) { printf(" %02x", output[j]); }
             printf("\n\n");
             passed = false;
         }
@@ -363,9 +468,9 @@ static bool ascon_xof_selftest(void) {
         }
         if (0 != memcmp(KAT[i][0], output, sizeof(output))) {
             printf("Mismatch with XOF len %d\n  Expected:", i);
-            for (int j = 0; j < 256/8; j++) { printf(" %02x", KAT[i][0][j]); }
+            for (int j = 0; j < 256 / 8; j++) { printf(" %02x", KAT[i][0][j]); }
             printf("\n  Found   :");
-            for (int j = 0; j < 256/8; j++) { printf(" %02x", output[j]); }
+            for (int j = 0; j < 256 / 8; j++) { printf(" %02x", output[j]); }
             printf("\n\n");
             passed = false;
         }
@@ -376,234 +481,234 @@ static bool ascon_xof_selftest(void) {
 
 //------------------------------------------------------------
 REGISTER_FAMILY(ascon,
-  $.src_url = "https://github.com/ascon/ascon-c",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/ascon/ascon-c",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(ascon_XOF_32,
-  $.desc = "ascon v1.2 (XOF, 32 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 32,
-  $.verification_LE = 0x1124BD16,
-  $.verification_BE = 0xED22753E,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<32,false,false>,
-  $.hashfn_bswap = ascon_xof<32,false,true>
-);
+   $.desc       = "ascon v1.2 (XOF, 32 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 32,
+   $.verification_LE = 0x1124BD16,
+   $.verification_BE = 0xED22753E,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<32, false, false>,
+   $.hashfn_bswap    = ascon_xof<32, false, true>
+ );
 
 REGISTER_HASH(ascon_XOFa_32,
-  $.desc = "ascon v1.2 (XOFa, 32 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 32,
-  $.verification_LE = 0x8F5BB129,
-  $.verification_BE = 0x44EBDFB6,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<32,true,false>,
-  $.hashfn_bswap = ascon_xof<32,true,true>
-);
+   $.desc       = "ascon v1.2 (XOFa, 32 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 32,
+   $.verification_LE = 0x8F5BB129,
+   $.verification_BE = 0x44EBDFB6,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<32, true, false>,
+   $.hashfn_bswap    = ascon_xof<32, true, true>
+ );
 
 REGISTER_HASH(ascon_XOF_64,
-  $.desc = "ascon v1.2 (XOF, 64 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0xCDAAB40E,
-  $.verification_BE = 0xAC65EB36,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<64,false,false>,
-  $.hashfn_bswap = ascon_xof<64,false,true>
-);
+   $.desc       = "ascon v1.2 (XOF, 64 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0xCDAAB40E,
+   $.verification_BE = 0xAC65EB36,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<64, false, false>,
+   $.hashfn_bswap    = ascon_xof<64, false, true>
+ );
 
 REGISTER_HASH(ascon_XOFa_64,
-  $.desc = "ascon v1.2 (XOFa, 64 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x43ACD116,
-  $.verification_BE = 0xACFB3C9F,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<64,true,false>,
-  $.hashfn_bswap = ascon_xof<64,true,true>
-);
+   $.desc       = "ascon v1.2 (XOFa, 64 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x43ACD116,
+   $.verification_BE = 0xACFB3C9F,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<64, true, false>,
+   $.hashfn_bswap    = ascon_xof<64, true, true>
+ );
 
 REGISTER_HASH(ascon_XOF_128,
-  $.desc = "ascon v1.2 (XOF, 128 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 128,
-  $.verification_LE = 0x9B2F9305,
-  $.verification_BE = 0x6C15FBDF,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<128,false,false>,
-  $.hashfn_bswap = ascon_xof<128,false,true>
-);
+   $.desc       = "ascon v1.2 (XOF, 128 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 128,
+   $.verification_LE = 0x9B2F9305,
+   $.verification_BE = 0x6C15FBDF,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<128, false, false>,
+   $.hashfn_bswap    = ascon_xof<128, false, true>
+ );
 
 REGISTER_HASH(ascon_XOFa_128,
-  $.desc = "ascon v1.2 (XOFa, 128 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 128,
-  $.verification_LE = 0x5701888C,
-  $.verification_BE = 0x10B381AE,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<128,true,false>,
-  $.hashfn_bswap = ascon_xof<128,true,true>
-);
+   $.desc       = "ascon v1.2 (XOFa, 128 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 128,
+   $.verification_LE = 0x5701888C,
+   $.verification_BE = 0x10B381AE,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<128, true, false>,
+   $.hashfn_bswap    = ascon_xof<128, true, true>
+ );
 
 REGISTER_HASH(ascon_XOF_160,
-  $.desc = "ascon v1.2 (XOF, 160 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 160,
-  $.verification_LE = 0x3B726110,
-  $.verification_BE = 0x3215F456,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<160,false,false>,
-  $.hashfn_bswap = ascon_xof<160,false,true>
-);
+   $.desc       = "ascon v1.2 (XOF, 160 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 160,
+   $.verification_LE = 0x3B726110,
+   $.verification_BE = 0x3215F456,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<160, false, false>,
+   $.hashfn_bswap    = ascon_xof<160, false, true>
+ );
 
 REGISTER_HASH(ascon_XOFa_160,
-  $.desc = "ascon v1.2 (XOFa, 160 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 160,
-  $.verification_LE = 0xA4E9A794,
-  $.verification_BE = 0x387FC024,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<160,true,false>,
-  $.hashfn_bswap = ascon_xof<160,true,true>
-);
+   $.desc       = "ascon v1.2 (XOFa, 160 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 160,
+   $.verification_LE = 0xA4E9A794,
+   $.verification_BE = 0x387FC024,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<160, true, false>,
+   $.hashfn_bswap    = ascon_xof<160, true, true>
+ );
 
 REGISTER_HASH(ascon_XOF_224,
-  $.desc = "ascon v1.2 (XOF, 224 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 224,
-  $.verification_LE = 0x83EAEBCC,
-  $.verification_BE = 0x9929AC99,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<224,false,false>,
-  $.hashfn_bswap = ascon_xof<224,false,true>
-);
+   $.desc       = "ascon v1.2 (XOF, 224 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 224,
+   $.verification_LE = 0x83EAEBCC,
+   $.verification_BE = 0x9929AC99,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<224, false, false>,
+   $.hashfn_bswap    = ascon_xof<224, false, true>
+ );
 
 REGISTER_HASH(ascon_XOFa_224,
-  $.desc = "ascon v1.2 (XOFa, 224 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 224,
-  $.verification_LE = 0x618744B2,
-  $.verification_BE = 0x2D9AFDE5,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<224,true,false>,
-  $.hashfn_bswap = ascon_xof<224,true,true>
-);
+   $.desc       = "ascon v1.2 (XOFa, 224 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 224,
+   $.verification_LE = 0x618744B2,
+   $.verification_BE = 0x2D9AFDE5,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<224, true, false>,
+   $.hashfn_bswap    = ascon_xof<224, true, true>
+ );
 
 REGISTER_HASH(ascon_XOF_256,
-  $.desc = "ascon v1.2 (XOF, 256 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 256,
-  $.verification_LE = 0xC6629453,
-  $.verification_BE = 0x6D8F406F,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<256,false,false>,
-  $.hashfn_bswap = ascon_xof<256,false,true>
-);
+   $.desc       = "ascon v1.2 (XOF, 256 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 256,
+   $.verification_LE = 0xC6629453,
+   $.verification_BE = 0x6D8F406F,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<256, false, false>,
+   $.hashfn_bswap    = ascon_xof<256, false, true>
+ );
 
 REGISTER_HASH(ascon_XOFa_256,
-  $.desc = "ascon v1.2 (XOFa, 256 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_BE           |
-        FLAG_IMPL_VERY_SLOW              |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 256,
-  $.verification_LE = 0x2ACF11FE,
-  $.verification_BE = 0xE5CD2E9B,
-  $.initfn = ascon_xof_selftest,
-  $.hashfn_native = ascon_xof<256,true,false>,
-  $.hashfn_bswap = ascon_xof<256,true,true>
-);
+   $.desc       = "ascon v1.2 (XOFa, 256 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_BE           |
+         FLAG_IMPL_VERY_SLOW              |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 256,
+   $.verification_LE = 0x2ACF11FE,
+   $.verification_BE = 0xE5CD2E9B,
+   $.initfn = ascon_xof_selftest,
+   $.hashfn_native   = ascon_xof<256, true, false>,
+   $.hashfn_bswap    = ascon_xof<256, true, true>
+ );
diff --git a/hashes/badhash.cpp b/hashes/badhash.cpp
index 8e215e4d..660055e2 100644
--- a/hashes/badhash.cpp
+++ b/hashes/badhash.cpp
@@ -27,11 +27,11 @@
 #include "Platform.h"
 #include "Hashlib.h"
 
-template < bool bswap >
-static void BadHash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void BadHash( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint8_t *       data = (const uint8_t *)in;
     const uint8_t * const end  = &data[len];
-    uint32_t h                 = seed;
+    uint32_t h = seed;
 
     while (data < end) {
         h ^= h >> 3;
@@ -42,11 +42,11 @@ static void BadHash(const void * in, const size_t len, const seed_t seed, void *
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void sumhash8(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void sumhash8( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint8_t *       data = (const uint8_t *)in;
     const uint8_t * const end  = &data[len];
-    uint32_t h                 = seed;
+    uint32_t h = seed;
 
     while (data < end) {
         h += *data++;
@@ -55,11 +55,11 @@ static void sumhash8(const void * in, const size_t len, const seed_t seed, void
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void sumhash32(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void sumhash32( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint32_t *       data = (const uint32_t *)in;
-    const uint32_t * const end  = &data[len/4];
-    uint32_t h                  = seed;
+    const uint32_t * const end  = &data[len / 4];
+    uint32_t h = seed;
 
     while (data < end) {
         h += GET_U32<bswap>((const uint8_t *)data, 0);
@@ -67,8 +67,8 @@ static void sumhash32(const void * in, const size_t len, const seed_t seed, void
     }
 
     if (len & 3) {
-        uint8_t * dc = (uint8_t*)data; //byte stepper
-        const uint8_t * const endc = &((const uint8_t*)in)[len];
+        uint8_t * dc = (uint8_t *)data; // byte stepper
+        const uint8_t * const endc = &((const uint8_t *)in)[len];
         while (dc < endc) {
             h += *dc++ * UINT64_C(11400714819323198485);
         }
@@ -78,58 +78,58 @@ static void sumhash32(const void * in, const size_t len, const seed_t seed, void
 }
 
 REGISTER_FAMILY(badhash,
-  $.src_url = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(badhash,
-  $.desc = "very simple XOR shift",
-  $.hash_flags =
-        FLAG_HASH_MOCK,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xAB432E23,
-  $.verification_BE = 0x241F49BE,
-  $.hashfn_native = BadHash<false>,
-  $.hashfn_bswap = BadHash<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = { 0 },
-  $.sort_order = 20
-);
+   $.desc       = "very simple XOR shift",
+   $.hash_flags =
+         FLAG_HASH_MOCK,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xAB432E23,
+   $.verification_BE = 0x241F49BE,
+   $.hashfn_native   = BadHash<false>,
+   $.hashfn_bswap    = BadHash<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0 },
+   $.sort_order      = 20
+ );
 
 REGISTER_HASH(sum8hash,
-  $.desc = "sum all 8-bit bytes",
-  $.hash_flags =
-        FLAG_HASH_MOCK,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT      |
-        FLAG_IMPL_SANITY_FAILS     ,
-  $.bits = 32,
-  $.verification_LE = 0x0000A9AC,
-  $.verification_BE = 0xACA90000,
-  $.hashfn_native = sumhash8<false>,
-  $.hashfn_bswap = sumhash8<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = { 0 },
-  $.sort_order = 30
-);
+   $.desc       = "sum all 8-bit bytes",
+   $.hash_flags =
+         FLAG_HASH_MOCK,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT      |
+         FLAG_IMPL_SANITY_FAILS,
+   $.bits = 32,
+   $.verification_LE = 0x0000A9AC,
+   $.verification_BE = 0xACA90000,
+   $.hashfn_native   = sumhash8<false>,
+   $.hashfn_bswap    = sumhash8<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0 },
+   $.sort_order      = 30
+ );
 
 REGISTER_HASH(sum32hash,
-  $.desc = "sum all 32-bit words",
-  $.hash_flags =
-        FLAG_HASH_MOCK,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT      |
-        FLAG_IMPL_SANITY_FAILS     |
-        FLAG_IMPL_MULTIPLY,
-  $.bits = 32,
-  $.verification_LE = 0x3D6DC280,
-  $.verification_BE = 0x00A10D9E,
-  $.hashfn_native = sumhash32<false>,
-  $.hashfn_bswap = sumhash32<true>,
-  $.seedfixfn = excludeZeroSeed,
-  $.badseeds = { UINT64_C(0x9e3779b97f4a7c15) },
-  $.sort_order = 31
-);
+   $.desc       = "sum all 32-bit words",
+   $.hash_flags =
+         FLAG_HASH_MOCK,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT      |
+         FLAG_IMPL_SANITY_FAILS     |
+         FLAG_IMPL_MULTIPLY,
+   $.bits = 32,
+   $.verification_LE = 0x3D6DC280,
+   $.verification_BE = 0x00A10D9E,
+   $.hashfn_native   = sumhash32<false>,
+   $.hashfn_bswap    = sumhash32<true>,
+   $.seedfixfn       = excludeZeroSeed,
+   $.badseeds        = { UINT64_C (0x9e3779b97f4a7c15) },
+   $.sort_order      = 31
+ );
diff --git a/hashes/beamsplitter.cpp b/hashes/beamsplitter.cpp
index b3926a80..3368b55a 100644
--- a/hashes/beamsplitter.cpp
+++ b/hashes/beamsplitter.cpp
@@ -30,309 +30,310 @@
 // gotten from random.org
 // as hex bytes that I formatted into 64-bit values
 static const uint64_t T[1024] = {
-  UINT64_C(0x6fa74b1b15047628), UINT64_C(0xa2b5ee64e9e8f629), UINT64_C(0xd0937853bdd0edca), UINT64_C(0x4e9fb2b2b0a637a6),
-  UINT64_C(0x26ac5a8fac69497e), UINT64_C(0x51e127f0db14aa48), UINT64_C(0xea5b9f512d8d6a09), UINT64_C(0xf3af1406a87de6a9),
-  UINT64_C(0x3b36e2ed14818955), UINT64_C(0xb0ac19ef2dde986c), UINT64_C(0xd34ed04929f8f66d), UINT64_C(0xe99978cff2b324ea),
-  UINT64_C(0x4032cb3ecff8cb38), UINT64_C(0xfa52274072d86042), UINT64_C(0x27437346dec26105), UINT64_C(0xec1cbf04b76aec71),
-  UINT64_C(0x6dd57b3dac56cd39), UINT64_C(0x34e9021797e95aad), UINT64_C(0xdc8d3363540c5999), UINT64_C(0x773d283eeeabf4ab),
-  UINT64_C(0x373c522657461aaf), UINT64_C(0x154cfe0f497d7f78), UINT64_C(0x6d377183b5ca6550), UINT64_C(0x614da5f6055e904b),
-  UINT64_C(0xd77b66b34896f00e), UINT64_C(0x122538125d6adaef), UINT64_C(0x1021e161206d9091), UINT64_C(0x38407c4313aefdfa),
-  UINT64_C(0xd941cc5dafc66162), UINT64_C(0xfc2432a6ea885315), UINT64_C(0x5576dc02b68b10ed), UINT64_C(0xd8449f9d4ab139a2),
-  UINT64_C(0xd333cbcd49cbacba), UINT64_C(0x700d20430e06eeb8), UINT64_C(0xdeb34810d6d0320a), UINT64_C(0x6743363d6cc8ba68),
-  UINT64_C(0xbd183cb526e6e936), UINT64_C(0xee62bf5ee97de5ea), UINT64_C(0xf6b855e743e76853), UINT64_C(0x83ac16a35d132df9),
-  UINT64_C(0x2046f2c70c2130b1), UINT64_C(0xaadd5007102b5ee4), UINT64_C(0x8eedac842e63cdac), UINT64_C(0xba02956e43c18608),
-  UINT64_C(0xd2688af010adbeaf), UINT64_C(0x4aaa5295377c17be), UINT64_C(0x83792382ba198f10), UINT64_C(0x6fc42849961a25b6),
-  UINT64_C(0x3501677f06fb1311), UINT64_C(0x1e18b89705c224dd), UINT64_C(0xa0a0b8684aa2e12d), UINT64_C(0x30d19aac3d40898e),
-  UINT64_C(0x41dd335a29272e9b), UINT64_C(0x5c5d445a07426e3f), UINT64_C(0x6f13080e67946fdc), UINT64_C(0x3ddabae21609bf08),
-  UINT64_C(0x8e6146d3cde11ca5), UINT64_C(0x9eff76a4c39eacf4), UINT64_C(0x71c66d0a423a21b7), UINT64_C(0x68515c0b712bbc4f),
-  UINT64_C(0x5edd17cec412a735), UINT64_C(0xa444f487c96f896c), UINT64_C(0xc161d16d4e54041a), UINT64_C(0x3a2d84d3e09bafb9),
-  UINT64_C(0x63a406b157a5f2f1), UINT64_C(0x18292d6007f839ba), UINT64_C(0xcaac5789618f2aac), UINT64_C(0x6f516d95f749dd97),
-  UINT64_C(0xb5784409560e219f), UINT64_C(0x12f0f0d6fbdcb81c), UINT64_C(0x993d6c2a47089679), UINT64_C(0xcc9247b35870aebf),
-  UINT64_C(0xa1ca8eff8b1bca70), UINT64_C(0x7a1d015397e558cc), UINT64_C(0xc504a4d4815f8722), UINT64_C(0x3e44258e93472b26),
-  UINT64_C(0x11bd0578a36c8044), UINT64_C(0x84c7087603a0a6ea), UINT64_C(0x457d0c59e84c9ac8), UINT64_C(0x32129275ee63dd95),
-  UINT64_C(0x66269220e943024d), UINT64_C(0x197de12f9d6e5c72), UINT64_C(0x06fdd09a4d6157dd), UINT64_C(0xf8c1a8b51fe95716),
-  UINT64_C(0x41eeb6129149f6cf), UINT64_C(0x42f510887a61de1b), UINT64_C(0xf3d2aa6e4fe5949d), UINT64_C(0xc0799007b85373aa),
-  UINT64_C(0x81577b167de515c3), UINT64_C(0x01f424fc6b856270), UINT64_C(0xff6247ed0658caa8), UINT64_C(0x63ad005e620fe4bb),
-  UINT64_C(0xdb919b9f63c93174), UINT64_C(0x5693dbd6c76c7683), UINT64_C(0xdaa9b82e85e0355a), UINT64_C(0x424c5c4e5672fc73),
-  UINT64_C(0x9de3ca332ba818f1), UINT64_C(0xb28f375a58bc6c1e), UINT64_C(0xef0af1e6041b9cd4), UINT64_C(0x0418afb53ef5408f),
-  UINT64_C(0x9a37634585d3330a), UINT64_C(0x3ab5aec014b097cd), UINT64_C(0x384a0739a3ff7dc8), UINT64_C(0x0ff31c11226e5d5a),
-  UINT64_C(0x71070735f1c16bb4), UINT64_C(0xc4f78905f49a3840), UINT64_C(0x561f68d6a5f44a81), UINT64_C(0xb09bd8cd8d932357),
-  UINT64_C(0xf270b47652354fdb), UINT64_C(0x47d6ca7bba50c2c7), UINT64_C(0x2720590d7b2b7b54), UINT64_C(0xcaac35df08cab300),
-  UINT64_C(0xd05759dee169d9fd), UINT64_C(0xdb8d0d0403a6aafb), UINT64_C(0xcd3ab85684ba537c), UINT64_C(0xad69c4e5240c158f),
-  UINT64_C(0x65427c4ff3637db2), UINT64_C(0x085ecbbf903a45ae), UINT64_C(0xeafed57a94384c62), UINT64_C(0xc99972367cd21eba),
-  UINT64_C(0xc1e2cf52270b20eb), UINT64_C(0x825dad5142681653), UINT64_C(0x47e99edc5e141d94), UINT64_C(0x125813bc26e42e07),
-  UINT64_C(0x06f41d2441b172ca), UINT64_C(0x5e9e640ed911730e), UINT64_C(0x5900403342f0f362), UINT64_C(0x57a600d157ee9945),
-  UINT64_C(0xbcc5d702f02dc7e0), UINT64_C(0x8258cf5a1a6435ab), UINT64_C(0xdf885b6a0343a3e0), UINT64_C(0xadd74c04a503b09a),
-  UINT64_C(0x0ea210122eeef589), UINT64_C(0x5217fd50f3ecaf85), UINT64_C(0xd0c39849df6b4756), UINT64_C(0xf66d9e1c91bd0981),
-  UINT64_C(0x0f355b00f40e3e6b), UINT64_C(0xc01dabcd14518520), UINT64_C(0x58691b4fa9e7d327), UINT64_C(0x357616c77c22fffe),
-  UINT64_C(0xb9fbf8de2ed23303), UINT64_C(0x0195932bc205c466), UINT64_C(0xef0763590a08a50d), UINT64_C(0xf546866c0028a938),
-  UINT64_C(0x41cc8732eaad496a), UINT64_C(0xadc61f16374896c6), UINT64_C(0x5eb8f93f25ad0457), UINT64_C(0x240f00f5db3fae25),
-  UINT64_C(0xcc48503596dc01ef), UINT64_C(0x351baaa904a306d5), UINT64_C(0x7111179ae328bb19), UINT64_C(0x6789a31719d5d453),
-  UINT64_C(0xf5318492c9613de6), UINT64_C(0xa0e8c24f3f0da716), UINT64_C(0xac15d68d54401b9d), UINT64_C(0xadafb35cf63092ee),
-  UINT64_C(0xceb5f8d63c7fec4c), UINT64_C(0x1ae71929b980fc9d), UINT64_C(0x6efdc5693ef4ee2a), UINT64_C(0xbedd8334cade7855),
-  UINT64_C(0x06f1b768b476a249), UINT64_C(0x9e614bedf41dd639), UINT64_C(0x9eca9c6c9e389a5d), UINT64_C(0x76999bf01b912df2),
-  UINT64_C(0x04d52fb2ac70ab31), UINT64_C(0xe467ea8172f5d066), UINT64_C(0x356ed51bb0e094ae), UINT64_C(0xab2047c21b54d8ba),
-  UINT64_C(0x21dbbfa0a6157474), UINT64_C(0x7de36edec62f1997), UINT64_C(0x306ef59f5204a58c), UINT64_C(0x954135a769d5b72e),
-  UINT64_C(0x9d7774a0c2d29380), UINT64_C(0xc03acfd63ac6b88c), UINT64_C(0x9989d5ee565322e6), UINT64_C(0x19d1a58324bdd145),
-  UINT64_C(0xe74685383cc6b27c), UINT64_C(0xf9edffe1c4d81108), UINT64_C(0x94950b5b6247cb43), UINT64_C(0xe3fa8c6468d419eb),
-  UINT64_C(0x29981bd802f77ac5), UINT64_C(0x6cf1a6cab28c1c36), UINT64_C(0x1d34a382a5d48973), UINT64_C(0xcd1d5d546e5e4d3d),
-  UINT64_C(0x4ad78b4a37e52322), UINT64_C(0x24da17671ab463f2), UINT64_C(0x527504b7c7bc5537), UINT64_C(0x7ba1d92e1969b2b5),
-  UINT64_C(0x53a130812c49d64a), UINT64_C(0x503af48d9510f1d7), UINT64_C(0x719db8a348dee165), UINT64_C(0xa85e4fad1f343e67),
-  UINT64_C(0xdafc1fa9203d2d45), UINT64_C(0x7730f245c903a407), UINT64_C(0xb7c04e53f913aeae), UINT64_C(0x39ed817e1e039153),
-  UINT64_C(0xf415ea2b3efc7606), UINT64_C(0x15e3c53fe43f104d), UINT64_C(0x1b71e4d83ccba83c), UINT64_C(0xfe088f4c90812841),
-  UINT64_C(0x1ff8e2ee0a04b6ae), UINT64_C(0xf4f4a23612b9eed2), UINT64_C(0xc596a66051b8aca1), UINT64_C(0xbc898edd3370a8dd),
-  UINT64_C(0xce7638a7a2f9152e), UINT64_C(0xd99192635c0d5c92), UINT64_C(0x62038c87c094a1ff), UINT64_C(0xa73f1bcaac7343af),
-  UINT64_C(0x93c797804faa5ff3), UINT64_C(0x9da7407c705da1f0), UINT64_C(0xa52cde7d37fef9f0), UINT64_C(0xb93a7db97e3fa7ff),
-  UINT64_C(0x75ee91392c60fb6b), UINT64_C(0x4d7f8e3db9383ae0), UINT64_C(0xe0aec397d5290d06), UINT64_C(0x159a20f22d740d81),
-  UINT64_C(0x231416cff9a9b014), UINT64_C(0x71ed3a6e513b4795), UINT64_C(0x190b08ebcb87f3bc), UINT64_C(0x36bb0bcb0e8df593),
-  UINT64_C(0xc1e63cdc4d78dfb3), UINT64_C(0x36e2c57ba6799460), UINT64_C(0x280c0618b19f63dc), UINT64_C(0xca2b8e49d6c71d2d),
-  UINT64_C(0xc881e59705270f09), UINT64_C(0x26fdf0dbb5f2f451), UINT64_C(0xc6d1a3697ca86855), UINT64_C(0xd00755a203980eb5),
-  UINT64_C(0xa85962163dd7de95), UINT64_C(0x622b7a1d2531d00e), UINT64_C(0xb6c1cfba74436ef7), UINT64_C(0x9578891a720bf317),
-  UINT64_C(0x5e325058bd3a343a), UINT64_C(0x9a468a5a888a475f), UINT64_C(0xa57f0edb414a0589), UINT64_C(0xa044aef7ea680f8c),
-  UINT64_C(0x2036717cee9b991a), UINT64_C(0x3925631ec66cb8aa), UINT64_C(0xdcb6a5da6b2fc78f), UINT64_C(0x17a8cd724b7b5e26),
-  UINT64_C(0x1c704c6a48a2dae0), UINT64_C(0x87d8f6738a0c30bc), UINT64_C(0xd8580262a4801240), UINT64_C(0x5812cea521ffaeaf),
-  UINT64_C(0x21b6ff923871f14c), UINT64_C(0x922dbd45c2b307d1), UINT64_C(0x5c67ecbaace24d31), UINT64_C(0xb90f5e3acfaeff9b),
-  UINT64_C(0xea5aa9f2f14efeb1), UINT64_C(0x08003af95ab5ce92), UINT64_C(0x5a39361e05692622), UINT64_C(0xd4b8cddc309e44da),
-  UINT64_C(0xe20bfe5f0a1343d9), UINT64_C(0x13848357d100b2b3), UINT64_C(0x912a1b220fa678f5), UINT64_C(0x7631242b7f6d6365),
-  UINT64_C(0x5a9f9a3284d95674), UINT64_C(0x0d5b02c98afd4279), UINT64_C(0xede70dbc04a7a3d9), UINT64_C(0xadb3f72865ba580e),
-  UINT64_C(0xc4a3c11163562e90), UINT64_C(0x482e567c69b6b128), UINT64_C(0x38ec96bfcb4d965d), UINT64_C(0x923fe02a6b4bdabe),
-  UINT64_C(0x0ae0ca91a2be0579), UINT64_C(0x137401e7f2acf3e8), UINT64_C(0xfdad100e85bc5622), UINT64_C(0x9c07483343c8030f),
-  UINT64_C(0x71872f8555dbd0a8), UINT64_C(0x8de5873dbfa538e0), UINT64_C(0x2922d0d9a2d9eb02), UINT64_C(0x2744006cfc375d0c),
-  UINT64_C(0xa82c09537574f583), UINT64_C(0x2ab2d255e73f6f83), UINT64_C(0x6cc5f73b682b3701), UINT64_C(0x6e59fc51ee28845d),
-  UINT64_C(0xe536b381533cc4cf), UINT64_C(0xfd2ac9f30025e109), UINT64_C(0xc26cdfa60b8be153), UINT64_C(0x62da136e08f0f885),
-  UINT64_C(0xeb6a7a065b640357), UINT64_C(0x7462b101e2adb3ff), UINT64_C(0x996ec340bf52ea07), UINT64_C(0xf0aa2a872333e60c),
-  UINT64_C(0x222884f9c4632341), UINT64_C(0x32b5289d94dac82e), UINT64_C(0x7cdd99055bd35f17), UINT64_C(0x92d3d262aefe21bc),
-  UINT64_C(0xc6c1b1029eb0dd4c), UINT64_C(0x28f046ec80f3c975), UINT64_C(0xc1f0c2d9745c5cb7), UINT64_C(0x92ada28cf6f7fe0b),
-  UINT64_C(0xdfb215a8df753a03), UINT64_C(0x942ecdad535f962d), UINT64_C(0x7d739b8c0b7a1669), UINT64_C(0xee95286e88be8510),
-  UINT64_C(0x4ae71aa9d3c3d36f), UINT64_C(0x2bd6d5d12452cc38), UINT64_C(0x16fa1504fbedf267), UINT64_C(0x4b835f8377f3937d),
-  UINT64_C(0x0004374053160cb7), UINT64_C(0xe44a676c90906fe8), UINT64_C(0x2389c459f53fbdcd), UINT64_C(0x4a7031455481da9e),
-  UINT64_C(0xb72c293d969a40cc), UINT64_C(0xd9b72ee09dde404d), UINT64_C(0xa31f4f98c5aabc97), UINT64_C(0x56f240ad0aea491c),
-  UINT64_C(0x86264ebf858d67bf), UINT64_C(0x93fd3b332948fd87), UINT64_C(0x79899120e2d72215), UINT64_C(0x36dedea1a614643e),
-  UINT64_C(0x1c5e947b88cba0f6), UINT64_C(0x20ec77907c771a4f), UINT64_C(0x587a65fe2c8f5487), UINT64_C(0x9b5431d881ff3b4a),
-  UINT64_C(0x8f55b2fd967902d7), UINT64_C(0xebd59a640fee9b7e), UINT64_C(0xd5a77b39543d5bef), UINT64_C(0x5dbf440d204f5d0f),
-  UINT64_C(0x4e22065f53ba213e), UINT64_C(0x4611a2d169ad5a0b), UINT64_C(0x41ea9888cb5be7d1), UINT64_C(0xf8a661f2359be997),
-  UINT64_C(0xde83a9e3a6562631), UINT64_C(0xd66dedc223dad775), UINT64_C(0x162e54732874a52a), UINT64_C(0xf6d91b1963c23d56),
-  UINT64_C(0x56d3c9a025a95772), UINT64_C(0x92ddff0a1caeb05c), UINT64_C(0x6cbeb9f263443bd7), UINT64_C(0xb4ad540e1b11894b),
-  UINT64_C(0xcfa573f2f78d8b29), UINT64_C(0xad477ed16d45543f), UINT64_C(0x0d0283973ed3423a), UINT64_C(0x5307f93f3654f284),
-  UINT64_C(0xbc9b362f504b145b), UINT64_C(0x5661193dc5bcb5ff), UINT64_C(0x151c9b1c7c0f246a), UINT64_C(0xad25cfcfd5e399d2),
-  UINT64_C(0xc5855adf08226db2), UINT64_C(0x5a027c03c078be13), UINT64_C(0xc2465bfb0dc5b99c), UINT64_C(0x8aaa55a9eca79b60),
-  UINT64_C(0x797a7c2608c23d9e), UINT64_C(0x692b8d7da8c7f748), UINT64_C(0xc23c7b1ab3e883e1), UINT64_C(0xe1ebb866f32ac6cf),
-  UINT64_C(0xca6be5075b5046f9), UINT64_C(0x3105a0555f6a3bac), UINT64_C(0x525b7cc4839ea6c5), UINT64_C(0xce1dd2aad7e83cf1),
-  UINT64_C(0xb4a9105674d79be6), UINT64_C(0x667eb8384834f7db), UINT64_C(0xb200a7a30f789150), UINT64_C(0x4ba4d2c780055821),
-  UINT64_C(0xb48a01ad5f7474c6), UINT64_C(0x3310ba4a1e25aab8), UINT64_C(0x64379d2408fd5735), UINT64_C(0xf11e9788704e5e0d),
-  UINT64_C(0xe9866ab0a8e90f4e), UINT64_C(0xaa344ffe50f7a934), UINT64_C(0xcce37a15b3870924), UINT64_C(0xe22135597a867f1c),
-  UINT64_C(0x8770a58d7fe57f99), UINT64_C(0xcafbbc8d2024bcbc), UINT64_C(0x2307e7f0fcdb1909), UINT64_C(0xdd016550b9ed2b2a),
-  UINT64_C(0xd0bcf0e9dee7df90), UINT64_C(0xe82d2e7daeab325c), UINT64_C(0x721a2aba71709aa7), UINT64_C(0x38cfabc260602614),
-  UINT64_C(0x3099ccb02b73b4c8), UINT64_C(0x00250ce48fd67df0), UINT64_C(0xcace64d8984b19cf), UINT64_C(0xee305dcbae8615ca),
-  UINT64_C(0xd187da55485b86ef), UINT64_C(0xebea32b2455e6486), UINT64_C(0x77cb912fa927d5c5), UINT64_C(0x911002ac8b62cbd8),
-  UINT64_C(0x70730c24c32c5870), UINT64_C(0x0a7cb6f89e988a83), UINT64_C(0x6b5e00839b7db787), UINT64_C(0xecae9f4cfd9ce924),
-  UINT64_C(0xae09926b714019a5), UINT64_C(0xbc1b2c59bc5ce769), UINT64_C(0x592756761e90349f), UINT64_C(0x95c9a69a21936de3),
-  UINT64_C(0x192b2119ee48eb9a), UINT64_C(0xcd8d11ebcd8a71c2), UINT64_C(0x34de8d4cad3151d6), UINT64_C(0x0fc4f3baf540eb1c),
-  UINT64_C(0x88bd85e02b2ec0e2), UINT64_C(0x5b65423e815dafb6), UINT64_C(0x66ec6fadd29f273e), UINT64_C(0xc3622fbc1f1c7bd0),
-  UINT64_C(0x50cc102827ff1acf), UINT64_C(0xe73cab705018a55f), UINT64_C(0xcd552b588a227f38), UINT64_C(0xc462735f28a9c597),
-  UINT64_C(0x3e3ccb00a16906e1), UINT64_C(0x79bdf5d7e7dfa593), UINT64_C(0xb333b6942d5db3a9), UINT64_C(0x3566edd901f25f20),
-  UINT64_C(0x8c5fe3e063253c7b), UINT64_C(0x9f0aa4160fb652ee), UINT64_C(0x2361d9bca2c92f43), UINT64_C(0x2d6a0339fe1de8ee),
-  UINT64_C(0x389b1bd9476b0470), UINT64_C(0xd7fa2522f0da451e), UINT64_C(0x43e6a01d67c62b2d), UINT64_C(0x5bdc15971dc0d5b3),
-  UINT64_C(0x38a0a80acbadf021), UINT64_C(0x2c66125ec66e1fad), UINT64_C(0xb58f61bb53b6a9ff), UINT64_C(0x492142919b2d61d6),
-  UINT64_C(0xd905263cc927ebd9), UINT64_C(0xca15f966e2279122), UINT64_C(0xf9dc67f8101119c9), UINT64_C(0x7f6755699c23d8c9),
-  UINT64_C(0x26146d38a23b0bdf), UINT64_C(0x0166c70bc773d9aa), UINT64_C(0x5b3317113904ec75), UINT64_C(0x5d3c4311b21e44d1),
-  UINT64_C(0x479c13c75df8cf18), UINT64_C(0x75a880dd38a8a4ff), UINT64_C(0xdf378e2eb432708d), UINT64_C(0xca1cb0f76b1c5f04),
-  UINT64_C(0x06c76e876516eb46), UINT64_C(0x965c10e60ec202ad), UINT64_C(0x67b18e2140e0aad3), UINT64_C(0x203ca38572b212b8),
-  UINT64_C(0x72adad835dd333c6), UINT64_C(0xdd02aa349680a96a), UINT64_C(0x69ab0df01d4b3eab), UINT64_C(0xfebfd83a2c43afd1),
-  UINT64_C(0x0dcd90c392b9fae4), UINT64_C(0x8a87b8033e4cd8cc), UINT64_C(0x3902150c36e99880), UINT64_C(0xb5b655e071474ebc),
-  UINT64_C(0x6c2dc9eeaffbd8d8), UINT64_C(0x3cf62bfa4986f0fe), UINT64_C(0xa68eaf0719a9afbc), UINT64_C(0xde1f4e9a4b190aef),
-  UINT64_C(0x7fbc9e8538999e56), UINT64_C(0xf6d5e9db2208a40c), UINT64_C(0x93b13abaddf4554c), UINT64_C(0xd8b5e4ad9911629f),
-  UINT64_C(0x6fdb9d7376488e52), UINT64_C(0xee604a7ce20d75ad), UINT64_C(0x94ec4abbaa9c2c1d), UINT64_C(0xdbd148c4fcd05ec1),
-  UINT64_C(0x0865c7c3b380a005), UINT64_C(0xa6da59a56992f211), UINT64_C(0x2eb1dc9f941c83ef), UINT64_C(0x3bf5ccf06910fae7),
-  UINT64_C(0x23a70e117e1f29f0), UINT64_C(0x4273791acbf6c4e5), UINT64_C(0x338414ec6b5e5d60), UINT64_C(0xa5873517e3d057d9),
-  UINT64_C(0xea88400a890764f6), UINT64_C(0xc0569d573ca5364f), UINT64_C(0x4c3fc02fc93316e0), UINT64_C(0x76597f718657e577),
-  UINT64_C(0x17052b8440c7d824), UINT64_C(0x9a7ec0a30be21a00), UINT64_C(0xab0453ac2173dac9), UINT64_C(0xb6f3706820512809),
-  UINT64_C(0xef44f0b07d46180a), UINT64_C(0x5e9aa12e99509a72), UINT64_C(0x6231337efc0182ca), UINT64_C(0x0963321a419da89b),
-  UINT64_C(0xfda3e7ad51f82b5e), UINT64_C(0x1ab8790c2f5bf1a3), UINT64_C(0x9ef177b8a59f28c0), UINT64_C(0x27d1c87da66c1652),
-  UINT64_C(0x1bd6bdf27c49d109), UINT64_C(0xc151e2a66994d599), UINT64_C(0x5e1b8d826b8c12a9), UINT64_C(0x39f41d57213261b5),
-  UINT64_C(0x16a57bd0bc78aada), UINT64_C(0x0127e7f9699b55c7), UINT64_C(0xd79eccc9f9d703be), UINT64_C(0xb41b81c61ba66d7d),
-  UINT64_C(0xcf8b79dcb95dce93), UINT64_C(0x5ca102a7743a6e0d), UINT64_C(0xf422a0c3a2ad7b28), UINT64_C(0x4a9137b4a0f03724),
-  UINT64_C(0x907dcf6425c829c2), UINT64_C(0x15551fd4432261fb), UINT64_C(0xa057dfbd55ef436c), UINT64_C(0x8b2541b9e0e0fa7e),
-  UINT64_C(0x7262166dcdf4b67e), UINT64_C(0xcf6533e5c608aaeb), UINT64_C(0xd6763d3967359786), UINT64_C(0x1f6b0228d257c676),
-  UINT64_C(0xc268c1064d2b458a), UINT64_C(0x6d8b2f6e75d2b613), UINT64_C(0xfaaf5adc43d72807), UINT64_C(0xb6376765e344f9f8),
-  UINT64_C(0xa8e18dd16a4bd501), UINT64_C(0xa71aa12a8ec11351), UINT64_C(0x1daaf130b537ebe0), UINT64_C(0x2e8aa415959d5d8f),
-  UINT64_C(0x2813ff3a3e5cbcfb), UINT64_C(0xf0fdd1d6d16a7c23), UINT64_C(0xbf2b55d2ecf0ee55), UINT64_C(0xbd4e9bec299381d0),
-  UINT64_C(0xac8827ab807eb180), UINT64_C(0x8514d75ac3b43b0b), UINT64_C(0xc9b5c78e45fb38a8), UINT64_C(0x4b66e6e7b797cd8f),
-  UINT64_C(0x1a482ffa6870a2d3), UINT64_C(0x98f55f701d4bf919), UINT64_C(0x7c0fda20e7e26ef8), UINT64_C(0x6ef795976fca3b54),
-  UINT64_C(0x79801cd422fa95cd), UINT64_C(0xce8a72301dbbe230), UINT64_C(0x5e79f4c925bdd0e0), UINT64_C(0x5729e93c99cc12b3),
-  UINT64_C(0x76d022747522392a), UINT64_C(0xb9d7652e917a6bc4), UINT64_C(0xc2978462dfa9551b), UINT64_C(0xac081b4a7528b0ce),
-  UINT64_C(0x5b7799fe02443b33), UINT64_C(0x6676e5687742e76a), UINT64_C(0x3e9836e33caf452b), UINT64_C(0x96ff93e427173943),
-  UINT64_C(0x30fa2f987359e0f6), UINT64_C(0xfaa730326c478363), UINT64_C(0x2bb0560d8986947e), UINT64_C(0x9f7c01d35aefc68f),
-  UINT64_C(0x6b81189bd90a0e45), UINT64_C(0xd592d2ad2df04128), UINT64_C(0xbcd0e0fe02816ec6), UINT64_C(0x1d6d84e5c1f8df0f),
-  UINT64_C(0xc4b55a73da2f8713), UINT64_C(0xdbd6510e7ad24d26), UINT64_C(0x7e3452b770e259bd), UINT64_C(0xd5fe716f2c3ee835),
-  UINT64_C(0x63a6d74ef78acd1d), UINT64_C(0x3bd673b27d5aa140), UINT64_C(0xe394f3a2a4f6d465), UINT64_C(0xf02f642cda7fee7e),
-  UINT64_C(0xe17ee2617b3d366a), UINT64_C(0x41cdb92402dce780), UINT64_C(0x4e5c54024fd18f6b), UINT64_C(0x6f45dd1c7c5a3f12),
-  UINT64_C(0xf6fd2b3f9ccda563), UINT64_C(0xe7628d358d971e26), UINT64_C(0x4dabc984370ed105), UINT64_C(0xec05f7d5c53cb70b),
-  UINT64_C(0xf48eccbc216dcf71), UINT64_C(0x8a571d0cb256f131), UINT64_C(0x4c05466392e32549), UINT64_C(0x91d3f9324ef03c3e),
-  UINT64_C(0xec0591069697e868), UINT64_C(0xa77da4079db8ffd8), UINT64_C(0x287335de3951784f), UINT64_C(0xe7afb90b4adbbf33),
-  UINT64_C(0x96e785b0c621dbbf), UINT64_C(0xc7f54753a5e1d81b), UINT64_C(0x4a3a42229fc7491e), UINT64_C(0xc9560ea788a62881),
-  UINT64_C(0xe34b9ee97b5bef12), UINT64_C(0xfae309a9fbff0656), UINT64_C(0xbc23f738a0bf4c58), UINT64_C(0xc6dd1ed9a7a706de),
-  UINT64_C(0x3473045c7f760007), UINT64_C(0x89b5f0a2e0ace69b), UINT64_C(0x7433c584785f3321), UINT64_C(0xa38220fab7357fc0),
-  UINT64_C(0x04e1d70ec8db6456), UINT64_C(0xa86065368c31fd72), UINT64_C(0x926cee3a66885fb3), UINT64_C(0xc09c39dbdb8240bc),
-  UINT64_C(0x1ee291407a9ac9db), UINT64_C(0xa6120818b86fd032), UINT64_C(0xa4c3a1cbf6a6666f), UINT64_C(0xb34ce856697db755),
-  UINT64_C(0xe3ef1a7123649d75), UINT64_C(0x814ea4e8549f30bc), UINT64_C(0xc8c12f327c1ee0a3), UINT64_C(0xc4ad0d22dbe77043),
-  UINT64_C(0x608451fb3ab06a00), UINT64_C(0x2e1141be52867cb9), UINT64_C(0x04b92abd9485965f), UINT64_C(0xcf91f012eb16b951),
-  UINT64_C(0xacc0a45db481b3b3), UINT64_C(0x523f65d99013b4d9), UINT64_C(0xf333b8f8613fae1f), UINT64_C(0x8b651a304f1c80b0),
-  UINT64_C(0xa91ecd6f061480d2), UINT64_C(0xbd01125685871081), UINT64_C(0x9933950983b6d41e), UINT64_C(0x1f4130fd7912c3e6),
-  UINT64_C(0x333230fc9385a4ba), UINT64_C(0x9d2d764680fb1581), UINT64_C(0x277e6bb16761eabf), UINT64_C(0x1829af028f40b602),
-  UINT64_C(0x9783144e64561566), UINT64_C(0x410d30cd66cb4e92), UINT64_C(0xce0e0df02a7ac717), UINT64_C(0xdbfc28dabb65c1e2),
-  UINT64_C(0x5a83f419f0610b35), UINT64_C(0xb0706efb6f56176b), UINT64_C(0x684148ee29c2a3d6), UINT64_C(0xc47213009755db33),
-  UINT64_C(0x2600f460fbea3831), UINT64_C(0x7037ec48a50dc3ec), UINT64_C(0xa761879a39764433), UINT64_C(0xcfd6983de3381424),
-  UINT64_C(0xfdc2524f5d605fc4), UINT64_C(0xbe84a33131a412c9), UINT64_C(0x1bd73706e51699b5), UINT64_C(0x7aea62c60dffb5ab),
-  UINT64_C(0x010fec687da2bbf4), UINT64_C(0x56aa74a28e54f75c), UINT64_C(0xba52dd2bb4019afe), UINT64_C(0x6ae298d992a98093),
-  UINT64_C(0xdbfc6eddb2348c70), UINT64_C(0xeab81b5b034b7836), UINT64_C(0x692b0fc00c8986ba), UINT64_C(0x02adf5476f927b39),
-  UINT64_C(0x0173c9bb282a94e7), UINT64_C(0x1e617773e554c877), UINT64_C(0x241d5db92d0aa39e), UINT64_C(0x902c43c4be589249),
-  UINT64_C(0x0b817ad8f9617273), UINT64_C(0x43508b7fb53d5d1f), UINT64_C(0xaf1d845886eeb50c), UINT64_C(0xc645d0758b0a08f2),
-  UINT64_C(0x3d1339390783be12), UINT64_C(0x376e4919f2fc41c9), UINT64_C(0x392c5bb8475370e6), UINT64_C(0x5e891f54eec6c015),
-  UINT64_C(0x16a12880b9ac0923), UINT64_C(0x6437af0453c57f36), UINT64_C(0x8dd1ec0ee82c5835), UINT64_C(0xc4738296f5085ef5),
-  UINT64_C(0x68c5d2b2d2d06381), UINT64_C(0x8a4627fb8fbef8df), UINT64_C(0x9d56ea18dd2590b3), UINT64_C(0x8dbdd1fd0ca96586),
-  UINT64_C(0x9c17bd827cc151ab), UINT64_C(0xdddb70eb24c36775), UINT64_C(0xb56277dfd02a9c4d), UINT64_C(0x5a8388d255264a83),
-  UINT64_C(0xcb7207a0b0155fa4), UINT64_C(0x2bbc2967864dd11a), UINT64_C(0x19fb91190adfc85a), UINT64_C(0xed562d76a7e244c3),
-  UINT64_C(0xf5438c5585588610), UINT64_C(0xbc16ff713cde2e48), UINT64_C(0x42248c858cf837cb), UINT64_C(0x59c8eeb9769cf08a),
-  UINT64_C(0x0f5260cc1dc624b7), UINT64_C(0x6b880672b5ebfdd5), UINT64_C(0x2e6d6cf57e3365cf), UINT64_C(0xe994b274628cdb20),
-  UINT64_C(0x939e00fbb43765d8), UINT64_C(0x093150ef5c7cd883), UINT64_C(0x8ae15f57f13b42f1), UINT64_C(0x3af5014a74f18355),
-  UINT64_C(0x7e1a2d0c860bcd23), UINT64_C(0x796312eee1445e38), UINT64_C(0x1cbde8ef8bdfee3d), UINT64_C(0x207592ed0910de04),
-  UINT64_C(0x150e839a79142012), UINT64_C(0xb920f5ff40de84a6), UINT64_C(0x0c05b146a932213b), UINT64_C(0x7406c434e2d92546),
-  UINT64_C(0x19376004d1fc67aa), UINT64_C(0x82f3677fcf0dd552), UINT64_C(0xd9daf63e3aa745a9), UINT64_C(0x8e1e09d0a9676fdf),
-  UINT64_C(0x2cb86571c0289958), UINT64_C(0x4c4c12eb3a97b760), UINT64_C(0x1e3468d9bf56d00c), UINT64_C(0x11f90498f14cb4a4),
-  UINT64_C(0x251664b4422a7c58), UINT64_C(0xad10e44d41c2b7c5), UINT64_C(0x663cf17121b6d221), UINT64_C(0x3fe40cdc49c541b8),
-  UINT64_C(0xb1b1a8b2a941f9c7), UINT64_C(0x83ffae6e34d4eb78), UINT64_C(0xa4564673c6728fbf), UINT64_C(0xe1499f6bd812a4b9),
-  UINT64_C(0xfb5507a915ed36a3), UINT64_C(0xe055a829c62de53c), UINT64_C(0x1ea06fc53acba653), UINT64_C(0xce0f8c15fd8f2258),
-  UINT64_C(0x7dd42e43e5ef6f4b), UINT64_C(0x0c55aecd7e1adc10), UINT64_C(0xc31b0e4d3a4e8b1c), UINT64_C(0x1205469d91599780),
-  UINT64_C(0xbba5d6df94390b83), UINT64_C(0xc97925cae2f17697), UINT64_C(0x3b98f3dc9e15ea08), UINT64_C(0x878203758954cd36),
-  UINT64_C(0x818deaef5ba91f77), UINT64_C(0x6f8f1786214acb89), UINT64_C(0x26c5c2162849ece8), UINT64_C(0xaf1c297b73471dd3),
-  UINT64_C(0x415c497c9fa7e936), UINT64_C(0xc1804e923aa3cce6), UINT64_C(0xdd7ca8ffb78dc68c), UINT64_C(0x5b912445ed7ba89a),
-  UINT64_C(0x95dec0af89a1f157), UINT64_C(0x7041c032d1fa5266), UINT64_C(0xc569835beabc20df), UINT64_C(0xcc662c0dbb7baaef),
-  UINT64_C(0x20d5d2c1383ff75c), UINT64_C(0x7efdaae3e1c4eaaf), UINT64_C(0x3575fad9533be200), UINT64_C(0xfb0fb500836d48dd),
-  UINT64_C(0xd211a5090e6d53e2), UINT64_C(0x34afe4050a01467c), UINT64_C(0x63457fe7bfe187c3), UINT64_C(0xc3ee000cb474d925),
-  UINT64_C(0x4fd32cbbb8326e22), UINT64_C(0xc2abcd1fc9bf14c2), UINT64_C(0xf34b534e55f28258), UINT64_C(0x094ff2a11972ddec),
-  UINT64_C(0x9744b26f181926a9), UINT64_C(0xa7fe6a0982135b29), UINT64_C(0x0f8d9e7a0de7d61b), UINT64_C(0x4bcd12d1b5d3d8a6),
-  UINT64_C(0x706e34dbac81bd39), UINT64_C(0xefea01605e9304c6), UINT64_C(0xee3bb6d1e510efe1), UINT64_C(0x84a094db3f4620f8),
-  UINT64_C(0xf1752fc679d6aeb3), UINT64_C(0x54921e5d6949a43f), UINT64_C(0xd3616f81f2ff8c55), UINT64_C(0x8bd9584eb62232bd),
-  UINT64_C(0xa990035eef6e7b13), UINT64_C(0xd4c56de5c11dcdda), UINT64_C(0x8048c23ec8bd072b), UINT64_C(0x407539904d984e51),
-  UINT64_C(0xeaf5a1d46eb3779b), UINT64_C(0x4b06e5769362f357), UINT64_C(0x931f75e21bc0d143), UINT64_C(0x9369439b81c92fc4),
-  UINT64_C(0x059fccc0d4afbb45), UINT64_C(0xd072671b3c927118), UINT64_C(0x61b6803f95c41115), UINT64_C(0xacb4b2c4381da3f5),
-  UINT64_C(0xd73bf897ee871c72), UINT64_C(0x241c9d52c953d3c0), UINT64_C(0x083c079e704d7b96), UINT64_C(0x8c431ee43e5171a5),
-  UINT64_C(0x66079596998b96b6), UINT64_C(0x041ea35d207b478e), UINT64_C(0xbe698683cf7b258e), UINT64_C(0x5457365cf6cbc5bb),
-  UINT64_C(0xc166c3ef7006b02d), UINT64_C(0x27789ff1e5365132), UINT64_C(0xae4a02397d308867), UINT64_C(0x0388704d03d7b613),
-  UINT64_C(0xf5c9d782d3fd58e3), UINT64_C(0xb51c3fe53965624e), UINT64_C(0xf785b86e7fe0adec), UINT64_C(0x19f72a9ef3a215e8),
-  UINT64_C(0x19db58361e6633d9), UINT64_C(0xf1fe7a08693d64ab), UINT64_C(0x07c3310adc3bbf03), UINT64_C(0x742e87d333077816),
-  UINT64_C(0xe817529af0f04970), UINT64_C(0xe7f343c941a044ff), UINT64_C(0xf9693fb4f37b4d2c), UINT64_C(0xb99da4a0b6ccb1ed),
-  UINT64_C(0x4eef654d39c7f631), UINT64_C(0xd06badd9354befc8), UINT64_C(0x3dea38b48a4fb6cf), UINT64_C(0xf6551a2de11ec63d),
-  UINT64_C(0xf0dd7ca2d08731e5), UINT64_C(0xfbbac6e989684aff), UINT64_C(0xe2b65b698f6ea652), UINT64_C(0x679e2fc32595fb51),
-  UINT64_C(0x6547fdc240571414), UINT64_C(0x6809f663de2d0466), UINT64_C(0x6c6b7a0a40a5e48f), UINT64_C(0xe5f43660d891606e),
-  UINT64_C(0xa44f283a5a5c10fd), UINT64_C(0x95635b53a60083be), UINT64_C(0x7e0f003a2698a45c), UINT64_C(0x2fd0eb2a3cb4db79),
-  UINT64_C(0x7416380640ad33c7), UINT64_C(0x988de04a8bfe794b), UINT64_C(0x6d00569ebd6839ff), UINT64_C(0x22ddd7d3d0efa384),
-  UINT64_C(0x20f9c1ae73b1a651), UINT64_C(0x32386da97bb626af), UINT64_C(0x263c358b8e1975fe), UINT64_C(0x32bd1e4fdb3e7f7c),
-  UINT64_C(0x2ebb53af95ab07db), UINT64_C(0xeccc526f7e6aca61), UINT64_C(0x186fd1f3ad161e28), UINT64_C(0xf96dd58eca026372),
-  UINT64_C(0x0403c8572fee3bf3), UINT64_C(0x2598261d29b22e84), UINT64_C(0xa4027ffeed481ae0), UINT64_C(0xe2f690ddcdb0fdaf),
-  UINT64_C(0x95d11d0d60c528fd), UINT64_C(0x0cc242f0eeae1d6c), UINT64_C(0xfa3440087835377f), UINT64_C(0x3d8fad475b8139e4),
-  UINT64_C(0x8e92fce862d8a97e), UINT64_C(0xc53bc4cb5ed50eb4), UINT64_C(0xc8f91ece0194e8d4), UINT64_C(0xf78d7c6b5cff07e1),
-  UINT64_C(0x3163d8458b924665), UINT64_C(0xc2ae6dc185c739bf), UINT64_C(0x2943e3eae337c6c6), UINT64_C(0x96bd36f0da4a49f7),
-  UINT64_C(0x98753f33282f27bf), UINT64_C(0xd5c33455bf0f69fd), UINT64_C(0x78cc9f69e0286682), UINT64_C(0x0631fadc21ec437c),
-  UINT64_C(0x521c3db58b6b1170), UINT64_C(0x2333f0f70e46b5cf), UINT64_C(0x87be027b8d434ac6), UINT64_C(0xba4c26796c582e4c),
-  UINT64_C(0x35d52e4f85db73e4), UINT64_C(0x8ac3723b24e99436), UINT64_C(0x4a2b6ce4b7a97a02), UINT64_C(0xcb8017cc584b287d),
-  UINT64_C(0x1ca3610bc2f30e9f), UINT64_C(0xc1c2dafdd385b283), UINT64_C(0xa812778eceff9a2b), UINT64_C(0x91b8429959ef5359),
-  UINT64_C(0xa2750c665bcab7d2), UINT64_C(0x9212f5d704b5320b), UINT64_C(0xfa46bb7a213be57f), UINT64_C(0xd20cbd122dce6c1d),
-  UINT64_C(0x82868b5aee7a4776), UINT64_C(0xf49ec5ddf8cec096), UINT64_C(0xa4fc2bf71ac9dcc2), UINT64_C(0x9d8b8f462bd2f17b),
-  UINT64_C(0x452703fe91008332), UINT64_C(0x919a288ada854bef), UINT64_C(0x75d2b2eb0f4eeed7), UINT64_C(0xd64885293558a96f),
-  UINT64_C(0x098d7efb4f8d5b31), UINT64_C(0x7ee77eef93a3928e), UINT64_C(0xb28eebae28b63dc8), UINT64_C(0x0f01129fc90af970),
-  UINT64_C(0xf3d5b92900d45181), UINT64_C(0xb9d8a408ea6715c0), UINT64_C(0xe44424fb8ca9e22e), UINT64_C(0xd81135834c1aaf96),
-  UINT64_C(0x445b3d67398e888b), UINT64_C(0x0dad43784fe36cda), UINT64_C(0xe6d1bd75c5d81518), UINT64_C(0x662f0e924150c5cb),
-  UINT64_C(0x78179f80df6e0709), UINT64_C(0xdd8fc687a741289c), UINT64_C(0x710873d7f5ab060e), UINT64_C(0xa1961d2b538f497c),
-  UINT64_C(0xb36bbf75bc8b8761), UINT64_C(0x675c608353017307), UINT64_C(0xade6b1aa0ec59bbe), UINT64_C(0xc803a2c9426b3c5f),
-  UINT64_C(0x48a8210409b5ffac), UINT64_C(0xc3d58389ce5f3b13), UINT64_C(0xa23ceb0e71b08443), UINT64_C(0xd9d192cd9c5e9a05),
-  UINT64_C(0x20d9cd878b94147d), UINT64_C(0x22329c7695f6df46), UINT64_C(0xaebdcdc2c2cbc0d9), UINT64_C(0xe95ae3d514f6f94b),
-  UINT64_C(0x59152e1f5715e782), UINT64_C(0xb3280d75a8134f15), UINT64_C(0x5bce3379e1fcb7b4), UINT64_C(0x437d9c3238c4169f),
-  UINT64_C(0x77db7e5ebd5125bd), UINT64_C(0x0dd3aef40336d438), UINT64_C(0x4a496a56bac81428), UINT64_C(0x72a128c3875dc93d),
-  UINT64_C(0x8eb605e5bef1747d), UINT64_C(0x666d4546567a4eef), UINT64_C(0xad5ad003399d2296), UINT64_C(0x19c74366682b52a0),
-  UINT64_C(0xb3c35c5a0e259420), UINT64_C(0xf98340503eb93d6d), UINT64_C(0xa51985b0bb7f81e8), UINT64_C(0x2a21510c6c7ca42f),
-  UINT64_C(0x3c1ac0b52c230998), UINT64_C(0x4e1d572a2d77000b), UINT64_C(0x8dd3adff3bfdec71), UINT64_C(0xdfb3a4a23e43d035),
-  UINT64_C(0xe12f748421173e62), UINT64_C(0x2f356145d2f72758), UINT64_C(0x31c13682374c445c), UINT64_C(0x09240a1f409fab88),
-  UINT64_C(0xa346e2d2f72fd5e8), UINT64_C(0x2c5b53bfc05f9f77), UINT64_C(0x0a9f7ab218574f6e), UINT64_C(0xc3fcb9b977f0cceb),
-  UINT64_C(0xac26889eb86459b9), UINT64_C(0x1082f785bc3dac21), UINT64_C(0x3c8c337a4c67ef18), UINT64_C(0x118e48d0e8a66e02),
-  UINT64_C(0xb777cef85278f2dc), UINT64_C(0x12a268a3dcda05bc), UINT64_C(0x75f5f7d3fde0bd9e), UINT64_C(0x62f5f1650ec91670),
-  UINT64_C(0x81fcf9e3e1c3adec), UINT64_C(0xf0b5e35ace23349c), UINT64_C(0xde7d514d058e53a4), UINT64_C(0x52a625e5f06242c7),
-  UINT64_C(0x3cc1346eda6a430a), UINT64_C(0x165bd737e851f6a1), UINT64_C(0xe52c53d745f1b49a), UINT64_C(0x15513074f676fafc),
-  UINT64_C(0xcb8797dbb29e6710), UINT64_C(0x27b92c8190fd679d), UINT64_C(0x0b39384ac668b176), UINT64_C(0x11341e6d7adad0e9),
-  UINT64_C(0x491b5b5390b70f94), UINT64_C(0x1f5eccf586d03746), UINT64_C(0x6502ca945646feae), UINT64_C(0x3abb5466229ef7d8),
-  UINT64_C(0x535b4effbe0ce5f6), UINT64_C(0x6575eefef9e916f5), UINT64_C(0x77a76fbf3c76f2d7), UINT64_C(0x1cc63124152994a7),
-  UINT64_C(0x6e33f80e95d4323d), UINT64_C(0xd711791d9b2e1d65), UINT64_C(0x7c766cd52013ae49), UINT64_C(0x08bc15230d2ef477),
-  UINT64_C(0xb751fa3b942ab063), UINT64_C(0xfe99a8b170a11941), UINT64_C(0x731979294908218a), UINT64_C(0x32166899c12f3097),
-  UINT64_C(0x8318df8e3823dd3d), UINT64_C(0x940e81f0b4ece3d8), UINT64_C(0x81ea0f12130235ea), UINT64_C(0x36603dfef356d752),
-  UINT64_C(0x409eeb16b992d793), UINT64_C(0xf4c675cca09e229a), UINT64_C(0x0ef989d732dae818), UINT64_C(0x269b4385573ad2f6),
-  UINT64_C(0x53df04584157173c), UINT64_C(0x260c347bedc5ce82), UINT64_C(0xb9fbfba9b58c1b09), UINT64_C(0x20115df9d0693a14),
-  UINT64_C(0x8c0fb27588303369), UINT64_C(0x3a9450974a66eaaf), UINT64_C(0x805f0d515d715679), UINT64_C(0x10f4b52a09898972),
-  UINT64_C(0x20e9c3449e84718e), UINT64_C(0x9eed8745b4e234e2), UINT64_C(0x946c3083bf840def), UINT64_C(0xb18de02e626f7dd9),
-  UINT64_C(0x9e8b496b1d035ed8), UINT64_C(0x6ef3891e7c690f77), UINT64_C(0xd62269e5ad1c07f5), UINT64_C(0x7117ed7eddc2883e),
-  UINT64_C(0x260f1d08457dfcca), UINT64_C(0xe0759189d723da9d), UINT64_C(0xd6d40adb9c9f94d7), UINT64_C(0x7c47c4b4a670b77e),
-  UINT64_C(0xb2b5179563a2abe1), UINT64_C(0x62118cb60f121507), UINT64_C(0x22c3a4a74379ceb1), UINT64_C(0xd5904c844fbfed74),
-  UINT64_C(0xa0afa38c06d50d92), UINT64_C(0xd6223dbbcfcf73f4), UINT64_C(0xf19623e7ec6f83dd), UINT64_C(0xd08c12de2b6265f6),
-  UINT64_C(0xc487d5dc19489db6), UINT64_C(0x759283ffd06fc796), UINT64_C(0xd61a735ad1cd7ccc), UINT64_C(0x32084ba3ca8fa3ee),
-  UINT64_C(0x17530308a1204968), UINT64_C(0x80328582a1eb8d8f), UINT64_C(0xd4c873deec7fb3d7), UINT64_C(0x11c825cc4bc8b181),
-  UINT64_C(0x0137fa50576b21eb), UINT64_C(0xc5ea2f958a3ddb53), UINT64_C(0x6ae611d92b67c9bc), UINT64_C(0xb798b3e1f9c3a851),
-  UINT64_C(0x22a42679fa4b013f), UINT64_C(0x2071f22dae8de629), UINT64_C(0x3faa3a80e45cbca6), UINT64_C(0xb0418f45808009ec),
-  UINT64_C(0x446063013dd5a0f4), UINT64_C(0x932445b680ef71ec), UINT64_C(0x2bc9a2d9ab8e2662), UINT64_C(0x8ebd57fbc56a6154),
-  UINT64_C(0xa28f3d2264ad0f10), UINT64_C(0xffff84df76a10c15), UINT64_C(0xac5c9b0e78fbee81), UINT64_C(0xc1f08e08982b237c),
-  UINT64_C(0x5907b7fa41daa2b8), UINT64_C(0xbed3856320d9c3c2), UINT64_C(0x500a342c1902f015), UINT64_C(0x0c3a5d539c71b7d6),
-  UINT64_C(0xa706750b1c3e5604), UINT64_C(0x1543ab593a8c824c), UINT64_C(0xbdfd9d26f151d83c), UINT64_C(0x1603bb40537de208),
-  UINT64_C(0x1501b0ba802daa2d), UINT64_C(0xdcbcc803f3c11f3c), UINT64_C(0x2bb283a389ec2f35), UINT64_C(0x3a27513ef9d14bf4),
-  UINT64_C(0xcb7c4fd02a39d8af), UINT64_C(0xcc6f61a03488e43f), UINT64_C(0xfdddf2b5fd6c4b05), UINT64_C(0xa015987625b9755d),
-  UINT64_C(0x14c5a9b03c63b253), UINT64_C(0x413f7d2608bf939e), UINT64_C(0x8bdb68c7176407e5), UINT64_C(0x436de64d8a614c32),
-  UINT64_C(0xc2aca4b10ff0bf8e), UINT64_C(0x3b56cc9c1df797e4), UINT64_C(0xb1750cce6cca57bb), UINT64_C(0x8c80e2303509012a),
-  UINT64_C(0x7f25bae3c4fea8af), UINT64_C(0xecf8ed9dac1367b8), UINT64_C(0x1a49274e39668f4e), UINT64_C(0xca4a0ae881c7dc39)
+    UINT64_C(0x6fa74b1b15047628), UINT64_C(0xa2b5ee64e9e8f629), UINT64_C(0xd0937853bdd0edca), UINT64_C(0x4e9fb2b2b0a637a6),
+    UINT64_C(0x26ac5a8fac69497e), UINT64_C(0x51e127f0db14aa48), UINT64_C(0xea5b9f512d8d6a09), UINT64_C(0xf3af1406a87de6a9),
+    UINT64_C(0x3b36e2ed14818955), UINT64_C(0xb0ac19ef2dde986c), UINT64_C(0xd34ed04929f8f66d), UINT64_C(0xe99978cff2b324ea),
+    UINT64_C(0x4032cb3ecff8cb38), UINT64_C(0xfa52274072d86042), UINT64_C(0x27437346dec26105), UINT64_C(0xec1cbf04b76aec71),
+    UINT64_C(0x6dd57b3dac56cd39), UINT64_C(0x34e9021797e95aad), UINT64_C(0xdc8d3363540c5999), UINT64_C(0x773d283eeeabf4ab),
+    UINT64_C(0x373c522657461aaf), UINT64_C(0x154cfe0f497d7f78), UINT64_C(0x6d377183b5ca6550), UINT64_C(0x614da5f6055e904b),
+    UINT64_C(0xd77b66b34896f00e), UINT64_C(0x122538125d6adaef), UINT64_C(0x1021e161206d9091), UINT64_C(0x38407c4313aefdfa),
+    UINT64_C(0xd941cc5dafc66162), UINT64_C(0xfc2432a6ea885315), UINT64_C(0x5576dc02b68b10ed), UINT64_C(0xd8449f9d4ab139a2),
+    UINT64_C(0xd333cbcd49cbacba), UINT64_C(0x700d20430e06eeb8), UINT64_C(0xdeb34810d6d0320a), UINT64_C(0x6743363d6cc8ba68),
+    UINT64_C(0xbd183cb526e6e936), UINT64_C(0xee62bf5ee97de5ea), UINT64_C(0xf6b855e743e76853), UINT64_C(0x83ac16a35d132df9),
+    UINT64_C(0x2046f2c70c2130b1), UINT64_C(0xaadd5007102b5ee4), UINT64_C(0x8eedac842e63cdac), UINT64_C(0xba02956e43c18608),
+    UINT64_C(0xd2688af010adbeaf), UINT64_C(0x4aaa5295377c17be), UINT64_C(0x83792382ba198f10), UINT64_C(0x6fc42849961a25b6),
+    UINT64_C(0x3501677f06fb1311), UINT64_C(0x1e18b89705c224dd), UINT64_C(0xa0a0b8684aa2e12d), UINT64_C(0x30d19aac3d40898e),
+    UINT64_C(0x41dd335a29272e9b), UINT64_C(0x5c5d445a07426e3f), UINT64_C(0x6f13080e67946fdc), UINT64_C(0x3ddabae21609bf08),
+    UINT64_C(0x8e6146d3cde11ca5), UINT64_C(0x9eff76a4c39eacf4), UINT64_C(0x71c66d0a423a21b7), UINT64_C(0x68515c0b712bbc4f),
+    UINT64_C(0x5edd17cec412a735), UINT64_C(0xa444f487c96f896c), UINT64_C(0xc161d16d4e54041a), UINT64_C(0x3a2d84d3e09bafb9),
+    UINT64_C(0x63a406b157a5f2f1), UINT64_C(0x18292d6007f839ba), UINT64_C(0xcaac5789618f2aac), UINT64_C(0x6f516d95f749dd97),
+    UINT64_C(0xb5784409560e219f), UINT64_C(0x12f0f0d6fbdcb81c), UINT64_C(0x993d6c2a47089679), UINT64_C(0xcc9247b35870aebf),
+    UINT64_C(0xa1ca8eff8b1bca70), UINT64_C(0x7a1d015397e558cc), UINT64_C(0xc504a4d4815f8722), UINT64_C(0x3e44258e93472b26),
+    UINT64_C(0x11bd0578a36c8044), UINT64_C(0x84c7087603a0a6ea), UINT64_C(0x457d0c59e84c9ac8), UINT64_C(0x32129275ee63dd95),
+    UINT64_C(0x66269220e943024d), UINT64_C(0x197de12f9d6e5c72), UINT64_C(0x06fdd09a4d6157dd), UINT64_C(0xf8c1a8b51fe95716),
+    UINT64_C(0x41eeb6129149f6cf), UINT64_C(0x42f510887a61de1b), UINT64_C(0xf3d2aa6e4fe5949d), UINT64_C(0xc0799007b85373aa),
+    UINT64_C(0x81577b167de515c3), UINT64_C(0x01f424fc6b856270), UINT64_C(0xff6247ed0658caa8), UINT64_C(0x63ad005e620fe4bb),
+    UINT64_C(0xdb919b9f63c93174), UINT64_C(0x5693dbd6c76c7683), UINT64_C(0xdaa9b82e85e0355a), UINT64_C(0x424c5c4e5672fc73),
+    UINT64_C(0x9de3ca332ba818f1), UINT64_C(0xb28f375a58bc6c1e), UINT64_C(0xef0af1e6041b9cd4), UINT64_C(0x0418afb53ef5408f),
+    UINT64_C(0x9a37634585d3330a), UINT64_C(0x3ab5aec014b097cd), UINT64_C(0x384a0739a3ff7dc8), UINT64_C(0x0ff31c11226e5d5a),
+    UINT64_C(0x71070735f1c16bb4), UINT64_C(0xc4f78905f49a3840), UINT64_C(0x561f68d6a5f44a81), UINT64_C(0xb09bd8cd8d932357),
+    UINT64_C(0xf270b47652354fdb), UINT64_C(0x47d6ca7bba50c2c7), UINT64_C(0x2720590d7b2b7b54), UINT64_C(0xcaac35df08cab300),
+    UINT64_C(0xd05759dee169d9fd), UINT64_C(0xdb8d0d0403a6aafb), UINT64_C(0xcd3ab85684ba537c), UINT64_C(0xad69c4e5240c158f),
+    UINT64_C(0x65427c4ff3637db2), UINT64_C(0x085ecbbf903a45ae), UINT64_C(0xeafed57a94384c62), UINT64_C(0xc99972367cd21eba),
+    UINT64_C(0xc1e2cf52270b20eb), UINT64_C(0x825dad5142681653), UINT64_C(0x47e99edc5e141d94), UINT64_C(0x125813bc26e42e07),
+    UINT64_C(0x06f41d2441b172ca), UINT64_C(0x5e9e640ed911730e), UINT64_C(0x5900403342f0f362), UINT64_C(0x57a600d157ee9945),
+    UINT64_C(0xbcc5d702f02dc7e0), UINT64_C(0x8258cf5a1a6435ab), UINT64_C(0xdf885b6a0343a3e0), UINT64_C(0xadd74c04a503b09a),
+    UINT64_C(0x0ea210122eeef589), UINT64_C(0x5217fd50f3ecaf85), UINT64_C(0xd0c39849df6b4756), UINT64_C(0xf66d9e1c91bd0981),
+    UINT64_C(0x0f355b00f40e3e6b), UINT64_C(0xc01dabcd14518520), UINT64_C(0x58691b4fa9e7d327), UINT64_C(0x357616c77c22fffe),
+    UINT64_C(0xb9fbf8de2ed23303), UINT64_C(0x0195932bc205c466), UINT64_C(0xef0763590a08a50d), UINT64_C(0xf546866c0028a938),
+    UINT64_C(0x41cc8732eaad496a), UINT64_C(0xadc61f16374896c6), UINT64_C(0x5eb8f93f25ad0457), UINT64_C(0x240f00f5db3fae25),
+    UINT64_C(0xcc48503596dc01ef), UINT64_C(0x351baaa904a306d5), UINT64_C(0x7111179ae328bb19), UINT64_C(0x6789a31719d5d453),
+    UINT64_C(0xf5318492c9613de6), UINT64_C(0xa0e8c24f3f0da716), UINT64_C(0xac15d68d54401b9d), UINT64_C(0xadafb35cf63092ee),
+    UINT64_C(0xceb5f8d63c7fec4c), UINT64_C(0x1ae71929b980fc9d), UINT64_C(0x6efdc5693ef4ee2a), UINT64_C(0xbedd8334cade7855),
+    UINT64_C(0x06f1b768b476a249), UINT64_C(0x9e614bedf41dd639), UINT64_C(0x9eca9c6c9e389a5d), UINT64_C(0x76999bf01b912df2),
+    UINT64_C(0x04d52fb2ac70ab31), UINT64_C(0xe467ea8172f5d066), UINT64_C(0x356ed51bb0e094ae), UINT64_C(0xab2047c21b54d8ba),
+    UINT64_C(0x21dbbfa0a6157474), UINT64_C(0x7de36edec62f1997), UINT64_C(0x306ef59f5204a58c), UINT64_C(0x954135a769d5b72e),
+    UINT64_C(0x9d7774a0c2d29380), UINT64_C(0xc03acfd63ac6b88c), UINT64_C(0x9989d5ee565322e6), UINT64_C(0x19d1a58324bdd145),
+    UINT64_C(0xe74685383cc6b27c), UINT64_C(0xf9edffe1c4d81108), UINT64_C(0x94950b5b6247cb43), UINT64_C(0xe3fa8c6468d419eb),
+    UINT64_C(0x29981bd802f77ac5), UINT64_C(0x6cf1a6cab28c1c36), UINT64_C(0x1d34a382a5d48973), UINT64_C(0xcd1d5d546e5e4d3d),
+    UINT64_C(0x4ad78b4a37e52322), UINT64_C(0x24da17671ab463f2), UINT64_C(0x527504b7c7bc5537), UINT64_C(0x7ba1d92e1969b2b5),
+    UINT64_C(0x53a130812c49d64a), UINT64_C(0x503af48d9510f1d7), UINT64_C(0x719db8a348dee165), UINT64_C(0xa85e4fad1f343e67),
+    UINT64_C(0xdafc1fa9203d2d45), UINT64_C(0x7730f245c903a407), UINT64_C(0xb7c04e53f913aeae), UINT64_C(0x39ed817e1e039153),
+    UINT64_C(0xf415ea2b3efc7606), UINT64_C(0x15e3c53fe43f104d), UINT64_C(0x1b71e4d83ccba83c), UINT64_C(0xfe088f4c90812841),
+    UINT64_C(0x1ff8e2ee0a04b6ae), UINT64_C(0xf4f4a23612b9eed2), UINT64_C(0xc596a66051b8aca1), UINT64_C(0xbc898edd3370a8dd),
+    UINT64_C(0xce7638a7a2f9152e), UINT64_C(0xd99192635c0d5c92), UINT64_C(0x62038c87c094a1ff), UINT64_C(0xa73f1bcaac7343af),
+    UINT64_C(0x93c797804faa5ff3), UINT64_C(0x9da7407c705da1f0), UINT64_C(0xa52cde7d37fef9f0), UINT64_C(0xb93a7db97e3fa7ff),
+    UINT64_C(0x75ee91392c60fb6b), UINT64_C(0x4d7f8e3db9383ae0), UINT64_C(0xe0aec397d5290d06), UINT64_C(0x159a20f22d740d81),
+    UINT64_C(0x231416cff9a9b014), UINT64_C(0x71ed3a6e513b4795), UINT64_C(0x190b08ebcb87f3bc), UINT64_C(0x36bb0bcb0e8df593),
+    UINT64_C(0xc1e63cdc4d78dfb3), UINT64_C(0x36e2c57ba6799460), UINT64_C(0x280c0618b19f63dc), UINT64_C(0xca2b8e49d6c71d2d),
+    UINT64_C(0xc881e59705270f09), UINT64_C(0x26fdf0dbb5f2f451), UINT64_C(0xc6d1a3697ca86855), UINT64_C(0xd00755a203980eb5),
+    UINT64_C(0xa85962163dd7de95), UINT64_C(0x622b7a1d2531d00e), UINT64_C(0xb6c1cfba74436ef7), UINT64_C(0x9578891a720bf317),
+    UINT64_C(0x5e325058bd3a343a), UINT64_C(0x9a468a5a888a475f), UINT64_C(0xa57f0edb414a0589), UINT64_C(0xa044aef7ea680f8c),
+    UINT64_C(0x2036717cee9b991a), UINT64_C(0x3925631ec66cb8aa), UINT64_C(0xdcb6a5da6b2fc78f), UINT64_C(0x17a8cd724b7b5e26),
+    UINT64_C(0x1c704c6a48a2dae0), UINT64_C(0x87d8f6738a0c30bc), UINT64_C(0xd8580262a4801240), UINT64_C(0x5812cea521ffaeaf),
+    UINT64_C(0x21b6ff923871f14c), UINT64_C(0x922dbd45c2b307d1), UINT64_C(0x5c67ecbaace24d31), UINT64_C(0xb90f5e3acfaeff9b),
+    UINT64_C(0xea5aa9f2f14efeb1), UINT64_C(0x08003af95ab5ce92), UINT64_C(0x5a39361e05692622), UINT64_C(0xd4b8cddc309e44da),
+    UINT64_C(0xe20bfe5f0a1343d9), UINT64_C(0x13848357d100b2b3), UINT64_C(0x912a1b220fa678f5), UINT64_C(0x7631242b7f6d6365),
+    UINT64_C(0x5a9f9a3284d95674), UINT64_C(0x0d5b02c98afd4279), UINT64_C(0xede70dbc04a7a3d9), UINT64_C(0xadb3f72865ba580e),
+    UINT64_C(0xc4a3c11163562e90), UINT64_C(0x482e567c69b6b128), UINT64_C(0x38ec96bfcb4d965d), UINT64_C(0x923fe02a6b4bdabe),
+    UINT64_C(0x0ae0ca91a2be0579), UINT64_C(0x137401e7f2acf3e8), UINT64_C(0xfdad100e85bc5622), UINT64_C(0x9c07483343c8030f),
+    UINT64_C(0x71872f8555dbd0a8), UINT64_C(0x8de5873dbfa538e0), UINT64_C(0x2922d0d9a2d9eb02), UINT64_C(0x2744006cfc375d0c),
+    UINT64_C(0xa82c09537574f583), UINT64_C(0x2ab2d255e73f6f83), UINT64_C(0x6cc5f73b682b3701), UINT64_C(0x6e59fc51ee28845d),
+    UINT64_C(0xe536b381533cc4cf), UINT64_C(0xfd2ac9f30025e109), UINT64_C(0xc26cdfa60b8be153), UINT64_C(0x62da136e08f0f885),
+    UINT64_C(0xeb6a7a065b640357), UINT64_C(0x7462b101e2adb3ff), UINT64_C(0x996ec340bf52ea07), UINT64_C(0xf0aa2a872333e60c),
+    UINT64_C(0x222884f9c4632341), UINT64_C(0x32b5289d94dac82e), UINT64_C(0x7cdd99055bd35f17), UINT64_C(0x92d3d262aefe21bc),
+    UINT64_C(0xc6c1b1029eb0dd4c), UINT64_C(0x28f046ec80f3c975), UINT64_C(0xc1f0c2d9745c5cb7), UINT64_C(0x92ada28cf6f7fe0b),
+    UINT64_C(0xdfb215a8df753a03), UINT64_C(0x942ecdad535f962d), UINT64_C(0x7d739b8c0b7a1669), UINT64_C(0xee95286e88be8510),
+    UINT64_C(0x4ae71aa9d3c3d36f), UINT64_C(0x2bd6d5d12452cc38), UINT64_C(0x16fa1504fbedf267), UINT64_C(0x4b835f8377f3937d),
+    UINT64_C(0x0004374053160cb7), UINT64_C(0xe44a676c90906fe8), UINT64_C(0x2389c459f53fbdcd), UINT64_C(0x4a7031455481da9e),
+    UINT64_C(0xb72c293d969a40cc), UINT64_C(0xd9b72ee09dde404d), UINT64_C(0xa31f4f98c5aabc97), UINT64_C(0x56f240ad0aea491c),
+    UINT64_C(0x86264ebf858d67bf), UINT64_C(0x93fd3b332948fd87), UINT64_C(0x79899120e2d72215), UINT64_C(0x36dedea1a614643e),
+    UINT64_C(0x1c5e947b88cba0f6), UINT64_C(0x20ec77907c771a4f), UINT64_C(0x587a65fe2c8f5487), UINT64_C(0x9b5431d881ff3b4a),
+    UINT64_C(0x8f55b2fd967902d7), UINT64_C(0xebd59a640fee9b7e), UINT64_C(0xd5a77b39543d5bef), UINT64_C(0x5dbf440d204f5d0f),
+    UINT64_C(0x4e22065f53ba213e), UINT64_C(0x4611a2d169ad5a0b), UINT64_C(0x41ea9888cb5be7d1), UINT64_C(0xf8a661f2359be997),
+    UINT64_C(0xde83a9e3a6562631), UINT64_C(0xd66dedc223dad775), UINT64_C(0x162e54732874a52a), UINT64_C(0xf6d91b1963c23d56),
+    UINT64_C(0x56d3c9a025a95772), UINT64_C(0x92ddff0a1caeb05c), UINT64_C(0x6cbeb9f263443bd7), UINT64_C(0xb4ad540e1b11894b),
+    UINT64_C(0xcfa573f2f78d8b29), UINT64_C(0xad477ed16d45543f), UINT64_C(0x0d0283973ed3423a), UINT64_C(0x5307f93f3654f284),
+    UINT64_C(0xbc9b362f504b145b), UINT64_C(0x5661193dc5bcb5ff), UINT64_C(0x151c9b1c7c0f246a), UINT64_C(0xad25cfcfd5e399d2),
+    UINT64_C(0xc5855adf08226db2), UINT64_C(0x5a027c03c078be13), UINT64_C(0xc2465bfb0dc5b99c), UINT64_C(0x8aaa55a9eca79b60),
+    UINT64_C(0x797a7c2608c23d9e), UINT64_C(0x692b8d7da8c7f748), UINT64_C(0xc23c7b1ab3e883e1), UINT64_C(0xe1ebb866f32ac6cf),
+    UINT64_C(0xca6be5075b5046f9), UINT64_C(0x3105a0555f6a3bac), UINT64_C(0x525b7cc4839ea6c5), UINT64_C(0xce1dd2aad7e83cf1),
+    UINT64_C(0xb4a9105674d79be6), UINT64_C(0x667eb8384834f7db), UINT64_C(0xb200a7a30f789150), UINT64_C(0x4ba4d2c780055821),
+    UINT64_C(0xb48a01ad5f7474c6), UINT64_C(0x3310ba4a1e25aab8), UINT64_C(0x64379d2408fd5735), UINT64_C(0xf11e9788704e5e0d),
+    UINT64_C(0xe9866ab0a8e90f4e), UINT64_C(0xaa344ffe50f7a934), UINT64_C(0xcce37a15b3870924), UINT64_C(0xe22135597a867f1c),
+    UINT64_C(0x8770a58d7fe57f99), UINT64_C(0xcafbbc8d2024bcbc), UINT64_C(0x2307e7f0fcdb1909), UINT64_C(0xdd016550b9ed2b2a),
+    UINT64_C(0xd0bcf0e9dee7df90), UINT64_C(0xe82d2e7daeab325c), UINT64_C(0x721a2aba71709aa7), UINT64_C(0x38cfabc260602614),
+    UINT64_C(0x3099ccb02b73b4c8), UINT64_C(0x00250ce48fd67df0), UINT64_C(0xcace64d8984b19cf), UINT64_C(0xee305dcbae8615ca),
+    UINT64_C(0xd187da55485b86ef), UINT64_C(0xebea32b2455e6486), UINT64_C(0x77cb912fa927d5c5), UINT64_C(0x911002ac8b62cbd8),
+    UINT64_C(0x70730c24c32c5870), UINT64_C(0x0a7cb6f89e988a83), UINT64_C(0x6b5e00839b7db787), UINT64_C(0xecae9f4cfd9ce924),
+    UINT64_C(0xae09926b714019a5), UINT64_C(0xbc1b2c59bc5ce769), UINT64_C(0x592756761e90349f), UINT64_C(0x95c9a69a21936de3),
+    UINT64_C(0x192b2119ee48eb9a), UINT64_C(0xcd8d11ebcd8a71c2), UINT64_C(0x34de8d4cad3151d6), UINT64_C(0x0fc4f3baf540eb1c),
+    UINT64_C(0x88bd85e02b2ec0e2), UINT64_C(0x5b65423e815dafb6), UINT64_C(0x66ec6fadd29f273e), UINT64_C(0xc3622fbc1f1c7bd0),
+    UINT64_C(0x50cc102827ff1acf), UINT64_C(0xe73cab705018a55f), UINT64_C(0xcd552b588a227f38), UINT64_C(0xc462735f28a9c597),
+    UINT64_C(0x3e3ccb00a16906e1), UINT64_C(0x79bdf5d7e7dfa593), UINT64_C(0xb333b6942d5db3a9), UINT64_C(0x3566edd901f25f20),
+    UINT64_C(0x8c5fe3e063253c7b), UINT64_C(0x9f0aa4160fb652ee), UINT64_C(0x2361d9bca2c92f43), UINT64_C(0x2d6a0339fe1de8ee),
+    UINT64_C(0x389b1bd9476b0470), UINT64_C(0xd7fa2522f0da451e), UINT64_C(0x43e6a01d67c62b2d), UINT64_C(0x5bdc15971dc0d5b3),
+    UINT64_C(0x38a0a80acbadf021), UINT64_C(0x2c66125ec66e1fad), UINT64_C(0xb58f61bb53b6a9ff), UINT64_C(0x492142919b2d61d6),
+    UINT64_C(0xd905263cc927ebd9), UINT64_C(0xca15f966e2279122), UINT64_C(0xf9dc67f8101119c9), UINT64_C(0x7f6755699c23d8c9),
+    UINT64_C(0x26146d38a23b0bdf), UINT64_C(0x0166c70bc773d9aa), UINT64_C(0x5b3317113904ec75), UINT64_C(0x5d3c4311b21e44d1),
+    UINT64_C(0x479c13c75df8cf18), UINT64_C(0x75a880dd38a8a4ff), UINT64_C(0xdf378e2eb432708d), UINT64_C(0xca1cb0f76b1c5f04),
+    UINT64_C(0x06c76e876516eb46), UINT64_C(0x965c10e60ec202ad), UINT64_C(0x67b18e2140e0aad3), UINT64_C(0x203ca38572b212b8),
+    UINT64_C(0x72adad835dd333c6), UINT64_C(0xdd02aa349680a96a), UINT64_C(0x69ab0df01d4b3eab), UINT64_C(0xfebfd83a2c43afd1),
+    UINT64_C(0x0dcd90c392b9fae4), UINT64_C(0x8a87b8033e4cd8cc), UINT64_C(0x3902150c36e99880), UINT64_C(0xb5b655e071474ebc),
+    UINT64_C(0x6c2dc9eeaffbd8d8), UINT64_C(0x3cf62bfa4986f0fe), UINT64_C(0xa68eaf0719a9afbc), UINT64_C(0xde1f4e9a4b190aef),
+    UINT64_C(0x7fbc9e8538999e56), UINT64_C(0xf6d5e9db2208a40c), UINT64_C(0x93b13abaddf4554c), UINT64_C(0xd8b5e4ad9911629f),
+    UINT64_C(0x6fdb9d7376488e52), UINT64_C(0xee604a7ce20d75ad), UINT64_C(0x94ec4abbaa9c2c1d), UINT64_C(0xdbd148c4fcd05ec1),
+    UINT64_C(0x0865c7c3b380a005), UINT64_C(0xa6da59a56992f211), UINT64_C(0x2eb1dc9f941c83ef), UINT64_C(0x3bf5ccf06910fae7),
+    UINT64_C(0x23a70e117e1f29f0), UINT64_C(0x4273791acbf6c4e5), UINT64_C(0x338414ec6b5e5d60), UINT64_C(0xa5873517e3d057d9),
+    UINT64_C(0xea88400a890764f6), UINT64_C(0xc0569d573ca5364f), UINT64_C(0x4c3fc02fc93316e0), UINT64_C(0x76597f718657e577),
+    UINT64_C(0x17052b8440c7d824), UINT64_C(0x9a7ec0a30be21a00), UINT64_C(0xab0453ac2173dac9), UINT64_C(0xb6f3706820512809),
+    UINT64_C(0xef44f0b07d46180a), UINT64_C(0x5e9aa12e99509a72), UINT64_C(0x6231337efc0182ca), UINT64_C(0x0963321a419da89b),
+    UINT64_C(0xfda3e7ad51f82b5e), UINT64_C(0x1ab8790c2f5bf1a3), UINT64_C(0x9ef177b8a59f28c0), UINT64_C(0x27d1c87da66c1652),
+    UINT64_C(0x1bd6bdf27c49d109), UINT64_C(0xc151e2a66994d599), UINT64_C(0x5e1b8d826b8c12a9), UINT64_C(0x39f41d57213261b5),
+    UINT64_C(0x16a57bd0bc78aada), UINT64_C(0x0127e7f9699b55c7), UINT64_C(0xd79eccc9f9d703be), UINT64_C(0xb41b81c61ba66d7d),
+    UINT64_C(0xcf8b79dcb95dce93), UINT64_C(0x5ca102a7743a6e0d), UINT64_C(0xf422a0c3a2ad7b28), UINT64_C(0x4a9137b4a0f03724),
+    UINT64_C(0x907dcf6425c829c2), UINT64_C(0x15551fd4432261fb), UINT64_C(0xa057dfbd55ef436c), UINT64_C(0x8b2541b9e0e0fa7e),
+    UINT64_C(0x7262166dcdf4b67e), UINT64_C(0xcf6533e5c608aaeb), UINT64_C(0xd6763d3967359786), UINT64_C(0x1f6b0228d257c676),
+    UINT64_C(0xc268c1064d2b458a), UINT64_C(0x6d8b2f6e75d2b613), UINT64_C(0xfaaf5adc43d72807), UINT64_C(0xb6376765e344f9f8),
+    UINT64_C(0xa8e18dd16a4bd501), UINT64_C(0xa71aa12a8ec11351), UINT64_C(0x1daaf130b537ebe0), UINT64_C(0x2e8aa415959d5d8f),
+    UINT64_C(0x2813ff3a3e5cbcfb), UINT64_C(0xf0fdd1d6d16a7c23), UINT64_C(0xbf2b55d2ecf0ee55), UINT64_C(0xbd4e9bec299381d0),
+    UINT64_C(0xac8827ab807eb180), UINT64_C(0x8514d75ac3b43b0b), UINT64_C(0xc9b5c78e45fb38a8), UINT64_C(0x4b66e6e7b797cd8f),
+    UINT64_C(0x1a482ffa6870a2d3), UINT64_C(0x98f55f701d4bf919), UINT64_C(0x7c0fda20e7e26ef8), UINT64_C(0x6ef795976fca3b54),
+    UINT64_C(0x79801cd422fa95cd), UINT64_C(0xce8a72301dbbe230), UINT64_C(0x5e79f4c925bdd0e0), UINT64_C(0x5729e93c99cc12b3),
+    UINT64_C(0x76d022747522392a), UINT64_C(0xb9d7652e917a6bc4), UINT64_C(0xc2978462dfa9551b), UINT64_C(0xac081b4a7528b0ce),
+    UINT64_C(0x5b7799fe02443b33), UINT64_C(0x6676e5687742e76a), UINT64_C(0x3e9836e33caf452b), UINT64_C(0x96ff93e427173943),
+    UINT64_C(0x30fa2f987359e0f6), UINT64_C(0xfaa730326c478363), UINT64_C(0x2bb0560d8986947e), UINT64_C(0x9f7c01d35aefc68f),
+    UINT64_C(0x6b81189bd90a0e45), UINT64_C(0xd592d2ad2df04128), UINT64_C(0xbcd0e0fe02816ec6), UINT64_C(0x1d6d84e5c1f8df0f),
+    UINT64_C(0xc4b55a73da2f8713), UINT64_C(0xdbd6510e7ad24d26), UINT64_C(0x7e3452b770e259bd), UINT64_C(0xd5fe716f2c3ee835),
+    UINT64_C(0x63a6d74ef78acd1d), UINT64_C(0x3bd673b27d5aa140), UINT64_C(0xe394f3a2a4f6d465), UINT64_C(0xf02f642cda7fee7e),
+    UINT64_C(0xe17ee2617b3d366a), UINT64_C(0x41cdb92402dce780), UINT64_C(0x4e5c54024fd18f6b), UINT64_C(0x6f45dd1c7c5a3f12),
+    UINT64_C(0xf6fd2b3f9ccda563), UINT64_C(0xe7628d358d971e26), UINT64_C(0x4dabc984370ed105), UINT64_C(0xec05f7d5c53cb70b),
+    UINT64_C(0xf48eccbc216dcf71), UINT64_C(0x8a571d0cb256f131), UINT64_C(0x4c05466392e32549), UINT64_C(0x91d3f9324ef03c3e),
+    UINT64_C(0xec0591069697e868), UINT64_C(0xa77da4079db8ffd8), UINT64_C(0x287335de3951784f), UINT64_C(0xe7afb90b4adbbf33),
+    UINT64_C(0x96e785b0c621dbbf), UINT64_C(0xc7f54753a5e1d81b), UINT64_C(0x4a3a42229fc7491e), UINT64_C(0xc9560ea788a62881),
+    UINT64_C(0xe34b9ee97b5bef12), UINT64_C(0xfae309a9fbff0656), UINT64_C(0xbc23f738a0bf4c58), UINT64_C(0xc6dd1ed9a7a706de),
+    UINT64_C(0x3473045c7f760007), UINT64_C(0x89b5f0a2e0ace69b), UINT64_C(0x7433c584785f3321), UINT64_C(0xa38220fab7357fc0),
+    UINT64_C(0x04e1d70ec8db6456), UINT64_C(0xa86065368c31fd72), UINT64_C(0x926cee3a66885fb3), UINT64_C(0xc09c39dbdb8240bc),
+    UINT64_C(0x1ee291407a9ac9db), UINT64_C(0xa6120818b86fd032), UINT64_C(0xa4c3a1cbf6a6666f), UINT64_C(0xb34ce856697db755),
+    UINT64_C(0xe3ef1a7123649d75), UINT64_C(0x814ea4e8549f30bc), UINT64_C(0xc8c12f327c1ee0a3), UINT64_C(0xc4ad0d22dbe77043),
+    UINT64_C(0x608451fb3ab06a00), UINT64_C(0x2e1141be52867cb9), UINT64_C(0x04b92abd9485965f), UINT64_C(0xcf91f012eb16b951),
+    UINT64_C(0xacc0a45db481b3b3), UINT64_C(0x523f65d99013b4d9), UINT64_C(0xf333b8f8613fae1f), UINT64_C(0x8b651a304f1c80b0),
+    UINT64_C(0xa91ecd6f061480d2), UINT64_C(0xbd01125685871081), UINT64_C(0x9933950983b6d41e), UINT64_C(0x1f4130fd7912c3e6),
+    UINT64_C(0x333230fc9385a4ba), UINT64_C(0x9d2d764680fb1581), UINT64_C(0x277e6bb16761eabf), UINT64_C(0x1829af028f40b602),
+    UINT64_C(0x9783144e64561566), UINT64_C(0x410d30cd66cb4e92), UINT64_C(0xce0e0df02a7ac717), UINT64_C(0xdbfc28dabb65c1e2),
+    UINT64_C(0x5a83f419f0610b35), UINT64_C(0xb0706efb6f56176b), UINT64_C(0x684148ee29c2a3d6), UINT64_C(0xc47213009755db33),
+    UINT64_C(0x2600f460fbea3831), UINT64_C(0x7037ec48a50dc3ec), UINT64_C(0xa761879a39764433), UINT64_C(0xcfd6983de3381424),
+    UINT64_C(0xfdc2524f5d605fc4), UINT64_C(0xbe84a33131a412c9), UINT64_C(0x1bd73706e51699b5), UINT64_C(0x7aea62c60dffb5ab),
+    UINT64_C(0x010fec687da2bbf4), UINT64_C(0x56aa74a28e54f75c), UINT64_C(0xba52dd2bb4019afe), UINT64_C(0x6ae298d992a98093),
+    UINT64_C(0xdbfc6eddb2348c70), UINT64_C(0xeab81b5b034b7836), UINT64_C(0x692b0fc00c8986ba), UINT64_C(0x02adf5476f927b39),
+    UINT64_C(0x0173c9bb282a94e7), UINT64_C(0x1e617773e554c877), UINT64_C(0x241d5db92d0aa39e), UINT64_C(0x902c43c4be589249),
+    UINT64_C(0x0b817ad8f9617273), UINT64_C(0x43508b7fb53d5d1f), UINT64_C(0xaf1d845886eeb50c), UINT64_C(0xc645d0758b0a08f2),
+    UINT64_C(0x3d1339390783be12), UINT64_C(0x376e4919f2fc41c9), UINT64_C(0x392c5bb8475370e6), UINT64_C(0x5e891f54eec6c015),
+    UINT64_C(0x16a12880b9ac0923), UINT64_C(0x6437af0453c57f36), UINT64_C(0x8dd1ec0ee82c5835), UINT64_C(0xc4738296f5085ef5),
+    UINT64_C(0x68c5d2b2d2d06381), UINT64_C(0x8a4627fb8fbef8df), UINT64_C(0x9d56ea18dd2590b3), UINT64_C(0x8dbdd1fd0ca96586),
+    UINT64_C(0x9c17bd827cc151ab), UINT64_C(0xdddb70eb24c36775), UINT64_C(0xb56277dfd02a9c4d), UINT64_C(0x5a8388d255264a83),
+    UINT64_C(0xcb7207a0b0155fa4), UINT64_C(0x2bbc2967864dd11a), UINT64_C(0x19fb91190adfc85a), UINT64_C(0xed562d76a7e244c3),
+    UINT64_C(0xf5438c5585588610), UINT64_C(0xbc16ff713cde2e48), UINT64_C(0x42248c858cf837cb), UINT64_C(0x59c8eeb9769cf08a),
+    UINT64_C(0x0f5260cc1dc624b7), UINT64_C(0x6b880672b5ebfdd5), UINT64_C(0x2e6d6cf57e3365cf), UINT64_C(0xe994b274628cdb20),
+    UINT64_C(0x939e00fbb43765d8), UINT64_C(0x093150ef5c7cd883), UINT64_C(0x8ae15f57f13b42f1), UINT64_C(0x3af5014a74f18355),
+    UINT64_C(0x7e1a2d0c860bcd23), UINT64_C(0x796312eee1445e38), UINT64_C(0x1cbde8ef8bdfee3d), UINT64_C(0x207592ed0910de04),
+    UINT64_C(0x150e839a79142012), UINT64_C(0xb920f5ff40de84a6), UINT64_C(0x0c05b146a932213b), UINT64_C(0x7406c434e2d92546),
+    UINT64_C(0x19376004d1fc67aa), UINT64_C(0x82f3677fcf0dd552), UINT64_C(0xd9daf63e3aa745a9), UINT64_C(0x8e1e09d0a9676fdf),
+    UINT64_C(0x2cb86571c0289958), UINT64_C(0x4c4c12eb3a97b760), UINT64_C(0x1e3468d9bf56d00c), UINT64_C(0x11f90498f14cb4a4),
+    UINT64_C(0x251664b4422a7c58), UINT64_C(0xad10e44d41c2b7c5), UINT64_C(0x663cf17121b6d221), UINT64_C(0x3fe40cdc49c541b8),
+    UINT64_C(0xb1b1a8b2a941f9c7), UINT64_C(0x83ffae6e34d4eb78), UINT64_C(0xa4564673c6728fbf), UINT64_C(0xe1499f6bd812a4b9),
+    UINT64_C(0xfb5507a915ed36a3), UINT64_C(0xe055a829c62de53c), UINT64_C(0x1ea06fc53acba653), UINT64_C(0xce0f8c15fd8f2258),
+    UINT64_C(0x7dd42e43e5ef6f4b), UINT64_C(0x0c55aecd7e1adc10), UINT64_C(0xc31b0e4d3a4e8b1c), UINT64_C(0x1205469d91599780),
+    UINT64_C(0xbba5d6df94390b83), UINT64_C(0xc97925cae2f17697), UINT64_C(0x3b98f3dc9e15ea08), UINT64_C(0x878203758954cd36),
+    UINT64_C(0x818deaef5ba91f77), UINT64_C(0x6f8f1786214acb89), UINT64_C(0x26c5c2162849ece8), UINT64_C(0xaf1c297b73471dd3),
+    UINT64_C(0x415c497c9fa7e936), UINT64_C(0xc1804e923aa3cce6), UINT64_C(0xdd7ca8ffb78dc68c), UINT64_C(0x5b912445ed7ba89a),
+    UINT64_C(0x95dec0af89a1f157), UINT64_C(0x7041c032d1fa5266), UINT64_C(0xc569835beabc20df), UINT64_C(0xcc662c0dbb7baaef),
+    UINT64_C(0x20d5d2c1383ff75c), UINT64_C(0x7efdaae3e1c4eaaf), UINT64_C(0x3575fad9533be200), UINT64_C(0xfb0fb500836d48dd),
+    UINT64_C(0xd211a5090e6d53e2), UINT64_C(0x34afe4050a01467c), UINT64_C(0x63457fe7bfe187c3), UINT64_C(0xc3ee000cb474d925),
+    UINT64_C(0x4fd32cbbb8326e22), UINT64_C(0xc2abcd1fc9bf14c2), UINT64_C(0xf34b534e55f28258), UINT64_C(0x094ff2a11972ddec),
+    UINT64_C(0x9744b26f181926a9), UINT64_C(0xa7fe6a0982135b29), UINT64_C(0x0f8d9e7a0de7d61b), UINT64_C(0x4bcd12d1b5d3d8a6),
+    UINT64_C(0x706e34dbac81bd39), UINT64_C(0xefea01605e9304c6), UINT64_C(0xee3bb6d1e510efe1), UINT64_C(0x84a094db3f4620f8),
+    UINT64_C(0xf1752fc679d6aeb3), UINT64_C(0x54921e5d6949a43f), UINT64_C(0xd3616f81f2ff8c55), UINT64_C(0x8bd9584eb62232bd),
+    UINT64_C(0xa990035eef6e7b13), UINT64_C(0xd4c56de5c11dcdda), UINT64_C(0x8048c23ec8bd072b), UINT64_C(0x407539904d984e51),
+    UINT64_C(0xeaf5a1d46eb3779b), UINT64_C(0x4b06e5769362f357), UINT64_C(0x931f75e21bc0d143), UINT64_C(0x9369439b81c92fc4),
+    UINT64_C(0x059fccc0d4afbb45), UINT64_C(0xd072671b3c927118), UINT64_C(0x61b6803f95c41115), UINT64_C(0xacb4b2c4381da3f5),
+    UINT64_C(0xd73bf897ee871c72), UINT64_C(0x241c9d52c953d3c0), UINT64_C(0x083c079e704d7b96), UINT64_C(0x8c431ee43e5171a5),
+    UINT64_C(0x66079596998b96b6), UINT64_C(0x041ea35d207b478e), UINT64_C(0xbe698683cf7b258e), UINT64_C(0x5457365cf6cbc5bb),
+    UINT64_C(0xc166c3ef7006b02d), UINT64_C(0x27789ff1e5365132), UINT64_C(0xae4a02397d308867), UINT64_C(0x0388704d03d7b613),
+    UINT64_C(0xf5c9d782d3fd58e3), UINT64_C(0xb51c3fe53965624e), UINT64_C(0xf785b86e7fe0adec), UINT64_C(0x19f72a9ef3a215e8),
+    UINT64_C(0x19db58361e6633d9), UINT64_C(0xf1fe7a08693d64ab), UINT64_C(0x07c3310adc3bbf03), UINT64_C(0x742e87d333077816),
+    UINT64_C(0xe817529af0f04970), UINT64_C(0xe7f343c941a044ff), UINT64_C(0xf9693fb4f37b4d2c), UINT64_C(0xb99da4a0b6ccb1ed),
+    UINT64_C(0x4eef654d39c7f631), UINT64_C(0xd06badd9354befc8), UINT64_C(0x3dea38b48a4fb6cf), UINT64_C(0xf6551a2de11ec63d),
+    UINT64_C(0xf0dd7ca2d08731e5), UINT64_C(0xfbbac6e989684aff), UINT64_C(0xe2b65b698f6ea652), UINT64_C(0x679e2fc32595fb51),
+    UINT64_C(0x6547fdc240571414), UINT64_C(0x6809f663de2d0466), UINT64_C(0x6c6b7a0a40a5e48f), UINT64_C(0xe5f43660d891606e),
+    UINT64_C(0xa44f283a5a5c10fd), UINT64_C(0x95635b53a60083be), UINT64_C(0x7e0f003a2698a45c), UINT64_C(0x2fd0eb2a3cb4db79),
+    UINT64_C(0x7416380640ad33c7), UINT64_C(0x988de04a8bfe794b), UINT64_C(0x6d00569ebd6839ff), UINT64_C(0x22ddd7d3d0efa384),
+    UINT64_C(0x20f9c1ae73b1a651), UINT64_C(0x32386da97bb626af), UINT64_C(0x263c358b8e1975fe), UINT64_C(0x32bd1e4fdb3e7f7c),
+    UINT64_C(0x2ebb53af95ab07db), UINT64_C(0xeccc526f7e6aca61), UINT64_C(0x186fd1f3ad161e28), UINT64_C(0xf96dd58eca026372),
+    UINT64_C(0x0403c8572fee3bf3), UINT64_C(0x2598261d29b22e84), UINT64_C(0xa4027ffeed481ae0), UINT64_C(0xe2f690ddcdb0fdaf),
+    UINT64_C(0x95d11d0d60c528fd), UINT64_C(0x0cc242f0eeae1d6c), UINT64_C(0xfa3440087835377f), UINT64_C(0x3d8fad475b8139e4),
+    UINT64_C(0x8e92fce862d8a97e), UINT64_C(0xc53bc4cb5ed50eb4), UINT64_C(0xc8f91ece0194e8d4), UINT64_C(0xf78d7c6b5cff07e1),
+    UINT64_C(0x3163d8458b924665), UINT64_C(0xc2ae6dc185c739bf), UINT64_C(0x2943e3eae337c6c6), UINT64_C(0x96bd36f0da4a49f7),
+    UINT64_C(0x98753f33282f27bf), UINT64_C(0xd5c33455bf0f69fd), UINT64_C(0x78cc9f69e0286682), UINT64_C(0x0631fadc21ec437c),
+    UINT64_C(0x521c3db58b6b1170), UINT64_C(0x2333f0f70e46b5cf), UINT64_C(0x87be027b8d434ac6), UINT64_C(0xba4c26796c582e4c),
+    UINT64_C(0x35d52e4f85db73e4), UINT64_C(0x8ac3723b24e99436), UINT64_C(0x4a2b6ce4b7a97a02), UINT64_C(0xcb8017cc584b287d),
+    UINT64_C(0x1ca3610bc2f30e9f), UINT64_C(0xc1c2dafdd385b283), UINT64_C(0xa812778eceff9a2b), UINT64_C(0x91b8429959ef5359),
+    UINT64_C(0xa2750c665bcab7d2), UINT64_C(0x9212f5d704b5320b), UINT64_C(0xfa46bb7a213be57f), UINT64_C(0xd20cbd122dce6c1d),
+    UINT64_C(0x82868b5aee7a4776), UINT64_C(0xf49ec5ddf8cec096), UINT64_C(0xa4fc2bf71ac9dcc2), UINT64_C(0x9d8b8f462bd2f17b),
+    UINT64_C(0x452703fe91008332), UINT64_C(0x919a288ada854bef), UINT64_C(0x75d2b2eb0f4eeed7), UINT64_C(0xd64885293558a96f),
+    UINT64_C(0x098d7efb4f8d5b31), UINT64_C(0x7ee77eef93a3928e), UINT64_C(0xb28eebae28b63dc8), UINT64_C(0x0f01129fc90af970),
+    UINT64_C(0xf3d5b92900d45181), UINT64_C(0xb9d8a408ea6715c0), UINT64_C(0xe44424fb8ca9e22e), UINT64_C(0xd81135834c1aaf96),
+    UINT64_C(0x445b3d67398e888b), UINT64_C(0x0dad43784fe36cda), UINT64_C(0xe6d1bd75c5d81518), UINT64_C(0x662f0e924150c5cb),
+    UINT64_C(0x78179f80df6e0709), UINT64_C(0xdd8fc687a741289c), UINT64_C(0x710873d7f5ab060e), UINT64_C(0xa1961d2b538f497c),
+    UINT64_C(0xb36bbf75bc8b8761), UINT64_C(0x675c608353017307), UINT64_C(0xade6b1aa0ec59bbe), UINT64_C(0xc803a2c9426b3c5f),
+    UINT64_C(0x48a8210409b5ffac), UINT64_C(0xc3d58389ce5f3b13), UINT64_C(0xa23ceb0e71b08443), UINT64_C(0xd9d192cd9c5e9a05),
+    UINT64_C(0x20d9cd878b94147d), UINT64_C(0x22329c7695f6df46), UINT64_C(0xaebdcdc2c2cbc0d9), UINT64_C(0xe95ae3d514f6f94b),
+    UINT64_C(0x59152e1f5715e782), UINT64_C(0xb3280d75a8134f15), UINT64_C(0x5bce3379e1fcb7b4), UINT64_C(0x437d9c3238c4169f),
+    UINT64_C(0x77db7e5ebd5125bd), UINT64_C(0x0dd3aef40336d438), UINT64_C(0x4a496a56bac81428), UINT64_C(0x72a128c3875dc93d),
+    UINT64_C(0x8eb605e5bef1747d), UINT64_C(0x666d4546567a4eef), UINT64_C(0xad5ad003399d2296), UINT64_C(0x19c74366682b52a0),
+    UINT64_C(0xb3c35c5a0e259420), UINT64_C(0xf98340503eb93d6d), UINT64_C(0xa51985b0bb7f81e8), UINT64_C(0x2a21510c6c7ca42f),
+    UINT64_C(0x3c1ac0b52c230998), UINT64_C(0x4e1d572a2d77000b), UINT64_C(0x8dd3adff3bfdec71), UINT64_C(0xdfb3a4a23e43d035),
+    UINT64_C(0xe12f748421173e62), UINT64_C(0x2f356145d2f72758), UINT64_C(0x31c13682374c445c), UINT64_C(0x09240a1f409fab88),
+    UINT64_C(0xa346e2d2f72fd5e8), UINT64_C(0x2c5b53bfc05f9f77), UINT64_C(0x0a9f7ab218574f6e), UINT64_C(0xc3fcb9b977f0cceb),
+    UINT64_C(0xac26889eb86459b9), UINT64_C(0x1082f785bc3dac21), UINT64_C(0x3c8c337a4c67ef18), UINT64_C(0x118e48d0e8a66e02),
+    UINT64_C(0xb777cef85278f2dc), UINT64_C(0x12a268a3dcda05bc), UINT64_C(0x75f5f7d3fde0bd9e), UINT64_C(0x62f5f1650ec91670),
+    UINT64_C(0x81fcf9e3e1c3adec), UINT64_C(0xf0b5e35ace23349c), UINT64_C(0xde7d514d058e53a4), UINT64_C(0x52a625e5f06242c7),
+    UINT64_C(0x3cc1346eda6a430a), UINT64_C(0x165bd737e851f6a1), UINT64_C(0xe52c53d745f1b49a), UINT64_C(0x15513074f676fafc),
+    UINT64_C(0xcb8797dbb29e6710), UINT64_C(0x27b92c8190fd679d), UINT64_C(0x0b39384ac668b176), UINT64_C(0x11341e6d7adad0e9),
+    UINT64_C(0x491b5b5390b70f94), UINT64_C(0x1f5eccf586d03746), UINT64_C(0x6502ca945646feae), UINT64_C(0x3abb5466229ef7d8),
+    UINT64_C(0x535b4effbe0ce5f6), UINT64_C(0x6575eefef9e916f5), UINT64_C(0x77a76fbf3c76f2d7), UINT64_C(0x1cc63124152994a7),
+    UINT64_C(0x6e33f80e95d4323d), UINT64_C(0xd711791d9b2e1d65), UINT64_C(0x7c766cd52013ae49), UINT64_C(0x08bc15230d2ef477),
+    UINT64_C(0xb751fa3b942ab063), UINT64_C(0xfe99a8b170a11941), UINT64_C(0x731979294908218a), UINT64_C(0x32166899c12f3097),
+    UINT64_C(0x8318df8e3823dd3d), UINT64_C(0x940e81f0b4ece3d8), UINT64_C(0x81ea0f12130235ea), UINT64_C(0x36603dfef356d752),
+    UINT64_C(0x409eeb16b992d793), UINT64_C(0xf4c675cca09e229a), UINT64_C(0x0ef989d732dae818), UINT64_C(0x269b4385573ad2f6),
+    UINT64_C(0x53df04584157173c), UINT64_C(0x260c347bedc5ce82), UINT64_C(0xb9fbfba9b58c1b09), UINT64_C(0x20115df9d0693a14),
+    UINT64_C(0x8c0fb27588303369), UINT64_C(0x3a9450974a66eaaf), UINT64_C(0x805f0d515d715679), UINT64_C(0x10f4b52a09898972),
+    UINT64_C(0x20e9c3449e84718e), UINT64_C(0x9eed8745b4e234e2), UINT64_C(0x946c3083bf840def), UINT64_C(0xb18de02e626f7dd9),
+    UINT64_C(0x9e8b496b1d035ed8), UINT64_C(0x6ef3891e7c690f77), UINT64_C(0xd62269e5ad1c07f5), UINT64_C(0x7117ed7eddc2883e),
+    UINT64_C(0x260f1d08457dfcca), UINT64_C(0xe0759189d723da9d), UINT64_C(0xd6d40adb9c9f94d7), UINT64_C(0x7c47c4b4a670b77e),
+    UINT64_C(0xb2b5179563a2abe1), UINT64_C(0x62118cb60f121507), UINT64_C(0x22c3a4a74379ceb1), UINT64_C(0xd5904c844fbfed74),
+    UINT64_C(0xa0afa38c06d50d92), UINT64_C(0xd6223dbbcfcf73f4), UINT64_C(0xf19623e7ec6f83dd), UINT64_C(0xd08c12de2b6265f6),
+    UINT64_C(0xc487d5dc19489db6), UINT64_C(0x759283ffd06fc796), UINT64_C(0xd61a735ad1cd7ccc), UINT64_C(0x32084ba3ca8fa3ee),
+    UINT64_C(0x17530308a1204968), UINT64_C(0x80328582a1eb8d8f), UINT64_C(0xd4c873deec7fb3d7), UINT64_C(0x11c825cc4bc8b181),
+    UINT64_C(0x0137fa50576b21eb), UINT64_C(0xc5ea2f958a3ddb53), UINT64_C(0x6ae611d92b67c9bc), UINT64_C(0xb798b3e1f9c3a851),
+    UINT64_C(0x22a42679fa4b013f), UINT64_C(0x2071f22dae8de629), UINT64_C(0x3faa3a80e45cbca6), UINT64_C(0xb0418f45808009ec),
+    UINT64_C(0x446063013dd5a0f4), UINT64_C(0x932445b680ef71ec), UINT64_C(0x2bc9a2d9ab8e2662), UINT64_C(0x8ebd57fbc56a6154),
+    UINT64_C(0xa28f3d2264ad0f10), UINT64_C(0xffff84df76a10c15), UINT64_C(0xac5c9b0e78fbee81), UINT64_C(0xc1f08e08982b237c),
+    UINT64_C(0x5907b7fa41daa2b8), UINT64_C(0xbed3856320d9c3c2), UINT64_C(0x500a342c1902f015), UINT64_C(0x0c3a5d539c71b7d6),
+    UINT64_C(0xa706750b1c3e5604), UINT64_C(0x1543ab593a8c824c), UINT64_C(0xbdfd9d26f151d83c), UINT64_C(0x1603bb40537de208),
+    UINT64_C(0x1501b0ba802daa2d), UINT64_C(0xdcbcc803f3c11f3c), UINT64_C(0x2bb283a389ec2f35), UINT64_C(0x3a27513ef9d14bf4),
+    UINT64_C(0xcb7c4fd02a39d8af), UINT64_C(0xcc6f61a03488e43f), UINT64_C(0xfdddf2b5fd6c4b05), UINT64_C(0xa015987625b9755d),
+    UINT64_C(0x14c5a9b03c63b253), UINT64_C(0x413f7d2608bf939e), UINT64_C(0x8bdb68c7176407e5), UINT64_C(0x436de64d8a614c32),
+    UINT64_C(0xc2aca4b10ff0bf8e), UINT64_C(0x3b56cc9c1df797e4), UINT64_C(0xb1750cce6cca57bb), UINT64_C(0x8c80e2303509012a),
+    UINT64_C(0x7f25bae3c4fea8af), UINT64_C(0xecf8ed9dac1367b8), UINT64_C(0x1a49274e39668f4e), UINT64_C(0xca4a0ae881c7dc39)
 };
 
-static const int STATE = 32;
-static const uint64_t MASK = UINT64_C(0xffffffffffffff);
+static const int      STATE = 32;
+static const uint64_t MASK  = UINT64_C(0xffffffffffffff);
 
 //--------
 // State mix function
-static FORCE_INLINE uint8_t beam_ROTR8(uint8_t v, int n) {
+static FORCE_INLINE uint8_t beam_ROTR8( uint8_t v, int n ) {
     n = n & 7;
-    if (n)
-        v = (v >> n) | (v << (8-n));
+    if (n) {
+        v = (v >> n) | (v << (8 - n));
+    }
     return v;
 }
 
-static FORCE_INLINE uint64_t beam_ROTR64(uint64_t v, int n) {
+static FORCE_INLINE uint64_t beam_ROTR64( uint64_t v, int n ) {
     n = n & 63;
-    if (n)
+    if (n) {
         v = ROTR64(v, n);
+    }
     return v;
 }
 
+static FORCE_INLINE void mix( uint64_t * state, const uint32_t A ) {
+    const uint32_t B  = A + 1;
+    const uint32_t iv = state[A ] & 1023;
+    const uint64_t M  = T    [iv];
 
-static FORCE_INLINE void mix(uint64_t * state, const uint32_t A) {
-      const uint32_t B = A+1;
-      const uint32_t iv = state[A] & 1023;
-      const uint64_t M = T[iv];
-      state[B] += M + state[A];
+    state[B] += state[A] + M;
 
-      state[A] ^= state[B];
-      state[B] ^= state[A];
-      state[A] ^= state[B];
+    state[A] ^= state[B];
+    state[B] ^= state[A];
+    state[A] ^= state[B];
 
-      state[B] = beam_ROTR64(state[B], state[A]);
+    state[B]  = beam_ROTR64(state[B], state[A]);
 }
 
 //---------
 // Hash round function
-template < bool bswap >
-static FORCE_INLINE void round(uint64_t * const state, const uint8_t * m8, uint32_t len) {
+template <bool bswap>
+static FORCE_INLINE void round( uint64_t * const state, const uint8_t * m8, uint32_t len ) {
     uint8_t * const state8 = (uint8_t *)state;
-    uint32_t index = 0;
-    uint32_t sindex = 0;
+    uint32_t        index  = 0;
+    uint32_t        sindex = 0;
 
     for (uint32_t Len = len >> 3; index < Len; index++) {
-        uint64_t blk = GET_U64<bswap>(m8, index*8);
-        state[sindex] += beam_ROTR64(blk + index + 1,
-                           state[sindex] + index + 1);
+        uint64_t blk = GET_U64<bswap>(m8, index * 8);
+        state[sindex] += beam_ROTR64(blk + index + 1, state[sindex] + index + 1);
         if (sindex == 1) {
             mix(state, 0);
         } else if (sindex == 3) {
@@ -345,13 +346,12 @@ static FORCE_INLINE void round(uint64_t * const state, const uint8_t * m8, uint3
     mix(state, 0);
 
     index <<= 3;
-    sindex = index&31;
-    for( ; index < len; index++) {
-        const uint32_t ssindex = bswap ? (sindex^7) : sindex;
-        state8[ssindex] += beam_ROTR8(m8[index] + index + 1,
-                                state8[ssindex] + index + 1);
+    sindex  = index & 31;
+    for (; index < len; index++) {
+        const uint32_t ssindex = bswap ? (sindex ^ 7) : sindex;
+        state8[ssindex] += beam_ROTR8(m8[index] + index + 1, state8[ssindex] + index + 1);
         // state+[0,1,2]
-        mix(state, index%3);
+        mix(state, index % 3);
         if (sindex >= 31) {
             sindex = -1;
         }
@@ -365,26 +365,26 @@ static FORCE_INLINE void round(uint64_t * const state, const uint8_t * m8, uint3
 
 //---------
 // main hash function
-template < bool bswap >
-static void beamsplitter_64(const void * in, const size_t len, const seed_t seed, void * out) {
-    const uint8_t * key8Arr = (uint8_t *)in;
-    uint32_t seedbuf[2] = {0};
+template <bool bswap>
+static void beamsplitter_64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    const uint8_t * key8Arr    = (uint8_t *)in;
+    uint32_t        seedbuf[2] = { 0 };
 
     if (len >= UINT32_C(0xffffffff)) { return; }
 
     // the cali number from the Matrix (1999)
     uint32_t seed32 = seed;
     if (!bswap) {
-        seedbuf[0] = 0xc5550690;
+        seedbuf[0]  = 0xc5550690;
         seedbuf[0] -= seed32;
-        seedbuf[1] = ~(1 - seed32);
+        seedbuf[1]  = ~(1 - seed32);
     } else {
-        seedbuf[1] = 0xc5550690;
+        seedbuf[1]  = 0xc5550690;
         seedbuf[1] -= seed32;
-        seedbuf[0] = ~(1 - seed32);
+        seedbuf[0]  = ~(1 - seed32);
     }
 
-    uint64_t state[STATE/8];
+    uint64_t state[STATE / 8];
     // nothing up my sleeve
     state[0] = UINT64_C(0x123456789abcdef0);
     state[1] = UINT64_C(0x0fedcba987654321);
@@ -396,50 +396,50 @@ static void beamsplitter_64(const void * in, const size_t len, const seed_t seed
     round<bswap>(state, key8Arr, (uint32_t)len);
     round<bswap>(state, key8Arr, (uint32_t)len);
     round<bswap>(state, key8Arr, (uint32_t)len);
-    round<false>(state, (uint8_t *)seedbuf, 8);
-    round<false>(state, (uint8_t *)seedbuf, 8);
+    round<false>(state, (uint8_t *)seedbuf, 8 );
+    round<false>(state, (uint8_t *)seedbuf, 8 );
     round<bswap>(state, key8Arr, (uint32_t)len);
     round<bswap>(state, key8Arr, (uint32_t)len);
     round<bswap>(state, key8Arr, (uint32_t)len);
 
     /*
-    //printf("state = %#018" PRIx64 " %#018" PRIx64 " %#018" PRIx64 " %#018" PRIx64 "\n",
-    //  state[0], state[1], state[2], state[3] );
-    */
+     * //printf("state = %#018" PRIx64 " %#018" PRIx64 " %#018" PRIx64 " %#018" PRIx64 "\n",
+     * //  state[0], state[1], state[2], state[3] );
+     */
 
-    //printf("state = %#018" PRIx64 " %#018" PRIx64 "\n",
+    // printf("state = %#018" PRIx64 " %#018" PRIx64 "\n",
     //  state[0], state[1] );
 
-    uint64_t h[2] = {0};
+    uint64_t h[2] = { 0 };
 
     // The new combination step
-    h[0] = state[2];
-    h[1] = state[3];
+    h[0]  = state[2];
+    h[1]  = state[3];
 
-    h[0] += h[1];
+    h[0] += h    [1];
 
     PUT_U64<bswap>(h[0], (uint8_t *)out, 0);
 }
 
 REGISTER_FAMILY(beamsplitter,
-  $.src_url = "https://github.com/crisdosyago/beamsplitter",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/crisdosyago/beamsplitter",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 // Yes, this has no bad seeds! See note at the top near "thread_local".
 REGISTER_HASH(beamsplitter,
-  $.desc = "A possibly universal hash made with a 10x64 s-box",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE     |
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_VERY_SLOW        |
-        FLAG_IMPL_ROTATE_VARIABLE  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x1BDF358B,
-  $.verification_BE = 0x4791907E,
-  $.hashfn_native = beamsplitter_64<false>,
-  $.hashfn_bswap = beamsplitter_64<true>,
-  $.badseeds = {}
-);
+   $.desc       = "A possibly universal hash made with a 10x64 s-box",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE     |
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_VERY_SLOW        |
+         FLAG_IMPL_ROTATE_VARIABLE  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x1BDF358B,
+   $.verification_BE = 0x4791907E,
+   $.hashfn_native   = beamsplitter_64<false>,
+   $.hashfn_bswap    = beamsplitter_64<true>,
+   $.badseeds        = {}
+ );
diff --git a/hashes/blake2.cpp b/hashes/blake2.cpp
index e778cdcf..54fa96f2 100644
--- a/hashes/blake2.cpp
+++ b/hashes/blake2.cpp
@@ -30,110 +30,107 @@
 #include "Platform.h"
 #include "Hashlib.h"
 
-static const uint64_t blake2b_IV[8] =
-{
-  UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
-  UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
-  UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
-  UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)
+static const uint64_t blake2b_IV [ 8]     = {
+    UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
+    UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
+    UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
+    UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179)
 };
 
-static const uint32_t blake2s_IV[8] =
-{
-  0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
-  0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
+static const uint32_t blake2s_IV [ 8]     = {
+    0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+    0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
 };
 
-static const uint8_t blake2_sigma[12][16] =
-{
-  {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
-  { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 } ,
-  { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 } ,
-  {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 } ,
-  {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 } ,
-  {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 } ,
-  { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 } ,
-  { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 } ,
-  {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 } ,
-  { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 } ,
-  {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 } ,
-  { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
+static const uint8_t blake2_sigma[12][16] = {
+    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 },
+    { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 },
+    {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 },
+    {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 },
+    {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 },
+    { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 },
+    { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 },
+    {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 },
+    { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13, 0 },
+    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
 };
 
 typedef struct blake2b_context_ {
-  uint64_t h[8];
-  uint64_t t[2];
-  uint64_t f[2];
-  uint8_t  buf[128];
-  size_t   buflen;
+    uint64_t  h[8];
+    uint64_t  t[2];
+    uint64_t  f[2];
+    uint8_t   buf[128];
+    size_t    buflen;
 } blake2b_context;
 
 typedef struct blake2s_context_ {
-  uint32_t h[8];
-  uint32_t t[2];
-  uint32_t f[2];
-  uint8_t  buf[64];
-  size_t   buflen;
+    uint32_t  h[8];
+    uint32_t  t[2];
+    uint32_t  f[2];
+    uint8_t   buf[64];
+    size_t    buflen;
 } blake2s_context;
 
 // This layout is explicitly little-endian
 struct blake2_params_prefix {
-  uint8_t  digest_length; /* 1 */
-  uint8_t  key_length;    /* 2 */
-  uint8_t  fanout;        /* 3 */
-  uint8_t  depth;         /* 4 */
-  uint32_t zero;          /* 8 */
+    uint8_t   digest_length; /* 1 */
+    uint8_t   key_length;    /* 2 */
+    uint8_t   fanout;        /* 3 */
+    uint8_t   depth;         /* 4 */
+    uint32_t  zero;          /* 8 */
 };
 
-template < typename T >
-NEVER_INLINE static void blake2_Init(T * ctx, unsigned hashbits, uint64_t seed) {
-  const uint32_t seedlo = seed         & 0xFFFFFFFF;
-  const uint32_t seedhi = (seed >> 32) & 0xFFFFFFFF;
+template <typename T>
+NEVER_INLINE static void blake2_Init( T * ctx, unsigned hashbits, uint64_t seed ) {
+    const uint32_t seedlo = seed         & 0xFFFFFFFF;
+    const uint32_t seedhi = (seed >> 32) & 0xFFFFFFFF;
+
+    memset(ctx    , 0, sizeof(*ctx)  );
+    for (int i = 0; i < 8; i++) {
+        if (sizeof(ctx->h[0]) == 8) {
+            ctx->h[i] = blake2b_IV[i];
+        } else {
+            ctx->h[i] = blake2s_IV[i];
+        }
+    }
 
-  memset(ctx, 0, sizeof(*ctx));
-  for (int i = 0; i < 8; i++) {
+    struct blake2_params_prefix params;
+    memset(&params, 0, sizeof(params));
+    params.digest_length = hashbits / 8;
+    params.fanout        = 1;
+    params.depth         = 1;
     if (sizeof(ctx->h[0]) == 8) {
-      ctx->h[i] = blake2b_IV[i];
+        ctx->h[0] ^= isLE() ?
+                    GET_U64<false>((const uint8_t *)(&params), 0) :
+                    GET_U64<true >((const uint8_t *)(&params), 0);
     } else {
-      ctx->h[i] = blake2s_IV[i];
+        ctx->h[0] ^= isLE() ?
+                    GET_U32<false>((const uint8_t *)(&params), 0) :
+                    GET_U32<true >((const uint8_t *)(&params), 0);
     }
-  }
-
-  struct blake2_params_prefix params;
-  memset(&params, 0, sizeof(params));
-  params.digest_length = hashbits/8;
-  params.fanout = 1;
-  params.depth = 1;
-  if (sizeof(ctx->h[0]) == 8) {
-      ctx->h[0] ^= isLE() ?
-          GET_U64<false>((const uint8_t *)(&params), 0) :
-          GET_U64<true> ((const uint8_t *)(&params), 0);
-  } else {
-      ctx->h[0] ^= isLE() ?
-          GET_U32<false>((const uint8_t *)(&params), 0) :
-          GET_U32<true> ((const uint8_t *)(&params), 0);
-  }
-
-  // Legacy homegrown BLAKE2 seeding for SMHasher3
-  ctx->h[0] ^= seedlo;
-  ctx->h[1] ^= seedhi;
+
+    // Legacy homegrown BLAKE2 seeding for SMHasher3
+    ctx->h[0] ^= seedlo;
+    ctx->h[1] ^= seedhi;
 }
 
-template < typename T >
+template <typename T>
 static int blake2_is_lastblock( const T * ctx ) {
-  return ctx->f[0] != 0;
+    return ctx->f[0] != 0;
 }
 
-template < typename T >
+template <typename T>
 static void blake2_set_lastblock( T * ctx ) {
     ctx->f[0] = 0;
     ctx->f[0]--;
 }
 
-template < typename T >
+template <typename T>
 static void blake2_increment_counter( T * ctx, const uint64_t inc ) {
-  ctx->t[0] += inc;
-  ctx->t[1] += ( ctx->t[0] < inc );
+    ctx->t[0] += inc;
+    ctx->t[1] += (ctx->t[0] < inc);
 }
 
 //
@@ -144,272 +141,272 @@ static void blake2_increment_counter( T * ctx, const uint64_t inc ) {
 //   static void blake2_compress(T * ctx, const uint8_t * in) {
 //   }
 #if defined(HAVE_SSE_2)
-#include "Intrinsics.h"
-#include "blake2/compress-sse2-plus.h"
+  #include "Intrinsics.h"
+  #include "blake2/compress-sse2-plus.h"
 #else
-#include "blake2/compress-portable.h"
+  #include "blake2/compress-portable.h"
 #endif
 
-template < bool bswap, typename T >
-static void blake2_Update(T * ctx, const uint8_t * in, size_t inlen) {
-  const uint64_t BLOCKBYTES = sizeof(ctx->buf);
-
-  if ( inlen > 0 ) {
-    size_t left = ctx->buflen;
-    size_t fill = BLOCKBYTES - left;
-    if ( inlen > fill ) {
-      ctx->buflen = 0;
-      memcpy( ctx->buf + left, in, fill ); /* Fill buffer */
-      blake2_increment_counter(ctx, BLOCKBYTES );
-      blake2_compress<bswap>(ctx, ctx->buf ); /* Compress */
-      in += fill; inlen -= fill;
-      while(inlen > BLOCKBYTES) {
-        blake2_increment_counter(ctx, BLOCKBYTES);
-	blake2_compress<bswap>(ctx,in);
-        in += BLOCKBYTES;
-        inlen -= BLOCKBYTES;
-      }
+template <bool bswap, typename T>
+static void blake2_Update( T * ctx, const uint8_t * in, size_t inlen ) {
+    const uint64_t BLOCKBYTES = sizeof(ctx->buf);
+
+    if (inlen > 0) {
+        size_t left = ctx->buflen;
+        size_t fill = BLOCKBYTES - left;
+        if (inlen > fill) {
+            ctx->buflen = 0;
+            memcpy(ctx->buf + left, in, fill);     /* Fill buffer */
+            blake2_increment_counter(ctx, BLOCKBYTES);
+            blake2_compress<bswap>(ctx, ctx->buf); /* Compress */
+            in += fill; inlen -= fill;
+            while (inlen > BLOCKBYTES) {
+                blake2_increment_counter(ctx, BLOCKBYTES);
+                blake2_compress<bswap>(ctx, in);
+                in    += BLOCKBYTES;
+                inlen -= BLOCKBYTES;
+            }
+        }
+        memcpy(ctx->buf + ctx->buflen, in, inlen);
+        ctx->buflen += inlen;
     }
-    memcpy( ctx->buf + ctx->buflen, in, inlen );
-    ctx->buflen += inlen;
-  }
 }
 
-template < bool bswap, typename T >
-static void blake2_Finalize(T * ctx) {
-  const uint64_t BLOCKBYTES = sizeof(ctx->buf);
+template <bool bswap, typename T>
+static void blake2_Finalize( T * ctx ) {
+    const uint64_t BLOCKBYTES = sizeof(ctx->buf);
 
-  if (blake2_is_lastblock(ctx)) {
-    return;
-  }
+    if (blake2_is_lastblock(ctx)) {
+        return;
+    }
 
-  blake2_increment_counter( ctx, ctx->buflen );
-  blake2_set_lastblock( ctx );
-  memset( ctx->buf + ctx->buflen, 0, BLOCKBYTES - ctx->buflen ); /* Padding */
-  blake2_compress<bswap>( ctx, ctx->buf );
+    blake2_increment_counter(ctx, ctx->buflen);
+    blake2_set_lastblock(ctx);
+    memset(ctx->buf + ctx->buflen, 0, BLOCKBYTES - ctx->buflen); /* Padding */
+    blake2_compress<bswap>(ctx, ctx->buf);
 }
 
-template < uint32_t hashbits, uint32_t outbits, bool bswap >
-static void BLAKE2B(const void * in, const size_t len, const seed_t seed, void * out) {
-  blake2b_context ctx;
+template <uint32_t hashbits, uint32_t outbits, bool bswap>
+static void BLAKE2B( const void * in, const size_t len, const seed_t seed, void * out ) {
+    blake2b_context ctx;
 
-  blake2_Init(&ctx, hashbits, (uint64_t)seed);
-  blake2_Update<bswap>(&ctx, (const uint8_t *)in, len);
-  blake2_Finalize<bswap>(&ctx);
+    blake2_Init(&ctx, hashbits, (uint64_t)seed);
+    blake2_Update<bswap>(&ctx, (const uint8_t *)in, len);
+    blake2_Finalize<bswap>(&ctx);
 
-  uint8_t buf[32];
-  for (int i = 0; i < 4; ++i ) {
-    PUT_U64<bswap>(ctx.h[i], buf, i*8);
-  }
-  memcpy(out, buf, (outbits >= 256) ? 32 : (outbits+7)/8);
+    uint8_t buf[32];
+    for (int i = 0; i < 4; ++i) {
+        PUT_U64<bswap>(ctx.h[i], buf, i * 8);
+    }
+    memcpy(out, buf, (outbits >= 256) ? 32 : (outbits + 7) / 8);
 }
 
-template < uint32_t hashbits, uint32_t outbits, bool bswap >
-static void BLAKE2S(const void * in, const size_t len, const seed_t seed, void * out) {
-  blake2s_context ctx;
+template <uint32_t hashbits, uint32_t outbits, bool bswap>
+static void BLAKE2S( const void * in, const size_t len, const seed_t seed, void * out ) {
+    blake2s_context ctx;
 
-  blake2_Init(&ctx, hashbits, (uint64_t)seed);
-  blake2_Update<bswap>(&ctx, (const uint8_t *)in, len);
-  blake2_Finalize<bswap>(&ctx);
+    blake2_Init(&ctx, hashbits, (uint64_t)seed);
+    blake2_Update<bswap>(&ctx, (const uint8_t *)in, len);
+    blake2_Finalize<bswap>(&ctx);
 
-  uint8_t buf[32];
-  for (int i = 0; i < 8; ++i ) {
-    PUT_U32<bswap>(ctx.h[i], buf, i*4);
-  }
-  memcpy(out, buf, (outbits >= 256) ? 32 : (outbits+7)/8);
+    uint8_t buf[32];
+    for (int i = 0; i < 8; ++i) {
+        PUT_U32<bswap>(ctx.h[i], buf, i * 4);
+    }
+    memcpy(out, buf, (outbits >= 256) ? 32 : (outbits + 7) / 8);
 }
 
 REGISTER_FAMILY(blake2,
-  $.src_url = "https://github.com/BLAKE2/BLAKE2",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/BLAKE2/BLAKE2",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(blake2b_256,
-  $.desc = "BLAKE 2b, 256-bit digest",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 256,
-  $.verification_LE = 0xC9D8D995,
-  $.verification_BE = 0xCDB3E566,
-  $.hashfn_native = BLAKE2B<256,256,false>,
-  $.hashfn_bswap = BLAKE2B<256,256,true>
-);
+   $.desc       = "BLAKE 2b, 256-bit digest",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 256,
+   $.verification_LE = 0xC9D8D995,
+   $.verification_BE = 0xCDB3E566,
+   $.hashfn_native   = BLAKE2B<256, 256, false>,
+   $.hashfn_bswap    = BLAKE2B<256, 256, true>
+ );
 
 REGISTER_HASH(blake2b_224,
-  $.desc = "BLAKE 2b, 224-bit digest",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 224,
-  $.verification_LE = 0x101A62A4,
-  $.verification_BE = 0x77BE80ED,
-  $.hashfn_native = BLAKE2B<224,224,false>,
-  $.hashfn_bswap = BLAKE2B<224,224,true>
-);
+   $.desc       = "BLAKE 2b, 224-bit digest",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 224,
+   $.verification_LE = 0x101A62A4,
+   $.verification_BE = 0x77BE80ED,
+   $.hashfn_native   = BLAKE2B<224, 224, false>,
+   $.hashfn_bswap    = BLAKE2B<224, 224, true>
+ );
 
 REGISTER_HASH(blake2b_160,
-  $.desc = "BLAKE 2b, 160-bit digest",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 160,
-  $.verification_LE = 0x28ADDA30,
-  $.verification_BE = 0xFF79839E,
-  $.hashfn_native = BLAKE2B<160,160,false>,
-  $.hashfn_bswap = BLAKE2B<160,160,true>
-);
+   $.desc       = "BLAKE 2b, 160-bit digest",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 160,
+   $.verification_LE = 0x28ADDA30,
+   $.verification_BE = 0xFF79839E,
+   $.hashfn_native   = BLAKE2B<160, 160, false>,
+   $.hashfn_bswap    = BLAKE2B<160, 160, true>
+ );
 
 REGISTER_HASH(blake2b_128,
-  $.desc = "BLAKE 2b, 128-bit digest",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 128,
-  $.verification_LE = 0x7DC97611,
-  $.verification_BE = 0xDD6695FD,
-  $.hashfn_native = BLAKE2B<128,128,false>,
-  $.hashfn_bswap = BLAKE2B<128,128,true>
-);
+   $.desc       = "BLAKE 2b, 128-bit digest",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 128,
+   $.verification_LE = 0x7DC97611,
+   $.verification_BE = 0xDD6695FD,
+   $.hashfn_native   = BLAKE2B<128, 128, false>,
+   $.hashfn_bswap    = BLAKE2B<128, 128, true>
+ );
 
 REGISTER_HASH(blake2b_256__64,
-  $.desc = "BLAKE 2b, 256-bit digest, bits 0-63",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 64,
-  $.verification_LE = 0xCF4F7EC3,
-  $.verification_BE = 0x0EB38190,
-  $.hashfn_native = BLAKE2B<256,64,false>,
-  $.hashfn_bswap = BLAKE2B<256,64,true>
-);
+   $.desc       = "BLAKE 2b, 256-bit digest, bits 0-63",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 64,
+   $.verification_LE = 0xCF4F7EC3,
+   $.verification_BE = 0x0EB38190,
+   $.hashfn_native   = BLAKE2B<256, 64, false>,
+   $.hashfn_bswap    = BLAKE2B<256, 64, true>
+ );
 
 REGISTER_HASH(blake2s_256,
-  $.desc = "BLAKE 2s, 256-bit digest",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 256,
-  $.verification_LE = 0x841D6354,
-  $.verification_BE = 0x9F85F5C2,
-  $.hashfn_native = BLAKE2S<256,256,false>,
-  $.hashfn_bswap = BLAKE2S<256,256,true>
-);
+   $.desc       = "BLAKE 2s, 256-bit digest",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 256,
+   $.verification_LE = 0x841D6354,
+   $.verification_BE = 0x9F85F5C2,
+   $.hashfn_native   = BLAKE2S<256, 256, false>,
+   $.hashfn_bswap    = BLAKE2S<256, 256, true>
+ );
 
 REGISTER_HASH(blake2s_224,
-  $.desc = "BLAKE 2s, 224-bit digest",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 224,
-  $.verification_LE = 0x19B36D2C,
-  $.verification_BE = 0xBD261F10,
-  $.hashfn_native = BLAKE2S<224,224,false>,
-  $.hashfn_bswap = BLAKE2S<224,224,true>
-);
+   $.desc       = "BLAKE 2s, 224-bit digest",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 224,
+   $.verification_LE = 0x19B36D2C,
+   $.verification_BE = 0xBD261F10,
+   $.hashfn_native   = BLAKE2S<224, 224, false>,
+   $.hashfn_bswap    = BLAKE2S<224, 224, true>
+ );
 
 REGISTER_HASH(blake2s_160,
-  $.desc = "BLAKE 2s, 160-bit digest",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 160,
-  $.verification_LE = 0xD50FF144,
-  $.verification_BE = 0xF9579BEA,
-  $.hashfn_native = BLAKE2S<160,160,false>,
-  $.hashfn_bswap = BLAKE2S<160,160,true>
-);
+   $.desc       = "BLAKE 2s, 160-bit digest",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 160,
+   $.verification_LE = 0xD50FF144,
+   $.verification_BE = 0xF9579BEA,
+   $.hashfn_native   = BLAKE2S<160, 160, false>,
+   $.hashfn_bswap    = BLAKE2S<160, 160, true>
+ );
 
 REGISTER_HASH(blake2s_128,
-  $.desc = "BLAKE 2s, 128-bit digest",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 128,
-  $.verification_LE = 0xE8D8FCDF,
-  $.verification_BE = 0x9C786057,
-  $.hashfn_native = BLAKE2S<128,128,false>,
-  $.hashfn_bswap = BLAKE2S<128,128,true>
-);
+   $.desc       = "BLAKE 2s, 128-bit digest",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 128,
+   $.verification_LE = 0xE8D8FCDF,
+   $.verification_BE = 0x9C786057,
+   $.hashfn_native   = BLAKE2S<128, 128, false>,
+   $.hashfn_bswap    = BLAKE2S<128, 128, true>
+ );
 
 REGISTER_HASH(blake2s_256__64,
-  $.desc = "BLAKE 2s, 256-bit digest, bits 0-63",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 64,
-  $.verification_LE = 0x53000BB2,
-  $.verification_BE = 0x901DDE1D,
-  $.hashfn_native = BLAKE2S<256,64,false>,
-  $.hashfn_bswap = BLAKE2S<256,64,true>
-);
+   $.desc       = "BLAKE 2s, 256-bit digest, bits 0-63",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 64,
+   $.verification_LE = 0x53000BB2,
+   $.verification_BE = 0x901DDE1D,
+   $.hashfn_native   = BLAKE2S<256, 64, false>,
+   $.hashfn_bswap    = BLAKE2S<256, 64, true>
+ );
diff --git a/hashes/blake2/compress-portable.h b/hashes/blake2/compress-portable.h
index 0d64a865..89c16fd5 100644
--- a/hashes/blake2/compress-portable.h
+++ b/hashes/blake2/compress-portable.h
@@ -1,13 +1,13 @@
-#define G(r,i,a,b,c,d)			    \
-  do {                                      \
-    a = a + b + m[blake2_sigma[r][2*i+0]];  \
-    d = ROTR64(d ^ a, 32);                  \
-    c = c + d;                              \
-    b = ROTR64(b ^ c, 24);                  \
-    a = a + b + m[blake2_sigma[r][2*i+1]];  \
-    d = ROTR64(d ^ a, 16);                  \
-    c = c + d;                              \
-    b = ROTR64(b ^ c, 63);                  \
+#define G(r,i,a,b,c,d)                     \
+  do {                                     \
+    a = a + b + m[blake2_sigma[r][2*i+0]]; \
+    d = ROTR64(d ^ a, 32);                 \
+    c = c + d;                             \
+    b = ROTR64(b ^ c, 24);                 \
+    a = a + b + m[blake2_sigma[r][2*i+1]]; \
+    d = ROTR64(d ^ a, 16);                 \
+    c = c + d;                             \
+    b = ROTR64(b ^ c, 63);                 \
   } while(0)
 
 #define ROUND(r)                    \
@@ -22,100 +22,99 @@
     G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
 } while(0)
 
-template < bool bswap >
-static void blake2_compress(blake2b_context * ctx, const uint8_t * in) {
-  uint64_t m[16];
-  uint64_t v[16];
-  size_t i;
-
-  for( i = 0; i < 16; ++i ) {
-    m[i] = GET_U64<bswap>(in, i * sizeof(m[i]));
-  }
-
-  for( i = 0; i < 8; ++i ) {
-    v[i] = ctx->h[i];
-  }
-
-  v[ 8] = blake2b_IV[0];
-  v[ 9] = blake2b_IV[1];
-  v[10] = blake2b_IV[2];
-  v[11] = blake2b_IV[3];
-  v[12] = blake2b_IV[4] ^ ctx->t[0];
-  v[13] = blake2b_IV[5] ^ ctx->t[1];
-  v[14] = blake2b_IV[6] ^ ctx->f[0];
-  v[15] = blake2b_IV[7] ^ ctx->f[1];
-
-  ROUND( 0 );
-  ROUND( 1 );
-  ROUND( 2 );
-  ROUND( 3 );
-  ROUND( 4 );
-  ROUND( 5 );
-  ROUND( 6 );
-  ROUND( 7 );
-  ROUND( 8 );
-  ROUND( 9 );
-  ROUND( 10 );
-  ROUND( 11 );
-
-  for( i = 0; i < 8; ++i ) {
-    ctx->h[i] = ctx->h[i] ^ v[i] ^ v[i + 8];
-  }
+template <bool bswap>
+static void blake2_compress( blake2b_context * ctx, const uint8_t * in ) {
+    uint64_t m[16];
+    uint64_t v[16];
+    size_t   i;
+
+    for (i = 0; i < 16; ++i) {
+        m[i] = GET_U64<bswap>(in, i * sizeof(m[i]));
+    }
+
+    for (i = 0; i < 8; ++i) {
+        v[i] = ctx->h[i];
+    }
+
+    v[ 8] = blake2b_IV[0];
+    v[ 9] = blake2b_IV[1];
+    v[10] = blake2b_IV[2];
+    v[11] = blake2b_IV[3];
+    v[12] = blake2b_IV[4] ^ ctx->t[0];
+    v[13] = blake2b_IV[5] ^ ctx->t[1];
+    v[14] = blake2b_IV[6] ^ ctx->f[0];
+    v[15] = blake2b_IV[7] ^ ctx->f[1];
+
+    ROUND( 0);
+    ROUND( 1);
+    ROUND( 2);
+    ROUND( 3);
+    ROUND( 4);
+    ROUND( 5);
+    ROUND( 6);
+    ROUND( 7);
+    ROUND( 8);
+    ROUND( 9);
+    ROUND(10);
+    ROUND(11);
+
+    for (i = 0; i < 8; ++i) {
+        ctx->h[i] = ctx->h[i] ^ v[i] ^ v[i + 8];
+    }
 }
 
 #undef G
 
-#define G(r,i,a,b,c,d)			    \
-  do {                                      \
-    a = a + b + m[blake2_sigma[r][2*i+0]];  \
-    d = ROTR32(d ^ a, 16);                  \
-    c = c + d;                              \
-    b = ROTR32(b ^ c, 12);                  \
-    a = a + b + m[blake2_sigma[r][2*i+1]];  \
-    d = ROTR32(d ^ a,  8);                  \
-    c = c + d;                              \
-    b = ROTR32(b ^ c,  7);                  \
+#define G(r,i,a,b,c,d)                     \
+  do {                                     \
+    a = a + b + m[blake2_sigma[r][2*i+0]]; \
+    d = ROTR32(d ^ a, 16);                 \
+    c = c + d;                             \
+    b = ROTR32(b ^ c, 12);                 \
+    a = a + b + m[blake2_sigma[r][2*i+1]]; \
+    d = ROTR32(d ^ a,  8);                 \
+    c = c + d;                             \
+    b = ROTR32(b ^ c,  7);                 \
   } while(0)
 
-template < bool bswap >
-static void blake2_compress(blake2s_context * ctx, const uint8_t * in) {
-  uint32_t m[16];
-  uint32_t v[16];
-  size_t i;
-
-  for( i = 0; i < 16; ++i ) {
-    m[i] = GET_U32<bswap>(in, i * sizeof(m[i]));
-  }
-
-  for( i = 0; i < 8; ++i ) {
-    v[i] = ctx->h[i];
-  }
-
-  v[ 8] = blake2s_IV[0];
-  v[ 9] = blake2s_IV[1];
-  v[10] = blake2s_IV[2];
-  v[11] = blake2s_IV[3];
-  v[12] = blake2s_IV[4] ^ ctx->t[0];
-  v[13] = blake2s_IV[5] ^ ctx->t[1];
-  v[14] = blake2s_IV[6] ^ ctx->f[0];
-  v[15] = blake2s_IV[7] ^ ctx->f[1];
-
-  ROUND( 0 );
-  ROUND( 1 );
-  ROUND( 2 );
-  ROUND( 3 );
-  ROUND( 4 );
-  ROUND( 5 );
-  ROUND( 6 );
-  ROUND( 7 );
-  ROUND( 8 );
-  ROUND( 9 );
-
-  for( i = 0; i < 8; ++i ) {
-    ctx->h[i] = ctx->h[i] ^ v[i] ^ v[i + 8];
-  }
+template <bool bswap>
+static void blake2_compress( blake2s_context * ctx, const uint8_t * in ) {
+    uint32_t m[16];
+    uint32_t v[16];
+    size_t   i;
+
+    for (i = 0; i < 16; ++i) {
+        m[i] = GET_U32<bswap>(in, i * sizeof(m[i]));
+    }
+
+    for (i = 0; i < 8; ++i) {
+        v[i] = ctx->h[i];
+    }
+
+    v[ 8] = blake2s_IV[0];
+    v[ 9] = blake2s_IV[1];
+    v[10] = blake2s_IV[2];
+    v[11] = blake2s_IV[3];
+    v[12] = blake2s_IV[4] ^ ctx->t[0];
+    v[13] = blake2s_IV[5] ^ ctx->t[1];
+    v[14] = blake2s_IV[6] ^ ctx->f[0];
+    v[15] = blake2s_IV[7] ^ ctx->f[1];
+
+    ROUND(0);
+    ROUND(1);
+    ROUND(2);
+    ROUND(3);
+    ROUND(4);
+    ROUND(5);
+    ROUND(6);
+    ROUND(7);
+    ROUND(8);
+    ROUND(9);
+
+    for (i = 0; i < 8; ++i) {
+        ctx->h[i] = ctx->h[i] ^ v[i] ^ v[i + 8];
+    }
 }
 
 #undef G
 #undef ROUND
-
diff --git a/hashes/blake2/compress-sse2-plus.h b/hashes/blake2/compress-sse2-plus.h
index 05b9c0a5..2cd1d3d1 100644
--- a/hashes/blake2/compress-sse2-plus.h
+++ b/hashes/blake2/compress-sse2-plus.h
@@ -5,285 +5,285 @@
 // It is generally assumed that supporting a later/higher instruction
 // set includes support for previous/lower instruction sets.
 
-#define LOADU(p)  _mm_loadu_si128( (const __m128i *)(p) )
-#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
+#define LOADU(p)  _mm_loadu_si128((const __m128i *)(p))
+#define STOREU(p, r) _mm_storeu_si128((__m128i *)(p), r)
 
 //-----------------------------------------------------------------------------
 // BLAKE2b code
 
 #if defined(HAVE_SSE_4_1)
 
-#define LOAD_MSG_0_1(b0, b1)  b0 = _mm_unpacklo_epi64(m0, m1); b1 = _mm_unpacklo_epi64(m2, m3);
-#define LOAD_MSG_0_2(b0, b1)  b0 = _mm_unpackhi_epi64(m0, m1); b1 = _mm_unpackhi_epi64(m2, m3);
-#define LOAD_MSG_0_3(b0, b1)  b0 = _mm_unpacklo_epi64(m4, m5); b1 = _mm_unpacklo_epi64(m6, m7);
-#define LOAD_MSG_0_4(b0, b1)  b0 = _mm_unpackhi_epi64(m4, m5); b1 = _mm_unpackhi_epi64(m6, m7);
-#define LOAD_MSG_1_1(b0, b1)  b0 = _mm_unpacklo_epi64(m7, m2); b1 = _mm_unpackhi_epi64(m4, m6);
-#define LOAD_MSG_1_2(b0, b1)  b0 = _mm_unpacklo_epi64(m5, m4); b1 = _mm_alignr_epi8(m3, m7, 8);
-#define LOAD_MSG_1_3(b0, b1)  b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); b1 = _mm_unpackhi_epi64(m5, m2);
-#define LOAD_MSG_1_4(b0, b1)  b0 = _mm_unpacklo_epi64(m6, m1); b1 = _mm_unpackhi_epi64(m3, m1);
-#define LOAD_MSG_2_1(b0, b1)  b0 = _mm_alignr_epi8(m6, m5, 8); b1 = _mm_unpackhi_epi64(m2, m7);
-#define LOAD_MSG_2_2(b0, b1)  b0 = _mm_unpacklo_epi64(m4, m0); b1 = _mm_blend_epi16(m1, m6, 0xF0);
-#define LOAD_MSG_2_3(b0, b1)  b0 = _mm_blend_epi16(m5, m1, 0xF0); b1 = _mm_unpackhi_epi64(m3, m4);
-#define LOAD_MSG_2_4(b0, b1)  b0 = _mm_unpacklo_epi64(m7, m3); b1 = _mm_alignr_epi8(m2, m0, 8);
-#define LOAD_MSG_3_1(b0, b1)  b0 = _mm_unpackhi_epi64(m3, m1); b1 = _mm_unpackhi_epi64(m6, m5);
-#define LOAD_MSG_3_2(b0, b1)  b0 = _mm_unpackhi_epi64(m4, m0); b1 = _mm_unpacklo_epi64(m6, m7);
-#define LOAD_MSG_3_3(b0, b1)  b0 = _mm_blend_epi16(m1, m2, 0xF0); b1 = _mm_blend_epi16(m2, m7, 0xF0);
-#define LOAD_MSG_3_4(b0, b1)  b0 = _mm_unpacklo_epi64(m3, m5); b1 = _mm_unpacklo_epi64(m0, m4);
-#define LOAD_MSG_4_1(b0, b1)  b0 = _mm_unpackhi_epi64(m4, m2); b1 = _mm_unpacklo_epi64(m1, m5);
-#define LOAD_MSG_4_2(b0, b1)  b0 = _mm_blend_epi16(m0, m3, 0xF0); b1 = _mm_blend_epi16(m2, m7, 0xF0);
-#define LOAD_MSG_4_3(b0, b1)  b0 = _mm_blend_epi16(m7, m5, 0xF0); b1 = _mm_blend_epi16(m3, m1, 0xF0);
-#define LOAD_MSG_4_4(b0, b1)  b0 = _mm_alignr_epi8(m6, m0, 8); b1 = _mm_blend_epi16(m4, m6, 0xF0);
-#define LOAD_MSG_5_1(b0, b1)  b0 = _mm_unpacklo_epi64(m1, m3); b1 = _mm_unpacklo_epi64(m0, m4);
-#define LOAD_MSG_5_2(b0, b1)  b0 = _mm_unpacklo_epi64(m6, m5); b1 = _mm_unpackhi_epi64(m5, m1);
-#define LOAD_MSG_5_3(b0, b1)  b0 = _mm_blend_epi16(m2, m3, 0xF0); b1 = _mm_unpackhi_epi64(m7, m0);
-#define LOAD_MSG_5_4(b0, b1)  b0 = _mm_unpackhi_epi64(m6, m2); b1 = _mm_blend_epi16(m7, m4, 0xF0);
-#define LOAD_MSG_6_1(b0, b1)  b0 = _mm_blend_epi16(m6, m0, 0xF0); b1 = _mm_unpacklo_epi64(m7, m2);
-#define LOAD_MSG_6_2(b0, b1)  b0 = _mm_unpackhi_epi64(m2, m7); b1 = _mm_alignr_epi8(m5, m6, 8);
-#define LOAD_MSG_6_3(b0, b1)  b0 = _mm_unpacklo_epi64(m0, m3); b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2));
-#define LOAD_MSG_6_4(b0, b1)  b0 = _mm_unpackhi_epi64(m3, m1); b1 = _mm_blend_epi16(m1, m5, 0xF0);
-#define LOAD_MSG_7_1(b0, b1)  b0 = _mm_unpackhi_epi64(m6, m3); b1 = _mm_blend_epi16(m6, m1, 0xF0);
-#define LOAD_MSG_7_2(b0, b1)  b0 = _mm_alignr_epi8(m7, m5, 8); b1 = _mm_unpackhi_epi64(m0, m4);
-#define LOAD_MSG_7_3(b0, b1)  b0 = _mm_unpackhi_epi64(m2, m7); b1 = _mm_unpacklo_epi64(m4, m1);
-#define LOAD_MSG_7_4(b0, b1)  b0 = _mm_unpacklo_epi64(m0, m2); b1 = _mm_unpacklo_epi64(m3, m5);
-#define LOAD_MSG_8_1(b0, b1)  b0 = _mm_unpacklo_epi64(m3, m7); b1 = _mm_alignr_epi8(m0, m5, 8);
-#define LOAD_MSG_8_2(b0, b1)  b0 = _mm_unpackhi_epi64(m7, m4); b1 = _mm_alignr_epi8(m4, m1, 8);
-#define LOAD_MSG_8_3(b0, b1)  b0 = m6; b1 = _mm_alignr_epi8(m5, m0, 8);
-#define LOAD_MSG_8_4(b0, b1)  b0 = _mm_blend_epi16(m1, m3, 0xF0); b1 = m2;
-#define LOAD_MSG_9_1(b0, b1)  b0 = _mm_unpacklo_epi64(m5, m4); b1 = _mm_unpackhi_epi64(m3, m0);
-#define LOAD_MSG_9_2(b0, b1)  b0 = _mm_unpacklo_epi64(m1, m2); b1 = _mm_blend_epi16(m3, m2, 0xF0);
-#define LOAD_MSG_9_3(b0, b1)  b0 = _mm_unpackhi_epi64(m7, m4); b1 = _mm_unpackhi_epi64(m1, m6);
-#define LOAD_MSG_9_4(b0, b1)  b0 = _mm_alignr_epi8(m7, m5, 8); b1 = _mm_unpacklo_epi64(m6, m0);
-#define LOAD_MSG_10_1(b0, b1)  b0 = _mm_unpacklo_epi64(m0, m1); b1 = _mm_unpacklo_epi64(m2, m3);
-#define LOAD_MSG_10_2(b0, b1)  b0 = _mm_unpackhi_epi64(m0, m1); b1 = _mm_unpackhi_epi64(m2, m3);
-#define LOAD_MSG_10_3(b0, b1)  b0 = _mm_unpacklo_epi64(m4, m5); b1 = _mm_unpacklo_epi64(m6, m7);
-#define LOAD_MSG_10_4(b0, b1)  b0 = _mm_unpackhi_epi64(m4, m5); b1 = _mm_unpackhi_epi64(m6, m7);
-#define LOAD_MSG_11_1(b0, b1)  b0 = _mm_unpacklo_epi64(m7, m2); b1 = _mm_unpackhi_epi64(m4, m6);
-#define LOAD_MSG_11_2(b0, b1)  b0 = _mm_unpacklo_epi64(m5, m4); b1 = _mm_alignr_epi8(m3, m7, 8);
-#define LOAD_MSG_11_3(b0, b1)  b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); b1 = _mm_unpackhi_epi64(m5, m2);
-#define LOAD_MSG_11_4(b0, b1)  b0 = _mm_unpacklo_epi64(m6, m1); b1 = _mm_unpackhi_epi64(m3, m1);
+  #define LOAD_MSG_0_1(b0, b1)  b0  = _mm_unpacklo_epi64(m0, m1); b1 = _mm_unpacklo_epi64(m2, m3);
+  #define LOAD_MSG_0_2(b0, b1)  b0  = _mm_unpackhi_epi64(m0, m1); b1 = _mm_unpackhi_epi64(m2, m3);
+  #define LOAD_MSG_0_3(b0, b1)  b0  = _mm_unpacklo_epi64(m4, m5); b1 = _mm_unpacklo_epi64(m6, m7);
+  #define LOAD_MSG_0_4(b0, b1)  b0  = _mm_unpackhi_epi64(m4, m5); b1 = _mm_unpackhi_epi64(m6, m7);
+  #define LOAD_MSG_1_1(b0, b1)  b0  = _mm_unpacklo_epi64(m7, m2); b1 = _mm_unpackhi_epi64(m4, m6);
+  #define LOAD_MSG_1_2(b0, b1)  b0  = _mm_unpacklo_epi64(m5, m4); b1 = _mm_alignr_epi8(m3, m7, 8);
+  #define LOAD_MSG_1_3(b0, b1)  b0  = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); b1 = _mm_unpackhi_epi64(m5, m2);
+  #define LOAD_MSG_1_4(b0, b1)  b0  = _mm_unpacklo_epi64(m6, m1); b1 = _mm_unpackhi_epi64(m3, m1);
+  #define LOAD_MSG_2_1(b0, b1)  b0  = _mm_alignr_epi8(m6, m5, 8); b1 = _mm_unpackhi_epi64(m2, m7);
+  #define LOAD_MSG_2_2(b0, b1)  b0  = _mm_unpacklo_epi64(m4, m0); b1 = _mm_blend_epi16(m1, m6, 0xF0);
+  #define LOAD_MSG_2_3(b0, b1)  b0  = _mm_blend_epi16(m5, m1, 0xF0); b1 = _mm_unpackhi_epi64(m3, m4);
+  #define LOAD_MSG_2_4(b0, b1)  b0  = _mm_unpacklo_epi64(m7, m3); b1 = _mm_alignr_epi8(m2, m0, 8);
+  #define LOAD_MSG_3_1(b0, b1)  b0  = _mm_unpackhi_epi64(m3, m1); b1 = _mm_unpackhi_epi64(m6, m5);
+  #define LOAD_MSG_3_2(b0, b1)  b0  = _mm_unpackhi_epi64(m4, m0); b1 = _mm_unpacklo_epi64(m6, m7);
+  #define LOAD_MSG_3_3(b0, b1)  b0  = _mm_blend_epi16(m1, m2, 0xF0); b1 = _mm_blend_epi16(m2, m7, 0xF0);
+  #define LOAD_MSG_3_4(b0, b1)  b0  = _mm_unpacklo_epi64(m3, m5); b1 = _mm_unpacklo_epi64(m0, m4);
+  #define LOAD_MSG_4_1(b0, b1)  b0  = _mm_unpackhi_epi64(m4, m2); b1 = _mm_unpacklo_epi64(m1, m5);
+  #define LOAD_MSG_4_2(b0, b1)  b0  = _mm_blend_epi16(m0, m3, 0xF0); b1 = _mm_blend_epi16(m2, m7, 0xF0);
+  #define LOAD_MSG_4_3(b0, b1)  b0  = _mm_blend_epi16(m7, m5, 0xF0); b1 = _mm_blend_epi16(m3, m1, 0xF0);
+  #define LOAD_MSG_4_4(b0, b1)  b0  = _mm_alignr_epi8(m6, m0, 8); b1 = _mm_blend_epi16(m4, m6, 0xF0);
+  #define LOAD_MSG_5_1(b0, b1)  b0  = _mm_unpacklo_epi64(m1, m3); b1 = _mm_unpacklo_epi64(m0, m4);
+  #define LOAD_MSG_5_2(b0, b1)  b0  = _mm_unpacklo_epi64(m6, m5); b1 = _mm_unpackhi_epi64(m5, m1);
+  #define LOAD_MSG_5_3(b0, b1)  b0  = _mm_blend_epi16(m2, m3, 0xF0); b1 = _mm_unpackhi_epi64(m7, m0);
+  #define LOAD_MSG_5_4(b0, b1)  b0  = _mm_unpackhi_epi64(m6, m2); b1 = _mm_blend_epi16(m7, m4, 0xF0);
+  #define LOAD_MSG_6_1(b0, b1)  b0  = _mm_blend_epi16(m6, m0, 0xF0); b1 = _mm_unpacklo_epi64(m7, m2);
+  #define LOAD_MSG_6_2(b0, b1)  b0  = _mm_unpackhi_epi64(m2, m7); b1 = _mm_alignr_epi8(m5, m6, 8);
+  #define LOAD_MSG_6_3(b0, b1)  b0  = _mm_unpacklo_epi64(m0, m3); b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2));
+  #define LOAD_MSG_6_4(b0, b1)  b0  = _mm_unpackhi_epi64(m3, m1); b1 = _mm_blend_epi16(m1, m5, 0xF0);
+  #define LOAD_MSG_7_1(b0, b1)  b0  = _mm_unpackhi_epi64(m6, m3); b1 = _mm_blend_epi16(m6, m1, 0xF0);
+  #define LOAD_MSG_7_2(b0, b1)  b0  = _mm_alignr_epi8(m7, m5, 8); b1 = _mm_unpackhi_epi64(m0, m4);
+  #define LOAD_MSG_7_3(b0, b1)  b0  = _mm_unpackhi_epi64(m2, m7); b1 = _mm_unpacklo_epi64(m4, m1);
+  #define LOAD_MSG_7_4(b0, b1)  b0  = _mm_unpacklo_epi64(m0, m2); b1 = _mm_unpacklo_epi64(m3, m5);
+  #define LOAD_MSG_8_1(b0, b1)  b0  = _mm_unpacklo_epi64(m3, m7); b1 = _mm_alignr_epi8(m0, m5, 8);
+  #define LOAD_MSG_8_2(b0, b1)  b0  = _mm_unpackhi_epi64(m7, m4); b1 = _mm_alignr_epi8(m4, m1, 8);
+  #define LOAD_MSG_8_3(b0, b1)  b0  = m6; b1 = _mm_alignr_epi8(m5, m0, 8);
+  #define LOAD_MSG_8_4(b0, b1)  b0  = _mm_blend_epi16(m1, m3, 0xF0); b1 = m2;
+  #define LOAD_MSG_9_1(b0, b1)  b0  = _mm_unpacklo_epi64(m5, m4); b1 = _mm_unpackhi_epi64(m3, m0);
+  #define LOAD_MSG_9_2(b0, b1)  b0  = _mm_unpacklo_epi64(m1, m2); b1 = _mm_blend_epi16(m3, m2, 0xF0);
+  #define LOAD_MSG_9_3(b0, b1)  b0  = _mm_unpackhi_epi64(m7, m4); b1 = _mm_unpackhi_epi64(m1, m6);
+  #define LOAD_MSG_9_4(b0, b1)  b0  = _mm_alignr_epi8(m7, m5, 8); b1 = _mm_unpacklo_epi64(m6, m0);
+  #define LOAD_MSG_10_1(b0, b1)  b0 = _mm_unpacklo_epi64(m0, m1); b1 = _mm_unpacklo_epi64(m2, m3);
+  #define LOAD_MSG_10_2(b0, b1)  b0 = _mm_unpackhi_epi64(m0, m1); b1 = _mm_unpackhi_epi64(m2, m3);
+  #define LOAD_MSG_10_3(b0, b1)  b0 = _mm_unpacklo_epi64(m4, m5); b1 = _mm_unpacklo_epi64(m6, m7);
+  #define LOAD_MSG_10_4(b0, b1)  b0 = _mm_unpackhi_epi64(m4, m5); b1 = _mm_unpackhi_epi64(m6, m7);
+  #define LOAD_MSG_11_1(b0, b1)  b0 = _mm_unpacklo_epi64(m7, m2); b1 = _mm_unpackhi_epi64(m4, m6);
+  #define LOAD_MSG_11_2(b0, b1)  b0 = _mm_unpacklo_epi64(m5, m4); b1 = _mm_alignr_epi8(m3, m7, 8);
+  #define LOAD_MSG_11_3(b0, b1)  b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); b1 = _mm_unpackhi_epi64(m5, m2);
+  #define LOAD_MSG_11_4(b0, b1)  b0 = _mm_unpacklo_epi64(m6, m1); b1 = _mm_unpackhi_epi64(m3, m1);
 
 #else
 
-#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
-#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
-#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
-#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
-#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
-#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
-#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
-#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
-#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
-#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
-#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
-#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
-#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
-#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
-#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
-#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
-#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
-#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
-#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
-#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
-#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
-#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
-#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
-#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
-#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
-#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
-#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
-#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
-#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
-#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
-#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
-#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
-#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
-#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
-#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
-#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
-#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
-#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
-#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
-#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
-#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
-#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
-#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
-#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
-#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
-#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
-#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
-#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
+  #define LOAD_MSG_0_1(b0, b1) b0  = _mm_set_epi64x(m2 , m0 ); b1 = _mm_set_epi64x(m6, m4)
+  #define LOAD_MSG_0_2(b0, b1) b0  = _mm_set_epi64x(m3 , m1 ); b1 = _mm_set_epi64x(m7, m5)
+  #define LOAD_MSG_0_3(b0, b1) b0  = _mm_set_epi64x(m10, m8 ); b1 = _mm_set_epi64x(m14, m12)
+  #define LOAD_MSG_0_4(b0, b1) b0  = _mm_set_epi64x(m11, m9 ); b1 = _mm_set_epi64x(m15, m13)
+  #define LOAD_MSG_1_1(b0, b1) b0  = _mm_set_epi64x(m4 , m14); b1 = _mm_set_epi64x(m13, m9)
+  #define LOAD_MSG_1_2(b0, b1) b0  = _mm_set_epi64x(m8 , m10); b1 = _mm_set_epi64x(m6, m15)
+  #define LOAD_MSG_1_3(b0, b1) b0  = _mm_set_epi64x(m0 , m1 ); b1 = _mm_set_epi64x(m5, m11)
+  #define LOAD_MSG_1_4(b0, b1) b0  = _mm_set_epi64x(m2 , m12); b1 = _mm_set_epi64x(m3, m7)
+  #define LOAD_MSG_2_1(b0, b1) b0  = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
+  #define LOAD_MSG_2_2(b0, b1) b0  = _mm_set_epi64x(m0 , m8 ); b1 = _mm_set_epi64x(m13, m2)
+  #define LOAD_MSG_2_3(b0, b1) b0  = _mm_set_epi64x(m3 , m10); b1 = _mm_set_epi64x(m9, m7)
+  #define LOAD_MSG_2_4(b0, b1) b0  = _mm_set_epi64x(m6 , m14); b1 = _mm_set_epi64x(m4, m1)
+  #define LOAD_MSG_3_1(b0, b1) b0  = _mm_set_epi64x(m3 , m7 ); b1 = _mm_set_epi64x(m11, m13)
+  #define LOAD_MSG_3_2(b0, b1) b0  = _mm_set_epi64x(m1 , m9 ); b1 = _mm_set_epi64x(m14, m12)
+  #define LOAD_MSG_3_3(b0, b1) b0  = _mm_set_epi64x(m5 , m2 ); b1 = _mm_set_epi64x(m15, m4)
+  #define LOAD_MSG_3_4(b0, b1) b0  = _mm_set_epi64x(m10, m6 ); b1 = _mm_set_epi64x(m8, m0)
+  #define LOAD_MSG_4_1(b0, b1) b0  = _mm_set_epi64x(m5 , m9 ); b1 = _mm_set_epi64x(m10, m2)
+  #define LOAD_MSG_4_2(b0, b1) b0  = _mm_set_epi64x(m7 , m0 ); b1 = _mm_set_epi64x(m15, m4)
+  #define LOAD_MSG_4_3(b0, b1) b0  = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
+  #define LOAD_MSG_4_4(b0, b1) b0  = _mm_set_epi64x(m12, m1 ); b1 = _mm_set_epi64x(m13, m8)
+  #define LOAD_MSG_5_1(b0, b1) b0  = _mm_set_epi64x(m6 , m2 ); b1 = _mm_set_epi64x(m8, m0)
+  #define LOAD_MSG_5_2(b0, b1) b0  = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
+  #define LOAD_MSG_5_3(b0, b1) b0  = _mm_set_epi64x(m7 , m4 ); b1 = _mm_set_epi64x(m1, m15)
+  #define LOAD_MSG_5_4(b0, b1) b0  = _mm_set_epi64x(m5 , m13); b1 = _mm_set_epi64x(m9, m14)
+  #define LOAD_MSG_6_1(b0, b1) b0  = _mm_set_epi64x(m1 , m12); b1 = _mm_set_epi64x(m4, m14)
+  #define LOAD_MSG_6_2(b0, b1) b0  = _mm_set_epi64x(m15, m5 ); b1 = _mm_set_epi64x(m10, m13)
+  #define LOAD_MSG_6_3(b0, b1) b0  = _mm_set_epi64x(m6 , m0 ); b1 = _mm_set_epi64x(m8, m9)
+  #define LOAD_MSG_6_4(b0, b1) b0  = _mm_set_epi64x(m3 , m7 ); b1 = _mm_set_epi64x(m11, m2)
+  #define LOAD_MSG_7_1(b0, b1) b0  = _mm_set_epi64x(m7 , m13); b1 = _mm_set_epi64x(m3, m12)
+  #define LOAD_MSG_7_2(b0, b1) b0  = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
+  #define LOAD_MSG_7_3(b0, b1) b0  = _mm_set_epi64x(m15, m5 ); b1 = _mm_set_epi64x(m2, m8)
+  #define LOAD_MSG_7_4(b0, b1) b0  = _mm_set_epi64x(m4 , m0 ); b1 = _mm_set_epi64x(m10, m6)
+  #define LOAD_MSG_8_1(b0, b1) b0  = _mm_set_epi64x(m14, m6 ); b1 = _mm_set_epi64x(m0, m11)
+  #define LOAD_MSG_8_2(b0, b1) b0  = _mm_set_epi64x(m9 , m15); b1 = _mm_set_epi64x(m8, m3)
+  #define LOAD_MSG_8_3(b0, b1) b0  = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
+  #define LOAD_MSG_8_4(b0, b1) b0  = _mm_set_epi64x(m7 , m2 ); b1 = _mm_set_epi64x(m5, m4)
+  #define LOAD_MSG_9_1(b0, b1) b0  = _mm_set_epi64x(m8 , m10); b1 = _mm_set_epi64x(m1, m7)
+  #define LOAD_MSG_9_2(b0, b1) b0  = _mm_set_epi64x(m4 , m2 ); b1 = _mm_set_epi64x(m5, m6)
+  #define LOAD_MSG_9_3(b0, b1) b0  = _mm_set_epi64x(m9 , m15); b1 = _mm_set_epi64x(m13, m3)
+  #define LOAD_MSG_9_4(b0, b1) b0  = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
+  #define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2 , m0 ); b1 = _mm_set_epi64x(m6, m4)
+  #define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3 , m1 ); b1 = _mm_set_epi64x(m7, m5)
+  #define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8 ); b1 = _mm_set_epi64x(m14, m12)
+  #define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9 ); b1 = _mm_set_epi64x(m15, m13)
+  #define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4 , m14); b1 = _mm_set_epi64x(m13, m9)
+  #define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8 , m10); b1 = _mm_set_epi64x(m6, m15)
+  #define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0 , m1 ); b1 = _mm_set_epi64x(m5, m11)
+  #define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2 , m12); b1 = _mm_set_epi64x(m3, m7)
 
 #endif
 
 #if defined(HAVE_SSSE_3) && !defined(HAVE_XOP)
-#define _mm_roti_epi64(x, c)					 \
-    (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1))  \
-    : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
-    : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
-    : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x)))  \
+#define _mm_roti_epi64(x, c)                                                           \
+    (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1))                        \
+    : (-(c) == 24) ? _mm_shuffle_epi8((x), r24)                                        \
+    : (-(c) == 16) ? _mm_shuffle_epi8((x), r16)                                        \
+    : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
     : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
 #elif !defined(HAVE_SSSE_3) && !defined(HAVE_XOP)
-#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) ))
+  #define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64((r), -(c)), _mm_slli_epi64((r), 64 - (-(c))))
 #endif
 
 #define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
-  row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
-  row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
-  \
-  row4l = _mm_xor_si128(row4l, row1l); \
-  row4h = _mm_xor_si128(row4h, row1h); \
-  \
-  row4l = _mm_roti_epi64(row4l, -32); \
-  row4h = _mm_roti_epi64(row4h, -32); \
-  \
-  row3l = _mm_add_epi64(row3l, row4l); \
-  row3h = _mm_add_epi64(row3h, row4h); \
-  \
-  row2l = _mm_xor_si128(row2l, row3l); \
-  row2h = _mm_xor_si128(row2h, row3h); \
-  \
-  row2l = _mm_roti_epi64(row2l, -24); \
-  row2h = _mm_roti_epi64(row2h, -24); \
+  row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l);         \
+  row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h);         \
+                                                                  \
+  row4l = _mm_xor_si128(row4l, row1l);                            \
+  row4h = _mm_xor_si128(row4h, row1h);                            \
+                                                                  \
+  row4l = _mm_roti_epi64(row4l, -32);                             \
+  row4h = _mm_roti_epi64(row4h, -32);                             \
+                                                                  \
+  row3l = _mm_add_epi64(row3l, row4l);                            \
+  row3h = _mm_add_epi64(row3h, row4h);                            \
+                                                                  \
+  row2l = _mm_xor_si128(row2l, row3l);                            \
+  row2h = _mm_xor_si128(row2h, row3h);                            \
+                                                                  \
+  row2l = _mm_roti_epi64(row2l, -24);                             \
+  row2h = _mm_roti_epi64(row2h, -24);                             \
 
 #define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
-  row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
-  row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
-  \
-  row4l = _mm_xor_si128(row4l, row1l); \
-  row4h = _mm_xor_si128(row4h, row1h); \
-  \
-  row4l = _mm_roti_epi64(row4l, -16); \
-  row4h = _mm_roti_epi64(row4h, -16); \
-  \
-  row3l = _mm_add_epi64(row3l, row4l); \
-  row3h = _mm_add_epi64(row3h, row4h); \
-  \
-  row2l = _mm_xor_si128(row2l, row3l); \
-  row2h = _mm_xor_si128(row2h, row3h); \
-  \
-  row2l = _mm_roti_epi64(row2l, -63); \
-  row2h = _mm_roti_epi64(row2h, -63); \
+  row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l);         \
+  row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h);         \
+                                                                  \
+  row4l = _mm_xor_si128(row4l, row1l);                            \
+  row4h = _mm_xor_si128(row4h, row1h);                            \
+                                                                  \
+  row4l = _mm_roti_epi64(row4l, -16);                             \
+  row4h = _mm_roti_epi64(row4h, -16);                             \
+                                                                  \
+  row3l = _mm_add_epi64(row3l, row4l);                            \
+  row3h = _mm_add_epi64(row3h, row4h);                            \
+                                                                  \
+  row2l = _mm_xor_si128(row2l, row3l);                            \
+  row2h = _mm_xor_si128(row2h, row3h);                            \
+                                                                  \
+  row2l = _mm_roti_epi64(row2l, -63);                             \
+  row2h = _mm_roti_epi64(row2h, -63);                             \
 
 #if defined(HAVE_SSSE_3)
 #define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
-  t0 = _mm_alignr_epi8(row2h, row2l, 8); \
-  t1 = _mm_alignr_epi8(row2l, row2h, 8); \
-  row2l = t0; \
-  row2h = t1; \
-  \
-  t0 = row3l; \
-  row3l = row3h; \
-  row3h = t0;    \
-  \
-  t0 = _mm_alignr_epi8(row4h, row4l, 8); \
-  t1 = _mm_alignr_epi8(row4l, row4h, 8); \
-  row4l = t1; \
+  t0 = _mm_alignr_epi8(row2h, row2l, 8);                             \
+  t1 = _mm_alignr_epi8(row2l, row2h, 8);                             \
+  row2l = t0;                                                        \
+  row2h = t1;                                                        \
+                                                                     \
+  t0 = row3l;                                                        \
+  row3l = row3h;                                                     \
+  row3h = t0;                                                        \
+                                                                     \
+  t0 = _mm_alignr_epi8(row4h, row4l, 8);                             \
+  t1 = _mm_alignr_epi8(row4l, row4h, 8);                             \
+  row4l = t1;                                                        \
   row4h = t0;
 
 #define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
-  t0 = _mm_alignr_epi8(row2l, row2h, 8); \
-  t1 = _mm_alignr_epi8(row2h, row2l, 8); \
-  row2l = t0; \
-  row2h = t1; \
-  \
-  t0 = row3l; \
-  row3l = row3h; \
-  row3h = t0; \
-  \
-  t0 = _mm_alignr_epi8(row4l, row4h, 8); \
-  t1 = _mm_alignr_epi8(row4h, row4l, 8); \
-  row4l = t1; \
+  t0 = _mm_alignr_epi8(row2l, row2h, 8);                               \
+  t1 = _mm_alignr_epi8(row2h, row2l, 8);                               \
+  row2l = t0;                                                          \
+  row2h = t1;                                                          \
+                                                                       \
+  t0 = row3l;                                                          \
+  row3l = row3h;                                                       \
+  row3h = t0;                                                          \
+                                                                       \
+  t0 = _mm_alignr_epi8(row4l, row4h, 8);                               \
+  t1 = _mm_alignr_epi8(row4h, row4l, 8);                               \
+  row4l = t1;                                                          \
   row4h = t0;
 
 #else
 
-#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h)	\
-  t0 = row4l;\
-  t1 = row2l;\
-  row4l = row3l;\
-  row3l = row3h;\
-  row3h = row4l;\
-  row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
-  row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
+#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h)   \
+  t0 = row4l;                                                          \
+  t1 = row2l;                                                          \
+  row4l = row3l;                                                       \
+  row3l = row3h;                                                       \
+  row3h = row4l;                                                       \
+  row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0));       \
+  row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h));    \
   row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
   row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
 
 #define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
-  t0 = row3l;\
-  row3l = row3h;\
-  row3h = t0;\
-  t0 = row2l;\
-  t1 = row4l;\
+  t0 = row3l;                                                          \
+  row3l = row3h;                                                       \
+  row3h = t0;                                                          \
+  t0 = row2l;                                                          \
+  t1 = row4l;                                                          \
   row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
-  row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
+  row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h));    \
   row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
   row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
 
 #endif
 
-#define ROUND(r) \
-  LOAD_MSG_ ##r ##_1(b0, b1); \
-  G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
-  LOAD_MSG_ ##r ##_2(b0, b1); \
-  G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+#define ROUND(r)                                                \
+  LOAD_MSG_ ##r ##_1(b0, b1);                                   \
+  G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);    \
+  LOAD_MSG_ ##r ##_2(b0, b1);                                   \
+  G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);    \
   DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
-  LOAD_MSG_ ##r ##_3(b0, b1); \
-  G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
-  LOAD_MSG_ ##r ##_4(b0, b1); \
-  G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
+  LOAD_MSG_ ##r ##_3(b0, b1);                                   \
+  G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);    \
+  LOAD_MSG_ ##r ##_4(b0, b1);                                   \
+  G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1);    \
   UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h);
 
-template < bool bswap >
-static void blake2_compress(blake2b_context * ctx, const uint8_t * in) {
-  __m128i row1l, row1h;
-  __m128i row2l, row2h;
-  __m128i row3l, row3h;
-  __m128i row4l, row4h;
-  __m128i b0, b1;
-  __m128i t0, t1;
-
-  const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
-  const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
-
-  const __m128i m0 = bswap ? mm_bswap64(LOADU(in +  00)) : LOADU( in +  00 );
-  const __m128i m1 = bswap ? mm_bswap64(LOADU(in +  16)) : LOADU( in +  16 );
-  const __m128i m2 = bswap ? mm_bswap64(LOADU(in +  32)) : LOADU( in +  32 );
-  const __m128i m3 = bswap ? mm_bswap64(LOADU(in +  48)) : LOADU( in +  48 );
-  const __m128i m4 = bswap ? mm_bswap64(LOADU(in +  64)) : LOADU( in +  64 );
-  const __m128i m5 = bswap ? mm_bswap64(LOADU(in +  80)) : LOADU( in +  80 );
-  const __m128i m6 = bswap ? mm_bswap64(LOADU(in +  96)) : LOADU( in +  96 );
-  const __m128i m7 = bswap ? mm_bswap64(LOADU(in + 112)) : LOADU( in + 112 );
-
-  row1l = LOADU( &(ctx->h[0]) );
-  row1h = LOADU( &(ctx->h[2]) );
-  row2l = LOADU( &(ctx->h[4]) );
-  row2h = LOADU( &(ctx->h[6]) );
-  row3l = LOADU( &blake2b_IV[0] );
-  row3h = LOADU( &blake2b_IV[2] );
-  row4l = _mm_xor_si128( LOADU( &blake2b_IV[4] ), LOADU( &(ctx->t[0]) ) );
-  row4h = _mm_xor_si128( LOADU( &blake2b_IV[6] ), LOADU( &(ctx->f[0]) ) );
-
-  ROUND( 0 );
-  ROUND( 1 );
-  ROUND( 2 );
-  ROUND( 3 );
-  ROUND( 4 );
-  ROUND( 5 );
-  ROUND( 6 );
-  ROUND( 7 );
-  ROUND( 8 );
-  ROUND( 9 );
-  ROUND( 10 );
-  ROUND( 11 );
-
-  row1l = _mm_xor_si128( row3l, row1l );
-  row1h = _mm_xor_si128( row3h, row1h );
-  STOREU( &(ctx->h[0]), _mm_xor_si128( LOADU( &(ctx->h[0]) ), row1l ) );
-  STOREU( &(ctx->h[2]), _mm_xor_si128( LOADU( &(ctx->h[2]) ), row1h ) );
-  row2l = _mm_xor_si128( row4l, row2l );
-  row2h = _mm_xor_si128( row4h, row2h );
-  STOREU( &(ctx->h[4]), _mm_xor_si128( LOADU( &(ctx->h[4]) ), row2l ) );
-  STOREU( &(ctx->h[6]), _mm_xor_si128( LOADU( &(ctx->h[6]) ), row2h ) );
+template <bool bswap>
+static void blake2_compress( blake2b_context * ctx, const uint8_t * in ) {
+    __m128i row1l, row1h;
+    __m128i row2l, row2h;
+    __m128i row3l, row3h;
+    __m128i row4l, row4h;
+    __m128i b0, b1;
+    __m128i t0, t1;
+
+    const __m128i r16 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8,  9);
+    const __m128i r24 = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15,  8, 9, 10);
+
+    const __m128i m0  = bswap ? mm_bswap64(LOADU(in +  00)) : LOADU(in +  00);
+    const __m128i m1  = bswap ? mm_bswap64(LOADU(in +  16)) : LOADU(in +  16);
+    const __m128i m2  = bswap ? mm_bswap64(LOADU(in +  32)) : LOADU(in +  32);
+    const __m128i m3  = bswap ? mm_bswap64(LOADU(in +  48)) : LOADU(in +  48);
+    const __m128i m4  = bswap ? mm_bswap64(LOADU(in +  64)) : LOADU(in +  64);
+    const __m128i m5  = bswap ? mm_bswap64(LOADU(in +  80)) : LOADU(in +  80);
+    const __m128i m6  = bswap ? mm_bswap64(LOADU(in +  96)) : LOADU(in +  96);
+    const __m128i m7  = bswap ? mm_bswap64(LOADU(in + 112)) : LOADU(in + 112);
+
+    row1l = LOADU(&(ctx->h   [0]));
+    row1h = LOADU(&(ctx->h   [2]));
+    row2l = LOADU(&(ctx->h   [4]));
+    row2h = LOADU(&(ctx->h   [6]));
+    row3l = LOADU(&blake2b_IV[0] );
+    row3h = LOADU(&blake2b_IV[2] );
+    row4l = _mm_xor_si128(LOADU(&blake2b_IV[4]), LOADU(&(ctx->t[0])));
+    row4h = _mm_xor_si128(LOADU(&blake2b_IV[6]), LOADU(&(ctx->f[0])));
+
+    ROUND( 0);
+    ROUND( 1);
+    ROUND( 2);
+    ROUND( 3);
+    ROUND( 4);
+    ROUND( 5);
+    ROUND( 6);
+    ROUND( 7);
+    ROUND( 8);
+    ROUND( 9);
+    ROUND(10);
+    ROUND(11);
+
+    row1l = _mm_xor_si128(row3l, row1l);
+    row1h = _mm_xor_si128(row3h, row1h);
+    STOREU(&(ctx->h[0]), _mm_xor_si128(LOADU(&(ctx->h[0])), row1l));
+    STOREU(&(ctx->h[2]), _mm_xor_si128(LOADU(&(ctx->h[2])), row1h));
+    row2l = _mm_xor_si128(row4l, row2l);
+    row2h = _mm_xor_si128(row4h, row2h);
+    STOREU(&(ctx->h[4]), _mm_xor_si128(LOADU(&(ctx->h[4])), row2l));
+    STOREU(&(ctx->h[6]), _mm_xor_si128(LOADU(&(ctx->h[6])), row2h));
 }
 
 #undef G1
@@ -348,12 +348,12 @@ static void blake2_compress(blake2b_context * ctx, const uint8_t * in) {
 
 #if defined(HAVE_XOP)
 
-#define TOB(x) ((x)*4*0x01010101 + 0x03020100) /* ..or not TOB */
+  #define TOB(x) ((x) * 4 * 0x01010101 + 0x03020100) /* ..or not TOB */
 
-#define LOAD_MSG_0_1(buf) buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
-#define LOAD_MSG_0_2(buf) buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
-#define LOAD_MSG_0_3(buf) buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(4),TOB(2),TOB(0),TOB(6)) );
-#define LOAD_MSG_0_4(buf) buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(5),TOB(3),TOB(1),TOB(7)) );
+  #define LOAD_MSG_0_1(buf) buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6), TOB(4), TOB(2), TOB(0)));
+  #define LOAD_MSG_0_2(buf) buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7), TOB(5), TOB(3), TOB(1)));
+  #define LOAD_MSG_0_3(buf) buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(4), TOB(2), TOB(0), TOB(6)));
+  #define LOAD_MSG_0_4(buf) buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(5), TOB(3), TOB(1), TOB(7)));
 #define LOAD_MSG_1_1(buf) t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \
   buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
 #define LOAD_MSG_1_2(buf) t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \
@@ -371,7 +371,7 @@ static void blake2_compress(blake2b_context * ctx, const uint8_t * in) {
 #define LOAD_MSG_2_4(buf) t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \
   buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(2),TOB(1),TOB(6),TOB(3)) );
 #define LOAD_MSG_3_1(buf) t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \
-  t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \
+  t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );                         \
   buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) );
 #define LOAD_MSG_3_2(buf) t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \
   buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) );
@@ -384,7 +384,7 @@ static void blake2_compress(blake2b_context * ctx, const uint8_t * in) {
 #define LOAD_MSG_4_2(buf) t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \
   buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
 #define LOAD_MSG_4_3(buf) t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \
-  t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \
+  t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) );                         \
   buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(2),TOB(1),TOB(6),TOB(3)) );
 #define LOAD_MSG_4_4(buf) t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \
   buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(2),TOB(4),TOB(0),TOB(5)) );
@@ -409,21 +409,21 @@ static void blake2_compress(blake2b_context * ctx, const uint8_t * in) {
 #define LOAD_MSG_7_2(buf) t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \
   buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
 #define LOAD_MSG_7_3(buf) t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \
-  t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \
+  t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) );                         \
   buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(2),TOB(7),TOB(0),TOB(3)) );
 #define LOAD_MSG_7_4(buf) t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \
   buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(2),TOB(1),TOB(0),TOB(6)) );
 #define LOAD_MSG_8_1(buf) t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
-  t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \
+  t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );                         \
   buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
 #define LOAD_MSG_8_2(buf) t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \
   buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) );
 #define LOAD_MSG_8_3(buf) t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \
   buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(2),TOB(5),TOB(4),TOB(3)) );
-#define LOAD_MSG_8_4(buf) buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(7),TOB(2),TOB(5)) );
+  #define LOAD_MSG_8_4(buf) buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4), TOB(7), TOB(2), TOB(5)));
 #define LOAD_MSG_9_1(buf) t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \
   buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) );
-#define LOAD_MSG_9_2(buf) buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) );
+  #define LOAD_MSG_9_2(buf) buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5), TOB(6), TOB(4), TOB(2)));
 #define LOAD_MSG_9_3(buf) t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \
   buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(2),TOB(1),TOB(7),TOB(5)) );
 #define LOAD_MSG_9_4(buf) t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \
@@ -431,270 +431,270 @@ static void blake2_compress(blake2b_context * ctx, const uint8_t * in) {
 
 #elif defined(HAVE_SSE_4_1)
 
-#define LOAD_MSG_0_1(buf) buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0)));
-#define LOAD_MSG_0_2(buf) buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1)));
+  #define LOAD_MSG_0_1(buf) buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2, 0, 2, 0)));
+  #define LOAD_MSG_0_2(buf) buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3, 1, 3, 1)));
 #define LOAD_MSG_0_3(buf) t0 = _mm_shuffle_epi32(m2, _MM_SHUFFLE(3,2,0,1)); \
-  t1 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,1,3,2));			\
+  t1 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,1,3,2));                         \
   buf = _mm_blend_epi16(t0, t1, 0xC3);
-#define LOAD_MSG_0_4(buf) t0 = _mm_blend_epi16(t0, t1, 0x3C);	\
+#define LOAD_MSG_0_4(buf) t0 = _mm_blend_epi16(t0, t1, 0x3C); \
   buf = _mm_shuffle_epi32(t0, _MM_SHUFFLE(2,3,0,1));
 #define LOAD_MSG_1_1(buf) t0 = _mm_blend_epi16(m1, m2, 0x0C); \
-  t1 = _mm_slli_si128(m3, 4);				      \
-  t2 = _mm_blend_epi16(t0, t1, 0xF0);			      \
+  t1 = _mm_slli_si128(m3, 4);                                 \
+  t2 = _mm_blend_epi16(t0, t1, 0xF0);                         \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3));
 #define LOAD_MSG_1_2(buf) t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \
-  t1 = _mm_blend_epi16(m1,m3,0xC0);					\
-  t2 = _mm_blend_epi16(t0, t1, 0xF0);					\
+  t1 = _mm_blend_epi16(m1,m3,0xC0);                                        \
+  t2 = _mm_blend_epi16(t0, t1, 0xF0);                                      \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
 #define LOAD_MSG_1_3(buf) t0 = _mm_slli_si128(m1, 4); \
-  t1 = _mm_blend_epi16(m2, t0, 0x30);		      \
-  t2 = _mm_blend_epi16(m0, t1, 0xF0);		      \
+  t1 = _mm_blend_epi16(m2, t0, 0x30);                 \
+  t2 = _mm_blend_epi16(m0, t1, 0xF0);                 \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,0,1,2));
 #define LOAD_MSG_1_4(buf) t0 = _mm_unpackhi_epi32(m0,m1); \
-  t1 = _mm_slli_si128(m3, 4);				  \
-  t2 = _mm_blend_epi16(t0, t1, 0x0C);			  \
+  t1 = _mm_slli_si128(m3, 4);                             \
+  t2 = _mm_blend_epi16(t0, t1, 0x0C);                     \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,0,1,2));
-#define LOAD_MSG_2_1(buf) t0 = _mm_unpackhi_epi32(m2,m3);	\
-  t1 = _mm_blend_epi16(m3,m1,0x0C);				\
-  t2 = _mm_blend_epi16(t0, t1, 0x0F);				\
+#define LOAD_MSG_2_1(buf) t0 = _mm_unpackhi_epi32(m2,m3); \
+  t1 = _mm_blend_epi16(m3,m1,0x0C);                       \
+  t2 = _mm_blend_epi16(t0, t1, 0x0F);                     \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
-#define LOAD_MSG_2_2(buf) t0 = _mm_unpacklo_epi32(m2,m0);	\
-  t1 = _mm_blend_epi16(t0, m0, 0xF0);				\
-  t2 = _mm_slli_si128(m3, 8);					\
+#define LOAD_MSG_2_2(buf) t0 = _mm_unpacklo_epi32(m2,m0); \
+  t1 = _mm_blend_epi16(t0, m0, 0xF0);                     \
+  t2 = _mm_slli_si128(m3, 8);                             \
   buf = _mm_blend_epi16(t1, t2, 0xC0);
-#define LOAD_MSG_2_3(buf) t0 = _mm_blend_epi16(m0, m2, 0x3C);	\
-  t1 = _mm_srli_si128(m1, 12);					\
-  t2 = _mm_blend_epi16(t0,t1,0x03);				\
+#define LOAD_MSG_2_3(buf) t0 = _mm_blend_epi16(m0, m2, 0x3C); \
+  t1 = _mm_srli_si128(m1, 12);                                \
+  t2 = _mm_blend_epi16(t0,t1,0x03);                           \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,3,2,1));
-#define LOAD_MSG_2_4(buf) t0 = _mm_slli_si128(m3, 4);	\
-  t1 = _mm_blend_epi16(m0, m1, 0x33);			\
-  t2 = _mm_blend_epi16(t1, t0, 0xC0);			\
+#define LOAD_MSG_2_4(buf) t0 = _mm_slli_si128(m3, 4); \
+  t1 = _mm_blend_epi16(m0, m1, 0x33);                 \
+  t2 = _mm_blend_epi16(t1, t0, 0xC0);                 \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0));
-#define LOAD_MSG_3_1(buf) t0 = _mm_unpackhi_epi32(m0,m1);	\
-  t1 = _mm_unpackhi_epi32(t0, m2);				\
-  t2 = _mm_blend_epi16(t1, m3, 0x0C);				\
+#define LOAD_MSG_3_1(buf) t0 = _mm_unpackhi_epi32(m0,m1); \
+  t1 = _mm_unpackhi_epi32(t0, m2);                        \
+  t2 = _mm_blend_epi16(t1, m3, 0x0C);                     \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
-#define LOAD_MSG_3_2(buf) t0 = _mm_slli_si128(m2, 8);	\
-  t1 = _mm_blend_epi16(m3,m0,0x0C);			\
-  t2 = _mm_blend_epi16(t1, t0, 0xC0);			\
+#define LOAD_MSG_3_2(buf) t0 = _mm_slli_si128(m2, 8); \
+  t1 = _mm_blend_epi16(m3,m0,0x0C);                   \
+  t2 = _mm_blend_epi16(t1, t0, 0xC0);                 \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
-#define LOAD_MSG_3_3(buf) t0 = _mm_blend_epi16(m0,m1,0x0F);	\
-  t1 = _mm_blend_epi16(t0, m3, 0xC0);				\
+#define LOAD_MSG_3_3(buf) t0 = _mm_blend_epi16(m0,m1,0x0F); \
+  t1 = _mm_blend_epi16(t0, m3, 0xC0);                       \
   buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(0,1,2,3));
-#define LOAD_MSG_3_4(buf) t0 = _mm_alignr_epi8(m0, m1, 4);	\
+#define LOAD_MSG_3_4(buf) t0 = _mm_alignr_epi8(m0, m1, 4); \
   buf = _mm_blend_epi16(t0, m2, 0x33);
 #define LOAD_MSG_4_1(buf) t0 = _mm_unpacklo_epi64(m1,m2); \
-  t1 = _mm_unpackhi_epi64(m0,m2);			  \
-  t2 = _mm_blend_epi16(t0,t1,0x33);			  \
+  t1 = _mm_unpackhi_epi64(m0,m2);                         \
+  t2 = _mm_blend_epi16(t0,t1,0x33);                       \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
-#define LOAD_MSG_4_2(buf) t0 = _mm_unpackhi_epi64(m1,m3);	\
-  t1 = _mm_unpacklo_epi64(m0,m1);				\
+#define LOAD_MSG_4_2(buf) t0 = _mm_unpackhi_epi64(m1,m3); \
+  t1 = _mm_unpacklo_epi64(m0,m1);                         \
   buf = _mm_blend_epi16(t0,t1,0x33);
 #define LOAD_MSG_4_3(buf) t0 = _mm_unpackhi_epi64(m3,m1); \
-  t1 = _mm_unpackhi_epi64(m2,m0);			  \
-  t2 = _mm_blend_epi16(t1,t0,0x33);			  \
+  t1 = _mm_unpackhi_epi64(m2,m0);                         \
+  t2 = _mm_blend_epi16(t1,t0,0x33);                       \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3));
-#define LOAD_MSG_4_4(buf) t0 = _mm_blend_epi16(m0,m2,0x03);	\
-  t1 = _mm_slli_si128(t0, 8);					\
-  t2 = _mm_blend_epi16(t1,m3,0x0F);				\
+#define LOAD_MSG_4_4(buf) t0 = _mm_blend_epi16(m0,m2,0x03); \
+  t1 = _mm_slli_si128(t0, 8);                               \
+  t2 = _mm_blend_epi16(t1,m3,0x0F);                         \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,3,1));
-#define LOAD_MSG_5_1(buf) t0 = _mm_unpackhi_epi32(m0,m1);	\
-  t1 = _mm_unpacklo_epi32(m0,m2);				\
+#define LOAD_MSG_5_1(buf) t0 = _mm_unpackhi_epi32(m0,m1); \
+  t1 = _mm_unpacklo_epi32(m0,m2);                         \
   buf = _mm_unpacklo_epi64(t0,t1);
-#define LOAD_MSG_5_2(buf) t0 = _mm_srli_si128(m2, 4);	\
-  t1 = _mm_blend_epi16(m0,m3,0x03);			\
+#define LOAD_MSG_5_2(buf) t0 = _mm_srli_si128(m2, 4); \
+  t1 = _mm_blend_epi16(m0,m3,0x03);                   \
   buf = _mm_blend_epi16(t1,t0,0x3C);
 #define LOAD_MSG_5_3(buf) t0 = _mm_blend_epi16(m1,m0,0x0C); \
-  t1 = _mm_srli_si128(m3, 4);				    \
-  t2 = _mm_blend_epi16(t0,t1,0x30);			    \
+  t1 = _mm_srli_si128(m3, 4);                               \
+  t2 = _mm_blend_epi16(t0,t1,0x30);                         \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
 #define LOAD_MSG_5_4(buf) t0 = _mm_unpacklo_epi64(m2,m1); \
-  t1 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(2,0,1,0));	  \
-  t2 = _mm_srli_si128(t0, 4);				  \
+  t1 = _mm_shuffle_epi32(m3, _MM_SHUFFLE(2,0,1,0));       \
+  t2 = _mm_srli_si128(t0, 4);                             \
   buf = _mm_blend_epi16(t1,t2,0x33);
 #define LOAD_MSG_6_1(buf) t0 = _mm_slli_si128(m1, 12); \
-  t1 = _mm_blend_epi16(m0,m3,0x33);		       \
+  t1 = _mm_blend_epi16(m0,m3,0x33);                    \
   buf = _mm_blend_epi16(t1,t0,0xC0);
 #define LOAD_MSG_6_2(buf) t0 = _mm_blend_epi16(m3,m2,0x30); \
-  t1 = _mm_srli_si128(m1, 4);				    \
-  t2 = _mm_blend_epi16(t0,t1,0x03);			    \
+  t1 = _mm_srli_si128(m1, 4);                               \
+  t2 = _mm_blend_epi16(t0,t1,0x03);                         \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0));
-#define LOAD_MSG_6_3(buf) t0 = _mm_unpacklo_epi64(m0,m2);	\
-  t1 = _mm_srli_si128(m1, 4);					\
-  t2 = _mm_blend_epi16(t0,t1,0x0C);				\
+#define LOAD_MSG_6_3(buf) t0 = _mm_unpacklo_epi64(m0,m2); \
+  t1 = _mm_srli_si128(m1, 4);                             \
+  t2 = _mm_blend_epi16(t0,t1,0x0C);                       \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
 #define LOAD_MSG_6_4(buf) t0 = _mm_unpackhi_epi32(m1,m2); \
-  t1 = _mm_unpackhi_epi64(m0,t0);			  \
+  t1 = _mm_unpackhi_epi64(m0,t0);                         \
   buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(0,1,2,3));
-#define LOAD_MSG_7_1(buf) t0 = _mm_unpackhi_epi32(m0,m1);	\
-  t1 = _mm_blend_epi16(t0,m3,0x0F);				\
+#define LOAD_MSG_7_1(buf) t0 = _mm_unpackhi_epi32(m0,m1); \
+  t1 = _mm_blend_epi16(t0,m3,0x0F);                       \
   buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1));
 #define LOAD_MSG_7_2(buf) t0 = _mm_blend_epi16(m2,m3,0x30); \
-  t1 = _mm_srli_si128(m0,4);				    \
-  t2 = _mm_blend_epi16(t0,t1,0x03);			    \
+  t1 = _mm_srli_si128(m0,4);                                \
+  t2 = _mm_blend_epi16(t0,t1,0x03);                         \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3));
 #define LOAD_MSG_7_3(buf) t0 = _mm_unpackhi_epi64(m0,m3); \
-  t1 = _mm_unpacklo_epi64(m1,m2);			  \
-  t2 = _mm_blend_epi16(t0,t1,0x3C);			  \
+  t1 = _mm_unpacklo_epi64(m1,m2);                         \
+  t2 = _mm_blend_epi16(t0,t1,0x3C);                       \
   buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(2,3,1,0));
-#define LOAD_MSG_7_4(buf) t0 = _mm_unpacklo_epi32(m0,m1);	\
-  t1 = _mm_unpackhi_epi32(m1,m2);				\
-  t2 = _mm_unpacklo_epi64(t0,t1);				\
+#define LOAD_MSG_7_4(buf) t0 = _mm_unpacklo_epi32(m0,m1); \
+  t1 = _mm_unpackhi_epi32(m1,m2);                         \
+  t2 = _mm_unpacklo_epi64(t0,t1);                         \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3));
-#define LOAD_MSG_8_1(buf) t0 = _mm_unpackhi_epi32(m1,m3);	\
-  t1 = _mm_unpacklo_epi64(t0,m0);				\
-  t2 = _mm_blend_epi16(t1,m2,0xC0);				\
+#define LOAD_MSG_8_1(buf) t0 = _mm_unpackhi_epi32(m1,m3); \
+  t1 = _mm_unpacklo_epi64(t0,m0);                         \
+  t2 = _mm_blend_epi16(t1,m2,0xC0);                       \
   buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2));
-#define LOAD_MSG_8_2(buf) t0 = _mm_unpackhi_epi32(m0,m3);	\
-  t1 = _mm_blend_epi16(m2,t0,0xF0);				\
+#define LOAD_MSG_8_2(buf) t0 = _mm_unpackhi_epi32(m0,m3); \
+  t1 = _mm_blend_epi16(m2,t0,0xF0);                       \
   buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3));
-#define LOAD_MSG_8_3(buf) t0 = _mm_unpacklo_epi64(m0,m3);	\
-  t1 = _mm_srli_si128(m2,8);					\
-  t2 = _mm_blend_epi16(t0,t1,0x03);				\
+#define LOAD_MSG_8_3(buf) t0 = _mm_unpacklo_epi64(m0,m3); \
+  t1 = _mm_srli_si128(m2,8);                              \
+  t2 = _mm_blend_epi16(t0,t1,0x03);                       \
   buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,3,2,0));
 #define LOAD_MSG_8_4(buf) t0 = _mm_blend_epi16(m1,m0,0x30); \
   buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(0,3,2,1));
-#define LOAD_MSG_9_1(buf) t0 = _mm_blend_epi16(m0,m2,0x03);	\
-  t1 = _mm_blend_epi16(m1,m2,0x30);				\
-  t2 = _mm_blend_epi16(t1,t0,0x0F);				\
+#define LOAD_MSG_9_1(buf) t0 = _mm_blend_epi16(m0,m2,0x03); \
+  t1 = _mm_blend_epi16(m1,m2,0x30);                         \
+  t2 = _mm_blend_epi16(t1,t0,0x0F);                         \
   buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2));
-#define LOAD_MSG_9_2(buf) t0 = _mm_slli_si128(m0,4);	\
-  t1 = _mm_blend_epi16(m1,t0,0xC0);			\
+#define LOAD_MSG_9_2(buf) t0 = _mm_slli_si128(m0,4); \
+  t1 = _mm_blend_epi16(m1,t0,0xC0);                  \
   buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3));
-#define LOAD_MSG_9_3(buf) t0 = _mm_unpackhi_epi32(m0,m3);	\
-  t1 = _mm_unpacklo_epi32(m2,m3);				\
-  t2 = _mm_unpackhi_epi64(t0,t1);				\
+#define LOAD_MSG_9_3(buf) t0 = _mm_unpackhi_epi32(m0,m3); \
+  t1 = _mm_unpacklo_epi32(m2,m3);                         \
+  t2 = _mm_unpackhi_epi64(t0,t1);                         \
   buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,1,3));
-#define LOAD_MSG_9_4(buf) t0 = _mm_blend_epi16(m3,m2,0xC0);	\
-  t1 = _mm_unpacklo_epi32(m0,m3);				\
-  t2 = _mm_blend_epi16(t0,t1,0x0F);				\
+#define LOAD_MSG_9_4(buf) t0 = _mm_blend_epi16(m3,m2,0xC0); \
+  t1 = _mm_unpacklo_epi32(m0,m3);                           \
+  t2 = _mm_blend_epi16(t0,t1,0x0F);                         \
   buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,2,3,0));
 
 #else
 
-#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0)
-#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1)
-#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m12,m10,m8,m14)
-#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m13,m11,m9,m15)
-#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14)
-#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10)
-#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m11,m0,m1,m5)
-#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m7,m2,m12,m3)
-#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11)
-#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8)
-#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m7,m3,m10,m9)
-#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m1,m6,m14,m4)
-#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7)
-#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9)
-#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m4,m5,m2,m15)
-#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m0,m10,m6,m8)
-#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9)
-#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0)
-#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m6,m11,m14,m3)
-#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m8,m12,m1,m13)
-#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2)
-#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12)
-#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m15,m7,m4,m1)
-#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m14,m5,m13,m9)
-#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12)
-#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5)
-#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m9,m6,m0,m8)
-#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m2,m3,m7,m11)
-#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13)
-#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11)
-#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m8,m15,m5,m2)
-#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m6,m4,m0,m10)
-#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6)
-#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15)
-#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m1,m13,m12,m10)
-#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m4,m7,m2,m5)
-#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10)
-#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2)
-#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m3,m9,m15,m13)
-#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m12,m14,m11,m0)
+  #define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6 , m4 , m2 , m0 )
+  #define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7 , m5 , m3 , m1 )
+  #define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m12, m10, m8 , m14)
+  #define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m13, m11, m9 , m15)
+  #define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13, m9 , m4 , m14)
+  #define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6 , m15, m8 , m10)
+  #define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m11, m0 , m1 , m5 )
+  #define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m7 , m2 , m12, m3 )
+  #define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15, m5 , m12, m11)
+  #define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13, m2 , m0 , m8 )
+  #define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m7 , m3 , m10, m9 )
+  #define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m1 , m6 , m14, m4 )
+  #define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11, m13, m3 , m7 )
+  #define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14, m12, m1 , m9 )
+  #define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m4 , m5 , m2 , m15)
+  #define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m0 , m10, m6 , m8 )
+  #define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10, m2 , m5 , m9 )
+  #define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15, m4 , m7 , m0 )
+  #define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m6 , m11, m14, m3 )
+  #define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m8 , m12, m1 , m13)
+  #define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8 , m0 , m6 , m2 )
+  #define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3 , m11, m10, m12)
+  #define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m15, m7 , m4 , m1 )
+  #define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m14, m5 , m13, m9 )
+  #define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4 , m14, m1 , m12)
+  #define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10, m13, m15, m5 )
+  #define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m9 , m6 , m0 , m8 )
+  #define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m2 , m3 , m7 , m11)
+  #define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3 , m12, m7 , m13)
+  #define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9 , m1 , m14, m11)
+  #define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m8 , m15, m5 , m2 )
+  #define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m6 , m4 , m0 , m10)
+  #define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0 , m11, m14, m6 )
+  #define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8 , m3 , m9 , m15)
+  #define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m1 , m13, m12, m10)
+  #define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m4 , m7 , m2 , m5 )
+  #define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1 , m7 , m8 , m10)
+  #define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5 , m6 , m4 , m2 )
+  #define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m3 , m9 , m15, m13)
+  #define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m12, m14, m11, m0 )
 
 #endif
 
 #if defined(HAVE_SSSE_3) && !defined(HAVE_XOP)
-#define _mm_roti_epi32(r, c) (			   \
-                (8==-(c)) ? _mm_shuffle_epi8(r,r8) \
+#define _mm_roti_epi32(r, c) (                       \
+                (8==-(c)) ? _mm_shuffle_epi8(r,r8)   \
               : (16==-(c)) ? _mm_shuffle_epi8(r,r16) \
               : _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) )
 #elif !defined(HAVE_SSSE_3) && !defined(HAVE_XOP)
-#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) ))
+  #define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32((r), -(c)), _mm_slli_epi32((r), 32 - (-(c))))
 #endif
 
-#define G1(row1,row2,row3,row4,buf) \
+#define G1(row1,row2,row3,row4,buf)                        \
   row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
-  row4 = _mm_xor_si128( row4, row1 ); \
-  row4 = _mm_roti_epi32(row4, -16); \
-  row3 = _mm_add_epi32( row3, row4 );   \
-  row2 = _mm_xor_si128( row2, row3 ); \
+  row4 = _mm_xor_si128( row4, row1 );                      \
+  row4 = _mm_roti_epi32(row4, -16);                        \
+  row3 = _mm_add_epi32( row3, row4 );                      \
+  row2 = _mm_xor_si128( row2, row3 );                      \
   row2 = _mm_roti_epi32(row2, -12);
 
-#define G2(row1,row2,row3,row4,buf) \
+#define G2(row1,row2,row3,row4,buf)                        \
   row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
-  row4 = _mm_xor_si128( row4, row1 ); \
-  row4 = _mm_roti_epi32(row4, -8); \
-  row3 = _mm_add_epi32( row3, row4 );   \
-  row2 = _mm_xor_si128( row2, row3 ); \
+  row4 = _mm_xor_si128( row4, row1 );                      \
+  row4 = _mm_roti_epi32(row4, -8);                         \
+  row3 = _mm_add_epi32( row3, row4 );                      \
+  row2 = _mm_xor_si128( row2, row3 );                      \
   row2 = _mm_roti_epi32(row2, -7);
 
-#define DIAGONALIZE(row1,row2,row3,row4) \
+#define DIAGONALIZE(row1,row2,row3,row4)                  \
   row1 = _mm_shuffle_epi32( row1, _MM_SHUFFLE(2,1,0,3) ); \
   row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(1,0,3,2) ); \
   row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(0,3,2,1) );
 
-#define UNDIAGONALIZE(row1,row2,row3,row4) \
+#define UNDIAGONALIZE(row1,row2,row3,row4)                \
   row1 = _mm_shuffle_epi32( row1, _MM_SHUFFLE(0,3,2,1) ); \
   row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(1,0,3,2) ); \
   row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(2,1,0,3) );
 
-#define ROUND(r)  \
-  LOAD_MSG_ ##r ##_1(buf1); \
-  G1(row1,row2,row3,row4,buf1); \
-  LOAD_MSG_ ##r ##_2(buf2); \
-  G2(row1,row2,row3,row4,buf2); \
-  DIAGONALIZE(row1,row2,row3,row4); \
-  LOAD_MSG_ ##r ##_3(buf3); \
-  G1(row1,row2,row3,row4,buf3); \
-  LOAD_MSG_ ##r ##_4(buf4); \
-  G2(row1,row2,row3,row4,buf4); \
+#define ROUND(r)                      \
+  LOAD_MSG_ ##r ##_1(buf1);           \
+  G1(row1,row2,row3,row4,buf1);       \
+  LOAD_MSG_ ##r ##_2(buf2);           \
+  G2(row1,row2,row3,row4,buf2);       \
+  DIAGONALIZE(row1,row2,row3,row4);   \
+  LOAD_MSG_ ##r ##_3(buf3);           \
+  G1(row1,row2,row3,row4,buf3);       \
+  LOAD_MSG_ ##r ##_4(buf4);           \
+  G2(row1,row2,row3,row4,buf4);       \
   UNDIAGONALIZE(row1,row2,row3,row4); \
 
-template < bool bswap >
-static void blake2_compress(blake2s_context * ctx, const uint8_t * in) {
-  __m128i row1, row2, row3, row4;
-  __m128i buf1, buf2, buf3, buf4;
-  __m128i t0, t1, t2;
-  __m128i ff0, ff1;
-
-  const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 );
-  const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 );
-
-  const __m128i m0 = bswap ? mm_bswap32(LOADU(in + 00)) : LOADU( in + 00 );
-  const __m128i m1 = bswap ? mm_bswap32(LOADU(in + 16)) : LOADU( in + 16 );
-  const __m128i m2 = bswap ? mm_bswap32(LOADU(in + 32)) : LOADU( in + 32 );
-  const __m128i m3 = bswap ? mm_bswap32(LOADU(in + 48)) : LOADU( in + 48 );
-
-  row1 = ff0 = LOADU( &ctx->h[0] );
-  row2 = ff1 = LOADU( &ctx->h[4] );
-  row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] );
-  row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &ctx->t[0] ) );
-
-  ROUND( 0 );
-  ROUND( 1 );
-  ROUND( 2 );
-  ROUND( 3 );
-  ROUND( 4 );
-  ROUND( 5 );
-  ROUND( 6 );
-  ROUND( 7 );
-  ROUND( 8 );
-  ROUND( 9 );
-
-  STOREU( &ctx->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
-  STOREU( &ctx->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
+template <bool bswap>
+static void blake2_compress( blake2s_context * ctx, const uint8_t * in ) {
+    __m128i row1, row2, row3, row4;
+    __m128i buf1, buf2, buf3, buf4;
+    __m128i t0, t1, t2;
+    __m128i ff0, ff1;
+
+    const __m128i r8  = _mm_set_epi8(12, 15, 14, 13, 8, 11, 10,  9, 4, 7, 6, 5, 0, 3, 2, 1);
+    const __m128i r16 = _mm_set_epi8(13, 12, 15, 14, 9,  8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2);
+
+    const __m128i m0  = bswap ? mm_bswap32(LOADU(in + 00)) : LOADU(in + 00);
+    const __m128i m1  = bswap ? mm_bswap32(LOADU(in + 16)) : LOADU(in + 16);
+    const __m128i m2  = bswap ? mm_bswap32(LOADU(in + 32)) : LOADU(in + 32);
+    const __m128i m3  = bswap ? mm_bswap32(LOADU(in + 48)) : LOADU(in + 48);
+
+    row1 = ff0 = LOADU(&ctx->h[0]);
+    row2 = ff1 = LOADU(&ctx->h[4]);
+    row3 = _mm_loadu_si128((__m128i const *)&blake2s_IV[0]);
+    row4 = _mm_xor_si128(_mm_loadu_si128((__m128i const *)&blake2s_IV[4]), LOADU(&ctx->t[0]));
+
+    ROUND(0);
+    ROUND(1);
+    ROUND(2);
+    ROUND(3);
+    ROUND(4);
+    ROUND(5);
+    ROUND(6);
+    ROUND(7);
+    ROUND(8);
+    ROUND(9);
+
+    STOREU(&ctx->h[0], _mm_xor_si128(ff0, _mm_xor_si128(row1, row3)));
+    STOREU(&ctx->h[4], _mm_xor_si128(ff1, _mm_xor_si128(row2, row4)));
 }
 
 #undef G1
diff --git a/hashes/blake3.cpp b/hashes/blake3.cpp
index ac9b2658..29484843 100644
--- a/hashes/blake3.cpp
+++ b/hashes/blake3.cpp
@@ -30,29 +30,31 @@
 #include "Platform.h"
 #include "Hashlib.h"
 
-static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
-                               0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
-                               0x1F83D9ABUL, 0x5BE0CD19UL};
+static const uint32_t IV         [8] = {
+    0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
+    0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
+    0x1F83D9ABUL, 0x5BE0CD19UL
+};
 
 static const uint8_t MSG_SCHEDULE[7][16] = {
-    {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-    {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
-    {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
-    {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
-    {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
-    {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
-    {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+    { 2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8 },
+    { 3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1 },
+    { 10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6 },
+    { 12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4 },
+    { 9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7 },
+    { 11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13 },
 };
 
 // internal flags
 enum blake3_flags {
-  CHUNK_START         = 1 << 0,
-  CHUNK_END           = 1 << 1,
-  PARENT              = 1 << 2,
-  ROOT                = 1 << 3,
-  KEYED_HASH          = 1 << 4,
-  DERIVE_KEY_CONTEXT  = 1 << 5,
-  DERIVE_KEY_MATERIAL = 1 << 6,
+    CHUNK_START         = 1 << 0,
+    CHUNK_END           = 1 << 1,
+    PARENT              = 1 << 2,
+    ROOT                = 1 << 3,
+    KEYED_HASH          = 1 << 4,
+    DERIVE_KEY_CONTEXT  = 1 << 5,
+    DERIVE_KEY_MATERIAL = 1 << 6,
 };
 
 #define BLAKE3_KEY_LEN 32
@@ -61,235 +63,231 @@ enum blake3_flags {
 #define BLAKE3_CHUNK_LEN 1024
 #define BLAKE3_MAX_DEPTH 54
 
-static FORCE_INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
+static FORCE_INLINE uint32_t counter_low( uint64_t counter ) { return (uint32_t)counter; }
 
-static FORCE_INLINE uint32_t counter_high(uint64_t counter) {
-  return (uint32_t)(counter >> 32);
+static FORCE_INLINE uint32_t counter_high( uint64_t counter ) {
+    return (uint32_t)(counter >> 32);
 }
 
-static FORCE_INLINE uint64_t round_down_to_power_of_2(uint64_t x) {
-  return 1ULL << (63 ^ clz8(x | 1));
+static FORCE_INLINE uint64_t round_down_to_power_of_2( uint64_t x ) {
+    return 1ULL << (63 ^ clz8(x | 1));
 }
 
-static FORCE_INLINE size_t left_len(size_t content_len) {
-  // Subtract 1 to reserve at least one byte for the right side. content_len
-  // should always be greater than BLAKE3_CHUNK_LEN.
-  size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
-  return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN;
+static FORCE_INLINE size_t left_len( size_t content_len ) {
+    // Subtract 1 to reserve at least one byte for the right side. content_len
+    // should always be greater than BLAKE3_CHUNK_LEN.
+    size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
+
+    return round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN;
 }
 
-static FORCE_INLINE void store32(void *dst, uint32_t w) {
-  uint8_t *p = (uint8_t *)dst;
-  p[0] = (uint8_t)(w >> 0);
-  p[1] = (uint8_t)(w >> 8);
-  p[2] = (uint8_t)(w >> 16);
-  p[3] = (uint8_t)(w >> 24);
+static FORCE_INLINE void store32( void * dst, uint32_t w ) {
+    uint8_t * p = (uint8_t *)dst;
+
+    p[0] = (uint8_t)(w >>  0);
+    p[1] = (uint8_t)(w >>  8);
+    p[2] = (uint8_t)(w >> 16);
+    p[3] = (uint8_t)(w >> 24);
 }
 
-static FORCE_INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
-  store32(&bytes_out[0 * 4], cv_words[0]);
-  store32(&bytes_out[1 * 4], cv_words[1]);
-  store32(&bytes_out[2 * 4], cv_words[2]);
-  store32(&bytes_out[3 * 4], cv_words[3]);
-  store32(&bytes_out[4 * 4], cv_words[4]);
-  store32(&bytes_out[5 * 4], cv_words[5]);
-  store32(&bytes_out[6 * 4], cv_words[6]);
-  store32(&bytes_out[7 * 4], cv_words[7]);
+static FORCE_INLINE void store_cv_words( uint8_t bytes_out[32], uint32_t cv_words[8] ) {
+    store32(&bytes_out[0 * 4], cv_words[0]);
+    store32(&bytes_out[1 * 4], cv_words[1]);
+    store32(&bytes_out[2 * 4], cv_words[2]);
+    store32(&bytes_out[3 * 4], cv_words[3]);
+    store32(&bytes_out[4 * 4], cv_words[4]);
+    store32(&bytes_out[5 * 4], cv_words[5]);
+    store32(&bytes_out[6 * 4], cv_words[6]);
+    store32(&bytes_out[7 * 4], cv_words[7]);
 }
 
 typedef struct {
-  uint32_t cv[8];
-  uint64_t chunk_counter;
-  uint8_t buf[BLAKE3_BLOCK_LEN];
-  uint8_t buf_len;
-  uint8_t blocks_compressed;
-  uint8_t flags;
+    uint32_t  cv[8];
+    uint64_t  chunk_counter;
+    uint8_t   buf[BLAKE3_BLOCK_LEN];
+    uint8_t   buf_len;
+    uint8_t   blocks_compressed;
+    uint8_t   flags;
 } blake3_chunk_state;
 
 typedef struct {
-  uint32_t key[8];
-  blake3_chunk_state chunk;
-  uint8_t cv_stack_len;
-  uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
+    uint32_t            key[8];
+    blake3_chunk_state  chunk;
+    uint8_t             cv_stack_len;
+    uint8_t             cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
 } blake3_hasher;
 
 typedef struct {
-  uint32_t input_cv[8];
-  uint64_t counter;
-  uint8_t block[BLAKE3_BLOCK_LEN];
-  uint8_t block_len;
-  uint8_t flags;
+    uint32_t  input_cv[8];
+    uint64_t  counter;
+    uint8_t   block[BLAKE3_BLOCK_LEN];
+    uint8_t   block_len;
+    uint8_t   flags;
 } output_t;
 
-static void blake3_compress_in_place(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
-			      uint8_t block_len, uint64_t counter, uint8_t flags);
-static void blake3_compress_xof(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
-			 uint8_t block_len, uint64_t counter,
-			 uint8_t flags, uint8_t out[64]);
-
-static FORCE_INLINE void chunk_state_init(blake3_chunk_state * self, const uint32_t key[8],
-                             uint8_t flags) {
-  memcpy(self->cv, key, BLAKE3_KEY_LEN);
-  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
-  self->chunk_counter = 0;
-  self->buf_len = 0;
-  self->blocks_compressed = 0;
-  self->flags = flags;
+static void blake3_compress_in_place( uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags );
+static void blake3_compress_xof( const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64] );
+
+static FORCE_INLINE void chunk_state_init( blake3_chunk_state * self, const uint32_t key[8], uint8_t flags ) {
+    memcpy(self->cv, key, BLAKE3_KEY_LEN);
+    memset(self->buf, 0, BLAKE3_BLOCK_LEN);
+    self->chunk_counter     = 0;
+    self->buf_len           = 0;
+    self->blocks_compressed = 0;
+    self->flags = flags;
 }
 
-static FORCE_INLINE void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8],
-                              uint64_t chunk_counter) {
-  memcpy(self->cv, key, BLAKE3_KEY_LEN);
-  self->chunk_counter = chunk_counter;
-  self->blocks_compressed = 0;
-  memset(self->buf, 0, BLAKE3_BLOCK_LEN);
-  self->buf_len = 0;
+static FORCE_INLINE void chunk_state_reset( blake3_chunk_state * self, const uint32_t key[8], uint64_t chunk_counter ) {
+    memcpy(self->cv, key, BLAKE3_KEY_LEN);
+    self->chunk_counter     = chunk_counter;
+    self->blocks_compressed = 0;
+    memset(self->buf, 0, BLAKE3_BLOCK_LEN);
+    self->buf_len = 0;
 }
 
-static FORCE_INLINE output_t make_output(const uint32_t input_cv[8],
-                            const uint8_t block[BLAKE3_BLOCK_LEN],
-                            uint8_t block_len, uint64_t counter,
-                            uint8_t flags) {
-  output_t ret;
-  memcpy(ret.input_cv, input_cv, 32);
-  memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
-  ret.block_len = block_len;
-  ret.counter = counter;
-  ret.flags = flags;
-  return ret;
+static FORCE_INLINE output_t make_output( const uint32_t input_cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags ) {
+    output_t ret;
+
+    memcpy(ret.input_cv, input_cv,               32);
+    memcpy(ret.block   , block   , BLAKE3_BLOCK_LEN);
+    ret.block_len = block_len;
+    ret.counter   = counter;
+    ret.flags     = flags;
+    return ret;
 }
 
-static FORCE_INLINE uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self) {
-  if (self->blocks_compressed == 0) {
-    return CHUNK_START;
-  } else {
-    return 0;
-  }
+static FORCE_INLINE uint8_t chunk_state_maybe_start_flag( const blake3_chunk_state * self ) {
+    if (self->blocks_compressed == 0) {
+        return CHUNK_START;
+    } else {
+        return 0;
+    }
 }
 
-static FORCE_INLINE size_t chunk_state_fill_buf(blake3_chunk_state *self,
-                                   const uint8_t *input, size_t input_len) {
-  size_t take = BLAKE3_BLOCK_LEN - ((size_t)self->buf_len);
-  if (take > input_len) {
-    take = input_len;
-  }
-  uint8_t *dest = self->buf + ((size_t)self->buf_len);
-  memcpy(dest, input, take);
-  self->buf_len += (uint8_t)take;
-  return take;
+static FORCE_INLINE size_t chunk_state_fill_buf( blake3_chunk_state * self, const uint8_t * input, size_t input_len ) {
+    size_t take = BLAKE3_BLOCK_LEN - ((size_t)self->buf_len);
+
+    if (take > input_len) {
+        take = input_len;
+    }
+    uint8_t * dest = self->buf     + ((size_t)self->buf_len);
+    memcpy(dest, input, take);
+    self->buf_len += (uint8_t)take;
+    return take;
 }
 
-static FORCE_INLINE output_t chunk_state_output(const blake3_chunk_state *self) {
-  uint8_t block_flags =
-      self->flags | chunk_state_maybe_start_flag(self) | CHUNK_END;
-  return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter,
-                     block_flags);
+static FORCE_INLINE output_t chunk_state_output( const blake3_chunk_state * self ) {
+    uint8_t block_flags =
+            self->flags | chunk_state_maybe_start_flag(self) | CHUNK_END;
+
+    return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter, block_flags);
 }
 
-static FORCE_INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
-                              const uint32_t key[8], uint8_t flags) {
-  return make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT);
+static FORCE_INLINE output_t parent_output( const uint8_t block[BLAKE3_BLOCK_LEN],
+        const uint32_t key[8], uint8_t flags ) {
+    return make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT);
 }
 
-static FORCE_INLINE size_t chunk_state_len(const blake3_chunk_state *self) {
-  return (BLAKE3_BLOCK_LEN * (size_t)self->blocks_compressed) +
-         ((size_t)self->buf_len);
+static FORCE_INLINE size_t chunk_state_len( const blake3_chunk_state * self ) {
+    return (BLAKE3_BLOCK_LEN * (size_t)self->blocks_compressed) +
+           ((size_t)self->buf_len);
 }
 
-static FORCE_INLINE void output_root_bytes(const output_t * self, uint8_t * out, size_t out_len) {
-  uint64_t output_block_counter = 0;
-  size_t offset_within_block = 0;
-  uint8_t wide_buf[64];
-  while (out_len > 0) {
-    blake3_compress_xof(self->input_cv, self->block, self->block_len,
-                        output_block_counter, self->flags | ROOT, wide_buf);
-    size_t available_bytes = 64 - offset_within_block;
-    size_t memcpy_len;
-    if (out_len > available_bytes) {
-      memcpy_len = available_bytes;
-    } else {
-      memcpy_len = out_len;
+static FORCE_INLINE void output_root_bytes( const output_t * self, uint8_t * out, size_t out_len ) {
+    uint64_t output_block_counter = 0;
+    size_t   offset_within_block  = 0;
+    uint8_t  wide_buf[64];
+
+    while (out_len > 0) {
+        blake3_compress_xof(self->input_cv, self->block, self->block_len,
+                output_block_counter, self->flags | ROOT, wide_buf);
+        size_t available_bytes = 64 - offset_within_block;
+        size_t memcpy_len;
+        if (out_len > available_bytes) {
+            memcpy_len = available_bytes;
+        } else {
+            memcpy_len = out_len;
+        }
+        memcpy(out, wide_buf + offset_within_block, memcpy_len);
+        out     += memcpy_len;
+        out_len -= memcpy_len;
+        output_block_counter += 1;
+        offset_within_block   = 0;
     }
-    memcpy(out, wide_buf + offset_within_block, memcpy_len);
-    out += memcpy_len;
-    out_len -= memcpy_len;
-    output_block_counter += 1;
-    offset_within_block = 0;
-  }
 }
 
-static FORCE_INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
-  uint32_t cv_words[8];
-  memcpy(cv_words, self->input_cv, 32);
-  blake3_compress_in_place(cv_words, self->block, self->block_len,
-                           self->counter, self->flags);
-  store_cv_words(cv, cv_words);
+static FORCE_INLINE void output_chaining_value( const output_t * self, uint8_t cv[32] ) {
+    uint32_t cv_words[8];
+
+    memcpy(cv_words, self->input_cv, 32);
+    blake3_compress_in_place(cv_words, self->block, self->block_len, self->counter, self->flags);
+    store_cv_words(cv, cv_words);
 }
 
-static FORCE_INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len) {
-  size_t post_merge_stack_len = (size_t)popcount8(total_len);
-  while (self->cv_stack_len > post_merge_stack_len) {
-    uint8_t *parent_node =
-        &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN];
-    output_t output = parent_output(parent_node, self->key, self->chunk.flags);
-    output_chaining_value(&output, parent_node);
-    self->cv_stack_len -= 1;
-  }
+static FORCE_INLINE void hasher_merge_cv_stack( blake3_hasher * self, uint64_t total_len ) {
+    size_t post_merge_stack_len = (size_t)popcount8(total_len);
+
+    while (self->cv_stack_len > post_merge_stack_len) {
+        uint8_t * parent_node =
+                &self->cv_stack[(self->cv_stack_len - 2) * BLAKE3_OUT_LEN];
+        output_t output       = parent_output(parent_node, self->key, self->chunk.flags);
+        output_chaining_value(&output, parent_node);
+        self->cv_stack_len -= 1;
+    }
 }
 
-static FORCE_INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
-                           uint64_t chunk_counter) {
-  hasher_merge_cv_stack(self, chunk_counter);
-  memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
-         BLAKE3_OUT_LEN);
-  self->cv_stack_len += 1;
+static FORCE_INLINE void hasher_push_cv( blake3_hasher * self,
+        uint8_t new_cv[BLAKE3_OUT_LEN], uint64_t chunk_counter ) {
+    hasher_merge_cv_stack(self, chunk_counter);
+    memcpy(&self->cv_stack[self->cv_stack_len * BLAKE3_OUT_LEN], new_cv, BLAKE3_OUT_LEN);
+    self->cv_stack_len += 1;
 }
 
-static FORCE_INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
-                               size_t input_len) {
-  if (self->buf_len > 0) {
+static FORCE_INLINE void chunk_state_update( blake3_chunk_state * self, const uint8_t * input, size_t input_len ) {
+    if (self->buf_len > 0) {
+        size_t take = chunk_state_fill_buf(self, input, input_len);
+        input     += take;
+        input_len -= take;
+        if (input_len > 0) {
+            blake3_compress_in_place(self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
+                    self->flags | chunk_state_maybe_start_flag(self));
+            self->blocks_compressed += 1;
+            self->buf_len = 0;
+            memset(self->buf, 0, BLAKE3_BLOCK_LEN);
+        }
+    }
+
+    while (input_len > BLAKE3_BLOCK_LEN) {
+        blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN, self->chunk_counter,
+                self->flags | chunk_state_maybe_start_flag(self));
+        self->blocks_compressed += 1;
+        input     += BLAKE3_BLOCK_LEN;
+        input_len -= BLAKE3_BLOCK_LEN;
+    }
+
     size_t take = chunk_state_fill_buf(self, input, input_len);
-    input += take;
+    input     += take;
     input_len -= take;
-    if (input_len > 0) {
-      blake3_compress_in_place(
-          self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
-          self->flags | chunk_state_maybe_start_flag(self));
-      self->blocks_compressed += 1;
-      self->buf_len = 0;
-      memset(self->buf, 0, BLAKE3_BLOCK_LEN);
-    }
-  }
-
-  while (input_len > BLAKE3_BLOCK_LEN) {
-    blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
-                             self->chunk_counter,
-                             self->flags | chunk_state_maybe_start_flag(self));
-    self->blocks_compressed += 1;
-    input += BLAKE3_BLOCK_LEN;
-    input_len -= BLAKE3_BLOCK_LEN;
-  }
-
-  size_t take = chunk_state_fill_buf(self, input, input_len);
-  input += take;
-  input_len -= take;
 }
 
-static void blake3_hasher_init(blake3_hasher * self) {
-  memcpy(self->key, IV, BLAKE3_KEY_LEN);
-  chunk_state_init(&self->chunk, IV, 0);
-  self->cv_stack_len = 0;
+static void blake3_hasher_init( blake3_hasher * self ) {
+    memcpy(self->key, IV, BLAKE3_KEY_LEN);
+    chunk_state_init(&self->chunk, IV, 0);
+    self->cv_stack_len = 0;
 }
 
 // Home-grown SMHasher3 seeding
-static void blake3_seed(blake3_hasher * hasher, uint64_t seed) {
-  const uint32_t seedlo = seed         & 0xFFFFFFFF;
-  const uint32_t seedhi = (seed >> 32) & 0xFFFFFFFF;
-
-  hasher->key[0] ^= seedlo;
-  hasher->chunk.cv[0] ^= seedlo;
-  hasher->key[1] ^= seedhi;
-  hasher->chunk.cv[1] ^= seedhi;
+static void blake3_seed( blake3_hasher * hasher, uint64_t seed ) {
+    const uint32_t seedlo = seed         & 0xFFFFFFFF;
+    const uint32_t seedhi = (seed >> 32) & 0xFFFFFFFF;
+
+    hasher->key[0]      ^= seedlo;
+    hasher->chunk.cv[0] ^= seedlo;
+    hasher->key[1]      ^= seedhi;
+    hasher->chunk.cv[1] ^= seedhi;
 }
 
 //
@@ -307,8 +305,8 @@ static void blake3_seed(blake3_hasher * hasher, uint64_t seed) {
 //
 //   FORCE_INLINE void hash_one(const uint8_t *input, size_t blocks,
 //                              const uint32_t key[8], uint64_t counter,
-//  	                        uint8_t flags, uint8_t flags_start,
-//	                        uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]);
+//                              uint8_t flags, uint8_t flags_start,
+//                              uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]);
 //
 //   void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
 //                         size_t blocks, const uint32_t key[8],
@@ -322,343 +320,325 @@ static void blake3_seed(blake3_hasher * hasher, uint64_t seed) {
 //     #define SIMD_DEGREE
 //
 #if defined(HAVE_SSE_4_1)
-#include "Intrinsics.h"
-#include "blake3/compress-sse41.h"
+  #include "Intrinsics.h"
+  #include "blake3/compress-sse41.h"
 #elif defined(HAVE_SSE_2)
-#include "Intrinsics.h"
-#include "blake3/compress-sse2.h"
+  #include "Intrinsics.h"
+  #include "blake3/compress-sse2.h"
 #else
-#include "blake3/compress-portable.h"
+  #include "blake3/compress-portable.h"
 #endif
 
-static FORCE_INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
-                                        size_t num_chaining_values,
-                                        const uint32_t key[8], uint8_t flags,
-                                        uint8_t *out) {
-  const uint8_t *parents_array[SIMD_DEGREE_OR_2];
-  size_t parents_array_len = 0;
-  while (num_chaining_values - (2 * parents_array_len) >= 2) {
-    parents_array[parents_array_len] =
-        &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN];
-    parents_array_len += 1;
-  }
-
-  blake3_hash_many(parents_array, parents_array_len, 1, key,
-                   0, // Parents always use counter 0.
-                   false, flags | PARENT,
-                   0, // Parents have no start flags.
-                   0, // Parents have no end flags.
-                   out);
-
-  // If there's an odd child left over, it becomes an output.
-  if (num_chaining_values > 2 * parents_array_len) {
-    memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],
-           &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN],
-           BLAKE3_OUT_LEN);
-    return parents_array_len + 1;
-  } else {
-    return parents_array_len;
-  }
+static FORCE_INLINE size_t compress_parents_parallel( const uint8_t * child_chaining_values, size_t num_chaining_values,
+        const uint32_t key[8], uint8_t flags, uint8_t * out ) {
+    const uint8_t * parents_array[SIMD_DEGREE_OR_2];
+    size_t          parents_array_len = 0;
+
+    while (num_chaining_values - (2 * parents_array_len) >= 2) {
+        parents_array[parents_array_len] =
+                &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN];
+        parents_array_len += 1;
+    }
+
+    blake3_hash_many(parents_array, parents_array_len, 1, key, 0, // Parents always use counter 0.
+            false, flags | PARENT, 0,                             // Parents have no start flags.
+            0,                                                    // Parents have no end flags.
+            out);
+
+    // If there's an odd child left over, it becomes an output.
+    if (num_chaining_values > 2 * parents_array_len) {
+        memcpy(&out[parents_array_len * BLAKE3_OUT_LEN], &child_chaining_values[2 * parents_array_len * BLAKE3_OUT_LEN],
+                BLAKE3_OUT_LEN);
+        return parents_array_len + 1;
+    } else {
+        return parents_array_len;
+    }
 }
 
-static FORCE_INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len,
-                                       const uint32_t key[8],
-                                       uint64_t chunk_counter, uint8_t flags,
-                                       uint8_t *out) {
-  const uint8_t *chunks_array[SIMD_DEGREE];
-  size_t input_position = 0;
-  size_t chunks_array_len = 0;
-  while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
-    chunks_array[chunks_array_len] = &input[input_position];
-    input_position += BLAKE3_CHUNK_LEN;
-    chunks_array_len += 1;
-  }
-
-  blake3_hash_many(chunks_array, chunks_array_len,
-                   BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN, key, chunk_counter,
-                   true, flags, CHUNK_START, CHUNK_END, out);
-
-  // Hash the remaining partial chunk, if there is one. Note that the empty
-  // chunk (meaning the empty message) is a different codepath.
-  if (input_len > input_position) {
-    uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
-    blake3_chunk_state chunk_state;
-    chunk_state_init(&chunk_state, key, flags);
-    chunk_state.chunk_counter = counter;
-    chunk_state_update(&chunk_state, &input[input_position],
-                       input_len - input_position);
-    output_t output = chunk_state_output(&chunk_state);
-    output_chaining_value(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]);
-    return chunks_array_len + 1;
-  } else {
-    return chunks_array_len;
-  }
+static FORCE_INLINE size_t compress_chunks_parallel( const uint8_t * input, size_t input_len,
+        const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t * out ) {
+    const uint8_t * chunks_array[SIMD_DEGREE];
+    size_t          input_position   = 0;
+    size_t          chunks_array_len = 0;
+
+    while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
+        chunks_array[chunks_array_len] = &input[input_position];
+        input_position   += BLAKE3_CHUNK_LEN;
+        chunks_array_len += 1;
+    }
+
+    blake3_hash_many(chunks_array, chunks_array_len, BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN,
+            key, chunk_counter, true, flags, CHUNK_START, CHUNK_END, out);
+
+    // Hash the remaining partial chunk, if there is one. Note that the empty
+    // chunk (meaning the empty message) is a different codepath.
+    if (input_len > input_position) {
+        uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
+        blake3_chunk_state chunk_state;
+        chunk_state_init(&chunk_state, key, flags);
+        chunk_state.chunk_counter = counter;
+        chunk_state_update(&chunk_state, &input[input_position], input_len - input_position);
+        output_t output = chunk_state_output(&chunk_state);
+        output_chaining_value(&output, &out[chunks_array_len * BLAKE3_OUT_LEN]);
+        return chunks_array_len + 1;
+    } else {
+        return chunks_array_len;
+    }
 }
 
-static size_t blake3_compress_subtree_wide(const uint8_t *input,
-                                           size_t input_len,
-                                           const uint32_t key[8],
-                                           uint64_t chunk_counter,
-                                           uint8_t flags, uint8_t *out) {
-  // Note that the single chunk case does *not* bump the SIMD degree up to 2
-  // when it is 1. If this implementation adds multi-threading in the future,
-  // this gives us the option of multi-threading even the 2-chunk case, which
-  // can help performance on smaller platforms.
-  if (input_len <= SIMD_DEGREE * BLAKE3_CHUNK_LEN) {
-    return compress_chunks_parallel(input, input_len, key, chunk_counter, flags,
-                                    out);
-  }
-
-  // With more than simd_degree chunks, we need to recurse. Start by dividing
-  // the input into left and right subtrees. (Note that this is only optimal
-  // as long as the SIMD degree is a power of 2. If we ever get a SIMD degree
-  // of 3 or something, we'll need a more complicated strategy.)
-  size_t left_input_len = left_len(input_len);
-  size_t right_input_len = input_len - left_input_len;
-  const uint8_t *right_input = &input[left_input_len];
-  uint64_t right_chunk_counter =
-      chunk_counter + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
-
-  uint8_t cv_array[2 * SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
-  size_t degree = SIMD_DEGREE;
-  if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
-    // The special case: We always use a degree of at least two, to make
-    // sure there are two outputs. Except, as noted above, at the chunk
-    // level, where we allow degree=1. (Note that the 1-chunk-input case is
-    // a different codepath.)
-    degree = 2;
-  }
-  uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
-
-  // Recurse! If this implementation adds multi-threading support in the
-  // future, this is where it will go.
-  size_t left_n = blake3_compress_subtree_wide(input, left_input_len, key,
-                                               chunk_counter, flags, cv_array);
-  size_t right_n = blake3_compress_subtree_wide(
-      right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);
-
-  // The special case again. If simd_degree=1, then we'll have left_n=1 and
-  // right_n=1. Rather than compressing them into a single output, return
-  // them directly, to make sure we always have at least two outputs.
-  if (left_n == 1) {
-    memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
-    return 2;
-  }
+static size_t blake3_compress_subtree_wide( const uint8_t * input, size_t input_len, const uint32_t key[8],
+        uint64_t chunk_counter, uint8_t flags, uint8_t * out ) {
+    // Note that the single chunk case does *not* bump the SIMD degree up to 2
+    // when it is 1. If this implementation adds multi-threading in the future,
+    // this gives us the option of multi-threading even the 2-chunk case, which
+    // can help performance on smaller platforms.
+    if (input_len <= SIMD_DEGREE * BLAKE3_CHUNK_LEN) {
+        return compress_chunks_parallel(input, input_len, key, chunk_counter, flags, out);
+    }
+
+    // With more than simd_degree chunks, we need to recurse. Start by dividing
+    // the input into left and right subtrees. (Note that this is only optimal
+    // as long as the SIMD degree is a power of 2. If we ever get a SIMD degree
+    // of 3 or something, we'll need a more complicated strategy.)
+    size_t          left_input_len      = left_len(input_len);
+    size_t          right_input_len     = input_len - left_input_len;
+    const uint8_t * right_input         = &input[left_input_len];
+    uint64_t        right_chunk_counter =
+            chunk_counter + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
+
+    uint8_t cv_array[2 * SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+    size_t  degree = SIMD_DEGREE;
+    if ((left_input_len > BLAKE3_CHUNK_LEN) && (degree == 1)) {
+        // The special case: We always use a degree of at least two, to make
+        // sure there are two outputs. Except, as noted above, at the chunk
+        // level, where we allow degree=1. (Note that the 1-chunk-input case is
+        // a different codepath.)
+        degree = 2;
+    }
+    uint8_t * right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
+
+    // Recurse! If this implementation adds multi-threading support in the
+    // future, this is where it will go.
+    size_t left_n  = blake3_compress_subtree_wide(input      , left_input_len , key, chunk_counter, flags, cv_array);
+    size_t right_n = blake3_compress_subtree_wide(right_input, right_input_len,
+            key, right_chunk_counter, flags, right_cvs);
+
+    // The special case again. If simd_degree=1, then we'll have left_n=1 and
+    // right_n=1. Rather than compressing them into a single output, return
+    // them directly, to make sure we always have at least two outputs.
+    if (left_n == 1) {
+        memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+        return 2;
+    }
 
-  // Otherwise, do one layer of parent node compression.
-  size_t num_chaining_values = left_n + right_n;
-  return compress_parents_parallel(cv_array, num_chaining_values, key, flags,
-                                   out);
+    // Otherwise, do one layer of parent node compression.
+    size_t num_chaining_values = left_n + right_n;
+    return compress_parents_parallel(cv_array, num_chaining_values, key, flags, out);
 }
 
-static FORCE_INLINE void compress_subtree_to_parent_node(
-    const uint8_t *input, size_t input_len, const uint32_t key[8],
-    uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) {
-  uint8_t cv_array[SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
-  size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
-                                                chunk_counter, flags, cv_array);
-  // If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
-  // compress_subtree_wide() returns more than 2 chaining values. Condense
-  // them into 2 by forming parent nodes repeatedly.
-  uint8_t out_array[SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
-  // The second half of this loop condition is always true, and we just
-  // asserted it above. But GCC can't tell that it's always true, and if NDEBUG
-  // is set on platforms where SIMD_DEGREE_OR_2 == 2, GCC emits spurious
-  // warnings here. GCC 8.5 is particularly sensitive, so if you're changing
-  // this code, test it against that version.
-  while (num_cvs > 2 && num_cvs <= SIMD_DEGREE_OR_2) {
-    num_cvs =
-        compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
-    memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
-  }
-  memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+static FORCE_INLINE void compress_subtree_to_parent_node( const uint8_t * input, size_t input_len,
+        const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN] ) {
+    uint8_t cv_array[SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+    size_t  num_cvs = blake3_compress_subtree_wide(input, input_len, key, chunk_counter, flags, cv_array);
+    // If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
+    // compress_subtree_wide() returns more than 2 chaining values. Condense
+    // them into 2 by forming parent nodes repeatedly.
+    uint8_t out_array[SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
+
+    // The second half of this loop condition is always true, and we just
+    // asserted it above. But GCC can't tell that it's always true, and if NDEBUG
+    // is set on platforms where SIMD_DEGREE_OR_2 == 2, GCC emits spurious
+    // warnings here. GCC 8.5 is particularly sensitive, so if you're changing
+    // this code, test it against that version.
+    while (num_cvs > 2 && num_cvs <= SIMD_DEGREE_OR_2) {
+        num_cvs =
+                compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
+        memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
+    }
+    memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
 }
 
-static void blake3_hasher_update(blake3_hasher *self, const void *input,
-                          size_t input_len) {
-  // Explicitly checking for zero avoids causing UB by passing a null pointer
-  // to memcpy. This comes up in practice with things like:
-  //   std::vector<uint8_t> v;
-  //   blake3_hasher_update(&hasher, v.data(), v.size());
-  if (input_len == 0) {
-    return;
-  }
-
-  const uint8_t *input_bytes = (const uint8_t *)input;
-
-  // If we have some partial chunk bytes in the internal chunk_state, we need
-  // to finish that chunk first.
-  if (chunk_state_len(&self->chunk) > 0) {
-    size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&self->chunk);
-    if (take > input_len) {
-      take = input_len;
+static void blake3_hasher_update( blake3_hasher * self, const void * input, size_t input_len ) {
+    // Explicitly checking for zero avoids causing UB by passing a null pointer
+    // to memcpy. This comes up in practice with things like:
+    //   std::vector<uint8_t> v;
+    //   blake3_hasher_update(&hasher, v.data(), v.size());
+    if (input_len == 0) {
+        return;
     }
-    chunk_state_update(&self->chunk, input_bytes, take);
-    input_bytes += take;
-    input_len -= take;
-    // If we've filled the current chunk and there's more coming, finalize this
-    // chunk and proceed. In this case we know it's not the root.
-    if (input_len > 0) {
-      output_t output = chunk_state_output(&self->chunk);
-      uint8_t chunk_cv[32];
-      output_chaining_value(&output, chunk_cv);
-      hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter);
-      chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1);
-    } else {
-      return;
+
+    const uint8_t * input_bytes = (const uint8_t *)input;
+
+    // If we have some partial chunk bytes in the internal chunk_state, we need
+    // to finish that chunk first.
+    if (chunk_state_len(&self->chunk) > 0) {
+        size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&self->chunk);
+        if (take > input_len) {
+            take = input_len;
+        }
+        chunk_state_update(&self->chunk, input_bytes, take);
+        input_bytes += take;
+        input_len   -= take;
+        // If we've filled the current chunk and there's more coming, finalize this
+        // chunk and proceed. In this case we know it's not the root.
+        if (input_len > 0) {
+            output_t output = chunk_state_output(&self->chunk);
+            uint8_t  chunk_cv[32];
+            output_chaining_value(&output, chunk_cv);
+            hasher_push_cv(self, chunk_cv, self->chunk.chunk_counter);
+            chunk_state_reset(&self->chunk, self->key, self->chunk.chunk_counter + 1);
+        } else {
+            return;
+        }
     }
-  }
-
-  // Now the chunk_state is clear, and we have more input. If there's more than
-  // a single chunk (so, definitely not the root chunk), hash the largest whole
-  // subtree we can, with the full benefits of SIMD (and maybe in the future,
-  // multi-threading) parallelism. Two restrictions:
-  // - The subtree has to be a power-of-2 number of chunks. Only subtrees along
-  //   the right edge can be incomplete, and we don't know where the right edge
-  //   is going to be until we get to finalize().
-  // - The subtree must evenly divide the total number of chunks up until this
-  //   point (if total is not 0). If the current incomplete subtree is only
-  //   waiting for 1 more chunk, we can't hash a subtree of 4 chunks. We have
-  //   to complete the current subtree first.
-  // Because we might need to break up the input to form powers of 2, or to
-  // evenly divide what we already have, this part runs in a loop.
-  while (input_len > BLAKE3_CHUNK_LEN) {
-    size_t subtree_len = round_down_to_power_of_2(input_len);
-    uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
-    // Shrink the subtree_len until it evenly divides the count so far. We know
-    // that subtree_len itself is a power of 2, so we can use a bitmasking
-    // trick instead of an actual remainder operation. (Note that if the caller
-    // consistently passes power-of-2 inputs of the same size, as is hopefully
-    // typical, this loop condition will always fail, and subtree_len will
-    // always be the full length of the input.)
-    //
-    // An aside: We don't have to shrink subtree_len quite this much. For
-    // example, if count_so_far is 1, we could pass 2 chunks to
-    // compress_subtree_to_parent_node. Since we'll get 2 CVs back, we'll still
-    // get the right answer in the end, and we might get to use 2-way SIMD
-    // parallelism. The problem with this optimization, is that it gets us
-    // stuck always hashing 2 chunks. The total number of chunks will remain
-    // odd, and we'll never graduate to higher degrees of parallelism. See
-    // https://github.com/BLAKE3-team/BLAKE3/issues/69.
-    while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
-      subtree_len /= 2;
+
+    // Now the chunk_state is clear, and we have more input. If there's more than
+    // a single chunk (so, definitely not the root chunk), hash the largest whole
+    // subtree we can, with the full benefits of SIMD (and maybe in the future,
+    // multi-threading) parallelism. Two restrictions:
+    // - The subtree has to be a power-of-2 number of chunks. Only subtrees along
+    //   the right edge can be incomplete, and we don't know where the right edge
+    //   is going to be until we get to finalize().
+    // - The subtree must evenly divide the total number of chunks up until this
+    //   point (if total is not 0). If the current incomplete subtree is only
+    //   waiting for 1 more chunk, we can't hash a subtree of 4 chunks. We have
+    //   to complete the current subtree first.
+    // Because we might need to break up the input to form powers of 2, or to
+    // evenly divide what we already have, this part runs in a loop.
+    while (input_len > BLAKE3_CHUNK_LEN) {
+        size_t   subtree_len  = round_down_to_power_of_2(input_len);
+        uint64_t count_so_far = self->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
+        // Shrink the subtree_len until it evenly divides the count so far. We know
+        // that subtree_len itself is a power of 2, so we can use a bitmasking
+        // trick instead of an actual remainder operation. (Note that if the caller
+        // consistently passes power-of-2 inputs of the same size, as is hopefully
+        // typical, this loop condition will always fail, and subtree_len will
+        // always be the full length of the input.)
+        //
+        // An aside: We don't have to shrink subtree_len quite this much. For
+        // example, if count_so_far is 1, we could pass 2 chunks to
+        // compress_subtree_to_parent_node. Since we'll get 2 CVs back, we'll still
+        // get the right answer in the end, and we might get to use 2-way SIMD
+        // parallelism. The problem with this optimization, is that it gets us
+        // stuck always hashing 2 chunks. The total number of chunks will remain
+        // odd, and we'll never graduate to higher degrees of parallelism. See
+        // https://github.com/BLAKE3-team/BLAKE3/issues/69.
+        while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
+            subtree_len /= 2;
+        }
+        // The shrunken subtree_len might now be 1 chunk long. If so, hash that one
+        // chunk by itself. Otherwise, compress the subtree into a pair of CVs.
+        uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
+        if (subtree_len <= BLAKE3_CHUNK_LEN) {
+            blake3_chunk_state chunk_state;
+            chunk_state_init(&chunk_state, self->key, self->chunk.flags);
+            chunk_state.chunk_counter = self->chunk.chunk_counter;
+            chunk_state_update(&chunk_state, input_bytes, subtree_len);
+            output_t output = chunk_state_output(&chunk_state);
+            uint8_t  cv[BLAKE3_OUT_LEN];
+            output_chaining_value(&output, cv);
+            hasher_push_cv(self, cv, chunk_state.chunk_counter);
+        } else {
+            // This is the high-performance happy path, though getting here depends
+            // on the caller giving us a long enough input.
+            uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
+            compress_subtree_to_parent_node(input_bytes, subtree_len, self->key,
+                    self->chunk.chunk_counter, self->chunk.flags, cv_pair);
+            hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);
+            hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN], self->chunk.chunk_counter + (subtree_chunks / 2));
+        }
+        self->chunk.chunk_counter += subtree_chunks;
+        input_bytes += subtree_len;
+        input_len   -= subtree_len;
     }
-    // The shrunken subtree_len might now be 1 chunk long. If so, hash that one
-    // chunk by itself. Otherwise, compress the subtree into a pair of CVs.
-    uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
-    if (subtree_len <= BLAKE3_CHUNK_LEN) {
-      blake3_chunk_state chunk_state;
-      chunk_state_init(&chunk_state, self->key, self->chunk.flags);
-      chunk_state.chunk_counter = self->chunk.chunk_counter;
-      chunk_state_update(&chunk_state, input_bytes, subtree_len);
-      output_t output = chunk_state_output(&chunk_state);
-      uint8_t cv[BLAKE3_OUT_LEN];
-      output_chaining_value(&output, cv);
-      hasher_push_cv(self, cv, chunk_state.chunk_counter);
-    } else {
-      // This is the high-performance happy path, though getting here depends
-      // on the caller giving us a long enough input.
-      uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
-      compress_subtree_to_parent_node(input_bytes, subtree_len, self->key,
-                                      self->chunk.chunk_counter,
-                                      self->chunk.flags, cv_pair);
-      hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);
-      hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN],
-                     self->chunk.chunk_counter + (subtree_chunks / 2));
+
+    // If there's any remaining input less than a full chunk, add it to the chunk
+    // state. In that case, also do a final merge loop to make sure the subtree
+    // stack doesn't contain any unmerged pairs. The remaining input means we
+    // know these merges are non-root. This merge loop isn't strictly necessary
+    // here, because hasher_push_chunk_cv already does its own merge loop, but it
+    // simplifies blake3_hasher_finalize below.
+    if (input_len > 0) {
+        chunk_state_update(&self->chunk, input_bytes, input_len);
+        hasher_merge_cv_stack(self, self->chunk.chunk_counter);
     }
-    self->chunk.chunk_counter += subtree_chunks;
-    input_bytes += subtree_len;
-    input_len -= subtree_len;
-  }
-
-  // If there's any remaining input less than a full chunk, add it to the chunk
-  // state. In that case, also do a final merge loop to make sure the subtree
-  // stack doesn't contain any unmerged pairs. The remaining input means we
-  // know these merges are non-root. This merge loop isn't strictly necessary
-  // here, because hasher_push_chunk_cv already does its own merge loop, but it
-  // simplifies blake3_hasher_finalize below.
-  if (input_len > 0) {
-    chunk_state_update(&self->chunk, input_bytes, input_len);
-    hasher_merge_cv_stack(self, self->chunk.chunk_counter);
-  }
 }
 
-static void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out, size_t out_len) {
-  // Explicitly checking for zero avoids causing UB by passing a null pointer
-  // to memcpy. This comes up in practice with things like:
-  //   std::vector<uint8_t> v;
-  //   blake3_hasher_finalize(&hasher, v.data(), v.size());
-  if (out_len == 0) {
-    return;
-  }
-
-  // If the subtree stack is empty, then the current chunk is the root.
-  if (self->cv_stack_len == 0) {
-    output_t output = chunk_state_output(&self->chunk);
+static void blake3_hasher_finalize( const blake3_hasher * self, uint8_t * out, size_t out_len ) {
+    // Explicitly checking for zero avoids causing UB by passing a null pointer
+    // to memcpy. This comes up in practice with things like:
+    //   std::vector<uint8_t> v;
+    //   blake3_hasher_finalize(&hasher, v.data(), v.size());
+    if (out_len == 0) {
+        return;
+    }
+
+    // If the subtree stack is empty, then the current chunk is the root.
+    if (self->cv_stack_len == 0) {
+        output_t output = chunk_state_output(&self->chunk);
+        output_root_bytes(&output, out, out_len);
+        return;
+    }
+
+    // If there are any bytes in the chunk state, finalize that chunk
+    // and do a roll-up merge between that chunk hash and every subtree
+    // in the stack. In this case, the extra merge loop at the end of
+    // blake3_hasher_update guarantees that none of the subtrees in the
+    // stack need to be merged with each other first. Otherwise, if
+    // there are no bytes in the chunk state, then the top of the stack
+    // is a chunk hash, and we start the merge from that.
+    output_t output;
+    size_t   cvs_remaining;
+    if (chunk_state_len(&self->chunk) > 0) {
+        cvs_remaining = self->cv_stack_len;
+        output        = chunk_state_output(&self->chunk);
+    } else {
+        // There are always at least 2 CVs in the stack in this case.
+        cvs_remaining = self->cv_stack_len - 2;
+        output        = parent_output(&self->cv_stack[cvs_remaining * 32], self->key, self->chunk.flags);
+    }
+    while (cvs_remaining > 0) {
+        cvs_remaining -= 1;
+        uint8_t parent_block[BLAKE3_BLOCK_LEN];
+        memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);
+        output_chaining_value(&output, &parent_block[32]);
+        output = parent_output(parent_block, self->key, self->chunk.flags);
+    }
     output_root_bytes(&output, out, out_len);
-    return;
-  }
-
-  // If there are any bytes in the chunk state, finalize that chunk
-  // and do a roll-up merge between that chunk hash and every subtree
-  // in the stack. In this case, the extra merge loop at the end of
-  // blake3_hasher_update guarantees that none of the subtrees in the
-  // stack need to be merged with each other first. Otherwise, if
-  // there are no bytes in the chunk state, then the top of the stack
-  // is a chunk hash, and we start the merge from that.
-  output_t output;
-  size_t cvs_remaining;
-  if (chunk_state_len(&self->chunk) > 0) {
-    cvs_remaining = self->cv_stack_len;
-    output = chunk_state_output(&self->chunk);
-  } else {
-    // There are always at least 2 CVs in the stack in this case.
-    cvs_remaining = self->cv_stack_len - 2;
-    output = parent_output(&self->cv_stack[cvs_remaining * 32], self->key,
-                           self->chunk.flags);
-  }
-  while (cvs_remaining > 0) {
-    cvs_remaining -= 1;
-    uint8_t parent_block[BLAKE3_BLOCK_LEN];
-    memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);
-    output_chaining_value(&output, &parent_block[32]);
-    output = parent_output(parent_block, self->key, self->chunk.flags);
-  }
-  output_root_bytes(&output, out, out_len);
 }
 
-template < uint32_t outbits, bool bswap >
-static void BLAKE3(const void * in, const size_t len, const seed_t seed, void * out) {
-  blake3_hasher hasher;
+template <uint32_t outbits, bool bswap>
+static void BLAKE3( const void * in, const size_t len, const seed_t seed, void * out ) {
+    blake3_hasher hasher;
 
-  blake3_hasher_init(&hasher);
-  blake3_seed(&hasher, seed);
-  blake3_hasher_update(&hasher, in, len);
-  blake3_hasher_finalize(&hasher, (uint8_t *)out, (outbits >= 256) ? 32 : (outbits+7)/8);
+    blake3_hasher_init(&hasher);
+    blake3_seed(&hasher, seed);
+    blake3_hasher_update(&hasher, in, len);
+    blake3_hasher_finalize(&hasher, (uint8_t *)out, (outbits >= 256) ? 32 : (outbits + 7) / 8);
 }
 
 REGISTER_FAMILY(blake3,
-  $.src_url = "https://github.com/BLAKE3-team/BLAKE3",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/BLAKE3-team/BLAKE3",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 // The NO_SEED flag is not actually true, but need to replace
 // homegrown with real seeding.
 REGISTER_HASH(blake3,
-  $.desc = "BLAKE 3, 256-bit digest",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_NO_SEED              |
-        FLAG_HASH_ENDIAN_INDEPENDENT   ,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_VERY_SLOW            |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          ,
-  $.bits = 256,
-  $.verification_LE = 0x50E4CD91,
-  $.verification_BE = 0x50E4CD91,
-  $.hashfn_native = BLAKE3<256,false>,
-  $.hashfn_bswap = BLAKE3<256,true>
-);
+   $.desc       = "BLAKE 3, 256-bit digest",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_NO_SEED              |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_VERY_SLOW            |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL,
+   $.bits = 256,
+   $.verification_LE = 0x50E4CD91,
+   $.verification_BE = 0x50E4CD91,
+   $.hashfn_native   = BLAKE3<256, false>,
+   $.hashfn_bswap    = BLAKE3<256, true>
+ );
diff --git a/hashes/blake3/compress-portable.h b/hashes/blake3/compress-portable.h
index 351afe5e..2f620f22 100644
--- a/hashes/blake3/compress-portable.h
+++ b/hashes/blake3/compress-portable.h
@@ -1,162 +1,155 @@
 #define SIMD_DEGREE_OR_2  2
 #define SIMD_DEGREE       1
 
-static FORCE_INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
-              uint32_t x, uint32_t y) {
-  state[a] = state[a] + state[b] + x;
-  state[d] = ROTR32(state[d] ^ state[a], 16);
-  state[c] = state[c] + state[d];
-  state[b] = ROTR32(state[b] ^ state[c], 12);
-  state[a] = state[a] + state[b] + y;
-  state[d] = ROTR32(state[d] ^ state[a], 8);
-  state[c] = state[c] + state[d];
-  state[b] = ROTR32(state[b] ^ state[c], 7);
+static FORCE_INLINE void g( uint32_t * state, size_t a, size_t b, size_t c, size_t d, uint32_t x, uint32_t y ) {
+    state[a] = state[a] + state[b] + x;
+    state[d] = ROTR32(state[d] ^ state[a], 16);
+    state[c] = state[c] + state[d];
+    state[b] = ROTR32(state[b] ^ state[c], 12);
+    state[a] = state[a] + state[b] + y;
+    state[d] = ROTR32(state[d] ^ state[a],  8);
+    state[c] = state[c] + state[d];
+    state[b] = ROTR32(state[b] ^ state[c],  7);
 }
 
-static FORCE_INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
-  // Select the message schedule based on the round.
-  const uint8_t *schedule = MSG_SCHEDULE[round];
-
-  // Mix the columns.
-  g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
-  g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
-  g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
-  g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
-
-  // Mix the rows.
-  g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
-  g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
-  g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
-  g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
+static FORCE_INLINE void round_fn( uint32_t state[16], const uint32_t * msg, size_t round ) {
+    // Select the message schedule based on the round.
+    const uint8_t * schedule = MSG_SCHEDULE[round];
+
+    // Mix the columns.
+    g(state, 0, 4,  8, 12, msg[schedule[0]] , msg[schedule[1]] );
+    g(state, 1, 5,  9, 13, msg[schedule[2]] , msg[schedule[3]] );
+    g(state, 2, 6, 10, 14, msg[schedule[4]] , msg[schedule[5]] );
+    g(state, 3, 7, 11, 15, msg[schedule[6]] , msg[schedule[7]] );
+
+    // Mix the rows.
+    g(state, 0, 5, 10, 15, msg[schedule[8]] , msg[schedule[9]] );
+    g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
+    g(state, 2, 7,  8, 13, msg[schedule[12]], msg[schedule[13]]);
+    g(state, 3, 4,  9, 14, msg[schedule[14]], msg[schedule[15]]);
 }
 
-static FORCE_INLINE uint32_t load32(const void *src) {
-  const uint8_t *p = (const uint8_t *)src;
-  return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
-         ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
+static FORCE_INLINE uint32_t load32( const void * src ) {
+    const uint8_t * p = (const uint8_t *)src;
+
+    return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
+           ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
 }
 
-static FORCE_INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
-                         const uint8_t block[BLAKE3_BLOCK_LEN],
-                         uint8_t block_len, uint64_t counter, uint8_t flags) {
-  uint32_t block_words[16];
-  block_words[0] = load32(block + 4 * 0);
-  block_words[1] = load32(block + 4 * 1);
-  block_words[2] = load32(block + 4 * 2);
-  block_words[3] = load32(block + 4 * 3);
-  block_words[4] = load32(block + 4 * 4);
-  block_words[5] = load32(block + 4 * 5);
-  block_words[6] = load32(block + 4 * 6);
-  block_words[7] = load32(block + 4 * 7);
-  block_words[8] = load32(block + 4 * 8);
-  block_words[9] = load32(block + 4 * 9);
-  block_words[10] = load32(block + 4 * 10);
-  block_words[11] = load32(block + 4 * 11);
-  block_words[12] = load32(block + 4 * 12);
-  block_words[13] = load32(block + 4 * 13);
-  block_words[14] = load32(block + 4 * 14);
-  block_words[15] = load32(block + 4 * 15);
-
-  state[0] = cv[0];
-  state[1] = cv[1];
-  state[2] = cv[2];
-  state[3] = cv[3];
-  state[4] = cv[4];
-  state[5] = cv[5];
-  state[6] = cv[6];
-  state[7] = cv[7];
-  state[8] = IV[0];
-  state[9] = IV[1];
-  state[10] = IV[2];
-  state[11] = IV[3];
-  state[12] = counter_low(counter);
-  state[13] = counter_high(counter);
-  state[14] = (uint32_t)block_len;
-  state[15] = (uint32_t)flags;
-
-  round_fn(state, &block_words[0], 0);
-  round_fn(state, &block_words[0], 1);
-  round_fn(state, &block_words[0], 2);
-  round_fn(state, &block_words[0], 3);
-  round_fn(state, &block_words[0], 4);
-  round_fn(state, &block_words[0], 5);
-  round_fn(state, &block_words[0], 6);
+static FORCE_INLINE void compress_pre( uint32_t state[16], const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags ) {
+    uint32_t block_words[16];
+
+    block_words[ 0] = load32(block + 4 *  0);
+    block_words[ 1] = load32(block + 4 *  1);
+    block_words[ 2] = load32(block + 4 *  2);
+    block_words[ 3] = load32(block + 4 *  3);
+    block_words[ 4] = load32(block + 4 *  4);
+    block_words[ 5] = load32(block + 4 *  5);
+    block_words[ 6] = load32(block + 4 *  6);
+    block_words[ 7] = load32(block + 4 *  7);
+    block_words[ 8] = load32(block + 4 *  8);
+    block_words[ 9] = load32(block + 4 *  9);
+    block_words[10] = load32(block + 4 * 10);
+    block_words[11] = load32(block + 4 * 11);
+    block_words[12] = load32(block + 4 * 12);
+    block_words[13] = load32(block + 4 * 13);
+    block_words[14] = load32(block + 4 * 14);
+    block_words[15] = load32(block + 4 * 15);
+
+    state[ 0]       = cv[0       ];
+    state[ 1]       = cv[1       ];
+    state[ 2]       = cv[2       ];
+    state[ 3]       = cv[3       ];
+    state[ 4]       = cv[4       ];
+    state[ 5]       = cv[5       ];
+    state[ 6]       = cv[6       ];
+    state[ 7]       = cv[7       ];
+    state[ 8]       = IV[0       ];
+    state[ 9]       = IV[1       ];
+    state[10]       = IV[2       ];
+    state[11]       = IV[3       ];
+    state[12]       = counter_low(counter);
+    state[13]       = counter_high(counter);
+    state[14]       =   (uint32_t)block_len;
+    state[15]       =   (uint32_t)flags;
+
+    round_fn(state, &block_words[0], 0);
+    round_fn(state, &block_words[0], 1);
+    round_fn(state, &block_words[0], 2);
+    round_fn(state, &block_words[0], 3);
+    round_fn(state, &block_words[0], 4);
+    round_fn(state, &block_words[0], 5);
+    round_fn(state, &block_words[0], 6);
 }
 
-static void blake3_compress_in_place(uint32_t cv[8],
-			      const uint8_t block[BLAKE3_BLOCK_LEN],
-			      uint8_t block_len, uint64_t counter,
-			      uint8_t flags) {
-  uint32_t state[16];
-  compress_pre(state, cv, block, block_len, counter, flags);
-  cv[0] = state[0] ^ state[8];
-  cv[1] = state[1] ^ state[9];
-  cv[2] = state[2] ^ state[10];
-  cv[3] = state[3] ^ state[11];
-  cv[4] = state[4] ^ state[12];
-  cv[5] = state[5] ^ state[13];
-  cv[6] = state[6] ^ state[14];
-  cv[7] = state[7] ^ state[15];
+static void blake3_compress_in_place( uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags ) {
+    uint32_t state[16];
+
+    compress_pre(state, cv, block, block_len, counter, flags);
+    cv[0] = state[0] ^ state[ 8];
+    cv[1] = state[1] ^ state[ 9];
+    cv[2] = state[2] ^ state[10];
+    cv[3] = state[3] ^ state[11];
+    cv[4] = state[4] ^ state[12];
+    cv[5] = state[5] ^ state[13];
+    cv[6] = state[6] ^ state[14];
+    cv[7] = state[7] ^ state[15];
 }
 
-static void blake3_compress_xof(const uint32_t cv[8],
-			 const uint8_t block[BLAKE3_BLOCK_LEN],
-			 uint8_t block_len, uint64_t counter,
-			 uint8_t flags, uint8_t out[64]) {
-  uint32_t state[16];
-  compress_pre(state, cv, block, block_len, counter, flags);
-
-  store32(&out[0 * 4], state[0] ^ state[8]);
-  store32(&out[1 * 4], state[1] ^ state[9]);
-  store32(&out[2 * 4], state[2] ^ state[10]);
-  store32(&out[3 * 4], state[3] ^ state[11]);
-  store32(&out[4 * 4], state[4] ^ state[12]);
-  store32(&out[5 * 4], state[5] ^ state[13]);
-  store32(&out[6 * 4], state[6] ^ state[14]);
-  store32(&out[7 * 4], state[7] ^ state[15]);
-  store32(&out[8 * 4], state[8] ^ cv[0]);
-  store32(&out[9 * 4], state[9] ^ cv[1]);
-  store32(&out[10 * 4], state[10] ^ cv[2]);
-  store32(&out[11 * 4], state[11] ^ cv[3]);
-  store32(&out[12 * 4], state[12] ^ cv[4]);
-  store32(&out[13 * 4], state[13] ^ cv[5]);
-  store32(&out[14 * 4], state[14] ^ cv[6]);
-  store32(&out[15 * 4], state[15] ^ cv[7]);
+static void blake3_compress_xof( const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64] ) {
+    uint32_t state[16];
+
+    compress_pre(state, cv, block, block_len, counter, flags);
+
+    store32(&out[0 * 4] , state[0] ^ state[8] );
+    store32(&out[1 * 4] , state[1] ^ state[9] );
+    store32(&out[2 * 4] , state[2] ^ state[10]);
+    store32(&out[3 * 4] , state[3] ^ state[11]);
+    store32(&out[4 * 4] , state[4] ^ state[12]);
+    store32(&out[5 * 4] , state[5] ^ state[13]);
+    store32(&out[6 * 4] , state[6] ^ state[14]);
+    store32(&out[7 * 4] , state[7] ^ state[15]);
+    store32(&out[8 * 4] , state[8] ^ cv[0]    );
+    store32(&out[9 * 4] , state[9] ^ cv[1]    );
+    store32(&out[10 * 4], state[10] ^ cv[2]   );
+    store32(&out[11 * 4], state[11] ^ cv[3]   );
+    store32(&out[12 * 4], state[12] ^ cv[4]   );
+    store32(&out[13 * 4], state[13] ^ cv[5]   );
+    store32(&out[14 * 4], state[14] ^ cv[6]   );
+    store32(&out[15 * 4], state[15] ^ cv[7]   );
 }
 
-static FORCE_INLINE void hash_one(const uint8_t *input, size_t blocks,
-		     const uint32_t key[8], uint64_t counter,
-		     uint8_t flags, uint8_t flags_start,
-		     uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
-  uint32_t cv[8];
-  memcpy(cv, key, BLAKE3_KEY_LEN);
-  uint8_t block_flags = flags | flags_start;
-  while (blocks > 0) {
-    if (blocks == 1) {
-      block_flags |= flags_end;
+static FORCE_INLINE void hash_one( const uint8_t * input, size_t blocks, const uint32_t key[8], uint64_t counter,
+        uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN] ) {
+    uint32_t cv[8];
+
+    memcpy(cv, key, BLAKE3_KEY_LEN);
+    uint8_t block_flags = flags | flags_start;
+    while (blocks > 0) {
+        if (blocks == 1) {
+            block_flags |= flags_end;
+        }
+        blake3_compress_in_place(cv, input, BLAKE3_BLOCK_LEN, counter, block_flags);
+        input       = &input[BLAKE3_BLOCK_LEN];
+        blocks     -= 1;
+        block_flags = flags;
     }
-    blake3_compress_in_place(cv, input, BLAKE3_BLOCK_LEN, counter,
-                                      block_flags);
-    input = &input[BLAKE3_BLOCK_LEN];
-    blocks -= 1;
-    block_flags = flags;
-  }
-  store_cv_words(out, cv);
+    store_cv_words(out, cv);
 }
 
-static void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
-                               size_t blocks, const uint32_t key[8],
-                               uint64_t counter, bool increment_counter,
-                               uint8_t flags, uint8_t flags_start,
-                               uint8_t flags_end, uint8_t *out) {
-  while (num_inputs > 0) {
-    hash_one(inputs[0], blocks, key, counter, flags, flags_start,
-                      flags_end, out);
-    if (increment_counter) {
-      counter += 1;
+static void blake3_hash_many( const uint8_t * const * inputs, size_t num_inputs, size_t blocks, const uint32_t key[8],
+        uint64_t counter, bool increment_counter, uint8_t flags,
+        uint8_t flags_start, uint8_t flags_end, uint8_t * out ) {
+    while (num_inputs > 0) {
+        hash_one(inputs[0], blocks, key, counter, flags, flags_start, flags_end, out);
+        if (increment_counter) {
+            counter += 1;
+        }
+        inputs     += 1;
+        num_inputs -= 1;
+        out         = &out[BLAKE3_OUT_LEN];
     }
-    inputs += 1;
-    num_inputs -= 1;
-    out = &out[BLAKE3_OUT_LEN];
-  }
 }
diff --git a/hashes/blake3/compress-sse2.h b/hashes/blake3/compress-sse2.h
index b24cf79d..f2525c49 100644
--- a/hashes/blake3/compress-sse2.h
+++ b/hashes/blake3/compress-sse2.h
@@ -3,562 +3,548 @@
 
 #define DEGREE 4
 
-#define _mm_shuffle_ps2(a, b, c)                                               \
-  (_mm_castps_si128(                                                           \
+#define _mm_shuffle_ps2(a, b, c) \
+  (_mm_castps_si128(             \
       _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
 
-static FORCE_INLINE __m128i loadu(const uint8_t src[16]) {
-  return _mm_loadu_si128((const __m128i *)src);
+static FORCE_INLINE __m128i loadu( const uint8_t src[16] ) {
+    return _mm_loadu_si128((const __m128i *)src);
 }
 
-static FORCE_INLINE void storeu(__m128i src, uint8_t dest[16]) {
-  _mm_storeu_si128((__m128i *)dest, src);
+static FORCE_INLINE void storeu( __m128i src, uint8_t dest[16] ) {
+    _mm_storeu_si128((__m128i *)dest, src);
 }
 
-static FORCE_INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
+static FORCE_INLINE __m128i addv( __m128i a, __m128i b ) { return _mm_add_epi32(a, b); }
 
 // Note that clang-format doesn't like the name "xor" for some reason.
-static FORCE_INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
+static FORCE_INLINE __m128i xorv( __m128i a, __m128i b ) { return _mm_xor_si128(a, b); }
 
-static FORCE_INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
+static FORCE_INLINE __m128i set1( uint32_t x ) { return _mm_set1_epi32((int32_t)x); }
 
-static FORCE_INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
-  return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
+static FORCE_INLINE __m128i set4( uint32_t a, uint32_t b, uint32_t c, uint32_t d ) {
+    return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
 }
 
-static FORCE_INLINE __m128i rot16(__m128i x) {
-  return _mm_shufflehi_epi16(_mm_shufflelo_epi16(x, 0xB1), 0xB1);
+static FORCE_INLINE __m128i rot16( __m128i x ) {
+    return _mm_shufflehi_epi16(_mm_shufflelo_epi16(x, 0xB1), 0xB1);
 }
 
-static FORCE_INLINE __m128i rot12(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
+static FORCE_INLINE __m128i rot12( __m128i x ) {
+    return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
 }
 
-static FORCE_INLINE __m128i rot8(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 8), _mm_slli_epi32(x, 32 - 8));
+static FORCE_INLINE __m128i rot8( __m128i x ) {
+    return xorv(_mm_srli_epi32(x, 8), _mm_slli_epi32(x, 32 - 8));
 }
 
-static FORCE_INLINE __m128i rot7(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
+static FORCE_INLINE __m128i rot7( __m128i x ) {
+    return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
 }
 
-static FORCE_INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = addv(addv(*row0, m), *row1);
-  *row3 = xorv(*row3, *row0);
-  *row3 = rot16(*row3);
-  *row2 = addv(*row2, *row3);
-  *row1 = xorv(*row1, *row2);
-  *row1 = rot12(*row1);
+static FORCE_INLINE void g1( __m128i * row0, __m128i * row1, __m128i * row2, __m128i * row3, __m128i m ) {
+    *row0 = addv(addv(*row0, m), *row1);
+    *row3 = xorv(*row3, *row0);
+    *row3 = rot16(*row3);
+    *row2 = addv(*row2, *row3);
+    *row1 = xorv(*row1, *row2);
+    *row1 = rot12(*row1);
 }
 
-static FORCE_INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = addv(addv(*row0, m), *row1);
-  *row3 = xorv(*row3, *row0);
-  *row3 = rot8(*row3);
-  *row2 = addv(*row2, *row3);
-  *row1 = xorv(*row1, *row2);
-  *row1 = rot7(*row1);
+static FORCE_INLINE void g2( __m128i * row0, __m128i * row1, __m128i * row2, __m128i * row3, __m128i m ) {
+    *row0 = addv(addv(*row0, m), *row1);
+    *row3 = xorv(*row3, *row0);
+    *row3 = rot8(*row3);
+    *row2 = addv(*row2, *row3);
+    *row1 = xorv(*row1, *row2);
+    *row1 = rot7(*row1);
 }
 
 // Note the optimization here of leaving row1 as the unrotated row, rather than
 // row0. All the message loads below are adjusted to compensate for this. See
 // discussion at https://github.com/sneves/blake2-avx2/pull/4
-static FORCE_INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
+static FORCE_INLINE void diagonalize( __m128i * row0, __m128i * row2, __m128i * row3 ) {
+    *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
+    *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+    *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
 }
 
-static FORCE_INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
+static FORCE_INLINE void undiagonalize( __m128i * row0, __m128i * row2, __m128i * row3 ) {
+    *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
+    *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+    *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
 }
 
-static FORCE_INLINE __m128i blend_epi16(__m128i a, __m128i b, const int16_t imm8) {
-  const __m128i bits = _mm_set_epi16(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
-  __m128i mask = _mm_set1_epi16(imm8);
-  mask = _mm_and_si128(mask, bits);
-  mask = _mm_cmpeq_epi16(mask, bits);
-  return _mm_or_si128(_mm_and_si128(mask, b), _mm_andnot_si128(mask, a));
+static FORCE_INLINE __m128i blend_epi16( __m128i a, __m128i b, const int16_t imm8 ) {
+    const __m128i bits = _mm_set_epi16(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
+    __m128i       mask = _mm_set1_epi16(imm8);
+
+    mask = _mm_and_si128(mask, bits);
+    mask = _mm_cmpeq_epi16(mask, bits);
+    return _mm_or_si128(_mm_and_si128(mask, b), _mm_andnot_si128(mask, a));
 }
 
-static FORCE_INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
-                         const uint8_t block[BLAKE3_BLOCK_LEN],
-                         uint8_t block_len, uint64_t counter, uint8_t flags) {
-  rows[0] = loadu((uint8_t *)&cv[0]);
-  rows[1] = loadu((uint8_t *)&cv[4]);
-  rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
-  rows[3] = set4(counter_low(counter), counter_high(counter),
-                 (uint32_t)block_len, (uint32_t)flags);
-
-  __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
-  __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
-  __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
-  __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
-
-  __m128i t0, t1, t2, t3, tt;
-
-  // Round 1. The first round permutes the message words from the original
-  // input order, into the groups that get mixed in parallel.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); //  6  4  2  0
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); //  7  5  3  1
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10  8
-  t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3));   // 12 10  8 14
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11  9
-  t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3));   // 13 11  9 15
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 2. This round and all following rounds apply a fixed permutation
-  // to the message words from the round before.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 3
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 4
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 5
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 6
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 7
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
+static FORCE_INLINE void compress_pre( __m128i rows[4], const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags ) {
+    rows[0] = loadu((uint8_t *)&cv[0]);
+    rows[1] = loadu((uint8_t *)&cv[4]);
+    rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
+    rows[3] = set4(counter_low(counter), counter_high(counter), (uint32_t)block_len, (uint32_t)flags);
+
+    __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
+    __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
+    __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
+    __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
+
+    __m128i t0, t1, t2, t3, tt;
+
+    // Round 1. The first round permutes the message words from the original
+    // input order, into the groups that get mixed in parallel.
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); //  6  4  2  0
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); //  7  5  3  1
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10  8
+    t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(  2, 1, 0, 3)); // 12 10  8 14
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11  9
+    t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(  2, 1, 0, 3)); // 13 11  9 15
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 2. This round and all following rounds apply a fixed permutation
+    // to the message words from the round before.
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 3
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 4
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 5
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 6
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 7
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
 }
 
-static void blake3_compress_in_place(uint32_t cv[8],
-                                   const uint8_t block[BLAKE3_BLOCK_LEN],
-                                   uint8_t block_len, uint64_t counter,
-                                   uint8_t flags) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
-  storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
+static void blake3_compress_in_place( uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags ) {
+    __m128i rows[4];
+
+    compress_pre(rows, cv, block, block_len, counter, flags);
+    storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
+    storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
 }
 
-static void blake3_compress_xof(const uint32_t cv[8],
-                              const uint8_t block[BLAKE3_BLOCK_LEN],
-                              uint8_t block_len, uint64_t counter,
-                              uint8_t flags, uint8_t out[64]) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu(xorv(rows[0], rows[2]), &out[0]);
-  storeu(xorv(rows[1], rows[3]), &out[16]);
-  storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
-  storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
+static void blake3_compress_xof( const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64] ) {
+    __m128i rows[4];
+
+    compress_pre(rows, cv, block, block_len, counter, flags);
+    storeu(xorv(rows[0], rows[2]), &out[0] );
+    storeu(xorv(rows[1], rows[3]), &out[16]);
+    storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
+    storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
 }
 
-static FORCE_INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) {
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[15] = rot16(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot12(v[4]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[15] = rot8(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot7(v[4]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot16(v[15]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[4] = rot12(v[4]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot8(v[15]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-  v[4] = rot7(v[4]);
+static FORCE_INLINE void round_fn( __m128i v[16], __m128i m[16], size_t r ) {
+    v[ 0] = addv(v[ 0], m[(size_t)MSG_SCHEDULE[r][0]]);
+    v[ 1] = addv(v[ 1], m[(size_t)MSG_SCHEDULE[r][2]]);
+    v[ 2] = addv(v[ 2], m[(size_t)MSG_SCHEDULE[r][4]]);
+    v[ 3] = addv(v[ 3], m[(size_t)MSG_SCHEDULE[r][6]]);
+    v[ 0] = addv(v[ 0], v[4]);
+    v[ 1] = addv(v[ 1], v[5]);
+    v[ 2] = addv(v[ 2], v[6]);
+    v[ 3] = addv(v[ 3], v[7]);
+    v[12] = xorv(v[12], v[0]);
+    v[13] = xorv(v[13], v[1]);
+    v[14] = xorv(v[14], v[2]);
+    v[15] = xorv(v[15], v[3]);
+    v[12] = rot16(v[12]);
+    v[13] = rot16(v[13]);
+    v[14] = rot16(v[14]);
+    v[15] = rot16(v[15]);
+    v[ 8] = addv(v [ 8], v[12]);
+    v[ 9] = addv(v [ 9], v[13]);
+    v[10] = addv(v [10], v[14]);
+    v[11] = addv(v [11], v[15]);
+    v[ 4] = xorv(v [ 4], v[ 8]);
+    v[ 5] = xorv(v [ 5], v[ 9]);
+    v[ 6] = xorv(v [ 6], v[10]);
+    v[ 7] = xorv(v [ 7], v[11]);
+    v[ 4] = rot12(v[ 4]);
+    v[ 5] = rot12(v[ 5]);
+    v[ 6] = rot12(v[ 6]);
+    v[ 7] = rot12(v[ 7]);
+    v[ 0] = addv(v[ 0], m[(size_t)MSG_SCHEDULE[r][1]]);
+    v[ 1] = addv(v[ 1], m[(size_t)MSG_SCHEDULE[r][3]]);
+    v[ 2] = addv(v[ 2], m[(size_t)MSG_SCHEDULE[r][5]]);
+    v[ 3] = addv(v[ 3], m[(size_t)MSG_SCHEDULE[r][7]]);
+    v[ 0] = addv(v[ 0], v[4]);
+    v[ 1] = addv(v[ 1], v[5]);
+    v[ 2] = addv(v[ 2], v[6]);
+    v[ 3] = addv(v[ 3], v[7]);
+    v[12] = xorv(v[12], v[0]);
+    v[13] = xorv(v[13], v[1]);
+    v[14] = xorv(v[14], v[2]);
+    v[15] = xorv(v[15], v[3]);
+    v[12] = rot8(v[12]);
+    v[13] = rot8(v[13]);
+    v[14] = rot8(v[14]);
+    v[15] = rot8(v[15]);
+    v[ 8] = addv(v[ 8], v[12]);
+    v[ 9] = addv(v[ 9], v[13]);
+    v[10] = addv(v[10], v[14]);
+    v[11] = addv(v[11], v[15]);
+    v[ 4] = xorv(v[ 4], v[ 8]);
+    v[ 5] = xorv(v[ 5], v[ 9]);
+    v[ 6] = xorv(v[ 6], v[10]);
+    v[ 7] = xorv(v[ 7], v[11]);
+    v[ 4] = rot7(v[ 4]);
+    v[ 5] = rot7(v[ 5]);
+    v[ 6] = rot7(v[ 6]);
+    v[ 7] = rot7(v[ 7]);
+
+    v[ 0] = addv(v[ 0], m[(size_t)MSG_SCHEDULE[r][ 8]]);
+    v[ 1] = addv(v[ 1], m[(size_t)MSG_SCHEDULE[r][10]]);
+    v[ 2] = addv(v[ 2], m[(size_t)MSG_SCHEDULE[r][12]]);
+    v[ 3] = addv(v[ 3], m[(size_t)MSG_SCHEDULE[r][14]]);
+    v[ 0] = addv(v[ 0], v[5]);
+    v[ 1] = addv(v[ 1], v[6]);
+    v[ 2] = addv(v[ 2], v[7]);
+    v[ 3] = addv(v[ 3], v[4]);
+    v[15] = xorv(v[15], v[0]);
+    v[12] = xorv(v[12], v[1]);
+    v[13] = xorv(v[13], v[2]);
+    v[14] = xorv(v[14], v[3]);
+    v[15] = rot16(v[15]);
+    v[12] = rot16(v[12]);
+    v[13] = rot16(v[13]);
+    v[14] = rot16(v[14]);
+    v[10] = addv(v [10], v[15]);
+    v[11] = addv(v [11], v[12]);
+    v[ 8] = addv(v [ 8], v[13]);
+    v[ 9] = addv(v [ 9], v[14]);
+    v[ 5] = xorv(v [ 5], v[10]);
+    v[ 6] = xorv(v [ 6], v[11]);
+    v[ 7] = xorv(v [ 7], v[ 8]);
+    v[ 4] = xorv(v [ 4], v[ 9]);
+    v[ 5] = rot12(v[ 5]);
+    v[ 6] = rot12(v[ 6]);
+    v[ 7] = rot12(v[ 7]);
+    v[ 4] = rot12(v[ 4]);
+    v[ 0] = addv(v[ 0], m[(size_t)MSG_SCHEDULE[r][ 9]]);
+    v[ 1] = addv(v[ 1], m[(size_t)MSG_SCHEDULE[r][11]]);
+    v[ 2] = addv(v[ 2], m[(size_t)MSG_SCHEDULE[r][13]]);
+    v[ 3] = addv(v[ 3], m[(size_t)MSG_SCHEDULE[r][15]]);
+    v[ 0] = addv(v[ 0], v[5]);
+    v[ 1] = addv(v[ 1], v[6]);
+    v[ 2] = addv(v[ 2], v[7]);
+    v[ 3] = addv(v[ 3], v[4]);
+    v[15] = xorv(v[15], v[0]);
+    v[12] = xorv(v[12], v[1]);
+    v[13] = xorv(v[13], v[2]);
+    v[14] = xorv(v[14], v[3]);
+    v[15] = rot8(v[15]);
+    v[12] = rot8(v[12]);
+    v[13] = rot8(v[13]);
+    v[14] = rot8(v[14]);
+    v[10] = addv(v[10], v[15]);
+    v[11] = addv(v[11], v[12]);
+    v[ 8] = addv(v[ 8], v[13]);
+    v[ 9] = addv(v[ 9], v[14]);
+    v[ 5] = xorv(v[ 5], v[10]);
+    v[ 6] = xorv(v[ 6], v[11]);
+    v[ 7] = xorv(v[ 7], v[ 8]);
+    v[ 4] = xorv(v[ 4], v[ 9]);
+    v[ 5] = rot7(v[ 5]);
+    v[ 6] = rot7(v[ 6]);
+    v[ 7] = rot7(v[ 7]);
+    v[ 4] = rot7(v[ 4]);
 }
 
-static FORCE_INLINE void transpose_vecs(__m128i vecs[DEGREE]) {
-  // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
-  // 22/33. Note that this doesn't split the vector into two lanes, as the
-  // AVX2 counterparts do.
-  __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
-  __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
-  __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
-  __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
-
-  // Interleave 64-bit lanes.
-  __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
-  __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
-  __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
-  __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
-
-  vecs[0] = abcd_0;
-  vecs[1] = abcd_1;
-  vecs[2] = abcd_2;
-  vecs[3] = abcd_3;
+static FORCE_INLINE void transpose_vecs( __m128i vecs[DEGREE] ) {
+    // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
+    // 22/33. Note that this doesn't split the vector into two lanes, as the
+    // AVX2 counterparts do.
+    __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+    __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+    __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+    __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+
+    // Interleave 64-bit lanes.
+    __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
+    __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
+    __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
+    __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
+
+    vecs[0] = abcd_0;
+    vecs[1] = abcd_1;
+    vecs[2] = abcd_2;
+    vecs[3] = abcd_3;
 }
 
-static FORCE_INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
-                               size_t block_offset, __m128i out[16]) {
-  out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
-  out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
-  out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
-  out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
-  out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
-  out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
-  out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
-  out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
-  out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
-  out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
-  out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
-  out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
-  out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
-  out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
-  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
-  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
-  for (size_t i = 0; i < 4; ++i) {
-    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
-  }
-  transpose_vecs(&out[0]);
-  transpose_vecs(&out[4]);
-  transpose_vecs(&out[8]);
-  transpose_vecs(&out[12]);
+static FORCE_INLINE void transpose_msg_vecs( const uint8_t * const * inputs, size_t block_offset, __m128i out[16] ) {
+    out[ 0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
+    out[ 1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
+    out[ 2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
+    out[ 3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
+    out[ 4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
+    out[ 5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
+    out[ 6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
+    out[ 7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
+    out[ 8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
+    out[ 9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
+    out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
+    out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
+    out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
+    out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
+    out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
+    out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
+    for (size_t i = 0; i < 4; ++i) {
+        _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+    }
+    transpose_vecs(&out[0] );
+    transpose_vecs(&out[4] );
+    transpose_vecs(&out[8] );
+    transpose_vecs(&out[12]);
 }
 
-static FORCE_INLINE void load_counters(uint64_t counter, bool increment_counter,
-                          __m128i *out_lo, __m128i *out_hi) {
-  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
-  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
-  const __m128i add1 = _mm_and_si128(mask, add0);
-  __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
-  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
-                                  _mm_xor_si128(   l, _mm_set1_epi32(0x80000000)));
-  __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
-  *out_lo = l;
-  *out_hi = h;
+static FORCE_INLINE void load_counters( uint64_t counter, bool increment_counter, __m128i * out_lo, __m128i * out_hi ) {
+    const __m128i mask  = _mm_set1_epi32(-(int32_t)increment_counter);
+    const __m128i add0  = _mm_set_epi32(3, 2, 1, 0);
+    const __m128i add1  = _mm_and_si128(mask, add0);
+    __m128i       l     = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
+    __m128i       carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(
+            0x80000000)), _mm_xor_si128(l, _mm_set1_epi32(0x80000000)));
+    __m128i h = _mm_sub_epi32(_mm_set1_epi32(          (int32_t)(counter >> 32)), carry);
+
+    *out_lo = l;
+    *out_hi = h;
 }
 
-static void blake3_hash4(const uint8_t *const *inputs, size_t blocks,
-                       const uint32_t key[8], uint64_t counter,
-                       bool increment_counter, uint8_t flags,
-                       uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-  __m128i h_vecs[8] = {
-      set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
-      set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
-  };
-  __m128i counter_low_vec, counter_high_vec;
-  load_counters(counter, increment_counter, &counter_low_vec,
-                &counter_high_vec);
-  uint8_t block_flags = flags | flags_start;
-
-  for (size_t block = 0; block < blocks; block++) {
-    if (block + 1 == blocks) {
-      block_flags |= flags_end;
-    }
-    __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN);
-    __m128i block_flags_vec = set1(block_flags);
-    __m128i msg_vecs[16];
-    transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
-
-    __m128i v[16] = {
-        h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
-        h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
-        set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
-        counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+static void blake3_hash4( const uint8_t * const * inputs, size_t blocks, const uint32_t key[8], uint64_t counter,
+        bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t * out ) {
+    __m128i h_vecs[8] = {
+        set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
+        set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
     };
-    round_fn(v, msg_vecs, 0);
-    round_fn(v, msg_vecs, 1);
-    round_fn(v, msg_vecs, 2);
-    round_fn(v, msg_vecs, 3);
-    round_fn(v, msg_vecs, 4);
-    round_fn(v, msg_vecs, 5);
-    round_fn(v, msg_vecs, 6);
-    h_vecs[0] = xorv(v[0], v[8]);
-    h_vecs[1] = xorv(v[1], v[9]);
-    h_vecs[2] = xorv(v[2], v[10]);
-    h_vecs[3] = xorv(v[3], v[11]);
-    h_vecs[4] = xorv(v[4], v[12]);
-    h_vecs[5] = xorv(v[5], v[13]);
-    h_vecs[6] = xorv(v[6], v[14]);
-    h_vecs[7] = xorv(v[7], v[15]);
-
-    block_flags = flags;
-  }
-
-  transpose_vecs(&h_vecs[0]);
-  transpose_vecs(&h_vecs[4]);
-  // The first four vecs now contain the first half of each output, and the
-  // second four vecs contain the second half of each output.
-  storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
-  storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
-  storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
-  storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
-  storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
-  storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
-  storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
-  storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
+    __m128i counter_low_vec, counter_high_vec;
+
+    load_counters(counter, increment_counter, &counter_low_vec, &counter_high_vec);
+    uint8_t block_flags = flags | flags_start;
+
+    for (size_t block = 0; block < blocks; block++) {
+        if (block + 1 == blocks) {
+            block_flags |= flags_end;
+        }
+        __m128i block_len_vec   = set1(BLAKE3_BLOCK_LEN);
+        __m128i block_flags_vec = set1(block_flags     );
+        __m128i msg_vecs[16];
+        transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+        __m128i v[16] = {
+            h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
+            h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
+            set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
+            counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+        };
+        round_fn(v, msg_vecs, 0);
+        round_fn(v, msg_vecs, 1);
+        round_fn(v, msg_vecs, 2);
+        round_fn(v, msg_vecs, 3);
+        round_fn(v, msg_vecs, 4);
+        round_fn(v, msg_vecs, 5);
+        round_fn(v, msg_vecs, 6);
+        h_vecs[0]   = xorv(v[0], v[ 8]);
+        h_vecs[1]   = xorv(v[1], v[ 9]);
+        h_vecs[2]   = xorv(v[2], v[10]);
+        h_vecs[3]   = xorv(v[3], v[11]);
+        h_vecs[4]   = xorv(v[4], v[12]);
+        h_vecs[5]   = xorv(v[5], v[13]);
+        h_vecs[6]   = xorv(v[6], v[14]);
+        h_vecs[7]   = xorv(v[7], v[15]);
+
+        block_flags = flags;
+    }
+
+    transpose_vecs(&h_vecs[0]);
+    transpose_vecs(&h_vecs[4]);
+    // The first four vecs now contain the first half of each output, and the
+    // second four vecs contain the second half of each output.
+    storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
+    storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
+    storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
+    storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
+    storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
+    storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
+    storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
+    storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
 }
 
-static FORCE_INLINE void hash_one(const uint8_t *input, size_t blocks,
-                          const uint32_t key[8], uint64_t counter,
-                          uint8_t flags, uint8_t flags_start,
-                          uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
-  uint32_t cv[8];
-  memcpy(cv, key, BLAKE3_KEY_LEN);
-  uint8_t block_flags = flags | flags_start;
-  while (blocks > 0) {
-    if (blocks == 1) {
-      block_flags |= flags_end;
+static FORCE_INLINE void hash_one( const uint8_t * input, size_t blocks, const uint32_t key[8], uint64_t counter,
+        uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN] ) {
+    uint32_t cv[8];
+
+    memcpy(cv, key, BLAKE3_KEY_LEN);
+    uint8_t block_flags = flags | flags_start;
+    while (blocks > 0) {
+        if (blocks == 1) {
+            block_flags |= flags_end;
+        }
+        blake3_compress_in_place(cv, input, BLAKE3_BLOCK_LEN, counter, block_flags);
+        input       = &input[BLAKE3_BLOCK_LEN];
+        blocks     -= 1;
+        block_flags = flags;
     }
-    blake3_compress_in_place(cv, input, BLAKE3_BLOCK_LEN, counter,
-                                  block_flags);
-    input = &input[BLAKE3_BLOCK_LEN];
-    blocks -= 1;
-    block_flags = flags;
-  }
-  memcpy(out, cv, BLAKE3_OUT_LEN);
+    memcpy(out, cv, BLAKE3_OUT_LEN);
 }
 
-static void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
-                           size_t blocks, const uint32_t key[8],
-                           uint64_t counter, bool increment_counter,
-                           uint8_t flags, uint8_t flags_start,
-                           uint8_t flags_end, uint8_t *out) {
-  while (num_inputs >= DEGREE) {
-    blake3_hash4(inputs, blocks, key, counter, increment_counter, flags,
-                      flags_start, flags_end, out);
-    if (increment_counter) {
-      counter += DEGREE;
+static void blake3_hash_many( const uint8_t * const * inputs, size_t num_inputs, size_t blocks, const uint32_t key[8],
+        uint64_t counter, bool increment_counter, uint8_t flags,
+        uint8_t flags_start, uint8_t flags_end, uint8_t * out ) {
+    while (num_inputs >= DEGREE) {
+        blake3_hash4(inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
+        if (increment_counter) {
+            counter += DEGREE;
+        }
+        inputs     += DEGREE;
+        num_inputs -= DEGREE;
+        out         = &out[DEGREE * BLAKE3_OUT_LEN];
     }
-    inputs += DEGREE;
-    num_inputs -= DEGREE;
-    out = &out[DEGREE * BLAKE3_OUT_LEN];
-  }
-  while (num_inputs > 0) {
-    hash_one(inputs[0], blocks, key, counter, flags, flags_start,
-                  flags_end, out);
-    if (increment_counter) {
-      counter += 1;
+    while (num_inputs > 0) {
+        hash_one(inputs[0], blocks, key, counter, flags, flags_start, flags_end, out);
+        if (increment_counter) {
+            counter += 1;
+        }
+        inputs     += 1;
+        num_inputs -= 1;
+        out         = &out[BLAKE3_OUT_LEN];
     }
-    inputs += 1;
-    num_inputs -= 1;
-    out = &out[BLAKE3_OUT_LEN];
-  }
 }
diff --git a/hashes/blake3/compress-sse41.h b/hashes/blake3/compress-sse41.h
index ae0ec61f..a379e5ae 100644
--- a/hashes/blake3/compress-sse41.h
+++ b/hashes/blake3/compress-sse41.h
@@ -3,556 +3,539 @@
 
 #define DEGREE 4
 
-#define _mm_shuffle_ps2(a, b, c)                                               \
-  (_mm_castps_si128(                                                           \
+#define _mm_shuffle_ps2(a, b, c) \
+  (_mm_castps_si128(             \
       _mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))
 
-static FORCE_INLINE __m128i loadu(const uint8_t src[16]) {
-  return _mm_loadu_si128((const __m128i *)src);
+static FORCE_INLINE __m128i loadu( const uint8_t src[16] ) {
+    return _mm_loadu_si128((const __m128i *)src);
 }
 
-static FORCE_INLINE void storeu(__m128i src, uint8_t dest[16]) {
-  _mm_storeu_si128((__m128i *)dest, src);
+static FORCE_INLINE void storeu( __m128i src, uint8_t dest[16] ) {
+    _mm_storeu_si128((__m128i *)dest, src);
 }
 
-static FORCE_INLINE __m128i addv(__m128i a, __m128i b) { return _mm_add_epi32(a, b); }
+static FORCE_INLINE __m128i addv( __m128i a, __m128i b ) { return _mm_add_epi32(a, b); }
 
 // Note that clang-format doesn't like the name "xor" for some reason.
-static FORCE_INLINE __m128i xorv(__m128i a, __m128i b) { return _mm_xor_si128(a, b); }
+static FORCE_INLINE __m128i xorv( __m128i a, __m128i b ) { return _mm_xor_si128(a, b); }
 
-static FORCE_INLINE __m128i set1(uint32_t x) { return _mm_set1_epi32((int32_t)x); }
+static FORCE_INLINE __m128i set1( uint32_t x ) { return _mm_set1_epi32((int32_t)x); }
 
-static FORCE_INLINE __m128i set4(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
-  return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
+static FORCE_INLINE __m128i set4( uint32_t a, uint32_t b, uint32_t c, uint32_t d ) {
+    return _mm_setr_epi32((int32_t)a, (int32_t)b, (int32_t)c, (int32_t)d);
 }
 
-static FORCE_INLINE __m128i rot16(__m128i x) {
-  return _mm_shuffle_epi8(
-      x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2));
+static FORCE_INLINE __m128i rot16( __m128i x ) {
+    return _mm_shuffle_epi8(x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2));
 }
 
-static FORCE_INLINE __m128i rot12(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
+static FORCE_INLINE __m128i rot12( __m128i x ) {
+    return xorv(_mm_srli_epi32(x, 12), _mm_slli_epi32(x, 32 - 12));
 }
 
-static FORCE_INLINE __m128i rot8(__m128i x) {
-  return _mm_shuffle_epi8(
-      x, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
+static FORCE_INLINE __m128i rot8( __m128i x ) {
+    return _mm_shuffle_epi8(x, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1));
 }
 
-static FORCE_INLINE __m128i rot7(__m128i x) {
-  return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
+static FORCE_INLINE __m128i rot7( __m128i x ) {
+    return xorv(_mm_srli_epi32(x, 7), _mm_slli_epi32(x, 32 - 7));
 }
 
-static FORCE_INLINE void g1(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = addv(addv(*row0, m), *row1);
-  *row3 = xorv(*row3, *row0);
-  *row3 = rot16(*row3);
-  *row2 = addv(*row2, *row3);
-  *row1 = xorv(*row1, *row2);
-  *row1 = rot12(*row1);
+static FORCE_INLINE void g1( __m128i * row0, __m128i * row1, __m128i * row2, __m128i * row3, __m128i m ) {
+    *row0 = addv(addv(*row0, m), *row1);
+    *row3 = xorv(*row3, *row0);
+    *row3 = rot16(*row3);
+    *row2 = addv(*row2, *row3);
+    *row1 = xorv(*row1, *row2);
+    *row1 = rot12(*row1);
 }
 
-static FORCE_INLINE void g2(__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3,
-               __m128i m) {
-  *row0 = addv(addv(*row0, m), *row1);
-  *row3 = xorv(*row3, *row0);
-  *row3 = rot8(*row3);
-  *row2 = addv(*row2, *row3);
-  *row1 = xorv(*row1, *row2);
-  *row1 = rot7(*row1);
+static FORCE_INLINE void g2( __m128i * row0, __m128i * row1, __m128i * row2, __m128i * row3, __m128i m ) {
+    *row0 = addv(addv(*row0, m), *row1);
+    *row3 = xorv(*row3, *row0);
+    *row3 = rot8(*row3);
+    *row2 = addv(*row2, *row3);
+    *row1 = xorv(*row1, *row2);
+    *row1 = rot7(*row1);
 }
 
 // Note the optimization here of leaving row1 as the unrotated row, rather than
 // row0. All the message loads below are adjusted to compensate for this. See
 // discussion at https://github.com/sneves/blake2-avx2/pull/4
-static FORCE_INLINE void diagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
+static FORCE_INLINE void diagonalize( __m128i * row0, __m128i * row2, __m128i * row3 ) {
+    *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(2, 1, 0, 3));
+    *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+    *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(0, 3, 2, 1));
 }
 
-static FORCE_INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
-  *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
-  *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
-  *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
+static FORCE_INLINE void undiagonalize( __m128i * row0, __m128i * row2, __m128i * row3 ) {
+    *row0 = _mm_shuffle_epi32(*row0, _MM_SHUFFLE(0, 3, 2, 1));
+    *row3 = _mm_shuffle_epi32(*row3, _MM_SHUFFLE(1, 0, 3, 2));
+    *row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
 }
 
-static FORCE_INLINE void compress_pre(__m128i rows[4], const uint32_t cv[8],
-                         const uint8_t block[BLAKE3_BLOCK_LEN],
-                         uint8_t block_len, uint64_t counter, uint8_t flags) {
-  rows[0] = loadu((uint8_t *)&cv[0]);
-  rows[1] = loadu((uint8_t *)&cv[4]);
-  rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
-  rows[3] = set4(counter_low(counter), counter_high(counter),
-                 (uint32_t)block_len, (uint32_t)flags);
-
-  __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
-  __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
-  __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
-  __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
-
-  __m128i t0, t1, t2, t3, tt;
-
-  // Round 1. The first round permutes the message words from the original
-  // input order, into the groups that get mixed in parallel.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); //  6  4  2  0
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); //  7  5  3  1
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10  8
-  t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2, 1, 0, 3));   // 12 10  8 14
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11  9
-  t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(2, 1, 0, 3));   // 13 11  9 15
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 2. This round and all following rounds apply a fixed permutation
-  // to the message words from the round before.
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 3
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 4
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 5
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 6
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
-  m0 = t0;
-  m1 = t1;
-  m2 = t2;
-  m3 = t3;
-
-  // Round 7
-  t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
-  t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(0, 3, 2, 1));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
-  t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
-  tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(0, 0, 3, 3));
-  t1 = _mm_blend_epi16(tt, t1, 0xCC);
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
-  diagonalize(&rows[0], &rows[2], &rows[3]);
-  t2 = _mm_unpacklo_epi64(m3, m1);
-  tt = _mm_blend_epi16(t2, m2, 0xC0);
-  t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(1, 3, 2, 0));
-  g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
-  t3 = _mm_unpackhi_epi32(m1, m3);
-  tt = _mm_unpacklo_epi32(m2, t3);
-  t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(0, 1, 3, 2));
-  g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
-  undiagonalize(&rows[0], &rows[2], &rows[3]);
+static FORCE_INLINE void compress_pre( __m128i rows[4], const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags ) {
+    rows[0] = loadu((uint8_t *)&cv[0]);
+    rows[1] = loadu((uint8_t *)&cv[4]);
+    rows[2] = set4(IV[0], IV[1], IV[2], IV[3]);
+    rows[3] = set4(counter_low(counter), counter_high(counter), (uint32_t)block_len, (uint32_t)flags);
+
+    __m128i m0 = loadu(&block[sizeof(__m128i) * 0]);
+    __m128i m1 = loadu(&block[sizeof(__m128i) * 1]);
+    __m128i m2 = loadu(&block[sizeof(__m128i) * 2]);
+    __m128i m3 = loadu(&block[sizeof(__m128i) * 3]);
+
+    __m128i t0, t1, t2, t3, tt;
+
+    // Round 1. The first round permutes the message words from the original
+    // input order, into the groups that get mixed in parallel.
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(2, 0, 2, 0)); //  6  4  2  0
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 3, 1)); //  7  5  3  1
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(2, 0, 2, 0)); // 14 12 10  8
+    t2 = _mm_shuffle_epi32(t2, _MM_SHUFFLE(  2, 1, 0, 3)); // 12 10  8 14
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 1, 3, 1)); // 15 13 11  9
+    t3 = _mm_shuffle_epi32(t3, _MM_SHUFFLE(  2, 1, 0, 3)); // 13 11  9 15
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 2. This round and all following rounds apply a fixed permutation
+    // to the message words from the round before.
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = _mm_blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = _mm_blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 3
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = _mm_blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = _mm_blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 4
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = _mm_blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = _mm_blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 5
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = _mm_blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = _mm_blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 6
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = _mm_blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = _mm_blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
+    m0 = t0;
+    m1 = t1;
+    m2 = t2;
+    m3 = t3;
+
+    // Round 7
+    t0 = _mm_shuffle_ps2(m0, m1, _MM_SHUFFLE(3, 1, 1, 2));
+    t0 = _mm_shuffle_epi32(t0, _MM_SHUFFLE(  0, 3, 2, 1));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t0);
+    t1 = _mm_shuffle_ps2(m2, m3, _MM_SHUFFLE(3, 3, 2, 2));
+    tt = _mm_shuffle_epi32(m0, _MM_SHUFFLE(  0, 0, 3, 3));
+    t1 = _mm_blend_epi16(tt, t1, 0xCC);
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t1);
+    diagonalize(&rows[0], &rows[2], &rows[3]);
+    t2 = _mm_unpacklo_epi64(m3, m1);
+    tt = _mm_blend_epi16(t2, m2, 0xC0);
+    t2 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  1, 3, 2, 0));
+    g1(&rows[0], &rows[1], &rows[2], &rows[3], t2);
+    t3 = _mm_unpackhi_epi32(m1, m3);
+    tt = _mm_unpacklo_epi32(m2, t3);
+    t3 = _mm_shuffle_epi32(tt, _MM_SHUFFLE(  0, 1, 3, 2));
+    g2(&rows[0], &rows[1], &rows[2], &rows[3], t3);
+    undiagonalize(&rows[0], &rows[2], &rows[3]);
 }
 
-static void blake3_compress_in_place(uint32_t cv[8],
-                                    const uint8_t block[BLAKE3_BLOCK_LEN],
-                                    uint8_t block_len, uint64_t counter,
-                                    uint8_t flags) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
-  storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
+static void blake3_compress_in_place( uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags ) {
+    __m128i rows[4];
+
+    compress_pre(rows, cv, block, block_len, counter, flags);
+    storeu(xorv(rows[0], rows[2]), (uint8_t *)&cv[0]);
+    storeu(xorv(rows[1], rows[3]), (uint8_t *)&cv[4]);
 }
 
-static void blake3_compress_xof(const uint32_t cv[8],
-                               const uint8_t block[BLAKE3_BLOCK_LEN],
-                               uint8_t block_len, uint64_t counter,
-                               uint8_t flags, uint8_t out[64]) {
-  __m128i rows[4];
-  compress_pre(rows, cv, block, block_len, counter, flags);
-  storeu(xorv(rows[0], rows[2]), &out[0]);
-  storeu(xorv(rows[1], rows[3]), &out[16]);
-  storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
-  storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
+static void blake3_compress_xof( const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN],
+        uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64] ) {
+    __m128i rows[4];
+
+    compress_pre(rows, cv, block, block_len, counter, flags);
+    storeu(xorv(rows[0], rows[2]), &out[0] );
+    storeu(xorv(rows[1], rows[3]), &out[16]);
+    storeu(xorv(rows[2], loadu((uint8_t *)&cv[0])), &out[32]);
+    storeu(xorv(rows[3], loadu((uint8_t *)&cv[4])), &out[48]);
 }
 
-static FORCE_INLINE void round_fn(__m128i v[16], __m128i m[16], size_t r) {
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][0]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][2]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][4]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][6]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[15] = rot16(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot12(v[4]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][1]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][3]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][5]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][7]]);
-  v[0] = addv(v[0], v[4]);
-  v[1] = addv(v[1], v[5]);
-  v[2] = addv(v[2], v[6]);
-  v[3] = addv(v[3], v[7]);
-  v[12] = xorv(v[12], v[0]);
-  v[13] = xorv(v[13], v[1]);
-  v[14] = xorv(v[14], v[2]);
-  v[15] = xorv(v[15], v[3]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[15] = rot8(v[15]);
-  v[8] = addv(v[8], v[12]);
-  v[9] = addv(v[9], v[13]);
-  v[10] = addv(v[10], v[14]);
-  v[11] = addv(v[11], v[15]);
-  v[4] = xorv(v[4], v[8]);
-  v[5] = xorv(v[5], v[9]);
-  v[6] = xorv(v[6], v[10]);
-  v[7] = xorv(v[7], v[11]);
-  v[4] = rot7(v[4]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][8]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][10]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][12]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][14]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot16(v[15]);
-  v[12] = rot16(v[12]);
-  v[13] = rot16(v[13]);
-  v[14] = rot16(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot12(v[5]);
-  v[6] = rot12(v[6]);
-  v[7] = rot12(v[7]);
-  v[4] = rot12(v[4]);
-  v[0] = addv(v[0], m[(size_t)MSG_SCHEDULE[r][9]]);
-  v[1] = addv(v[1], m[(size_t)MSG_SCHEDULE[r][11]]);
-  v[2] = addv(v[2], m[(size_t)MSG_SCHEDULE[r][13]]);
-  v[3] = addv(v[3], m[(size_t)MSG_SCHEDULE[r][15]]);
-  v[0] = addv(v[0], v[5]);
-  v[1] = addv(v[1], v[6]);
-  v[2] = addv(v[2], v[7]);
-  v[3] = addv(v[3], v[4]);
-  v[15] = xorv(v[15], v[0]);
-  v[12] = xorv(v[12], v[1]);
-  v[13] = xorv(v[13], v[2]);
-  v[14] = xorv(v[14], v[3]);
-  v[15] = rot8(v[15]);
-  v[12] = rot8(v[12]);
-  v[13] = rot8(v[13]);
-  v[14] = rot8(v[14]);
-  v[10] = addv(v[10], v[15]);
-  v[11] = addv(v[11], v[12]);
-  v[8] = addv(v[8], v[13]);
-  v[9] = addv(v[9], v[14]);
-  v[5] = xorv(v[5], v[10]);
-  v[6] = xorv(v[6], v[11]);
-  v[7] = xorv(v[7], v[8]);
-  v[4] = xorv(v[4], v[9]);
-  v[5] = rot7(v[5]);
-  v[6] = rot7(v[6]);
-  v[7] = rot7(v[7]);
-  v[4] = rot7(v[4]);
+static FORCE_INLINE void round_fn( __m128i v[16], __m128i m[16], size_t r ) {
+    v[ 0] = addv(v[ 0], m[(size_t)MSG_SCHEDULE[r][0]]);
+    v[ 1] = addv(v[ 1], m[(size_t)MSG_SCHEDULE[r][2]]);
+    v[ 2] = addv(v[ 2], m[(size_t)MSG_SCHEDULE[r][4]]);
+    v[ 3] = addv(v[ 3], m[(size_t)MSG_SCHEDULE[r][6]]);
+    v[ 0] = addv(v[ 0], v[4]);
+    v[ 1] = addv(v[ 1], v[5]);
+    v[ 2] = addv(v[ 2], v[6]);
+    v[ 3] = addv(v[ 3], v[7]);
+    v[12] = xorv(v[12], v[0]);
+    v[13] = xorv(v[13], v[1]);
+    v[14] = xorv(v[14], v[2]);
+    v[15] = xorv(v[15], v[3]);
+    v[12] = rot16(v[12]);
+    v[13] = rot16(v[13]);
+    v[14] = rot16(v[14]);
+    v[15] = rot16(v[15]);
+    v[ 8] = addv(v [ 8], v[12]);
+    v[ 9] = addv(v [ 9], v[13]);
+    v[10] = addv(v [10], v[14]);
+    v[11] = addv(v [11], v[15]);
+    v[ 4] = xorv(v [ 4], v[ 8]);
+    v[ 5] = xorv(v [ 5], v[ 9]);
+    v[ 6] = xorv(v [ 6], v[10]);
+    v[ 7] = xorv(v [ 7], v[11]);
+    v[ 4] = rot12(v[ 4]);
+    v[ 5] = rot12(v[ 5]);
+    v[ 6] = rot12(v[ 6]);
+    v[ 7] = rot12(v[ 7]);
+    v[ 0] = addv(v[ 0], m[(size_t)MSG_SCHEDULE[r][1]]);
+    v[ 1] = addv(v[ 1], m[(size_t)MSG_SCHEDULE[r][3]]);
+    v[ 2] = addv(v[ 2], m[(size_t)MSG_SCHEDULE[r][5]]);
+    v[ 3] = addv(v[ 3], m[(size_t)MSG_SCHEDULE[r][7]]);
+    v[ 0] = addv(v[ 0], v[4]);
+    v[ 1] = addv(v[ 1], v[5]);
+    v[ 2] = addv(v[ 2], v[6]);
+    v[ 3] = addv(v[ 3], v[7]);
+    v[12] = xorv(v[12], v[0]);
+    v[13] = xorv(v[13], v[1]);
+    v[14] = xorv(v[14], v[2]);
+    v[15] = xorv(v[15], v[3]);
+    v[12] = rot8(v[12]);
+    v[13] = rot8(v[13]);
+    v[14] = rot8(v[14]);
+    v[15] = rot8(v[15]);
+    v[ 8] = addv(v[ 8], v[12]);
+    v[ 9] = addv(v[ 9], v[13]);
+    v[10] = addv(v[10], v[14]);
+    v[11] = addv(v[11], v[15]);
+    v[ 4] = xorv(v[ 4], v[ 8]);
+    v[ 5] = xorv(v[ 5], v[ 9]);
+    v[ 6] = xorv(v[ 6], v[10]);
+    v[ 7] = xorv(v[ 7], v[11]);
+    v[ 4] = rot7(v[ 4]);
+    v[ 5] = rot7(v[ 5]);
+    v[ 6] = rot7(v[ 6]);
+    v[ 7] = rot7(v[ 7]);
+
+    v[ 0] = addv(v[ 0], m[(size_t)MSG_SCHEDULE[r][ 8]]);
+    v[ 1] = addv(v[ 1], m[(size_t)MSG_SCHEDULE[r][10]]);
+    v[ 2] = addv(v[ 2], m[(size_t)MSG_SCHEDULE[r][12]]);
+    v[ 3] = addv(v[ 3], m[(size_t)MSG_SCHEDULE[r][14]]);
+    v[ 0] = addv(v[ 0], v[5]);
+    v[ 1] = addv(v[ 1], v[6]);
+    v[ 2] = addv(v[ 2], v[7]);
+    v[ 3] = addv(v[ 3], v[4]);
+    v[15] = xorv(v[15], v[0]);
+    v[12] = xorv(v[12], v[1]);
+    v[13] = xorv(v[13], v[2]);
+    v[14] = xorv(v[14], v[3]);
+    v[15] = rot16(v[15]);
+    v[12] = rot16(v[12]);
+    v[13] = rot16(v[13]);
+    v[14] = rot16(v[14]);
+    v[10] = addv(v [10], v[15]);
+    v[11] = addv(v [11], v[12]);
+    v[ 8] = addv(v [ 8], v[13]);
+    v[ 9] = addv(v [ 9], v[14]);
+    v[ 5] = xorv(v [ 5], v[10]);
+    v[ 6] = xorv(v [ 6], v[11]);
+    v[ 7] = xorv(v [ 7], v[ 8]);
+    v[ 4] = xorv(v [ 4], v[ 9]);
+    v[ 5] = rot12(v[ 5]);
+    v[ 6] = rot12(v[ 6]);
+    v[ 7] = rot12(v[ 7]);
+    v[ 4] = rot12(v[ 4]);
+    v[ 0] = addv(v[ 0], m[(size_t)MSG_SCHEDULE[r][ 9]]);
+    v[ 1] = addv(v[ 1], m[(size_t)MSG_SCHEDULE[r][11]]);
+    v[ 2] = addv(v[ 2], m[(size_t)MSG_SCHEDULE[r][13]]);
+    v[ 3] = addv(v[ 3], m[(size_t)MSG_SCHEDULE[r][15]]);
+    v[ 0] = addv(v[ 0], v[5]);
+    v[ 1] = addv(v[ 1], v[6]);
+    v[ 2] = addv(v[ 2], v[7]);
+    v[ 3] = addv(v[ 3], v[4]);
+    v[15] = xorv(v[15], v[0]);
+    v[12] = xorv(v[12], v[1]);
+    v[13] = xorv(v[13], v[2]);
+    v[14] = xorv(v[14], v[3]);
+    v[15] = rot8(v[15]);
+    v[12] = rot8(v[12]);
+    v[13] = rot8(v[13]);
+    v[14] = rot8(v[14]);
+    v[10] = addv(v[10], v[15]);
+    v[11] = addv(v[11], v[12]);
+    v[ 8] = addv(v[ 8], v[13]);
+    v[ 9] = addv(v[ 9], v[14]);
+    v[ 5] = xorv(v[ 5], v[10]);
+    v[ 6] = xorv(v[ 6], v[11]);
+    v[ 7] = xorv(v[ 7], v[ 8]);
+    v[ 4] = xorv(v[ 4], v[ 9]);
+    v[ 5] = rot7(v[ 5]);
+    v[ 6] = rot7(v[ 6]);
+    v[ 7] = rot7(v[ 7]);
+    v[ 4] = rot7(v[ 4]);
 }
 
-static FORCE_INLINE void transpose_vecs(__m128i vecs[DEGREE]) {
-  // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
-  // 22/33. Note that this doesn't split the vector into two lanes, as the
-  // AVX2 counterparts do.
-  __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
-  __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
-  __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
-  __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
-
-  // Interleave 64-bit lanes.
-  __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
-  __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
-  __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
-  __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
-
-  vecs[0] = abcd_0;
-  vecs[1] = abcd_1;
-  vecs[2] = abcd_2;
-  vecs[3] = abcd_3;
+static FORCE_INLINE void transpose_vecs( __m128i vecs[DEGREE] ) {
+    // Interleave 32-bit lates. The low unpack is lanes 00/11 and the high is
+    // 22/33. Note that this doesn't split the vector into two lanes, as the
+    // AVX2 counterparts do.
+    __m128i ab_01 = _mm_unpacklo_epi32(vecs[0], vecs[1]);
+    __m128i ab_23 = _mm_unpackhi_epi32(vecs[0], vecs[1]);
+    __m128i cd_01 = _mm_unpacklo_epi32(vecs[2], vecs[3]);
+    __m128i cd_23 = _mm_unpackhi_epi32(vecs[2], vecs[3]);
+
+    // Interleave 64-bit lanes.
+    __m128i abcd_0 = _mm_unpacklo_epi64(ab_01, cd_01);
+    __m128i abcd_1 = _mm_unpackhi_epi64(ab_01, cd_01);
+    __m128i abcd_2 = _mm_unpacklo_epi64(ab_23, cd_23);
+    __m128i abcd_3 = _mm_unpackhi_epi64(ab_23, cd_23);
+
+    vecs[0] = abcd_0;
+    vecs[1] = abcd_1;
+    vecs[2] = abcd_2;
+    vecs[3] = abcd_3;
 }
 
-static FORCE_INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
-                               size_t block_offset, __m128i out[16]) {
-  out[0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
-  out[1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
-  out[2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
-  out[3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
-  out[4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
-  out[5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
-  out[6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
-  out[7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
-  out[8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
-  out[9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
-  out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
-  out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
-  out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
-  out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
-  out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
-  out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
-  for (size_t i = 0; i < 4; ++i) {
-    _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
-  }
-  transpose_vecs(&out[0]);
-  transpose_vecs(&out[4]);
-  transpose_vecs(&out[8]);
-  transpose_vecs(&out[12]);
+static FORCE_INLINE void transpose_msg_vecs( const uint8_t * const * inputs, size_t block_offset, __m128i out[16] ) {
+    out[ 0] = loadu(&inputs[0][block_offset + 0 * sizeof(__m128i)]);
+    out[ 1] = loadu(&inputs[1][block_offset + 0 * sizeof(__m128i)]);
+    out[ 2] = loadu(&inputs[2][block_offset + 0 * sizeof(__m128i)]);
+    out[ 3] = loadu(&inputs[3][block_offset + 0 * sizeof(__m128i)]);
+    out[ 4] = loadu(&inputs[0][block_offset + 1 * sizeof(__m128i)]);
+    out[ 5] = loadu(&inputs[1][block_offset + 1 * sizeof(__m128i)]);
+    out[ 6] = loadu(&inputs[2][block_offset + 1 * sizeof(__m128i)]);
+    out[ 7] = loadu(&inputs[3][block_offset + 1 * sizeof(__m128i)]);
+    out[ 8] = loadu(&inputs[0][block_offset + 2 * sizeof(__m128i)]);
+    out[ 9] = loadu(&inputs[1][block_offset + 2 * sizeof(__m128i)]);
+    out[10] = loadu(&inputs[2][block_offset + 2 * sizeof(__m128i)]);
+    out[11] = loadu(&inputs[3][block_offset + 2 * sizeof(__m128i)]);
+    out[12] = loadu(&inputs[0][block_offset + 3 * sizeof(__m128i)]);
+    out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
+    out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
+    out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
+    for (size_t i = 0; i < 4; ++i) {
+        _mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
+    }
+    transpose_vecs(&out[0] );
+    transpose_vecs(&out[4] );
+    transpose_vecs(&out[8] );
+    transpose_vecs(&out[12]);
 }
 
-static FORCE_INLINE void load_counters(uint64_t counter, bool increment_counter,
-                          __m128i *out_lo, __m128i *out_hi) {
-  const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
-  const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
-  const __m128i add1 = _mm_and_si128(mask, add0);
-  __m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
-  __m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
-                                  _mm_xor_si128(   l, _mm_set1_epi32(0x80000000)));
-  __m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
-  *out_lo = l;
-  *out_hi = h;
+static FORCE_INLINE void load_counters( uint64_t counter, bool increment_counter, __m128i * out_lo, __m128i * out_hi ) {
+    const __m128i mask  = _mm_set1_epi32(-(int32_t)increment_counter);
+    const __m128i add0  = _mm_set_epi32(3, 2, 1, 0);
+    const __m128i add1  = _mm_and_si128(mask, add0);
+    __m128i       l     = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
+    __m128i       carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(
+            0x80000000)), _mm_xor_si128(l, _mm_set1_epi32(0x80000000)));
+    __m128i h = _mm_sub_epi32(_mm_set1_epi32(          (int32_t)(counter >> 32)), carry);
+
+    *out_lo = l;
+    *out_hi = h;
 }
 
-static void blake3_hash4(const uint8_t *const *inputs, size_t blocks,
-                        const uint32_t key[8], uint64_t counter,
-                        bool increment_counter, uint8_t flags,
-                        uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
-  __m128i h_vecs[8] = {
-      set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
-      set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
-  };
-  __m128i counter_low_vec, counter_high_vec;
-  load_counters(counter, increment_counter, &counter_low_vec,
-                &counter_high_vec);
-  uint8_t block_flags = flags | flags_start;
-
-  for (size_t block = 0; block < blocks; block++) {
-    if (block + 1 == blocks) {
-      block_flags |= flags_end;
-    }
-    __m128i block_len_vec = set1(BLAKE3_BLOCK_LEN);
-    __m128i block_flags_vec = set1(block_flags);
-    __m128i msg_vecs[16];
-    transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
-
-    __m128i v[16] = {
-        h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
-        h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
-        set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
-        counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+static void blake3_hash4( const uint8_t * const * inputs, size_t blocks, const uint32_t key[8], uint64_t counter,
+        bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t * out ) {
+    __m128i h_vecs[8] = {
+        set1(key[0]), set1(key[1]), set1(key[2]), set1(key[3]),
+        set1(key[4]), set1(key[5]), set1(key[6]), set1(key[7]),
     };
-    round_fn(v, msg_vecs, 0);
-    round_fn(v, msg_vecs, 1);
-    round_fn(v, msg_vecs, 2);
-    round_fn(v, msg_vecs, 3);
-    round_fn(v, msg_vecs, 4);
-    round_fn(v, msg_vecs, 5);
-    round_fn(v, msg_vecs, 6);
-    h_vecs[0] = xorv(v[0], v[8]);
-    h_vecs[1] = xorv(v[1], v[9]);
-    h_vecs[2] = xorv(v[2], v[10]);
-    h_vecs[3] = xorv(v[3], v[11]);
-    h_vecs[4] = xorv(v[4], v[12]);
-    h_vecs[5] = xorv(v[5], v[13]);
-    h_vecs[6] = xorv(v[6], v[14]);
-    h_vecs[7] = xorv(v[7], v[15]);
-
-    block_flags = flags;
-  }
-
-  transpose_vecs(&h_vecs[0]);
-  transpose_vecs(&h_vecs[4]);
-  // The first four vecs now contain the first half of each output, and the
-  // second four vecs contain the second half of each output.
-  storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
-  storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
-  storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
-  storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
-  storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
-  storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
-  storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
-  storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
+    __m128i counter_low_vec, counter_high_vec;
+
+    load_counters(counter, increment_counter, &counter_low_vec, &counter_high_vec);
+    uint8_t block_flags = flags | flags_start;
+
+    for (size_t block = 0; block < blocks; block++) {
+        if (block + 1 == blocks) {
+            block_flags |= flags_end;
+        }
+        __m128i block_len_vec   = set1(BLAKE3_BLOCK_LEN);
+        __m128i block_flags_vec = set1(block_flags     );
+        __m128i msg_vecs[16];
+        transpose_msg_vecs(inputs, block * BLAKE3_BLOCK_LEN, msg_vecs);
+
+        __m128i v[16] = {
+            h_vecs[0],       h_vecs[1],        h_vecs[2],     h_vecs[3],
+            h_vecs[4],       h_vecs[5],        h_vecs[6],     h_vecs[7],
+            set1(IV[0]),     set1(IV[1]),      set1(IV[2]),   set1(IV[3]),
+            counter_low_vec, counter_high_vec, block_len_vec, block_flags_vec,
+        };
+        round_fn(v, msg_vecs, 0);
+        round_fn(v, msg_vecs, 1);
+        round_fn(v, msg_vecs, 2);
+        round_fn(v, msg_vecs, 3);
+        round_fn(v, msg_vecs, 4);
+        round_fn(v, msg_vecs, 5);
+        round_fn(v, msg_vecs, 6);
+        h_vecs[0]   = xorv(v[0], v[ 8]);
+        h_vecs[1]   = xorv(v[1], v[ 9]);
+        h_vecs[2]   = xorv(v[2], v[10]);
+        h_vecs[3]   = xorv(v[3], v[11]);
+        h_vecs[4]   = xorv(v[4], v[12]);
+        h_vecs[5]   = xorv(v[5], v[13]);
+        h_vecs[6]   = xorv(v[6], v[14]);
+        h_vecs[7]   = xorv(v[7], v[15]);
+
+        block_flags = flags;
+    }
+
+    transpose_vecs(&h_vecs[0]);
+    transpose_vecs(&h_vecs[4]);
+    // The first four vecs now contain the first half of each output, and the
+    // second four vecs contain the second half of each output.
+    storeu(h_vecs[0], &out[0 * sizeof(__m128i)]);
+    storeu(h_vecs[4], &out[1 * sizeof(__m128i)]);
+    storeu(h_vecs[1], &out[2 * sizeof(__m128i)]);
+    storeu(h_vecs[5], &out[3 * sizeof(__m128i)]);
+    storeu(h_vecs[2], &out[4 * sizeof(__m128i)]);
+    storeu(h_vecs[6], &out[5 * sizeof(__m128i)]);
+    storeu(h_vecs[3], &out[6 * sizeof(__m128i)]);
+    storeu(h_vecs[7], &out[7 * sizeof(__m128i)]);
 }
 
-static FORCE_INLINE void hash_one(const uint8_t *input, size_t blocks,
-                           const uint32_t key[8], uint64_t counter,
-                           uint8_t flags, uint8_t flags_start,
-                           uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
-  uint32_t cv[8];
-  memcpy(cv, key, BLAKE3_KEY_LEN);
-  uint8_t block_flags = flags | flags_start;
-  while (blocks > 0) {
-    if (blocks == 1) {
-      block_flags |= flags_end;
+static FORCE_INLINE void hash_one( const uint8_t * input, size_t blocks, const uint32_t key[8], uint64_t counter,
+        uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN] ) {
+    uint32_t cv[8];
+
+    memcpy(cv, key, BLAKE3_KEY_LEN);
+    uint8_t block_flags = flags | flags_start;
+    while (blocks > 0) {
+        if (blocks == 1) {
+            block_flags |= flags_end;
+        }
+        blake3_compress_in_place(cv, input, BLAKE3_BLOCK_LEN, counter, block_flags);
+        input       = &input[BLAKE3_BLOCK_LEN];
+        blocks     -= 1;
+        block_flags = flags;
     }
-    blake3_compress_in_place(cv, input, BLAKE3_BLOCK_LEN, counter,
-                                   block_flags);
-    input = &input[BLAKE3_BLOCK_LEN];
-    blocks -= 1;
-    block_flags = flags;
-  }
-  memcpy(out, cv, BLAKE3_OUT_LEN);
+    memcpy(out, cv, BLAKE3_OUT_LEN);
 }
 
-static void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
-                            size_t blocks, const uint32_t key[8],
-                            uint64_t counter, bool increment_counter,
-                            uint8_t flags, uint8_t flags_start,
-                            uint8_t flags_end, uint8_t *out) {
-  while (num_inputs >= DEGREE) {
-    blake3_hash4(inputs, blocks, key, counter, increment_counter, flags,
-                       flags_start, flags_end, out);
-    if (increment_counter) {
-      counter += DEGREE;
+static void blake3_hash_many( const uint8_t * const * inputs, size_t num_inputs, size_t blocks, const uint32_t key[8],
+        uint64_t counter, bool increment_counter, uint8_t flags,
+        uint8_t flags_start, uint8_t flags_end, uint8_t * out ) {
+    while (num_inputs >= DEGREE) {
+        blake3_hash4(inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
+        if (increment_counter) {
+            counter += DEGREE;
+        }
+        inputs     += DEGREE;
+        num_inputs -= DEGREE;
+        out         = &out[DEGREE * BLAKE3_OUT_LEN];
     }
-    inputs += DEGREE;
-    num_inputs -= DEGREE;
-    out = &out[DEGREE * BLAKE3_OUT_LEN];
-  }
-  while (num_inputs > 0) {
-    hash_one(inputs[0], blocks, key, counter, flags, flags_start,
-                   flags_end, out);
-    if (increment_counter) {
-      counter += 1;
+    while (num_inputs > 0) {
+        hash_one(inputs[0], blocks, key, counter, flags, flags_start, flags_end, out);
+        if (increment_counter) {
+            counter += 1;
+        }
+        inputs     += 1;
+        num_inputs -= 1;
+        out         = &out[BLAKE3_OUT_LEN];
     }
-    inputs += 1;
-    num_inputs -= 1;
-    out = &out[BLAKE3_OUT_LEN];
-  }
 }
diff --git a/hashes/blockpearson.cpp b/hashes/blockpearson.cpp
index c10a520b..41eb3b95 100644
--- a/hashes/blockpearson.cpp
+++ b/hashes/blockpearson.cpp
@@ -33,11 +33,11 @@
 
 // David Stafford's Mix13 from http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html
 // the author clarified via eMail that this of his work is released to the public domain
-#define permute64(in)                           \
-    in ^= (in >> 30);                           \
-    in *= UINT64_C(0xbf58476d1ce4e5b9);         \
-    in ^= (in >> 27);                           \
-    in *= UINT64_C(0x94d049bb133111eb);         \
+#define permute64(in)                   \
+    in ^= (in >> 30);                   \
+    in *= UINT64_C(0xbf58476d1ce4e5b9); \
+    in ^= (in >> 27);                   \
+    in *= UINT64_C(0x94d049bb133111eb); \
     in ^= (in >> 31)
 
 #define dec1(in) \
@@ -60,189 +60,189 @@
     dec##part(hash##part);         \
     permute64(hash##part)
 
-template < bool bswap >
-static void blockpearson_hash_256(const void * in, const size_t org_len, const seed_t seed, void * out) {
-  const uint8_t * current = (const uint8_t *)in;
-
-  uint64_t len = (uint64_t)org_len;
-  uint64_t hash1 = (uint64_t)seed;
-
-  permute64(hash1);
-
-  uint64_t hash2 = hash1;
-  uint64_t hash3 = hash1;
-  uint64_t hash4 = hash1;
-
-  while (len > 7) {
-    hash_round(hash, GET_U64<bswap>(current, 0), 1);
-    hash_round(hash, GET_U64<bswap>(current, 0), 2);
-    hash_round(hash, GET_U64<bswap>(current, 0), 3);
-    hash_round(hash, GET_U64<bswap>(current, 0), 4);
-
-    current += 8;
-    len -= 8;
-  }
-
-  // handle the rest
-  hash1 = ~hash1;
-  hash2 = ~hash2;
-  hash3 = ~hash3;
-  hash4 = ~hash4;
-
-  while(len) {
-    // byte-wise, no endianess
-    hash_round(hash, *current, 1);
-    hash_round(hash, *current, 2);
-    hash_round(hash, *current, 3);
-    hash_round(hash, *current, 4);
-
-    current++;
-    len--;
-  }
-
-  // digest length
-  hash1 = ~hash1;
-  hash2 = ~hash2;
-  hash3 = ~hash3;
-  hash4 = ~hash4;
-
-  hash_round(hash, (uint64_t)org_len, 1);
-  hash_round(hash, (uint64_t)org_len, 2);
-  hash_round(hash, (uint64_t)org_len, 3);
-  hash_round(hash, (uint64_t)org_len, 4);
-
-  PUT_U64<!bswap>(hash4, (uint8_t *)out, 0);
-  PUT_U64<!bswap>(hash3, (uint8_t *)out, 8);
-  PUT_U64<!bswap>(hash2, (uint8_t *)out, 16);
-  PUT_U64<!bswap>(hash1, (uint8_t *)out, 24);
+template <bool bswap>
+static void blockpearson_hash_256( const void * in, const size_t org_len, const seed_t seed, void * out ) {
+    const uint8_t * current = (const uint8_t *)in;
+
+    uint64_t len   =          (uint64_t       )org_len;
+    uint64_t hash1 =          (uint64_t       )seed;
+
+    permute64(hash1);
+
+    uint64_t hash2 = hash1;
+    uint64_t hash3 = hash1;
+    uint64_t hash4 = hash1;
+
+    while (len > 7) {
+        hash_round(hash, GET_U64<bswap>(current, 0), 1);
+        hash_round(hash, GET_U64<bswap>(current, 0), 2);
+        hash_round(hash, GET_U64<bswap>(current, 0), 3);
+        hash_round(hash, GET_U64<bswap>(current, 0), 4);
+
+        current += 8;
+        len     -= 8;
+    }
+
+    // handle the rest
+    hash1 = ~hash1;
+    hash2 = ~hash2;
+    hash3 = ~hash3;
+    hash4 = ~hash4;
+
+    while (len) {
+        // byte-wise, no endianess
+        hash_round(hash, *current, 1);
+        hash_round(hash, *current, 2);
+        hash_round(hash, *current, 3);
+        hash_round(hash, *current, 4);
+
+        current++;
+        len--;
+    }
+
+    // digest length
+    hash1 = ~hash1;
+    hash2 = ~hash2;
+    hash3 = ~hash3;
+    hash4 = ~hash4;
+
+    hash_round(hash, (uint64_t)org_len, 1);
+    hash_round(hash, (uint64_t)org_len, 2);
+    hash_round(hash, (uint64_t)org_len, 3);
+    hash_round(hash, (uint64_t)org_len, 4);
+
+    PUT_U64<!bswap>(hash4, (uint8_t *)out,  0);
+    PUT_U64<!bswap>(hash3, (uint8_t *)out,  8);
+    PUT_U64<!bswap>(hash2, (uint8_t *)out, 16);
+    PUT_U64<!bswap>(hash1, (uint8_t *)out, 24);
 }
 
-template < bool bswap >
-static void blockpearson_hash_128(const void * in, const size_t org_len, const seed_t seed, void * out) {
-  const uint8_t * current = (const uint8_t *)in;
+template <bool bswap>
+static void blockpearson_hash_128( const void * in, const size_t org_len, const seed_t seed, void * out ) {
+    const uint8_t * current = (const uint8_t *)in;
 
-  uint64_t len = (uint64_t)org_len;
-  uint64_t hash1 = (uint64_t)seed;
+    uint64_t len   =          (uint64_t       )org_len;
+    uint64_t hash1 =          (uint64_t       )seed;
 
-  permute64(hash1);
+    permute64(hash1);
 
-  uint64_t hash2 = hash1;
+    uint64_t hash2 = hash1;
 
-  while (len > 7) {
-    hash_round(hash, GET_U64<bswap>(current, 0), 1);
-    hash_round(hash, GET_U64<bswap>(current, 0), 2);
+    while (len > 7) {
+        hash_round(hash, GET_U64<bswap>(current, 0), 1);
+        hash_round(hash, GET_U64<bswap>(current, 0), 2);
 
-    current += 8;
-    len -= 8;
-  }
+        current += 8;
+        len     -= 8;
+    }
 
-  // handle the rest
-  hash1 = ~hash1;
-  hash2 = ~hash2;
+    // handle the rest
+    hash1 = ~hash1;
+    hash2 = ~hash2;
 
-  while(len) {
-    // byte-wise, no endianess
-    hash_round(hash, *current, 1);
-    hash_round(hash, *current, 2);
+    while (len) {
+        // byte-wise, no endianess
+        hash_round(hash, *current, 1);
+        hash_round(hash, *current, 2);
 
-    current++;
-    len--;
-  }
+        current++;
+        len--;
+    }
 
-  // digest length
-  hash1 = ~hash1;
-  hash2 = ~hash2;
+    // digest length
+    hash1 = ~hash1;
+    hash2 = ~hash2;
 
-  hash_round(hash, (uint64_t)org_len, 1);
-  hash_round(hash, (uint64_t)org_len, 2);
+    hash_round(hash, (uint64_t)org_len, 1);
+    hash_round(hash, (uint64_t)org_len, 2);
 
-  PUT_U64<!bswap>(hash2, (uint8_t *)out, 0);
-  PUT_U64<!bswap>(hash1, (uint8_t *)out, 8);
+    PUT_U64<!bswap>(hash2, (uint8_t *)out, 0);
+    PUT_U64<!bswap>(hash1, (uint8_t *)out, 8);
 }
 
-template < bool bswap >
-static void blockpearson_hash_64(const void * in, const size_t org_len, const seed_t seed, void * out) {
-  const uint8_t * current = (const uint8_t *)in;
+template <bool bswap>
+static void blockpearson_hash_64( const void * in, const size_t org_len, const seed_t seed, void * out ) {
+    const uint8_t * current = (const uint8_t *)in;
 
-  uint64_t len = (uint64_t)org_len;
-  uint64_t hash1 = (uint64_t)seed;
+    uint64_t len   =          (uint64_t       )org_len;
+    uint64_t hash1 =          (uint64_t       )seed;
 
-  permute64(hash1);
+    permute64(hash1);
 
-  while (len > 7) {
-    hash_round(hash, GET_U64<bswap>(current, 0), 1);
+    while (len > 7) {
+        hash_round(hash, GET_U64<bswap>(current, 0), 1);
 
-    current += 8;
-    len -= 8;
-  }
+        current += 8;
+        len     -= 8;
+    }
 
-  // handle the rest
-  hash1 = ~hash1;
+    // handle the rest
+    hash1 = ~hash1;
 
-  while(len) {
-    // byte-wise, no endianess
-    hash_round(hash, *current, 1);
+    while (len) {
+        // byte-wise, no endianess
+        hash_round(hash, *current, 1);
 
-    current++;
-    len--;
-  }
+        current++;
+        len--;
+    }
 
-  // digest length
-  hash1 = ~hash1;
+    // digest length
+    hash1 = ~hash1;
 
-  hash_round(hash, (uint64_t)org_len, 1);
+    hash_round(hash, (uint64_t)org_len, 1);
 
-  // Previous SMHasher implementation didn't byteswap this properly
-  PUT_U64<!bswap>(hash1, (uint8_t *)out, 0);
+    // Previous SMHasher implementation didn't byteswap this properly
+    PUT_U64<!bswap>(hash1, (uint8_t *)out, 0);
 }
 
 REGISTER_FAMILY(pearsonblock,
-  $.src_url = "https://github.com/Logan007/pearsonB",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/Logan007/pearsonB",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(PearsonBlock_64,
-  $.desc = "Pearson-inspired block hash, 64-bit state",
-  $.hash_flags =
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE           |
-        FLAG_IMPL_MULTIPLY_64_64         |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x14C3D184,
-  $.verification_BE = 0x162C2D8A,
-  $.hashfn_native = blockpearson_hash_64<false>,
-  $.hashfn_bswap = blockpearson_hash_64<true>
-);
+   $.desc       = "Pearson-inspired block hash, 64-bit state",
+   $.hash_flags =
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE           |
+         FLAG_IMPL_MULTIPLY_64_64         |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x14C3D184,
+   $.verification_BE = 0x162C2D8A,
+   $.hashfn_native   = blockpearson_hash_64<false>,
+   $.hashfn_bswap    = blockpearson_hash_64<true>
+ );
 
 REGISTER_HASH(PearsonBlock_128,
-  $.desc = "Pearson-inspired block hash, 128-bit state",
-  $.hash_flags =
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE           |
-        FLAG_IMPL_MULTIPLY_64_64         |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 128,
-  $.verification_LE = 0x6BEFE6EA,
-  $.verification_BE = 0x00D61079,
-  $.hashfn_native = blockpearson_hash_128<false>,
-  $.hashfn_bswap = blockpearson_hash_128<true>
-);
+   $.desc       = "Pearson-inspired block hash, 128-bit state",
+   $.hash_flags =
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE           |
+         FLAG_IMPL_MULTIPLY_64_64         |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 128,
+   $.verification_LE = 0x6BEFE6EA,
+   $.verification_BE = 0x00D61079,
+   $.hashfn_native   = blockpearson_hash_128<false>,
+   $.hashfn_bswap    = blockpearson_hash_128<true>
+ );
 
 REGISTER_HASH(PearsonBlock_256,
-  $.desc = "Pearson-inspired block hash, 256-bit state",
-  $.hash_flags =
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE           |
-        FLAG_IMPL_SLOW                   |
-        FLAG_IMPL_MULTIPLY_64_64         |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 256,
-  $.verification_LE = 0x999B3C19,
-  $.verification_BE = 0x92D43B4F,
-  $.hashfn_native = blockpearson_hash_256<false>,
-  $.hashfn_bswap = blockpearson_hash_256<true>
-);
+   $.desc       = "Pearson-inspired block hash, 256-bit state",
+   $.hash_flags =
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE           |
+         FLAG_IMPL_SLOW                   |
+         FLAG_IMPL_MULTIPLY_64_64         |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 256,
+   $.verification_LE = 0x999B3C19,
+   $.verification_BE = 0x92D43B4F,
+   $.hashfn_native   = blockpearson_hash_256<false>,
+   $.hashfn_bswap    = blockpearson_hash_256<true>
+ );
diff --git a/hashes/chaskey.cpp b/hashes/chaskey.cpp
index 4c922e27..2e05e739 100644
--- a/hashes/chaskey.cpp
+++ b/hashes/chaskey.cpp
@@ -15,78 +15,78 @@
 #include "Hashlib.h"
 
 //------------------------------------------------------------
-#define ROUND(v)                                            \
-    do {                                                    \
-        v[0] += v[1]; v[1]=ROTL32(v[1], 5);                 \
-        v[1] ^= v[0]; v[0]=ROTL32(v[0],16);                 \
-        v[2] += v[3]; v[3]=ROTL32(v[3], 8); v[3] ^= v[2];   \
-        v[0] += v[3]; v[3]=ROTL32(v[3],13); v[3] ^= v[0];   \
-        v[2] += v[1]; v[1]=ROTL32(v[1], 7);                 \
-        v[1] ^= v[2]; v[2]=ROTL32(v[2],16);                 \
+#define ROUND(v)                                          \
+    do {                                                  \
+        v[0] += v[1]; v[1]=ROTL32(v[1], 5);               \
+        v[1] ^= v[0]; v[0]=ROTL32(v[0],16);               \
+        v[2] += v[3]; v[3]=ROTL32(v[3], 8); v[3] ^= v[2]; \
+        v[0] += v[3]; v[3]=ROTL32(v[3],13); v[3] ^= v[0]; \
+        v[2] += v[1]; v[1]=ROTL32(v[1], 7);               \
+        v[1] ^= v[2]; v[2]=ROTL32(v[2],16);               \
     } while(0)
 
-template < uint32_t rounds, uint32_t tagwords, bool bswap >
-static void chaskey_impl(uint8_t * tag, const uint8_t * m, const size_t mlen,
-        const uint32_t k[4], const uint32_t k1[4], const uint32_t k2[4]) {
-  const uint8_t * end = m + (((mlen - 1) >> 4) << 4); /* pointer to last message block */
-
-  uint32_t v[4];
-
-  v[0] = k[0];
-  v[1] = k[1];
-  v[2] = k[2];
-  v[3] = k[3];
-
-  if (mlen != 0) {
-      for (; m != end; m += 16) {
-          v[0] ^= GET_U32<bswap>(m,  0);
-          v[1] ^= GET_U32<bswap>(m,  4);
-          v[2] ^= GET_U32<bswap>(m,  8);
-          v[3] ^= GET_U32<bswap>(m, 12);
-          for (uint32_t i = 0; i < rounds; i++) {
-              ROUND(v);
-          }
-      }
-  }
-
-  const size_t remain = mlen & 0xF;
-  const uint8_t  * lastblock;
-  const uint32_t * lastkey;
-  uint8_t lb[16];
-
-  if ((mlen != 0) && (remain == 0)) {
-      lastkey = k1;
-      lastblock = m;
-  } else {
-      lastkey = k2;
-      memset(lb, 0, sizeof(lb));
-      memcpy(lb, m, remain);
-      lb[remain] = 0x01; /* padding bit */
-      lastblock = lb;
-  }
-
-  v[0] ^= GET_U32<bswap>(lastblock,  0);
-  v[1] ^= GET_U32<bswap>(lastblock,  4);
-  v[2] ^= GET_U32<bswap>(lastblock,  8);
-  v[3] ^= GET_U32<bswap>(lastblock, 12);
-
-  v[0] ^= lastkey[0];
-  v[1] ^= lastkey[1];
-  v[2] ^= lastkey[2];
-  v[3] ^= lastkey[3];
-
-  for (uint32_t i = 0; i < rounds; i++) {
-      ROUND(v);
-  }
-
-  v[0] ^= lastkey[0];
-  v[1] ^= lastkey[1];
-  v[2] ^= lastkey[2];
-  v[3] ^= lastkey[3];
-
-  for (int i = 0; i < tagwords; i++) {
-      PUT_U32<bswap>(v[i], tag, 4*i);
-  }
+template <uint32_t rounds, uint32_t tagwords, bool bswap>
+static void chaskey_impl( uint8_t * tag, const uint8_t * m, const size_t mlen, const uint32_t k[4],
+        const uint32_t k1[4], const uint32_t k2[4] ) {
+    const uint8_t * end = m + (((mlen - 1) >> 4) << 4); /* pointer to last message block */
+
+    uint32_t v[4];
+
+    v[0] = k[0];
+    v[1] = k[1];
+    v[2] = k[2];
+    v[3] = k[3];
+
+    if (mlen != 0) {
+        for (; m != end; m += 16) {
+            v[0] ^= GET_U32<bswap>(m,  0);
+            v[1] ^= GET_U32<bswap>(m,  4);
+            v[2] ^= GET_U32<bswap>(m,  8);
+            v[3] ^= GET_U32<bswap>(m, 12);
+            for (uint32_t i = 0; i < rounds; i++) {
+                ROUND(v);
+            }
+        }
+    }
+
+    const size_t     remain = mlen & 0xF;
+    const uint8_t *  lastblock;
+    const uint32_t * lastkey;
+    uint8_t          lb[16];
+
+    if ((mlen != 0) && (remain == 0)) {
+        lastkey   = k1;
+        lastblock = m;
+    } else {
+        lastkey = k2;
+        memset(lb, 0, sizeof(lb));
+        memcpy(lb, m, remain);
+        lb[remain] = 0x01; /* padding bit */
+        lastblock  = lb;
+    }
+
+    v[0] ^= GET_U32<bswap>(lastblock,  0);
+    v[1] ^= GET_U32<bswap>(lastblock,  4);
+    v[2] ^= GET_U32<bswap>(lastblock,  8);
+    v[3] ^= GET_U32<bswap>(lastblock, 12);
+
+    v[0] ^= lastkey[0];
+    v[1] ^= lastkey[1];
+    v[2] ^= lastkey[2];
+    v[3] ^= lastkey[3];
+
+    for (uint32_t i = 0; i < rounds; i++) {
+        ROUND(v);
+    }
+
+    v[0] ^= lastkey[0];
+    v[1] ^= lastkey[1];
+    v[2] ^= lastkey[2];
+    v[3] ^= lastkey[3];
+
+    for (int i = 0; i < tagwords; i++) {
+        PUT_U32<bswap>(v[i], tag, 4 * i);
+    }
 }
 
 //------------------------------------------------------------
@@ -100,17 +100,16 @@ static const volatile uint32_t C[2] = { 0x00, 0x87 };
         out[3] = (in[3] << 1) | (in[2] >> 31);  \
     } while(0)
 
-
-static void make_subkeys(uint32_t k1[4], uint32_t k2[4], const uint32_t k[4]) {
-  TIMESTWO(k1, k);
-  TIMESTWO(k2, k1);
+static void make_subkeys( uint32_t k1[4], uint32_t k2[4], const uint32_t k[4] ) {
+    TIMESTWO(k1, k );
+    TIMESTWO(k2, k1);
 }
 
 //------------------------------------------------------------
 typedef struct {
-    uint32_t k[4];
-    uint32_t k1[4];
-    uint32_t k2[4];
+    uint32_t  k[4];
+    uint32_t  k1[4];
+    uint32_t  k2[4];
 } keys_t;
 
 static thread_local keys_t chaskeys;
@@ -128,8 +127,8 @@ static thread_local keys_t chaskeys;
 // the state space. ROUND() also has full diffusion after 3 rounds, so
 // this is two full diffusions. Finally, a 6-round permutation is the
 // smallest number where chaskey passes this SMHasher3 test suite.
-static uintptr_t seed_subkeys(uint64_t seed) {
-    uint32_t seedlo = (uint32_t)(seed);
+static uintptr_t seed_subkeys( uint64_t seed ) {
+    uint32_t seedlo = (uint32_t)(seed      );
     uint32_t seedhi = (uint32_t)(seed >> 32);
 
     chaskeys.k[0] = seedlo ^ 0xe5d2aff1;
@@ -148,83 +147,83 @@ static uintptr_t seed_subkeys(uint64_t seed) {
     return (uintptr_t)(&chaskeys);
 }
 
-template < uint32_t rounds, uint32_t tagwords, bool bswap >
-static void chaskey(const void * in, const size_t len, const seed_t seed, void * out) {
+template <uint32_t rounds, uint32_t tagwords, bool bswap>
+static void chaskey( const void * in, const size_t len, const seed_t seed, void * out ) {
     const keys_t * keys = (const keys_t *)(uintptr_t)seed;
-    chaskey_impl<rounds,tagwords,bswap>((uint8_t *)out, (const uint8_t *)in,
-            len, keys->k, keys->k1, keys->k2);
+
+    chaskey_impl<rounds, tagwords, bswap>((uint8_t *)out, (const uint8_t *)in, len, keys->k, keys->k1, keys->k2);
 }
 
 //------------------------------------------------------------
 // Test vectors from chaskey-12 reference implementation
 static const uint8_t vectors[64][8] = {
-  { 0xdd, 0x3e, 0x18, 0x49, 0xd6, 0x82, 0x45, 0x55 },
-  { 0xed, 0x1d, 0xa8, 0x9e, 0xc9, 0x31, 0x79, 0xca },
-  { 0x98, 0xfe, 0x20, 0xa3, 0x43, 0xcd, 0x66, 0x6f },
-  { 0xf6, 0xf4, 0x18, 0xac, 0xdd, 0x7d, 0x9f, 0xa1 },
-  { 0x4c, 0xf0, 0x49, 0x60, 0x09, 0x99, 0x49, 0xf3 },
-  { 0x75, 0xc8, 0x32, 0x52, 0x65, 0x3d, 0x3b, 0x57 },
-  { 0x96, 0x4b, 0x04, 0x61, 0xfb, 0xe9, 0x22, 0x73 },
-  { 0x14, 0x1f, 0xa0, 0x8b, 0xbf, 0x39, 0x96, 0x36 },
-  { 0x41, 0x2d, 0x98, 0xed, 0x93, 0x6d, 0x4a, 0xb2 },
-  { 0xfb, 0x0d, 0x98, 0xbc, 0x70, 0xe3, 0x05, 0xf9 },
-  { 0x36, 0xf8, 0x8e, 0x1f, 0xda, 0x86, 0xc8, 0xab },
-  { 0x4d, 0x1a, 0x18, 0x15, 0x86, 0x8a, 0x5a, 0xa8 },
-  { 0x7a, 0x79, 0x12, 0xc1, 0x99, 0x9e, 0xae, 0x81 },
-  { 0x9c, 0xa1, 0x11, 0x37, 0xb4, 0xa3, 0x46, 0x01 },
-  { 0x79, 0x05, 0x14, 0x2f, 0x3b, 0xe7, 0x7e, 0x67 },
-  { 0x6a, 0x3e, 0xe3, 0xd3, 0x5c, 0x04, 0x33, 0x97 },
-  { 0xd1, 0x39, 0x70, 0xd7, 0xbe, 0x9b, 0x23, 0x50 },
-  { 0x32, 0xac, 0xd9, 0x14, 0xbf, 0xda, 0x3b, 0xc8 },
-  { 0x8a, 0x58, 0xd8, 0x16, 0xcb, 0x7a, 0x14, 0x83 },
-  { 0x03, 0xf4, 0xd6, 0x66, 0x38, 0xef, 0xad, 0x8d },
-  { 0xf9, 0x93, 0x22, 0x37, 0xff, 0x05, 0xe8, 0x31 },
-  { 0xf5, 0xfe, 0xdb, 0x13, 0x48, 0x62, 0xb4, 0x71 },
-  { 0x8b, 0xb5, 0x54, 0x86, 0xf3, 0x8d, 0x57, 0xea },
-  { 0x8a, 0x3a, 0xcb, 0x94, 0xb5, 0xad, 0x59, 0x1c },
-  { 0x7c, 0xe3, 0x70, 0x87, 0x23, 0xf7, 0x49, 0x5f },
-  { 0xf4, 0x2f, 0x3d, 0x2f, 0x40, 0x57, 0x10, 0xc2 },
-  { 0xb3, 0x93, 0x3a, 0x16, 0x7e, 0x56, 0x36, 0xac },
-  { 0x89, 0x9a, 0x79, 0x45, 0x42, 0x3a, 0x5e, 0x1b },
-  { 0x65, 0xe1, 0x2d, 0xf5, 0xa6, 0x95, 0xfa, 0xc8 },
-  { 0xb8, 0x24, 0x49, 0xd8, 0xc8, 0xa0, 0x6a, 0xe9 },
-  { 0xa8, 0x50, 0xdf, 0xba, 0xde, 0xfa, 0x42, 0x29 },
-  { 0xfd, 0x42, 0xc3, 0x9d, 0x08, 0xab, 0x71, 0xa0 },
-  { 0xb4, 0x65, 0xc2, 0x41, 0x26, 0x10, 0xbf, 0x84 },
-  { 0x89, 0xc4, 0xa9, 0xdd, 0xb5, 0x3e, 0x69, 0x91 },
-  { 0x5a, 0x9a, 0xf9, 0x1e, 0xb0, 0x95, 0xd3, 0x31 },
-  { 0x8e, 0x54, 0x91, 0x4c, 0x15, 0x1e, 0x46, 0xb0 },
-  { 0xfa, 0xb8, 0xab, 0x0b, 0x5b, 0xea, 0xae, 0xc6 },
-  { 0x60, 0xad, 0x90, 0x6a, 0xcd, 0x06, 0xc8, 0x23 },
-  { 0x6b, 0x1e, 0x6b, 0xc2, 0x42, 0x6d, 0xad, 0x17 },
-  { 0x90, 0x32, 0x8f, 0xd2, 0x59, 0x88, 0x9a, 0x8f },
-  { 0xf0, 0xf7, 0x81, 0x5e, 0xe6, 0xf3, 0xd5, 0x16 },
-  { 0x97, 0xe7, 0xe2, 0xce, 0xbe, 0xa8, 0x26, 0xb8 },
-  { 0xb0, 0xfa, 0x18, 0x45, 0xf7, 0x2a, 0x76, 0xd6 },
-  { 0xa4, 0x68, 0xbd, 0xfc, 0xdf, 0x0a, 0xa9, 0xc7 },
-  { 0xda, 0x84, 0xe1, 0x13, 0x38, 0x38, 0x7d, 0xa7 },
-  { 0xb3, 0x0d, 0x5e, 0xad, 0x8e, 0x39, 0xf2, 0xbc },
-  { 0x17, 0x8a, 0x43, 0xd2, 0xa0, 0x08, 0x50, 0x3e },
-  { 0x6d, 0xfa, 0xa7, 0x05, 0xa8, 0xa0, 0x6c, 0x70 },
-  { 0xaa, 0x04, 0x7f, 0x07, 0xc5, 0xae, 0x8d, 0xb4 },
-  { 0x30, 0x5b, 0xbb, 0x42, 0x0c, 0x5d, 0x5e, 0xcc },
-  { 0x08, 0x32, 0x80, 0x31, 0x59, 0x75, 0x0f, 0x49 },
-  { 0x90, 0x80, 0x25, 0x4f, 0xb7, 0x9b, 0xab, 0x1a },
-  { 0x61, 0xc2, 0x85, 0xca, 0x24, 0x57, 0x74, 0xa4 },
-  { 0x2a, 0xae, 0x03, 0x5c, 0xfb, 0x61, 0xf9, 0x7a },
-  { 0xf5, 0x28, 0x90, 0x75, 0xc9, 0xab, 0x39, 0xe5 },
-  { 0xe6, 0x5c, 0x42, 0x37, 0x32, 0xda, 0xe7, 0x95 },
-  { 0x4b, 0x22, 0xcf, 0x0d, 0x9d, 0xa8, 0xde, 0x3d },
-  { 0x26, 0x26, 0xea, 0x2f, 0xa1, 0xf9, 0xab, 0xcf },
-  { 0xd1, 0xe1, 0x7e, 0x6e, 0xc4, 0xa8, 0x8d, 0xa6 },
-  { 0x16, 0x57, 0x44, 0x28, 0x27, 0xff, 0x64, 0x0a },
-  { 0xfd, 0x15, 0x5a, 0x40, 0xdf, 0x15, 0xf6, 0x30 },
-  { 0xff, 0xeb, 0x59, 0x6f, 0x29, 0x9f, 0x58, 0xb2 },
-  { 0xbe, 0x4e, 0xe4, 0xed, 0x39, 0x75, 0xdf, 0x87 },
-  { 0xfc, 0x7f, 0x9d, 0xf7, 0x99, 0x1b, 0x87, 0xbc }
+    { 0xdd, 0x3e, 0x18, 0x49, 0xd6, 0x82, 0x45, 0x55 },
+    { 0xed, 0x1d, 0xa8, 0x9e, 0xc9, 0x31, 0x79, 0xca },
+    { 0x98, 0xfe, 0x20, 0xa3, 0x43, 0xcd, 0x66, 0x6f },
+    { 0xf6, 0xf4, 0x18, 0xac, 0xdd, 0x7d, 0x9f, 0xa1 },
+    { 0x4c, 0xf0, 0x49, 0x60, 0x09, 0x99, 0x49, 0xf3 },
+    { 0x75, 0xc8, 0x32, 0x52, 0x65, 0x3d, 0x3b, 0x57 },
+    { 0x96, 0x4b, 0x04, 0x61, 0xfb, 0xe9, 0x22, 0x73 },
+    { 0x14, 0x1f, 0xa0, 0x8b, 0xbf, 0x39, 0x96, 0x36 },
+    { 0x41, 0x2d, 0x98, 0xed, 0x93, 0x6d, 0x4a, 0xb2 },
+    { 0xfb, 0x0d, 0x98, 0xbc, 0x70, 0xe3, 0x05, 0xf9 },
+    { 0x36, 0xf8, 0x8e, 0x1f, 0xda, 0x86, 0xc8, 0xab },
+    { 0x4d, 0x1a, 0x18, 0x15, 0x86, 0x8a, 0x5a, 0xa8 },
+    { 0x7a, 0x79, 0x12, 0xc1, 0x99, 0x9e, 0xae, 0x81 },
+    { 0x9c, 0xa1, 0x11, 0x37, 0xb4, 0xa3, 0x46, 0x01 },
+    { 0x79, 0x05, 0x14, 0x2f, 0x3b, 0xe7, 0x7e, 0x67 },
+    { 0x6a, 0x3e, 0xe3, 0xd3, 0x5c, 0x04, 0x33, 0x97 },
+    { 0xd1, 0x39, 0x70, 0xd7, 0xbe, 0x9b, 0x23, 0x50 },
+    { 0x32, 0xac, 0xd9, 0x14, 0xbf, 0xda, 0x3b, 0xc8 },
+    { 0x8a, 0x58, 0xd8, 0x16, 0xcb, 0x7a, 0x14, 0x83 },
+    { 0x03, 0xf4, 0xd6, 0x66, 0x38, 0xef, 0xad, 0x8d },
+    { 0xf9, 0x93, 0x22, 0x37, 0xff, 0x05, 0xe8, 0x31 },
+    { 0xf5, 0xfe, 0xdb, 0x13, 0x48, 0x62, 0xb4, 0x71 },
+    { 0x8b, 0xb5, 0x54, 0x86, 0xf3, 0x8d, 0x57, 0xea },
+    { 0x8a, 0x3a, 0xcb, 0x94, 0xb5, 0xad, 0x59, 0x1c },
+    { 0x7c, 0xe3, 0x70, 0x87, 0x23, 0xf7, 0x49, 0x5f },
+    { 0xf4, 0x2f, 0x3d, 0x2f, 0x40, 0x57, 0x10, 0xc2 },
+    { 0xb3, 0x93, 0x3a, 0x16, 0x7e, 0x56, 0x36, 0xac },
+    { 0x89, 0x9a, 0x79, 0x45, 0x42, 0x3a, 0x5e, 0x1b },
+    { 0x65, 0xe1, 0x2d, 0xf5, 0xa6, 0x95, 0xfa, 0xc8 },
+    { 0xb8, 0x24, 0x49, 0xd8, 0xc8, 0xa0, 0x6a, 0xe9 },
+    { 0xa8, 0x50, 0xdf, 0xba, 0xde, 0xfa, 0x42, 0x29 },
+    { 0xfd, 0x42, 0xc3, 0x9d, 0x08, 0xab, 0x71, 0xa0 },
+    { 0xb4, 0x65, 0xc2, 0x41, 0x26, 0x10, 0xbf, 0x84 },
+    { 0x89, 0xc4, 0xa9, 0xdd, 0xb5, 0x3e, 0x69, 0x91 },
+    { 0x5a, 0x9a, 0xf9, 0x1e, 0xb0, 0x95, 0xd3, 0x31 },
+    { 0x8e, 0x54, 0x91, 0x4c, 0x15, 0x1e, 0x46, 0xb0 },
+    { 0xfa, 0xb8, 0xab, 0x0b, 0x5b, 0xea, 0xae, 0xc6 },
+    { 0x60, 0xad, 0x90, 0x6a, 0xcd, 0x06, 0xc8, 0x23 },
+    { 0x6b, 0x1e, 0x6b, 0xc2, 0x42, 0x6d, 0xad, 0x17 },
+    { 0x90, 0x32, 0x8f, 0xd2, 0x59, 0x88, 0x9a, 0x8f },
+    { 0xf0, 0xf7, 0x81, 0x5e, 0xe6, 0xf3, 0xd5, 0x16 },
+    { 0x97, 0xe7, 0xe2, 0xce, 0xbe, 0xa8, 0x26, 0xb8 },
+    { 0xb0, 0xfa, 0x18, 0x45, 0xf7, 0x2a, 0x76, 0xd6 },
+    { 0xa4, 0x68, 0xbd, 0xfc, 0xdf, 0x0a, 0xa9, 0xc7 },
+    { 0xda, 0x84, 0xe1, 0x13, 0x38, 0x38, 0x7d, 0xa7 },
+    { 0xb3, 0x0d, 0x5e, 0xad, 0x8e, 0x39, 0xf2, 0xbc },
+    { 0x17, 0x8a, 0x43, 0xd2, 0xa0, 0x08, 0x50, 0x3e },
+    { 0x6d, 0xfa, 0xa7, 0x05, 0xa8, 0xa0, 0x6c, 0x70 },
+    { 0xaa, 0x04, 0x7f, 0x07, 0xc5, 0xae, 0x8d, 0xb4 },
+    { 0x30, 0x5b, 0xbb, 0x42, 0x0c, 0x5d, 0x5e, 0xcc },
+    { 0x08, 0x32, 0x80, 0x31, 0x59, 0x75, 0x0f, 0x49 },
+    { 0x90, 0x80, 0x25, 0x4f, 0xb7, 0x9b, 0xab, 0x1a },
+    { 0x61, 0xc2, 0x85, 0xca, 0x24, 0x57, 0x74, 0xa4 },
+    { 0x2a, 0xae, 0x03, 0x5c, 0xfb, 0x61, 0xf9, 0x7a },
+    { 0xf5, 0x28, 0x90, 0x75, 0xc9, 0xab, 0x39, 0xe5 },
+    { 0xe6, 0x5c, 0x42, 0x37, 0x32, 0xda, 0xe7, 0x95 },
+    { 0x4b, 0x22, 0xcf, 0x0d, 0x9d, 0xa8, 0xde, 0x3d },
+    { 0x26, 0x26, 0xea, 0x2f, 0xa1, 0xf9, 0xab, 0xcf },
+    { 0xd1, 0xe1, 0x7e, 0x6e, 0xc4, 0xa8, 0x8d, 0xa6 },
+    { 0x16, 0x57, 0x44, 0x28, 0x27, 0xff, 0x64, 0x0a },
+    { 0xfd, 0x15, 0x5a, 0x40, 0xdf, 0x15, 0xf6, 0x30 },
+    { 0xff, 0xeb, 0x59, 0x6f, 0x29, 0x9f, 0x58, 0xb2 },
+    { 0xbe, 0x4e, 0xe4, 0xed, 0x39, 0x75, 0xdf, 0x87 },
+    { 0xfc, 0x7f, 0x9d, 0xf7, 0x99, 0x1b, 0x87, 0xbc }
 };
 
-static bool chaskey_selftest(void) {
+static bool chaskey_selftest( void ) {
     uint8_t tag[8];
     uint8_t m[64];
 
@@ -232,14 +231,14 @@ static bool chaskey_selftest(void) {
 
     // As mentioned above, this sets the key to the vector
     // { 0x33221100, 0x77665544, 0xbbaa9988, 0xffeeddcc }.
-    seed_t s = seed_subkeys(0);
+    seed_t s    = seed_subkeys(0);
 
     bool passed = true;
     for (int i = 0; i < 64; i++) {
         if (isLE()) {
-            chaskey<12,2,false>(m, i, s, tag);
+            chaskey<12, 2, false>(m, i, s, tag);
         } else {
-            chaskey<12,2,true>(m, i, s, tag);
+            chaskey<12, 2, true>(m, i, s, tag);
         }
         if (0 != memcmp(tag, vectors[i], 8)) {
             printf("Mismatch with len %d\n  Expected:", i);
@@ -256,129 +255,129 @@ static bool chaskey_selftest(void) {
 
 //------------------------------------------------------------
 REGISTER_FAMILY(chaskey,
-  $.src_url = "http://mouha.be/chaskey/",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "http://mouha.be/chaskey/",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(chaskey_12__32,
-  $.desc = "Chaskey PRF (12 rounds, 32 bits)",
-  $.sort_order = 20,
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_SLOW                   |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_CANONICAL_LE           |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 32,
-  $.verification_LE = 0x672570CB,
-  $.verification_BE = 0x22B350D2,
-  $.initfn = chaskey_selftest,
-  $.seedfn = seed_subkeys,
-  $.hashfn_native = chaskey<12,1,false>,
-  $.hashfn_bswap = chaskey<12,1,true>
-);
+   $.desc       = "Chaskey PRF (12 rounds, 32 bits)",
+   $.sort_order = 20,
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_SLOW                   |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_CANONICAL_LE           |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 32,
+   $.verification_LE = 0x672570CB,
+   $.verification_BE = 0x22B350D2,
+   $.initfn        = chaskey_selftest,
+   $.seedfn        = seed_subkeys,
+   $.hashfn_native = chaskey<12, 1, false>,
+   $.hashfn_bswap  = chaskey<12, 1, true>
+ );
 
 REGISTER_HASH(chaskey_12__64,
-  $.desc = "Chaskey PRF (12 rounds, 64 bits)",
-  $.sort_order = 20,
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_SLOW                   |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_CANONICAL_LE           |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x919290D6,
-  $.verification_BE = 0x5D0E8285,
-  $.initfn = chaskey_selftest,
-  $.seedfn = seed_subkeys,
-  $.hashfn_native = chaskey<12,2,false>,
-  $.hashfn_bswap = chaskey<12,2,true>
-);
+   $.desc       = "Chaskey PRF (12 rounds, 64 bits)",
+   $.sort_order = 20,
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_SLOW                   |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_CANONICAL_LE           |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x919290D6,
+   $.verification_BE = 0x5D0E8285,
+   $.initfn        = chaskey_selftest,
+   $.seedfn        = seed_subkeys,
+   $.hashfn_native = chaskey<12, 2, false>,
+   $.hashfn_bswap  = chaskey<12, 2, true>
+ );
 
 REGISTER_HASH(chaskey_12,
-  $.desc = "Chaskey PRF (12 rounds, 128 bits)",
-  $.sort_order = 20,
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_SLOW                   |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_CANONICAL_LE           |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 128,
-  $.verification_LE = 0x1E983B23,
-  $.verification_BE = 0xB042962B,
-  $.initfn = chaskey_selftest,
-  $.seedfn = seed_subkeys,
-  $.hashfn_native = chaskey<12,4,false>,
-  $.hashfn_bswap = chaskey<12,4,true>
-);
+   $.desc       = "Chaskey PRF (12 rounds, 128 bits)",
+   $.sort_order = 20,
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_SLOW                   |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_CANONICAL_LE           |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 128,
+   $.verification_LE = 0x1E983B23,
+   $.verification_BE = 0xB042962B,
+   $.initfn        = chaskey_selftest,
+   $.seedfn        = seed_subkeys,
+   $.hashfn_native = chaskey<12, 4, false>,
+   $.hashfn_bswap  = chaskey<12, 4, true>
+ );
 
 REGISTER_HASH(chaskey_8__32,
-  $.desc = "Chaskey PRF (8 rounds, 32 bits)",
-  $.sort_order = 10,
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_CANONICAL_LE           |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 32,
-  $.verification_LE = 0xA984B318,
-  $.verification_BE = 0x23FE2699,
-  $.initfn = chaskey_selftest,
-  $.seedfn = seed_subkeys,
-  $.hashfn_native = chaskey<8,1,false>,
-  $.hashfn_bswap = chaskey<8,1,true>
-);
+   $.desc       = "Chaskey PRF (8 rounds, 32 bits)",
+   $.sort_order = 10,
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_CANONICAL_LE           |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 32,
+   $.verification_LE = 0xA984B318,
+   $.verification_BE = 0x23FE2699,
+   $.initfn        = chaskey_selftest,
+   $.seedfn        = seed_subkeys,
+   $.hashfn_native = chaskey<8, 1, false>,
+   $.hashfn_bswap  = chaskey<8, 1, true>
+ );
 
 REGISTER_HASH(chaskey_8__64,
-  $.desc = "Chaskey PRF (8 rounds, 64 bits)",
-  $.sort_order = 10,
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_CANONICAL_LE           |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x4DA0DD3A,
-  $.verification_BE = 0x87A85CD2,
-  $.initfn = chaskey_selftest,
-  $.seedfn = seed_subkeys,
-  $.hashfn_native = chaskey<8,2,false>,
-  $.hashfn_bswap = chaskey<8,2,true>
-);
+   $.desc       = "Chaskey PRF (8 rounds, 64 bits)",
+   $.sort_order = 10,
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_CANONICAL_LE           |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x4DA0DD3A,
+   $.verification_BE = 0x87A85CD2,
+   $.initfn        = chaskey_selftest,
+   $.seedfn        = seed_subkeys,
+   $.hashfn_native = chaskey<8, 2, false>,
+   $.hashfn_bswap  = chaskey<8, 2, true>
+ );
 
 REGISTER_HASH(chaskey_8,
-  $.desc = "Chaskey PRF (8 rounds, 128 bits)",
-  $.sort_order = 10,
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC          |
-        FLAG_HASH_NO_SEED                |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_CANONICAL_LE           |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 128,
-  $.verification_LE = 0x48B645E4,
-  $.verification_BE = 0xB84D00F9,
-  $.initfn = chaskey_selftest,
-  $.seedfn = seed_subkeys,
-  $.hashfn_native = chaskey<8,4,false>,
-  $.hashfn_bswap = chaskey<8,4,true>
-);
+   $.desc       = "Chaskey PRF (8 rounds, 128 bits)",
+   $.sort_order = 10,
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC          |
+         FLAG_HASH_NO_SEED                |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_CANONICAL_LE           |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 128,
+   $.verification_LE = 0x48B645E4,
+   $.verification_BE = 0xB84D00F9,
+   $.initfn        = chaskey_selftest,
+   $.seedfn        = seed_subkeys,
+   $.hashfn_native = chaskey<8, 4, false>,
+   $.hashfn_bswap  = chaskey<8, 4, true>
+ );
diff --git a/hashes/cityhash.cpp b/hashes/cityhash.cpp
index 682a1ac4..ef5ea564 100644
--- a/hashes/cityhash.cpp
+++ b/hashes/cityhash.cpp
@@ -29,43 +29,51 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_X86_64_CRC32C)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
 using namespace std;
 
 //------------------------------------------------------------
 #if defined(HAVE_INT128)
-static inline uint64_t Uint128Low64(const uint128_t x) {
-  return static_cast<uint64_t>(x);
+
+static inline uint64_t Uint128Low64( const uint128_t x ) {
+    return static_cast<uint64_t>(x);
 }
-static inline uint64_t Uint128High64(const uint128_t x) {
-  return static_cast<uint64_t>(x >> 64);
+
+static inline uint64_t Uint128High64( const uint128_t x ) {
+    return static_cast<uint64_t>(x >> 64);
 }
-static inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
-  return lo + (((uint128_t)hi) << 64);
+
+static inline uint128_t Uint128( uint64_t lo, uint64_t hi ) {
+    return lo + (((uint128_t)hi) << 64);
 }
+
 #else
 typedef std::pair<uint64_t, uint64_t> uint128_t;
-static inline uint64_t Uint128Low64(const uint128_t x) {
+
+static inline uint64_t Uint128Low64( const uint128_t x ) {
     return x.first;
 }
-static inline uint64_t Uint128High64(const uint128_t x) {
+
+static inline uint64_t Uint128High64( const uint128_t x ) {
     return x.second;
 }
-static inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
+
+static inline uint128_t Uint128( uint64_t lo, uint64_t hi ) {
     return uint128_t(lo, hi);
 }
+
 #endif
 
 //------------------------------------------------------------
-template < bool bswap >
-static inline uint32_t Fetch32(const uint8_t * p) {
+template <bool bswap>
+static inline uint32_t Fetch32( const uint8_t * p ) {
     return GET_U32<bswap>(p, 0);
 }
 
-template < bool bswap >
-static inline uint64_t Fetch64(const uint8_t * p) {
+template <bool bswap>
+static inline uint64_t Fetch64( const uint8_t * p ) {
     return GET_U64<bswap>(p, 0);
 }
 
@@ -83,387 +91,383 @@ static const uint32_t c2 = 0x1b873593;
 //------------------------------------------------------------
 // Hash 128 input bits down to 64 bits of output.
 // This is intended to be a reasonably good hash function.
-static inline uint64_t Hash128to64(const uint128_t & x) {
-  // Murmur-inspired hashing.
-  const uint64_t kMul = UINT64_C(0x9ddfea08eb382d69);
-  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
-  a ^= (a >> 47);
-  uint64_t b = (Uint128High64(x) ^ a) * kMul;
-  b ^= (b >> 47);
-  b *= kMul;
-  return b;
+static inline uint64_t Hash128to64( const uint128_t & x ) {
+    // Murmur-inspired hashing.
+    const uint64_t kMul = UINT64_C(0x9ddfea08eb382d69);
+    uint64_t       a    = (Uint128Low64(x)  ^ Uint128High64(x)) * kMul;
+
+    a ^= (a >> 47);
+    uint64_t b =          (Uint128High64(x) ^ a) * kMul;
+    b ^= (b >> 47);
+    b *= kMul;
+    return b;
 }
 
 // A 32-bit to 32-bit integer hash copied from Murmur3.
-static uint32_t fmix(uint32_t h) {
-  h ^= h >> 16;
-  h *= 0x85ebca6b;
-  h ^= h >> 13;
-  h *= 0xc2b2ae35;
-  h ^= h >> 16;
-  return h;
+static uint32_t fmix( uint32_t h ) {
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+    return h;
 }
 
 // Helper from Murmur3 for combining two 32-bit values.
-static uint32_t Mur(uint32_t a, uint32_t h) {
-  a *= c1;
-  a = ROTR32(a, 17);
-  a *= c2;
-  h ^= a;
-  h = ROTR32(h, 19);
-  return h * 5 + 0xe6546b64;
+static uint32_t Mur( uint32_t a, uint32_t h ) {
+    a *= c1;
+    a  = ROTR32(a, 17);
+    a *= c2;
+    h ^= a;
+    h  = ROTR32(h, 19);
+    return h * 5 + 0xe6546b64;
 }
 
-static uint64_t ShiftMix(uint64_t val) {
-  return val ^ (val >> 47);
+static uint64_t ShiftMix( uint64_t val ) {
+    return val ^ (val >> 47);
 }
 
-static uint64_t HashLen16(uint64_t u, uint64_t v) {
-  return Hash128to64(Uint128(u, v));
+static uint64_t HashLen16( uint64_t u, uint64_t v ) {
+    return Hash128to64(Uint128(u, v));
 }
 
 // Return a 16-byte hash for 48 bytes.  Quick and dirty.
 // Callers do best to use "random-looking" values for a and b.
-static pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
-    uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
-  a += w;
-  b = ROTR64(b + a + z, 21);
-  uint64_t c = a;
-  a += x;
-  a += y;
-  b += ROTR64(a, 44);
-  return make_pair(a + z, b + c);
+static pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( uint64_t w, uint64_t x,
+        uint64_t y, uint64_t z, uint64_t a, uint64_t b ) {
+    a += w;
+    b  = ROTR64(b + a + z, 21);
+    uint64_t c = a;
+    a += x;
+    a += y;
+    b += ROTR64(a        , 44);
+    return make_pair(a + z, b + c);
 }
 
 // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
-template < bool bswap >
-static pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
-    const uint8_t* s, uint64_t a, uint64_t b) {
-  return WeakHashLen32WithSeeds(Fetch64<bswap>(s),
-                                Fetch64<bswap>(s + 8),
-                                Fetch64<bswap>(s + 16),
-                                Fetch64<bswap>(s + 24),
-                                a,
-                                b);
+template <bool bswap>
+static pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( const uint8_t * s, uint64_t a, uint64_t b ) {
+    return WeakHashLen32WithSeeds(Fetch64<bswap>(s), Fetch64<bswap>(
+            s + 8), Fetch64<bswap>(s + 16), Fetch64<bswap>(s + 24), a, b);
 }
 
 #define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
 
 //------------------------------------------------------------
-static uint32_t Hash32Len0to4(const uint8_t *s, size_t len, uint32_t seed) {
-  uint32_t b = seed;
-  uint32_t c = 9;
-  for (int i = 0; i < len; i++) {
-    b = b * c1 + s[i];
-    c ^= b;
-  }
-  return fmix(Mur(b, Mur(len, c)));
-}
-
-template < bool bswap >
-static uint32_t Hash32Len5to12(const uint8_t *s, size_t len, uint32_t seed) {
-  uint32_t a = len + seed, b = len * 5, c = 9, d = b;
-  a += Fetch32<bswap>(s);
-  b += Fetch32<bswap>(s + len - 4);
-  c += Fetch32<bswap>(s + ((len >> 1) & 4));
-  return fmix(Mur(c, Mur(b, Mur(a, d))));
-}
-
-template < bool bswap >
-static uint32_t Hash32Len13to24(const uint8_t *s, size_t len, uint32_t seed) {
-  uint32_t a = Fetch32<bswap>(s - 4 + (len >> 1));
-  uint32_t b = Fetch32<bswap>(s + 4);
-  uint32_t c = Fetch32<bswap>(s + len - 8);
-  uint32_t d = Fetch32<bswap>(s + (len >> 1));
-  uint32_t e = Fetch32<bswap>(s);
-  uint32_t f = Fetch32<bswap>(s + len - 4);
-  uint32_t h = seed + len;
-
-  return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
-}
-
-template < bool bswap >
-static uint32_t CityHash32WithSeed(const uint8_t *s, size_t len, uint32_t seed) {
-  if (len <= 24) {
-    return len <= 12 ?
-        (len <= 4 ?
-                Hash32Len0to4(s, len, seed) :
-                Hash32Len5to12<bswap>(s, len, seed)  ) :
-        Hash32Len13to24<bswap>(s, len, seed);
-  }
-
-  // len > 24
-  uint32_t h = len + seed, g = c1 * len, f = g;
-  uint32_t a0 = ROTR32(Fetch32<bswap>(s + len - 4) * c1, 17) * c2;
-  uint32_t a1 = ROTR32(Fetch32<bswap>(s + len - 8) * c1, 17) * c2;
-  uint32_t a2 = ROTR32(Fetch32<bswap>(s + len - 16) * c1, 17) * c2;
-  uint32_t a3 = ROTR32(Fetch32<bswap>(s + len - 12) * c1, 17) * c2;
-  uint32_t a4 = ROTR32(Fetch32<bswap>(s + len - 20) * c1, 17) * c2;
-  h ^= a0;
-  h = ROTR32(h, 19);
-  h = h * 5 + 0xe6546b64;
-  h ^= a2;
-  h = ROTR32(h, 19);
-  h = h * 5 + 0xe6546b64;
-  g ^= a1;
-  g = ROTR32(g, 19);
-  g = g * 5 + 0xe6546b64;
-  g ^= a3;
-  g = ROTR32(g, 19);
-  g = g * 5 + 0xe6546b64;
-  f += a4;
-  f = ROTR32(f, 19);
-  f = f * 5 + 0xe6546b64;
-  size_t iters = (len - 1) / 20;
-  do {
-    uint32_t a0 = ROTR32(Fetch32<bswap>(s) * c1, 17) * c2;
-    uint32_t a1 = Fetch32<bswap>(s + 4);
-    uint32_t a2 = ROTR32(Fetch32<bswap>(s + 8) * c1, 17) * c2;
-    uint32_t a3 = ROTR32(Fetch32<bswap>(s + 12) * c1, 17) * c2;
-    uint32_t a4 = Fetch32<bswap>(s + 16);
+static uint32_t Hash32Len0to4( const uint8_t * s, size_t len, uint32_t seed ) {
+    uint32_t b = seed;
+    uint32_t c = 9;
+
+    for (int i = 0; i < len; i++) {
+        b  = b * c1 + s[i];
+        c ^= b;
+    }
+    return fmix(Mur(b, Mur(len, c)));
+}
+
+template <bool bswap>
+static uint32_t Hash32Len5to12( const uint8_t * s, size_t len, uint32_t seed ) {
+    uint32_t a = len + seed, b = len * 5, c = 9, d = b;
+
+    a += Fetch32<bswap>(s);
+    b += Fetch32<bswap>(s + len - 4);
+    c += Fetch32<bswap>(s + ((len >> 1) & 4));
+    return fmix(Mur(c, Mur(b, Mur(a, d))));
+}
+
+template <bool bswap>
+static uint32_t Hash32Len13to24( const uint8_t * s, size_t len, uint32_t seed ) {
+    uint32_t a = Fetch32<bswap>(s - 4   + (len >> 1));
+    uint32_t b = Fetch32<bswap>(s + 4);
+    uint32_t c = Fetch32<bswap>(s + len - 8);
+    uint32_t d = Fetch32<bswap>(s +       (len >> 1));
+    uint32_t e = Fetch32<bswap>(s);
+    uint32_t f = Fetch32<bswap>(s + len - 4);
+    uint32_t h = seed + len;
+
+    return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
+}
+
+template <bool bswap>
+static uint32_t CityHash32WithSeed( const uint8_t * s, size_t len, uint32_t seed ) {
+    if (len <= 24) {
+        return len <= 12 ?
+                   (len <= 4 ?
+                       Hash32Len0to4(s, len, seed) :
+                       Hash32Len5to12<bswap>(s, len, seed)) :
+                   Hash32Len13to24<bswap>(s, len, seed);
+    }
+
+    // len > 24
+    uint32_t h = len + seed, g = c1 * len, f = g;
+    uint32_t a0 = ROTR32(Fetch32<bswap>(s + len -  4) * c1, 17) * c2;
+    uint32_t a1 = ROTR32(Fetch32<bswap>(s + len -  8) * c1, 17) * c2;
+    uint32_t a2 = ROTR32(Fetch32<bswap>(s + len - 16) * c1, 17) * c2;
+    uint32_t a3 = ROTR32(Fetch32<bswap>(s + len - 12) * c1, 17) * c2;
+    uint32_t a4 = ROTR32(Fetch32<bswap>(s + len - 20) * c1, 17) * c2;
     h ^= a0;
-    h = ROTR32(h, 18);
+    h  = ROTR32(h, 19);
+    h  = h * 5 + 0xe6546b64;
+    h ^= a2;
+    h  = ROTR32(h, 19);
+    h  = h * 5 + 0xe6546b64;
+    g ^= a1;
+    g  = ROTR32(g, 19);
+    g  = g * 5 + 0xe6546b64;
+    g ^= a3;
+    g  = ROTR32(g, 19);
+    g  = g * 5 + 0xe6546b64;
+    f += a4;
+    f  = ROTR32(f, 19);
+    f  = f * 5 + 0xe6546b64;
+    size_t iters = (len - 1) / 20;
+    do {
+        uint32_t a0 = ROTR32(Fetch32<bswap>(s)      * c1, 17) * c2;
+        uint32_t a1 = Fetch32<bswap>(s +  4);
+        uint32_t a2 = ROTR32(Fetch32<bswap>(s +  8) * c1, 17) * c2;
+        uint32_t a3 = ROTR32(Fetch32<bswap>(s + 12) * c1, 17) * c2;
+        uint32_t a4 = Fetch32<bswap>(s + 16);
+        h ^= a0;
+        h  = ROTR32(h, 18);
+        h  = h * 5 + 0xe6546b64;
+        f += a1;
+        f  = ROTR32(f, 19);
+        f  = f * c1;
+        g += a2;
+        g  = ROTR32(g, 18);
+        g  = g * 5 + 0xe6546b64;
+        h ^= a3 + a1;
+        h  = ROTR32(h, 19);
+        h  = h * 5 + 0xe6546b64;
+        g ^= a4;
+        g  = BSWAP(g) * 5;
+        h += a4 * 5;
+        h  = BSWAP(h);
+        f += a0;
+        PERMUTE3(f, h, g);
+        s += 20;
+    } while (--iters != 0);
+    g = ROTR32(g    , 11) * c1;
+    g = ROTR32(g    , 17) * c1;
+    f = ROTR32(f    , 11) * c1;
+    f = ROTR32(f    , 17) * c1;
+    h = ROTR32(h + g, 19);
     h = h * 5 + 0xe6546b64;
-    f += a1;
-    f = ROTR32(f, 19);
-    f = f * c1;
-    g += a2;
-    g = ROTR32(g, 18);
-    g = g * 5 + 0xe6546b64;
-    h ^= a3 + a1;
-    h = ROTR32(h, 19);
+    h = ROTR32(h    , 17) * c1;
+    h = ROTR32(h + f, 19);
     h = h * 5 + 0xe6546b64;
-    g ^= a4;
-    g = BSWAP(g) * 5;
-    h += a4 * 5;
-    h = BSWAP(h);
-    f += a0;
-    PERMUTE3(f, h, g);
-    s += 20;
-  } while (--iters != 0);
-  g = ROTR32(g, 11) * c1;
-  g = ROTR32(g, 17) * c1;
-  f = ROTR32(f, 11) * c1;
-  f = ROTR32(f, 17) * c1;
-  h = ROTR32(h + g, 19);
-  h = h * 5 + 0xe6546b64;
-  h = ROTR32(h, 17) * c1;
-  h = ROTR32(h + f, 19);
-  h = h * 5 + 0xe6546b64;
-  h = ROTR32(h, 17) * c1;
-  return h;
+    h = ROTR32(h    , 17) * c1;
+    return h;
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static uint64_t HashLen0to16(const uint8_t *s, size_t len) {
-  if (len > 8) {
-    uint64_t a = Fetch64<bswap>(s);
-    uint64_t b = Fetch64<bswap>(s + len - 8);
-    return HashLen16(a, ROTR64(b + len, len)) ^ b;
-  }
-  if (len >= 4) {
-      uint64_t a = Fetch32<bswap>(s);
-      return HashLen16(len + (a << 3), Fetch32<bswap>(s + len - 4));
-  }
-  if (len > 0) {
-    uint8_t a = s[0];
-    uint8_t b = s[len >> 1];
-    uint8_t c = s[len - 1];
-    uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
-    uint32_t z = len + (static_cast<uint32_t>(c) << 2);
-    return ShiftMix(y * k2 ^ z * k3) * k2;
-  }
-  return k2;
+template <bool bswap>
+static uint64_t HashLen0to16( const uint8_t * s, size_t len ) {
+    if (len > 8) {
+        uint64_t a = Fetch64<bswap>(s);
+        uint64_t b = Fetch64<bswap>(s + len - 8);
+        return HashLen16(a, ROTR64(b + len, len)) ^ b;
+    }
+    if (len >= 4) {
+        uint64_t a = Fetch32<bswap>(s);
+        return HashLen16(len + (a << 3), Fetch32<bswap>(s + len - 4));
+    }
+    if (len > 0) {
+        uint8_t  a = s[0];
+        uint8_t  b = s[len >> 1];
+        uint8_t  c = s[len  - 1];
+        uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
+        uint32_t z = len + (static_cast<uint32_t>(c) << 2);
+        return ShiftMix(y * k2 ^ z * k3) * k2;
+    }
+    return k2;
 }
 
 // This probably works well for 16-byte strings as well, but it may be overkill
 // in that case.
-template < bool bswap >
-static uint64_t HashLen17to32(const uint8_t *s, size_t len) {
-  uint64_t a = Fetch64<bswap>(s) * k1;
-  uint64_t b = Fetch64<bswap>(s + 8);
-  uint64_t c = Fetch64<bswap>(s + len - 8) * k2;
-  uint64_t d = Fetch64<bswap>(s + len - 16) * k0;
-  return HashLen16(ROTR64(a - b, 43) + ROTR64(c, 30) + d,
-                   a + ROTR64(b ^ k3, 20) - c + len);
+template <bool bswap>
+static uint64_t HashLen17to32( const uint8_t * s, size_t len ) {
+    uint64_t a = Fetch64<bswap>(s    )        * k1;
+    uint64_t b = Fetch64<bswap>(s + 8);
+    uint64_t c = Fetch64<bswap>(s + len -  8) * k2;
+    uint64_t d = Fetch64<bswap>(s + len - 16) * k0;
+
+    return HashLen16(ROTR64(a - b, 43) + ROTR64(c, 30) + d, a + ROTR64(b ^ k3, 20) - c + len);
 }
 
 // Return an 8-byte hash for 33 to 64 bytes.
-template < bool bswap >
-static uint64_t HashLen33to64(const uint8_t *s, size_t len) {
-  uint64_t z = Fetch64<bswap>(s + 24);
-  uint64_t a = Fetch64<bswap>(s) + (len + Fetch64<bswap>(s + len - 16)) * k0;
-  uint64_t b = ROTR64(a + z, 52);
-  uint64_t c = ROTR64(a, 37);
-  a += Fetch64<bswap>(s + 8);
-  c += ROTR64(a, 7);
-  a += Fetch64<bswap>(s + 16);
-  uint64_t vf = a + z;
-  uint64_t vs = b + ROTR64(a, 31) + c;
-  a = Fetch64<bswap>(s + 16) + Fetch64<bswap>(s + len - 32);
-  z = Fetch64<bswap>(s + len - 8);
-  b = ROTR64(a + z, 52);
-  c = ROTR64(a, 37);
-  a += Fetch64<bswap>(s + len - 24);
-  c += ROTR64(a, 7);
-  a += Fetch64<bswap>(s + len - 16);
-  uint64_t wf = a + z;
-  uint64_t ws = b + ROTR64(a, 31) + c;
-  uint64_t r = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
-  return ShiftMix(r * k0 + vs) * k2;
-}
-
-template < bool bswap >
-static uint64_t CityHash64(const uint8_t *s, size_t len) {
-  if (len <= 32) {
-    if (len <= 16) {
-        return HashLen0to16<bswap>(s, len);
-    } else {
-        return HashLen17to32<bswap>(s, len);
+template <bool bswap>
+static uint64_t HashLen33to64( const uint8_t * s, size_t len ) {
+    uint64_t z = Fetch64<bswap>(s + 24);
+    uint64_t a = Fetch64<bswap>(s     ) + (len + Fetch64<bswap>(s + len - 16)) * k0;
+    uint64_t b = ROTR64(a + z, 52);
+    uint64_t c = ROTR64(a    , 37);
+
+    a += Fetch64<bswap>(s +  8);
+    c += ROTR64(a, 7);
+    a += Fetch64<bswap>(s + 16      );
+    uint64_t vf = a + z;
+    uint64_t vs = b + ROTR64(a, 31) + c;
+    a  = Fetch64<bswap>(s + 16      ) + Fetch64<bswap>(s + len - 32);
+    z  = Fetch64<bswap>(s + len -  8);
+    b  = ROTR64(a + z, 52);
+    c  = ROTR64(a    , 37);
+    a += Fetch64<bswap>(s + len - 24);
+    c += ROTR64(a, 7);
+    a += Fetch64<bswap>(s + len - 16);
+    uint64_t wf = a + z;
+    uint64_t ws = b + ROTR64(a, 31) + c;
+    uint64_t r  = ShiftMix((vf + ws) * k2 + (wf + vs) * k0);
+    return ShiftMix(r * k0 + vs) * k2;
+}
+
+template <bool bswap>
+static uint64_t CityHash64( const uint8_t * s, size_t len ) {
+    if (len <= 32) {
+        if (len <= 16) {
+            return HashLen0to16<bswap>(s, len);
+        } else {
+            return HashLen17to32<bswap>(s, len);
+        }
+    } else if (len <= 64) {
+        return HashLen33to64<bswap>(s, len);
     }
-  } else if (len <= 64) {
-      return HashLen33to64<bswap>(s, len);
-  }
-
-  // For strings over 64 bytes we hash the end first, and then as we
-  // loop we keep 56 bytes of state: v, w, x, y, and z.
-  uint64_t x = Fetch64<bswap>(s + len - 40);
-  uint64_t y = Fetch64<bswap>(s + len - 16) + Fetch64<bswap>(s + len - 56);
-  uint64_t z = HashLen16(Fetch64<bswap>(s + len - 48) + len, Fetch64<bswap>(s + len - 24));
-  pair<uint64_t, uint64_t> v = WeakHashLen32WithSeeds<bswap>(s + len - 64, len, z);
-  pair<uint64_t, uint64_t> w = WeakHashLen32WithSeeds<bswap>(s + len - 32, y + k1, x);
-  x = x * k1 + Fetch64<bswap>(s);
-
-  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
-  len = (len - 1) & ~static_cast<size_t>(63);
-  do {
-    x = ROTR64(x + y + v.first + Fetch64<bswap>(s + 8), 37) * k1;
-    y = ROTR64(y + v.second + Fetch64<bswap>(s + 48), 42) * k1;
-    x ^= w.second;
-    y += v.first + Fetch64<bswap>(s + 40);
-    z = ROTR64(z + w.first, 33) * k1;
-    v = WeakHashLen32WithSeeds<bswap>(s, v.second * k1, x + w.first);
-    w = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second, y + Fetch64<bswap>(s + 16));
-    std::swap(z, x);
-    s += 64;
-    len -= 64;
-  } while (len != 0);
-  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
-                   HashLen16(v.second, w.second) + x);
-}
-
-template < bool bswap >
-static uint64_t CityHash64WithSeeds(const uint8_t *s, size_t len,
-                           uint64_t seed0, uint64_t seed1) {
+
+    // For strings over 64 bytes we hash the end first, and then as we
+    // loop we keep 56 bytes of state: v, w, x, y, and z.
+    uint64_t x = Fetch64<bswap>(s + len - 40);
+    uint64_t y = Fetch64<bswap>(s + len - 16) + Fetch64<bswap>(s + len - 56);
+    uint64_t z = HashLen16(Fetch64<bswap>(s + len - 48) + len, Fetch64<bswap>(s + len - 24));
+    pair<uint64_t, uint64_t> v = WeakHashLen32WithSeeds<bswap>(s + len - 64, len   , z);
+    pair<uint64_t, uint64_t> w = WeakHashLen32WithSeeds<bswap>(s + len - 32, y + k1, x);
+    x = x * k1 + Fetch64<bswap>(s);
+
+    // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
+    len = (len - 1) & ~static_cast<size_t>(63);
+    do {
+        x    = ROTR64(x + y        + v.first + Fetch64<bswap>(s +  8), 37) * k1;
+        y    = ROTR64(y + v.second + Fetch64          <bswap>(s + 48), 42) * k1;
+        x   ^= w.second;
+        y   += v.first + Fetch64<bswap>(s + 40);
+        z    = ROTR64(z + w.first, 33) * k1;
+        v    = WeakHashLen32WithSeeds<bswap>(s     , v.second * k1, x + w.first);
+        w    = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second , y + Fetch64<bswap>(s + 16));
+        std::swap(z, x);
+        s   += 64;
+        len -= 64;
+    } while (len != 0);
+    return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, HashLen16(v.second, w.second) + x);
+}
+
+template <bool bswap>
+static uint64_t CityHash64WithSeeds( const uint8_t * s, size_t len, uint64_t seed0, uint64_t seed1 ) {
     return HashLen16(CityHash64<bswap>(s, len) - seed0, seed1);
 }
 
-template < bool bswap >
-static uint64_t CityHash64WithSeed(const uint8_t *s, size_t len, uint64_t seed) {
+template <bool bswap>
+static uint64_t CityHash64WithSeed( const uint8_t * s, size_t len, uint64_t seed ) {
     return CityHash64WithSeeds<bswap>(s, len, k2, seed);
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static uint128_t CityMurmur(const uint8_t *s, size_t len, uint128_t seed) {
-  uint64_t a = Uint128Low64(seed);
-  uint64_t b = Uint128High64(seed);
-  uint64_t c = 0;
-  uint64_t d = 0;
-  signed long l = len - 16;
-  if (l <= 0) {  // len <= 16
-    a = ShiftMix(a * k1) * k1;
-    c = b * k1 + HashLen0to16<bswap>(s, len);
-    d = ShiftMix(a + (len >= 8 ? Fetch64<bswap>(s) : c));
-  } else {  // len > 16
-    c = HashLen16(Fetch64<bswap>(s + len - 8) + k1, a);
-    d = HashLen16(b + len, c + Fetch64<bswap>(s + len - 16));
-    a += d;
+template <bool bswap>
+static uint128_t CityMurmur( const uint8_t * s, size_t len, uint128_t seed ) {
+    uint64_t    a = Uint128Low64(seed);
+    uint64_t    b = Uint128High64(seed);
+    uint64_t    c = 0;
+    uint64_t    d = 0;
+    signed long l = len - 16;
+
+    if (l <= 0) { // len <= 16
+        a = ShiftMix(a * k1) * k1;
+        c = b * k1 + HashLen0to16<bswap>(s, len);
+        d = ShiftMix(a + (len >= 8 ? Fetch64<bswap>(s) : c));
+    } else { // len > 16
+        c  = HashLen16(Fetch64<bswap>(s + len - 8) + k1, a      );
+        d  = HashLen16(b + len, c + Fetch64<bswap>(s + len - 16));
+        a += d;
+        do {
+            a ^= ShiftMix(Fetch64<bswap>(s)     * k1) * k1;
+            a *= k1;
+            b ^= a;
+            c ^= ShiftMix(Fetch64<bswap>(s + 8) * k1) * k1;
+            c *= k1;
+            d ^= c;
+            s += 16;
+            l -= 16;
+        } while (l > 0);
+    }
+    a = HashLen16(a, c);
+    b = HashLen16(d, b);
+    return Uint128(a ^ b, HashLen16(b, a));
+}
+
+template <bool bswap>
+static uint128_t CityHash128WithSeed( const uint8_t * s, size_t len, uint128_t seed ) {
+    if (len < 128) {
+        return CityMurmur<bswap>(s, len, seed);
+    }
+
+    // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
+    // v, w, x, y, and z.
+    pair<uint64_t, uint64_t> v, w;
+    uint64_t x = Uint128Low64(seed);
+    uint64_t y = Uint128High64(seed);
+    uint64_t z = len * k1;
+    v.first  = ROTR64(y ^ k1 , 49) * k1 + Fetch64<bswap>(s);
+    v.second = ROTR64(v.first, 42) * k1 + Fetch64<bswap>(s + 8);
+    w.first  = ROTR64(y + z  , 35) * k1 + x;
+    w.second = ROTR64(x + Fetch64<bswap>(s + 88), 53) * k1;
+
+    // This is the same inner loop as CityHash64(), manually unrolled.
     do {
-      a ^= ShiftMix(Fetch64<bswap>(s) * k1) * k1;
-      a *= k1;
-      b ^= a;
-      c ^= ShiftMix(Fetch64<bswap>(s + 8) * k1) * k1;
-      c *= k1;
-      d ^= c;
-      s += 16;
-      l -= 16;
-    } while (l > 0);
-  }
-  a = HashLen16(a, c);
-  b = HashLen16(d, b);
-  return Uint128(a ^ b, HashLen16(b, a));
-}
-
-template < bool bswap >
-static uint128_t CityHash128WithSeed(const uint8_t *s, size_t len, uint128_t seed) {
-  if (len < 128) {
-      return CityMurmur<bswap>(s, len, seed);
-  }
-
-  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
-  // v, w, x, y, and z.
-  pair<uint64_t, uint64_t> v, w;
-  uint64_t x = Uint128Low64(seed);
-  uint64_t y = Uint128High64(seed);
-  uint64_t z = len * k1;
-  v.first = ROTR64(y ^ k1, 49) * k1 + Fetch64<bswap>(s);
-  v.second = ROTR64(v.first, 42) * k1 + Fetch64<bswap>(s + 8);
-  w.first = ROTR64(y + z, 35) * k1 + x;
-  w.second = ROTR64(x + Fetch64<bswap>(s + 88), 53) * k1;
-
-  // This is the same inner loop as CityHash64(), manually unrolled.
-  do {
-    x = ROTR64(x + y + v.first + Fetch64<bswap>(s + 8), 37) * k1;
-    y = ROTR64(y + v.second + Fetch64<bswap>(s + 48), 42) * k1;
-    x ^= w.second;
-    y += v.first + Fetch64<bswap>(s + 40);
-    z = ROTR64(z + w.first, 33) * k1;
-    v = WeakHashLen32WithSeeds<bswap>(s, v.second * k1, x + w.first);
-    w = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second, y + Fetch64<bswap>(s + 16));
-    std::swap(z, x);
-    s += 64;
-    x = ROTR64(x + y + v.first + Fetch64<bswap>(s + 8), 37) * k1;
-    y = ROTR64(y + v.second + Fetch64<bswap>(s + 48), 42) * k1;
-    x ^= w.second;
-    y += v.first + Fetch64<bswap>(s + 40);
-    z = ROTR64(z + w.first, 33) * k1;
-    v = WeakHashLen32WithSeeds<bswap>(s, v.second * k1, x + w.first);
-    w = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second, y + Fetch64<bswap>(s + 16));
-    std::swap(z, x);
-    s += 64;
-    len -= 128;
-  } while (likely(len >= 128));
-  x += ROTR64(v.first + z, 49) * k0;
-  z += ROTR64(w.first, 37) * k0;
-  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
-  for (size_t tail_done = 0; tail_done < len; ) {
-    tail_done += 32;
-    y = ROTR64(x + y, 42) * k0 + v.second;
-    w.first += Fetch64<bswap>(s + len - tail_done + 16);
-    x = x * k0 + w.first;
-    z += w.second + Fetch64<bswap>(s + len - tail_done);
-    w.second += v.first;
-    v = WeakHashLen32WithSeeds<bswap>(s + len - tail_done, v.first + z, v.second);
-  }
-  // At this point our 56 bytes of state should contain more than
-  // enough information for a strong 128-bit hash.  We use two
-  // different 56-byte-to-8-byte hashes to get a 16-byte final result.
-  x = HashLen16(x, v.first);
-  y = HashLen16(y + z, w.first);
-  return Uint128(HashLen16(x + v.second, w.second) + y,
-                 HashLen16(x + w.second, y + v.second));
-}
-
-template < bool bswap >
-static uint128_t CityHash128(const char *s, size_t len) {
+        x    = ROTR64(x + y        + v.first + Fetch64<bswap>(s +  8), 37) * k1;
+        y    = ROTR64(y + v.second + Fetch64          <bswap>(s + 48), 42) * k1;
+        x   ^= w.second;
+        y   += v.first + Fetch64<bswap>(s + 40);
+        z    = ROTR64(z + w.first, 33) * k1;
+        v    = WeakHashLen32WithSeeds<bswap>(s     , v.second * k1, x + w.first);
+        w    = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second , y + Fetch64<bswap>(s + 16));
+        std::swap(z, x);
+        s   += 64;
+        x    = ROTR64(x + y        + v.first + Fetch64<bswap>(s +  8), 37) * k1;
+        y    = ROTR64(y + v.second + Fetch64          <bswap>(s + 48), 42) * k1;
+        x   ^= w.second;
+        y   += v.first + Fetch64<bswap>(s + 40);
+        z    = ROTR64(z + w.first, 33) * k1;
+        v    = WeakHashLen32WithSeeds<bswap>(s     , v.second * k1, x + w.first);
+        w    = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second , y + Fetch64<bswap>(s + 16));
+        std::swap(z, x);
+        s   += 64;
+        len -= 128;
+    } while (likely(len >= 128));
+    x += ROTR64(v.first + z, 49) * k0;
+    z += ROTR64(w.first    , 37) * k0;
+    // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
+    for (size_t tail_done = 0; tail_done < len;) {
+        tail_done += 32;
+        y          = ROTR64(x + y, 42) * k0 + v.second;
+        w.first   += Fetch64               <bswap>(s + len - tail_done + 16);
+        x          = x                 * k0 + w.first;
+        z         += w.second + Fetch64    <bswap>(s       + len       - tail_done);
+        w.second  += v.first;
+        v          = WeakHashLen32WithSeeds<bswap>(s + len - tail_done, v.first + z, v.second);
+    }
+    // At this point our 56 bytes of state should contain more than
+    // enough information for a strong 128-bit hash.  We use two
+    // different 56-byte-to-8-byte hashes to get a 16-byte final result.
+    x = HashLen16(x    , v.first);
+    y = HashLen16(y + z, w.first);
+    return Uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
+}
+
+template <bool bswap>
+static uint128_t CityHash128( const char * s, size_t len ) {
     if (len >= 16) {
-        return CityHash128WithSeed<bswap>(
-            s + 16, len - 16, Uint128(Fetch64<bswap>(s) ^ k3, Fetch64<bswap>(s + 8)));
+        return CityHash128WithSeed<bswap>(s + 16, len - 16, Uint128(Fetch64<bswap>(s) ^ k3, Fetch64<bswap>(s + 8)));
     } else if (len >= 8) {
-        return CityHash128WithSeed<bswap>(
-            NULL, 0, Uint128(Fetch64<bswap>(s) ^ (len * k0), Fetch64<bswap>(s + len - 8) ^ k1));
+        return CityHash128WithSeed<bswap>(NULL, 0, Uint128(Fetch64<bswap>(
+                s) ^ (len * k0), Fetch64<bswap>(s + len - 8) ^ k1));
     } else {
         return CityHash128WithSeed<bswap>(s, len, Uint128(k0, k1));
     }
@@ -473,160 +477,167 @@ static uint128_t CityHash128(const char *s, size_t len) {
 #if defined(HAVE_X86_64_CRC32C)
 
 // Requires len >= 240.
-template < bool bswap >
-static void CityHashCrc256Long(const uint8_t *s, size_t len,
-                               uint64_t seed, uint64_t *result) {
-  uint64_t a = Fetch64<bswap>(s + 56) + k0;
-  uint64_t b = Fetch64<bswap>(s + 96) + k0;
-  uint64_t c = result[0] = HashLen16(b, len);
-  uint64_t d = result[1] = Fetch64<bswap>(s + 120) * k0 + len;
-  uint64_t e = Fetch64<bswap>(s + 184) + seed;
-  uint64_t f = seed;
-  uint64_t g = 0;
-  uint64_t h = 0;
-  uint64_t i = 0;
-  uint64_t j = 0;
-  uint64_t t = c + d;
-
-  // 240 bytes of input per iter.
-  size_t iters = len / 240;
-  len -= iters * 240;
-  do {
-
-#define CHUNK(multiplier, z)                                            \
-      {                                                                 \
-          uint64_t old_a = a;                                             \
-          a = ROTR64(b, 41 ^ z) * multiplier + Fetch64<bswap>(s);       \
-          b = ROTR64(c, 27 ^ z) * multiplier + Fetch64<bswap>(s + 8);   \
-          c = ROTR64(d, 41 ^ z) * multiplier + Fetch64<bswap>(s + 16);  \
-          d = ROTR64(e, 33 ^ z) * multiplier + Fetch64<bswap>(s + 24);  \
-          e = ROTR64(t, 25 ^ z) * multiplier + Fetch64<bswap>(s + 32);  \
-          t = old_a;                                                    \
-      }                                                                 \
-      f = _mm_crc32_u64(f, a);                                          \
-      g = _mm_crc32_u64(g, b);                                          \
-      h = _mm_crc32_u64(h, c);                                          \
-      i = _mm_crc32_u64(i, d);                                          \
-      j = _mm_crc32_u64(j, e);                                          \
+template <bool bswap>
+static void CityHashCrc256Long( const uint8_t * s, size_t len, uint64_t seed, uint64_t * result ) {
+    uint64_t a = Fetch64<bswap>(s +  56) + k0;
+    uint64_t b = Fetch64<bswap>(s +  96) + k0;
+    uint64_t c = HashLen16(b, len);
+    uint64_t d = Fetch64<bswap>(s + 120) * k0 + len;
+    uint64_t e = Fetch64<bswap>(s + 184) + seed;
+    uint64_t f = seed;
+    uint64_t g = 0;
+    uint64_t h = 0;
+    uint64_t i = 0;
+    uint64_t j = 0;
+    uint64_t t = c + d;
+
+    result[0] = c;
+    result[1] = d;
+
+    // 240 bytes of input per iter.
+    size_t iters = len / 240;
+    len -= iters * 240;
+    do {
+#define CHUNK(multiplier, z)                                           \
+      {                                                                \
+          uint64_t old_a = a;                                          \
+          a = ROTR64(b, 41 ^ z) * multiplier + Fetch64<bswap>(s);      \
+          b = ROTR64(c, 27 ^ z) * multiplier + Fetch64<bswap>(s + 8);  \
+          c = ROTR64(d, 41 ^ z) * multiplier + Fetch64<bswap>(s + 16); \
+          d = ROTR64(e, 33 ^ z) * multiplier + Fetch64<bswap>(s + 24); \
+          e = ROTR64(t, 25 ^ z) * multiplier + Fetch64<bswap>(s + 32); \
+          t = old_a;                                                   \
+      }                                                                \
+      f = _mm_crc32_u64(f, a);                                         \
+      g = _mm_crc32_u64(g, b);                                         \
+      h = _mm_crc32_u64(h, c);                                         \
+      i = _mm_crc32_u64(i, d);                                         \
+      j = _mm_crc32_u64(j, e);                                         \
       s += 40
 
-    CHUNK(1, 1); CHUNK(k0, 0);
-    CHUNK(1, 1); CHUNK(k0, 0);
-    CHUNK(1, 1); CHUNK(k0, 0);
-  } while (--iters > 0);
-
-  while (len >= 40) {
-    CHUNK(k0, 0);
-    len -= 40;
-  }
-  if (len > 0) {
-    s = s + len - 40;
-    CHUNK(k0, 0);
-  }
-  j += i << 32;
-  a = HashLen16(a, j);
-  h += g << 32;
-  b += h;
-  c = HashLen16(c, f) + i;
-  d = HashLen16(d, e + result[0]);
-  j += e;
-  i += HashLen16(h, t);
-  e = HashLen16(a, d) + j;
-  f = HashLen16(b, c) + a;
-  g = HashLen16(j, i) + c;
-  result[0] = e + f + g + h;
-  a = ShiftMix((a + g) * k0) * k0 + b;
-  result[1] += a + result[0];
-  a = ShiftMix(a * k0) * k0 + c;
-  result[2] = a + result[1];
-  a = ShiftMix((a + e) * k0) * k0;
-  result[3] = a + result[2];
+        CHUNK(1, 1); CHUNK(k0, 0);
+        CHUNK(1, 1); CHUNK(k0, 0);
+        CHUNK(1, 1); CHUNK(k0, 0);
+    } while (--iters > 0);
+
+    while (len >= 40) {
+        CHUNK(k0, 0);
+        len -= 40;
+    }
+    if (len > 0) {
+        s = s + len - 40;
+        CHUNK(k0, 0);
+    }
+    j += i << 32;
+    a  = HashLen16(a, j);
+    h += g << 32;
+    b += h;
+    c  = HashLen16(c, f) + i;
+    d  = HashLen16(d, e + result[0]);
+    j += e;
+    i += HashLen16(h, t);
+    e  = HashLen16(a, d) + j;
+    f  = HashLen16(b, c) + a;
+    g  = HashLen16(j, i) + c;
+
+    //
+    result[0]  = e + f     + g + h;
+    a          = ShiftMix((a + g) * k0) * k0 + b;
+    result[1] += a + result[0  ];
+    a          = ShiftMix(a       * k0) * k0 + c;
+    result[2]  = a + result[1  ];
+    a          = ShiftMix((a + e) * k0) * k0;
+    result[3]  = a + result[2  ];
 }
 
 // Requires len < 240.
-template < bool bswap >
-static void CityHashCrc256Short(const uint8_t *s, size_t len, uint64_t *result) {
-  uint8_t buf[240];
-  memcpy(buf, s, len);
-  memset(buf + len, 0, 240 - len);
-  CityHashCrc256Long<bswap>(buf, 240, ~static_cast<uint64_t>(len), result);
+template <bool bswap>
+static void CityHashCrc256Short( const uint8_t * s, size_t len, uint64_t * result ) {
+    uint8_t buf[240];
+
+    memcpy(buf, s, len);
+    memset(buf + len, 0, 240 - len);
+    CityHashCrc256Long<bswap>(buf, 240, ~static_cast<uint64_t>(len), result);
 }
 
-template < bool bswap >
-static void CityHashCrc256(const uint8_t *s, size_t len, uint64_t *result) {
-  if (likely(len >= 240)) {
-    CityHashCrc256Long<bswap>(s, len, 0, result);
-  } else {
-    CityHashCrc256Short<bswap>(s, len, result);
-  }
+template <bool bswap>
+static void CityHashCrc256( const uint8_t * s, size_t len, uint64_t * result ) {
+    if (likely(len >= 240)) {
+        CityHashCrc256Long<bswap>(s, len, 0, result);
+    } else {
+        CityHashCrc256Short<bswap>(s, len, result);
+    }
 }
 
 // Requires len < 240.
 // Unofficial homegrown seeding for SMHasher3
-template < bool bswap >
-static void CityHashCrc256ShortWithSeed(const uint8_t *s, size_t len, uint64_t seed, uint64_t *result) {
-  uint8_t buf[240];
-  memcpy(buf, s, len);
-  memset(buf + len, 0, 240 - len);
-  CityHashCrc256Long<bswap>(buf, 240, seed ^ ~static_cast<uint64_t>(len), result);
+template <bool bswap>
+static void CityHashCrc256ShortWithSeed( const uint8_t * s, size_t len, uint64_t seed, uint64_t * result ) {
+    uint8_t buf[240];
+
+    memcpy(buf, s, len);
+    memset(buf + len, 0, 240 - len);
+    CityHashCrc256Long<bswap>(buf, 240, seed ^ ~static_cast<uint64_t>(len), result);
 }
 
 // Unofficial
-template < bool bswap >
-static void CityHashCrc256WithSeed(const uint8_t *s, size_t len, uint64_t seed, uint64_t *result) {
-  if (likely(len >= 240)) {
-    CityHashCrc256Long<bswap>(s, len, seed, result);
-  } else {
-    CityHashCrc256ShortWithSeed<bswap>(s, len, seed, result);
-  }
-}
-
-template < bool bswap >
-static uint128_t CityHashCrc128WithSeed(const uint8_t *s, size_t len, uint128_t seed) {
-  if (len <= 900) {
-    return CityHash128WithSeed<bswap>(s, len, seed);
-  } else {
-    uint64_t result[4];
-    CityHashCrc256<bswap>(s, len, result);
-    uint64_t u = Uint128High64(seed) + result[0];
-    uint64_t v = Uint128Low64(seed) + result[1];
-    return Uint128(HashLen16(u, v + result[2]),
-                   HashLen16(ROTR64(v, 32), u * k0 + result[3]));
-  }
-}
-
-template < bool bswap >
-static uint128_t CityHashCrc128(const uint8_t *s, size_t len) {
-  if (len <= 900) {
-    return CityHash128<bswap>(s, len);
-  } else {
-    uint64_t result[4];
-    CityHashCrc256<bswap>(s, len, result);
-    return Uint128(result[2], result[3]);
-  }
+template <bool bswap>
+static void CityHashCrc256WithSeed( const uint8_t * s, size_t len, uint64_t seed, uint64_t * result ) {
+    if (likely(len >= 240)) {
+        CityHashCrc256Long<bswap>(s, len, seed, result);
+    } else {
+        CityHashCrc256ShortWithSeed<bswap>(s, len, seed, result);
+    }
+}
+
+template <bool bswap>
+static uint128_t CityHashCrc128WithSeed( const uint8_t * s, size_t len, uint128_t seed ) {
+    if (len <= 900) {
+        return CityHash128WithSeed<bswap>(s, len, seed);
+    } else {
+        uint64_t result[4];
+        CityHashCrc256<bswap>(s, len, result);
+        uint64_t u = Uint128High64(seed) + result[0];
+        uint64_t v = Uint128Low64(seed)  + result[1];
+        return Uint128(HashLen16(u, v + result[2]), HashLen16(ROTR64(v, 32), u * k0 + result[3]));
+    }
+}
+
+template <bool bswap>
+static uint128_t CityHashCrc128( const uint8_t * s, size_t len ) {
+    if (len <= 900) {
+        return CityHash128<bswap>(s, len);
+    } else {
+        uint64_t result[4];
+        CityHashCrc256<bswap>(s, len, result);
+        return Uint128(result[2], result[3]);
+    }
 }
 
 #endif
 
 //------------------------------------------------------------
-template < bool bswap >
-static void City32(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void City32( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h;
+
     h = CityHash32WithSeed<bswap>((const uint8_t *)in, len, (uint32_t)seed);
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void City64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void City64( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h;
+
     h = CityHash64WithSeed<bswap>((const uint8_t *)in, len, (uint64_t)seed);
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap, uint32_t seedmode >
-static void City128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap, uint32_t seedmode>
+static void City128( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint128_t seed128;
-    switch(seedmode) {
+
+    switch (seedmode) {
     case 1: seed128 = Uint128((uint64_t)seed, 0); break;
     case 2: seed128 = Uint128(0, (uint64_t)seed); break;
     case 3: seed128 = Uint128((uint64_t)seed, (uint64_t)seed); break;
@@ -635,16 +646,17 @@ static void City128(const void * in, const size_t len, const seed_t seed, void *
 
     uint128_t h;
     h = CityHash128WithSeed<bswap>((const uint8_t *)in, len, seed128);
-    PUT_U64<bswap>(Uint128Low64(h), (uint8_t *)out, 0);
+    PUT_U64<bswap>(Uint128Low64(h) , (uint8_t *)out, 0);
     PUT_U64<bswap>(Uint128High64(h), (uint8_t *)out, 8);
 }
 
 // This version is slightly different than the one in Farmhash, so it
 // is tested also.
-template < bool bswap, uint32_t seedmode >
-static void CityMurmur_128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap, uint32_t seedmode>
+static void CityMurmur_128( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint128_t seed128;
-    switch(seedmode) {
+
+    switch (seedmode) {
     case 1: seed128 = Uint128((uint64_t)seed, 0); break;
     case 2: seed128 = Uint128(0, (uint64_t)seed); break;
     case 3: seed128 = Uint128((uint64_t)seed, (uint64_t)seed); break;
@@ -653,16 +665,17 @@ static void CityMurmur_128(const void * in, const size_t len, const seed_t seed,
 
     uint128_t h;
     h = CityMurmur<bswap>((const uint8_t *)in, len, seed128);
-    PUT_U64<bswap>(Uint128Low64(h), (uint8_t *)out, 0);
+    PUT_U64<bswap>(Uint128Low64(h) , (uint8_t *)out, 0);
     PUT_U64<bswap>(Uint128High64(h), (uint8_t *)out, 8);
 }
 
 #if defined(HAVE_X86_64_CRC32C)
 
-template < bool bswap, uint32_t seedmode >
-static void CityCrc128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap, uint32_t seedmode>
+static void CityCrc128( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint128_t seed128;
-    switch(seedmode) {
+
+    switch (seedmode) {
     case 1: seed128 = Uint128((uint64_t)seed, 0); break;
     case 2: seed128 = Uint128(0, (uint64_t)seed); break;
     case 3: seed128 = Uint128((uint64_t)seed, (uint64_t)seed); break;
@@ -671,13 +684,14 @@ static void CityCrc128(const void * in, const size_t len, const seed_t seed, voi
 
     uint128_t h;
     h = CityHashCrc128WithSeed<bswap>((const uint8_t *)in, len, seed128);
-    PUT_U64<bswap>(Uint128Low64(h), (uint8_t *)out, 0);
+    PUT_U64<bswap>(Uint128Low64(h) , (uint8_t *)out, 0);
     PUT_U64<bswap>(Uint128High64(h), (uint8_t *)out, 8);
 }
 
-template < bool bswap >
-static void CityCrc256(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void CityCrc256( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t result[4];
+
     CityHashCrc256WithSeed<bswap>((const uint8_t *)in, len, (uint64_t)seed, result);
     PUT_U64<bswap>(result[0], (uint8_t *)out,  0);
     PUT_U64<bswap>(result[1], (uint8_t *)out,  8);
@@ -689,192 +703,192 @@ static void CityCrc256(const void * in, const size_t len, const seed_t seed, voi
 
 //------------------------------------------------------------
 REGISTER_FAMILY(cityhash,
-  $.src_url = "https://github.com/google/cityhash",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/google/cityhash",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(CityHash_32,
-  $.desc = "Google CityHash32WithSeed",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED      ,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 32,
-  $.verification_LE = 0x5C28AD62,
-  $.verification_BE = 0x79F1F814,
-  $.hashfn_native = City32<false>,
-  $.hashfn_bswap = City32<true>
-);
+   $.desc       = "Google CityHash32WithSeed",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x5C28AD62,
+   $.verification_BE = 0x79F1F814,
+   $.hashfn_native   = City32<false>,
+   $.hashfn_bswap    = City32<true>
+ );
 
 REGISTER_HASH(CityHash_64,
-  $.desc = "Google CityHash64WithSeed",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 64,
-  $.verification_LE = 0x25A20825,
-  $.verification_BE = 0x5698D8C4,
-  $.hashfn_native = City64<false>,
-  $.hashfn_bswap = City64<true>
-);
+   $.desc       = "Google CityHash64WithSeed",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x25A20825,
+   $.verification_BE = 0x5698D8C4,
+   $.hashfn_native   = City64<false>,
+   $.hashfn_bswap    = City64<true>
+ );
 
 REGISTER_HASH(CityHash_128__seed1,
-  $.desc = "Google CityHash128WithSeed (seeded low 64 bits)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.verification_LE = 0x6531F54E,
-  $.verification_BE = 0x595FC28D,
-  $.hashfn_native = City128<false,1>,
-  $.hashfn_bswap = City128<true,1>
-);
+   $.desc       = "Google CityHash128WithSeed (seeded low 64 bits)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x6531F54E,
+   $.verification_BE = 0x595FC28D,
+   $.hashfn_native   = City128<false, 1>,
+   $.hashfn_bswap    = City128<true, 1>
+ );
 
 REGISTER_HASH(CityHash_128__seed2,
-  $.desc = "Google CityHash128WithSeed (seeded high 64 bits)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.verification_LE = 0x33E4ECD1,
-  $.verification_BE = 0xE7A9C3FD,
-  $.hashfn_native = City128<false,2>,
-  $.hashfn_bswap = City128<true,2>
-);
+   $.desc       = "Google CityHash128WithSeed (seeded high 64 bits)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x33E4ECD1,
+   $.verification_BE = 0xE7A9C3FD,
+   $.hashfn_native   = City128<false, 2>,
+   $.hashfn_bswap    = City128<true, 2>
+ );
 
 REGISTER_HASH(CityHash_128__seed3,
-  $.desc = "Google CityHash128WithSeed (seeded low+high 64 bits)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.verification_LE = 0x1C03D5B9,
-  $.verification_BE = 0xCE532972,
-  $.hashfn_native = City128<false,3>,
-  $.hashfn_bswap = City128<true,3>
-);
+   $.desc       = "Google CityHash128WithSeed (seeded low+high 64 bits)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x1C03D5B9,
+   $.verification_BE = 0xCE532972,
+   $.hashfn_native   = City128<false, 3>,
+   $.hashfn_bswap    = City128<true, 3>
+ );
 
 REGISTER_HASH(CityMurmur__seed1,
-  $.desc = "CityMurmur (seeded low 64 bits)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.verification_LE = 0x47EE6507,
-  $.verification_BE = 0x646575E0,
-  $.hashfn_native = CityMurmur_128<false,1>,
-  $.hashfn_bswap = CityMurmur_128<true,1>
-);
+   $.desc       = "CityMurmur (seeded low 64 bits)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x47EE6507,
+   $.verification_BE = 0x646575E0,
+   $.hashfn_native   = CityMurmur_128<false, 1>,
+   $.hashfn_bswap    = CityMurmur_128<true, 1>
+ );
 
 REGISTER_HASH(CityMurmur__seed2,
-  $.desc = "CityMurmur (seeded high 64 bits)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.verification_LE = 0xAD2F2840,
-  $.verification_BE = 0x9677E1F6,
-  $.hashfn_native = CityMurmur_128<false,2>,
-  $.hashfn_bswap = CityMurmur_128<true,2>
-);
+   $.desc       = "CityMurmur (seeded high 64 bits)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0xAD2F2840,
+   $.verification_BE = 0x9677E1F6,
+   $.hashfn_native   = CityMurmur_128<false, 2>,
+   $.hashfn_bswap    = CityMurmur_128<true, 2>
+ );
 
 REGISTER_HASH(CityMurmur__seed3,
-  $.desc = "CityMurmur (seeded low+high 64 bits)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.verification_LE = 0xE0FECCA8,
-  $.verification_BE = 0x2DA46BE3,
-  $.hashfn_native = CityMurmur_128<false,3>,
-  $.hashfn_bswap = CityMurmur_128<true,3>
-);
+   $.desc       = "CityMurmur (seeded low+high 64 bits)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0xE0FECCA8,
+   $.verification_BE = 0x2DA46BE3,
+   $.hashfn_native   = CityMurmur_128<false, 3>,
+   $.hashfn_bswap    = CityMurmur_128<true, 3>
+ );
 
 #if defined(HAVE_X86_64_CRC32C)
 
 REGISTER_HASH(CityHashCrc_128__seed1,
-  $.desc = "Google CityHashCrc128WithSeed (seeded low 64 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRC_BASED       ,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.verification_LE = 0xD4389C97,
-  $.verification_BE = 0x561D03B3 ,
-  $.hashfn_native = CityCrc128<false,1>,
-  $.hashfn_bswap = CityCrc128<true,1>
-);
+   $.desc       = "Google CityHashCrc128WithSeed (seeded low 64 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRC_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0xD4389C97,
+   $.verification_BE = 0x561D03B3,
+   $.hashfn_native   = CityCrc128<false, 1>,
+   $.hashfn_bswap    = CityCrc128<true, 1>
+ );
 
 REGISTER_HASH(CityHashCrc_128__seed2,
-  $.desc = "Google CityHashCrc128WithSeed (seeded high 64 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRC_BASED       ,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.verification_LE = 0xD627AF5F,
-  $.verification_BE = 0x45FB4A4B,
-  $.hashfn_native = CityCrc128<false,2>,
-  $.hashfn_bswap = CityCrc128<true,2>
-);
+   $.desc       = "Google CityHashCrc128WithSeed (seeded high 64 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRC_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0xD627AF5F,
+   $.verification_BE = 0x45FB4A4B,
+   $.hashfn_native   = CityCrc128<false, 2>,
+   $.hashfn_bswap    = CityCrc128<true, 2>
+ );
 
 REGISTER_HASH(CityHashCrc_128__seed3,
-  $.desc = "Google CityHashCrc128WithSeed (seeded low+high 64 bits)",
-  $.hash_flags =
-        FLAG_HASH_CRC_BASED       ,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.verification_LE = 0x1DA45069,
-  $.verification_BE = 0x9AFFB28F,
-  $.hashfn_native = CityCrc128<false,3>,
-  $.hashfn_bswap = CityCrc128<true,3>
-);
+   $.desc       = "Google CityHashCrc128WithSeed (seeded low+high 64 bits)",
+   $.hash_flags =
+         FLAG_HASH_CRC_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x1DA45069,
+   $.verification_BE = 0x9AFFB28F,
+   $.hashfn_native   = CityCrc128<false, 3>,
+   $.hashfn_bswap    = CityCrc128<true, 3>
+ );
 
 REGISTER_HASH(CityHashCrc_256,
-  $.desc = "Google CityHashCrc256 (with modified seeding)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED         |
-        FLAG_HASH_CRC_BASED       ,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_SLOW            |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 256,
-  $.verification_LE = 0x4A282558,
-  $.verification_BE = 0xB95D3E15,
-  $.hashfn_native = CityCrc256<false>,
-  $.hashfn_bswap = CityCrc256<true>
-);
+   $.desc       = "Google CityHashCrc256 (with modified seeding)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED         |
+         FLAG_HASH_CRC_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_SLOW            |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 256,
+   $.verification_LE = 0x4A282558,
+   $.verification_BE = 0xB95D3E15,
+   $.hashfn_native   = CityCrc256<false>,
+   $.hashfn_bswap    = CityCrc256<true>
+ );
 
 #endif
diff --git a/hashes/clhash.cpp b/hashes/clhash.cpp
index 1ef9d6fb..549e52da 100644
--- a/hashes/clhash.cpp
+++ b/hashes/clhash.cpp
@@ -26,8 +26,8 @@
 
 #if defined(HAVE_X86_64_CLMUL)
 
-#include "Intrinsics.h"
-#include <cassert>
+  #include "Intrinsics.h"
+  #include <cassert>
 
 /*
  * CLHash is a very fast hashing function that uses the
@@ -40,7 +40,8 @@
  *
  * Template option: if you define BITMIX during compilation, extra
  * work is done to pass smhasher's avalanche test succesfully.
- **/
+ *
+ */
 
 //------------------------------------------------------------
 // xoshift RNG for turning uint seeds into random bytes.
@@ -48,22 +49,24 @@
 // Keys for scalar xorshift128. Must be non-zero. These are modified
 // by xorshift128plus.
 typedef struct xorshift128plus_key_s {
-    uint64_t part1;
-    uint64_t part2;
+    uint64_t  part1;
+    uint64_t  part2;
 } xorshift128plus_key_t;
 
-static uint64_t xorshift128plus(xorshift128plus_key_t * key) {
-    uint64_t s1 = key->part1;
+static uint64_t xorshift128plus( xorshift128plus_key_t * key ) {
+    uint64_t       s1 = key->part1;
     const uint64_t s0 = key->part2;
+
     key->part1 = s0;
-    s1 ^= s1 << 23; // a
+    s1        ^= s1 << 23;                         // a
     key->part2 = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
     return key->part2 + s0;
 }
 
 // key must be aligned to 16 bytes!
-static void get_random_key_for_clhash(uint64_t seed1, uint64_t seed2, size_t keycnt, uint64_t * key) {
+static void get_random_key_for_clhash( uint64_t seed1, uint64_t seed2, size_t keycnt, uint64_t * key ) {
     xorshift128plus_key_t k;
+
     k.part1 = seed1;
     k.part2 = seed2;
 
@@ -78,35 +81,36 @@ static void get_random_key_for_clhash(uint64_t seed1, uint64_t seed2, size_t key
 
 //------------------------------------------------------------
 enum {
-    CLHASH_64BITWORDS_CHUNK_SIZE = 128,
-    CLHASH_64BITWORDS_EXTRA = 6,
+    CLHASH_64BITWORDS_CHUNK_SIZE        = 128,
+    CLHASH_64BITWORDS_EXTRA             = 6,
     RANDOM_64BITWORDS_NEEDED_FOR_CLHASH = CLHASH_64BITWORDS_CHUNK_SIZE + CLHASH_64BITWORDS_EXTRA,
 };
 // static_assert((CLHASH_64BITWORDS_CHUNK_SIZE % 4) == 0)
 
 alignas(16) static uint64_t clhash_random[RANDOM_64BITWORDS_NEEDED_FOR_CLHASH];
 
-static bool clhash_init(void) {
+static bool clhash_init( void ) {
     // Constants taken from SMHasher, for compatibility
-    get_random_key_for_clhash(UINT64_C(0xb3816f6a2c68e530), 711,
-            RANDOM_64BITWORDS_NEEDED_FOR_CLHASH, clhash_random);
+    get_random_key_for_clhash(UINT64_C(0xb3816f6a2c68e530), 711, RANDOM_64BITWORDS_NEEDED_FOR_CLHASH, clhash_random);
     return true;
 }
 
 //------------------------------------------------------------
 // computes a << 1
-static inline __m128i leftshift1(__m128i a) {
-    const int x = 1;
-    __m128i u64shift =  _mm_slli_epi64(a,x);
-    __m128i topbits =  _mm_slli_si128(_mm_srli_epi64(a,64 - x),sizeof(uint64_t));
+static inline __m128i leftshift1( __m128i a ) {
+    const int x        = 1;
+    __m128i   u64shift = _mm_slli_epi64(a, x);
+    __m128i   topbits  = _mm_slli_si128(_mm_srli_epi64(a, 64 - x), sizeof(uint64_t));
+
     return _mm_or_si128(u64shift, topbits);
 }
 
 // computes a << 2
-static inline __m128i leftshift2(__m128i a) {
-    const int x = 2;
-    __m128i u64shift =  _mm_slli_epi64(a,x);
-    __m128i topbits =  _mm_slli_si128(_mm_srli_epi64(a,64 - x),sizeof(uint64_t));
+static inline __m128i leftshift2( __m128i a ) {
+    const int x        = 2;
+    __m128i   u64shift = _mm_slli_epi64(a, x);
+    __m128i   topbits  = _mm_slli_si128(_mm_srli_epi64(a, 64 - x), sizeof(uint64_t));
+
     return _mm_or_si128(u64shift, topbits);
 }
 
@@ -121,7 +125,7 @@ static inline __m128i leftshift2(__m128i a) {
 // Precondition:  given that Ahigh|Alow represents a 254-bit value
 //                  (two highest bits of Ahigh must be zero)
 //////////////////
-static inline __m128i lazymod127(__m128i Alow, __m128i Ahigh) {
+static inline __m128i lazymod127( __m128i Alow, __m128i Ahigh ) {
     ///////////////////////////////////////////////////
     // CHECKING THE PRECONDITION:
     // Important: we are assuming that the two highest bits of Ahigh
@@ -136,184 +140,189 @@ static inline __m128i lazymod127(__m128i Alow, __m128i Ahigh) {
     // credit for simplified implementation : Jan Wassenberg
     __m128i shift1 = leftshift1(Ahigh);
     __m128i shift2 = leftshift2(Ahigh);
-    __m128i final =  _mm_xor_si128(_mm_xor_si128(Alow, shift1),shift2);
+    __m128i final = _mm_xor_si128(_mm_xor_si128(Alow, shift1), shift2);
+
     return final;
 }
 
 // multiplication with lazy reduction
 // assumes that the two highest bits of the 256-bit multiplication are zeros
 // returns a lazy reduction
-static inline  __m128i mul128by128to128_lazymod127( __m128i A, __m128i B) {
-    __m128i Amix1 = _mm_clmulepi64_si128(A,B,0x01);
-    __m128i Amix2 = _mm_clmulepi64_si128(A,B,0x10);
-    __m128i Alow = _mm_clmulepi64_si128(A,B,0x00);
-    __m128i Ahigh = _mm_clmulepi64_si128(A,B,0x11);
-    __m128i Amix = _mm_xor_si128(Amix1,Amix2);
-    Amix1 = _mm_slli_si128(Amix,8);
-    Amix2 = _mm_srli_si128(Amix,8);
-    Alow = _mm_xor_si128(Alow,Amix1);
-    Ahigh = _mm_xor_si128(Ahigh,Amix2);
+static inline __m128i mul128by128to128_lazymod127( __m128i A, __m128i B ) {
+    __m128i Amix1 = _mm_clmulepi64_si128(A, B, 0x01);
+    __m128i Amix2 = _mm_clmulepi64_si128(A, B, 0x10);
+    __m128i Alow  = _mm_clmulepi64_si128(A, B, 0x00);
+    __m128i Ahigh = _mm_clmulepi64_si128(A, B, 0x11);
+    __m128i Amix  = _mm_xor_si128(Amix1, Amix2);
+
+    Amix1 = _mm_slli_si128(Amix, 8);
+    Amix2 = _mm_srli_si128(Amix, 8);
+    Alow  = _mm_xor_si128(Alow , Amix1);
+    Ahigh = _mm_xor_si128(Ahigh, Amix2);
     return lazymod127(Alow, Ahigh);
 }
 
 // multiply the length and the some key, no modulo
-static __m128i lazyLengthHash(uint64_t keylength, uint64_t length) {
-    const __m128i lengthvector = _mm_set_epi64x(keylength,length);
-    const __m128i clprod1  = _mm_clmulepi64_si128( lengthvector, lengthvector, 0x10);
+static __m128i lazyLengthHash( uint64_t keylength, uint64_t length ) {
+    const __m128i lengthvector = _mm_set_epi64x(keylength, length);
+    const __m128i clprod1      = _mm_clmulepi64_si128(lengthvector, lengthvector, 0x10);
+
     return clprod1;
 }
 
 // modulo reduction to 64-bit value. The high 64 bits contain garbage,
 // see precompReduction64
-static inline __m128i precompReduction64_si128( __m128i A) {
-    //const __m128i C = _mm_set_epi64x(1U,(1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); // C is the irreducible poly. (64,4,3,1,0)
-    const __m128i C = _mm_cvtsi64_si128((1U<<4)+(1U<<3)+(1U<<1)+(1U<<0));
-    __m128i Q2 = _mm_clmulepi64_si128( A, C, 0x01);
-    __m128i Q3 = _mm_shuffle_epi8(_mm_setr_epi8(
-                                                0, 27, 54, 45,
-                                                108, 119, 90, 65,
-                                                (uint8_t)216, (uint8_t)195, (uint8_t)238, (uint8_t)245,
-                                                (uint8_t)180, (uint8_t)175, (uint8_t)130, (uint8_t)153) ,
-            _mm_srli_si128(Q2,8));
-    __m128i Q4 = _mm_xor_si128(Q2,A);
-    const __m128i final = _mm_xor_si128(Q3,Q4);
+static inline __m128i precompReduction64_si128( __m128i A ) {
+    // const __m128i C = _mm_set_epi64x(1U,(1U<<4)+(1U<<3)+(1U<<1)+(1U<<0)); // C is the irreducible poly. (64,4,3,1,0)
+    const __m128i C  = _mm_cvtsi64_si128((1U << 4) + (1U << 3) + (1U << 1) + (1U << 0));
+    __m128i       Q2 = _mm_clmulepi64_si128(A, C, 0x01);
+    __m128i       Q3 = _mm_shuffle_epi8(_mm_setr_epi8(0, 27, 54, 45, 108, 119, 90, 65, (uint8_t)216, (uint8_t)195,
+            (uint8_t)238, (uint8_t)245, (uint8_t)180, (uint8_t)175, (uint8_t)130, (uint8_t)153), _mm_srli_si128(Q2, 8));
+    __m128i Q4       = _mm_xor_si128(Q2, A);
+    const __m128i final = _mm_xor_si128(Q3, Q4);
+
     return final; /// WARNING: HIGH 64 BITS CONTAIN GARBAGE
 }
 
-static inline uint64_t precompReduction64( __m128i A) {
+static inline uint64_t precompReduction64( __m128i A ) {
     return _mm_cvtsi128_si64(precompReduction64_si128(A));
 }
 
 // hashing the bits in value using the keys key1 and key2 (only the
 // first 64 bits of key2 are used).  This is basically (a xor k1) * (b
 // xor k2) mod p with length component.
-static uint64_t simple128to64hashwithlength(const __m128i value, const __m128i key, uint64_t keylength, uint64_t length) {
-    const __m128i add =  _mm_xor_si128 (value,key);
-    const __m128i clprod1  = _mm_clmulepi64_si128( add, add, 0x10);
-    const __m128i total = _mm_xor_si128 (clprod1,lazyLengthHash(keylength, length));
+static uint64_t simple128to64hashwithlength( const __m128i value, const __m128i key,
+        uint64_t keylength, uint64_t length ) {
+    const __m128i add     = _mm_xor_si128(value, key);
+    const __m128i clprod1 = _mm_clmulepi64_si128(add, add, 0x10);
+    const __m128i total   = _mm_xor_si128(clprod1, lazyLengthHash(keylength, length));
+
     return precompReduction64(total);
 }
 
 // we expect length to have value 128 or, at least, to be divisible by 4.
-template < bool bswap >
-static __m128i clmulhalfscalarproductwithoutreduction(const __m128i * randomsource,
-        const uint64_t * string, const size_t length) {
+template <bool bswap>
+static __m128i clmulhalfscalarproductwithoutreduction( const __m128i * randomsource,
+        const uint64_t * string, const size_t length ) {
     const uint64_t * const endstring = string + length;
     __m128i acc = _mm_setzero_si128();
+
     // we expect length = 128
     for (; string + 3 < endstring; randomsource += 2, string += 4) {
-        const __m128i temp1 = _mm_load_si128( randomsource);
-        const __m128i temp2 = _mm_lddqu_si128((const __m128i *) string);
-        const __m128i temp3 = bswap ? mm_bswap64(temp2) : temp2;
-        const __m128i add1 = _mm_xor_si128(temp1, temp3);
-        const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
-        acc = _mm_xor_si128(clprod1, acc);
-        const __m128i temp12 = _mm_load_si128(randomsource + 1);
-        const __m128i temp22 = _mm_lddqu_si128((const __m128i *) (string + 2));
-        const __m128i temp32 = bswap ? mm_bswap64(temp22) : temp22;
-        const __m128i add12 = _mm_xor_si128(temp12, temp32);
+        const __m128i temp1    = _mm_load_si128(randomsource);
+        const __m128i temp2    = _mm_lddqu_si128((const __m128i *)string);
+        const __m128i temp3    = bswap ? mm_bswap64(temp2) : temp2;
+        const __m128i add1     = _mm_xor_si128(temp1, temp3);
+        const __m128i clprod1  = _mm_clmulepi64_si128(add1, add1, 0x10);
+        acc = _mm_xor_si128(clprod1 , acc);
+        const __m128i temp12   = _mm_load_si128(randomsource + 1);
+        const __m128i temp22   = _mm_lddqu_si128((const __m128i *)(string + 2));
+        const __m128i temp32   = bswap ? mm_bswap64(temp22) : temp22;
+        const __m128i add12    = _mm_xor_si128(temp12, temp32);
         const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10);
         acc = _mm_xor_si128(clprod12, acc);
     }
     return acc;
 }
 
-template < bool bswap >
-static __m128i clmulhalfscalarproductwithtailwithoutreduction(const __m128i * randomsource,
-        const uint64_t * string, const size_t length) {
+template <bool bswap>
+static __m128i clmulhalfscalarproductwithtailwithoutreduction( const __m128i * randomsource,
+        const uint64_t * string, const size_t length ) {
     const uint64_t * const endstring = string + length;
     __m128i acc = _mm_setzero_si128();
+
     for (; string + 3 < endstring; randomsource += 2, string += 4) {
-        const __m128i temp1 = _mm_load_si128(randomsource);
-        const __m128i temp2 = _mm_lddqu_si128((const __m128i *) string);
-        const __m128i temp3 = bswap ? mm_bswap64(temp2) : temp2;
-        const __m128i add1 = _mm_xor_si128(temp1, temp3);
-        const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
-        acc = _mm_xor_si128(clprod1, acc);
-        const __m128i temp12 = _mm_load_si128(randomsource+1);
-        const __m128i temp22 = _mm_lddqu_si128((const __m128i *) (string + 2));
-        const __m128i temp32 = bswap ? mm_bswap64(temp22) : temp22;
-        const __m128i add12 = _mm_xor_si128(temp12, temp32);
+        const __m128i temp1    = _mm_load_si128(randomsource);
+        const __m128i temp2    = _mm_lddqu_si128((const __m128i *)string);
+        const __m128i temp3    = bswap ? mm_bswap64(temp2) : temp2;
+        const __m128i add1     = _mm_xor_si128(temp1, temp3);
+        const __m128i clprod1  = _mm_clmulepi64_si128(add1, add1, 0x10);
+        acc = _mm_xor_si128(clprod1 , acc);
+        const __m128i temp12   = _mm_load_si128(randomsource + 1);
+        const __m128i temp22   = _mm_lddqu_si128((const __m128i *)(string + 2));
+        const __m128i temp32   = bswap ? mm_bswap64(temp22) : temp22;
+        const __m128i add12    = _mm_xor_si128(temp12, temp32);
         const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10);
         acc = _mm_xor_si128(clprod12, acc);
     }
     if (string + 1 < endstring) {
-        const __m128i temp1 = _mm_load_si128(randomsource);
-        const __m128i temp2 = _mm_lddqu_si128((const __m128i *) string);
-        const __m128i temp3 = bswap ? mm_bswap64(temp2) : temp2;
-        const __m128i add1 = _mm_xor_si128(temp1, temp3);
+        const __m128i temp1   = _mm_load_si128(randomsource);
+        const __m128i temp2   = _mm_lddqu_si128((const __m128i *)string);
+        const __m128i temp3   = bswap ? mm_bswap64(temp2) : temp2;
+        const __m128i add1    = _mm_xor_si128(temp1, temp3);
         const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
-        acc = _mm_xor_si128(clprod1, acc);
+        acc           = _mm_xor_si128(clprod1, acc);
         randomsource += 1;
-        string += 2;
+        string       += 2;
     }
     if (string < endstring) {
-        const __m128i temp1 = _mm_load_si128(randomsource);
-        const __m128i temp2 = _mm_loadl_epi64((const __m128i *)string);
-        const __m128i temp3 = bswap ? mm_bswap64(temp2) : temp2;
-        const __m128i add1 = _mm_xor_si128(temp1, temp3);
+        const __m128i temp1   = _mm_load_si128(randomsource);
+        const __m128i temp2   = _mm_loadl_epi64((const __m128i *)string);
+        const __m128i temp3   = bswap ? mm_bswap64(temp2) : temp2;
+        const __m128i add1    = _mm_xor_si128(temp1, temp3);
         const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
         acc = _mm_xor_si128(clprod1, acc);
     }
     return acc;
 }
 
-template < bool bswap >
-static __m128i clmulhalfscalarproductwithtailwithoutreductionWithExtraWord(const __m128i * randomsource,
-        const uint64_t * string, const size_t length, const uint64_t extraword) {
+template <bool bswap>
+static __m128i clmulhalfscalarproductwithtailwithoutreductionWithExtraWord( const __m128i * randomsource,
+        const uint64_t * string, const size_t length, const uint64_t extraword ) {
     const uint64_t * const endstring = string + length;
     __m128i acc = _mm_setzero_si128();
+
     for (; string + 3 < endstring; randomsource += 2, string += 4) {
-        const __m128i temp1 = _mm_load_si128(randomsource);
-        const __m128i temp2 = _mm_lddqu_si128((const __m128i *) string);
-        const __m128i temp3 = bswap ? mm_bswap64(temp2) : temp2;
-        const __m128i add1 = _mm_xor_si128(temp1, temp3);
-        const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
-        acc = _mm_xor_si128(clprod1, acc);
-        const __m128i temp12 = _mm_load_si128(randomsource+1);
-        const __m128i temp22 = _mm_lddqu_si128((const __m128i *) (string + 2));
-        const __m128i temp32 = bswap ? mm_bswap64(temp22) : temp22;
-        const __m128i add12 = _mm_xor_si128(temp12, temp32);
+        const __m128i temp1    = _mm_load_si128(randomsource);
+        const __m128i temp2    = _mm_lddqu_si128((const __m128i *)string);
+        const __m128i temp3    = bswap ? mm_bswap64(temp2) : temp2;
+        const __m128i add1     = _mm_xor_si128(temp1, temp3);
+        const __m128i clprod1  = _mm_clmulepi64_si128(add1, add1, 0x10);
+        acc = _mm_xor_si128(clprod1 , acc);
+        const __m128i temp12   = _mm_load_si128(randomsource + 1);
+        const __m128i temp22   = _mm_lddqu_si128((const __m128i *)(string + 2));
+        const __m128i temp32   = bswap ? mm_bswap64(temp22) : temp22;
+        const __m128i add12    = _mm_xor_si128(temp12, temp32);
         const __m128i clprod12 = _mm_clmulepi64_si128(add12, add12, 0x10);
         acc = _mm_xor_si128(clprod12, acc);
     }
     if (string + 1 < endstring) {
-        const __m128i temp1 = _mm_load_si128(randomsource);
-        const __m128i temp2 = _mm_lddqu_si128((const __m128i *) string);
-        const __m128i temp3 = bswap ? mm_bswap64(temp2) : temp2;
-        const __m128i add1 = _mm_xor_si128(temp1, temp3);
+        const __m128i temp1   = _mm_load_si128(randomsource);
+        const __m128i temp2   = _mm_lddqu_si128((const __m128i *)string);
+        const __m128i temp3   = bswap ? mm_bswap64(temp2) : temp2;
+        const __m128i add1    = _mm_xor_si128(temp1, temp3);
         const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
-        acc = _mm_xor_si128(clprod1, acc);
+        acc           = _mm_xor_si128(clprod1, acc);
         randomsource += 1;
-        string += 2;
+        string       += 2;
     }
     // we have to append an extra 1
     if (string < endstring) {
-        const __m128i temp1 = _mm_load_si128(randomsource);
-        const __m128i temp2 = _mm_set_epi64x(extraword,GET_U64<bswap>((const uint8_t *)string, 0));
-        const __m128i temp3 = bswap ? mm_bswap64(temp2) : temp2;
-        const __m128i add1 = _mm_xor_si128(temp1, temp3);
+        const __m128i temp1   = _mm_load_si128(randomsource);
+        const __m128i temp2   = _mm_set_epi64x(extraword, GET_U64<bswap>((const uint8_t *)string, 0));
+        const __m128i temp3   = bswap ? mm_bswap64(temp2) : temp2;
+        const __m128i add1    = _mm_xor_si128(temp1, temp3);
         const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x10);
         acc = _mm_xor_si128(clprod1, acc);
     } else {
-        const __m128i temp1 = _mm_load_si128(randomsource);
-        const __m128i temp2 = _mm_loadl_epi64((const __m128i *)&extraword);
-        const __m128i temp3 = bswap ? mm_bswap64(temp2) : temp2;
-        const __m128i add1 = _mm_xor_si128(temp1, temp3);
+        const __m128i temp1   = _mm_load_si128(randomsource);
+        const __m128i temp2   = _mm_loadl_epi64((const __m128i *)&extraword);
+        const __m128i temp3   = bswap ? mm_bswap64(temp2) : temp2;
+        const __m128i add1    = _mm_xor_si128(temp1, temp3);
         const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x01);
         acc = _mm_xor_si128(clprod1, acc);
     }
     return acc;
 }
 
-template < bool bswap >
-static __m128i clmulhalfscalarproductOnlyExtraWord(const __m128i * randomsource,
-        const uint64_t extraword) {
-    const __m128i temp1 = _mm_load_si128(randomsource);
-    const __m128i temp2 = _mm_loadl_epi64((const __m128i *)&extraword);
-    const __m128i temp3 = bswap ? mm_bswap64(temp2) : temp2;
-    const __m128i add1 = _mm_xor_si128(temp1, temp3);
+template <bool bswap>
+static __m128i clmulhalfscalarproductOnlyExtraWord( const __m128i * randomsource, const uint64_t extraword ) {
+    const __m128i temp1   = _mm_load_si128(randomsource);
+    const __m128i temp2   = _mm_loadl_epi64((const __m128i *)&extraword);
+    const __m128i temp3   = bswap ? mm_bswap64(temp2) : temp2;
+    const __m128i add1    = _mm_xor_si128(temp1, temp3);
     const __m128i clprod1 = _mm_clmulepi64_si128(add1, add1, 0x01);
+
     return clprod1;
 }
 
@@ -321,7 +330,7 @@ static __m128i clmulhalfscalarproductOnlyExtraWord(const __m128i * randomsource,
 // an invertible function used to mix the bits
 // borrowed directly from murmurhash
 ////////
-static inline uint64_t fmix64 ( uint64_t k ) {
+static inline uint64_t fmix64( uint64_t k ) {
     k ^= k >> 33;
     k *= UINT64_C(0xff51afd7ed558ccd);
     k ^= k >> 33;
@@ -333,31 +342,37 @@ static inline uint64_t fmix64 ( uint64_t k ) {
 // there always remain an incomplete word that has 1,2, 3, 4, 5, 6, 7
 // used bytes.  we append 0s to it. The result is really a fancy 8-byte buffer, so
 // this routine does not care about byteswapping.
-static inline uint64_t createLastWord(const size_t lengthbyte, const uint64_t * lastw) {
+static inline uint64_t createLastWord( const size_t lengthbyte, const uint64_t * lastw ) {
     const int significantbytes = lengthbyte % sizeof(uint64_t);
-    uint64_t lastword = 0;
-    memcpy(&lastword,lastw,significantbytes); // could possibly be faster?
+    uint64_t  lastword         = 0;
+
+    memcpy(&lastword, lastw, significantbytes); // could possibly be faster?
     return lastword;
 }
 
 // The seeding here is homegrown for SMHasher3
-template < bool bitmix, bool bswap >
-static uint64_t clhash(const void * random, const uint8_t * stringbyte, const size_t lengthbyte, const uint64_t seed) {
-    assert(((uintptr_t) random & 15) == 0);// we expect cache line alignment for the keys
+template <bool bitmix, bool bswap>
+static uint64_t clhash( const void * random, const uint8_t * stringbyte,
+        const size_t lengthbyte, const uint64_t seed ) {
+    assert(((uintptr_t)random & 15) == 0); // we expect cache line alignment for the keys
 
     // We process the data in chunks of 16 cache lines (m should be divisible by 4).
     const uint32_t m = CLHASH_64BITWORDS_CHUNK_SIZE;
     const uint32_t m128neededperblock = m / 2; // How many 128-bit words of random bits we use per block.
+
     const uint64_t * string = (const uint64_t *)stringbyte;
-    const size_t length = lengthbyte / sizeof(uint64_t); // # of complete words
-    const size_t lengthinc = (lengthbyte + sizeof(uint64_t) - 1) / sizeof(uint64_t); // # of words, including partial ones
-    const __m128i * rs64 = (__m128i *)random;
-    const __m128i seed128 = lazyLengthHash(((const uint64_t *)(rs64 + m128neededperblock + 2))[1], seed);
+
+    const size_t length     = lengthbyte / sizeof(uint64_t);                          // # of complete words
+    const size_t lengthinc  = (lengthbyte + sizeof(uint64_t) - 1) / sizeof(uint64_t); // # of words, including partial
+                                                                                      // ones
+
+    const __m128i * rs64   = (__m128i *       )random;
+    const __m128i   seed128 = lazyLengthHash(((const uint64_t *)(rs64 + m128neededperblock + 2))[1], seed);
 
     // to preserve alignment on cache lines for main loop, we pick random bits at the end
-    __m128i polyvalue =  _mm_load_si128(rs64 + m128neededperblock);
+    __m128i polyvalue = _mm_load_si128(rs64 + m128neededperblock);
     // setting two highest bits to zero
-    polyvalue = _mm_and_si128(polyvalue,_mm_setr_epi32(0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0x3fffffff));
+    polyvalue = _mm_and_si128(polyvalue, _mm_setr_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x3fffffff));
     // we should check that polyvalue is non-zero, though this is best done outside the function and highly unlikely
 
     // long strings // modified from length to lengthinc to address issue #3 raised by Eik List
@@ -370,11 +385,11 @@ static uint64_t clhash(const void * random, const uint8_t * stringbyte, const si
         for (; t + m <= length; t += m) {
             // we compute something like
             // acc+= polyvalue * acc + h1
-            acc =  mul128by128to128_lazymod127(polyvalue,acc);
-            const __m128i h1 =  clmulhalfscalarproductwithoutreduction<bswap>(rs64, string + t, m);
+            acc = mul128by128to128_lazymod127(polyvalue, acc);
+            const __m128i h1 = clmulhalfscalarproductwithoutreduction<bswap>(rs64, string + t, m);
             acc = _mm_xor_si128(acc, h1);
         }
-        const uint32_t remain = length - t;  // number of completely filled words
+        const uint32_t remain = length - t; // number of completely filled words
 
         if (remain != 0) {
             // we compute something like
@@ -382,12 +397,12 @@ static uint64_t clhash(const void * random, const uint8_t * stringbyte, const si
             acc = mul128by128to128_lazymod127(polyvalue, acc);
             if (lengthbyte % sizeof(uint64_t) == 0) {
                 const __m128i h1 =
-                    clmulhalfscalarproductwithtailwithoutreduction<bswap>(rs64, string + t, remain);
+                        clmulhalfscalarproductwithtailwithoutreduction<bswap>(rs64, string + t, remain);
                 acc = _mm_xor_si128(acc, h1);
             } else {
                 const uint64_t lastword = createLastWord(lengthbyte, (string + length));
-                const __m128i h1 =
-                    clmulhalfscalarproductwithtailwithoutreductionWithExtraWord<bswap>(
+                const __m128i  h1       =
+                        clmulhalfscalarproductwithtailwithoutreductionWithExtraWord<bswap>(
                         rs64, string + t, remain, lastword);
                 acc = _mm_xor_si128(acc, h1);
             }
@@ -395,24 +410,22 @@ static uint64_t clhash(const void * random, const uint8_t * stringbyte, const si
             // there are no completely filled words left, but there is one partial word.
             acc = mul128by128to128_lazymod127(polyvalue, acc);
             const uint64_t lastword = createLastWord(lengthbyte, (string + length));
-            const __m128i h1 = clmulhalfscalarproductOnlyExtraWord<bswap>(rs64, lastword);
+            const __m128i  h1       = clmulhalfscalarproductOnlyExtraWord<bswap>(rs64, lastword);
             acc = _mm_xor_si128(acc, h1);
         }
 
-        const __m128i finalkey = _mm_load_si128(rs64 + m128neededperblock + 1);
+        const __m128i  finalkey  = _mm_load_si128(rs64 + m128neededperblock + 1);
         const uint64_t keylength = ((const uint64_t *)(rs64 + m128neededperblock + 2))[0];
         return simple128to64hashwithlength(acc, finalkey, keylength, (uint64_t)lengthbyte);
-
     } else {
         // short strings
         __m128i acc;
 
-        if(lengthbyte % sizeof(uint64_t) == 0) {
-            acc = clmulhalfscalarproductwithtailwithoutreduction<bswap>(rs64, string, length);
+        if (lengthbyte % sizeof(uint64_t) == 0) {
+            acc = clmulhalfscalarproductwithtailwithoutreduction             <bswap>(rs64, string, length);
         } else {
             const uint64_t lastword = createLastWord(lengthbyte, (string + length));
-            acc = clmulhalfscalarproductwithtailwithoutreductionWithExtraWord<bswap>(
-                          rs64, string, length, lastword);
+            acc = clmulhalfscalarproductwithtailwithoutreductionWithExtraWord<bswap>(rs64, string, length, lastword);
         }
         // Mix the seed in using a non-commuting operation with all the xors and clmuls.
         acc = _mm_sub_epi8(acc, seed128);
@@ -424,15 +437,17 @@ static uint64_t clhash(const void * random, const uint8_t * stringbyte, const si
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void CLHash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void CLHash( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = clhash<true, bswap>(clhash_random, (const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void CLHashNomix(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void CLHashNomix( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = clhash<false, bswap>(clhash_random, (const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
@@ -440,46 +455,46 @@ static void CLHashNomix(const void * in, const size_t len, const seed_t seed, vo
 
 //------------------------------------------------------------
 REGISTER_FAMILY(clhash,
-  $.src_url = "https://github.com/lemire/clhash",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/lemire/clhash",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 #if defined(HAVE_X86_64_CLMUL)
 
 REGISTER_HASH(CLhash__bitmix,
-  $.desc = "Carryless multiplication hash, with -DBITMIX",
-  $.hash_flags =
-        FLAG_HASH_CLMUL_BASED      |
-        FLAG_HASH_LOOKUP_TABLE     |
-        FLAG_HASH_NO_SEED          |
-        FLAG_HASH_SYSTEM_SPECIFIC  ,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_LICENSE_GPL3,
-  $.bits = 64,
-  $.verification_LE = 0x578865A5,
-  $.verification_BE = 0x0D2B93FA,
-  $.hashfn_native = CLHash<false>,
-  $.hashfn_bswap = CLHash<true>,
-  $.initfn = clhash_init
-);
+   $.desc       = "Carryless multiplication hash, with -DBITMIX",
+   $.hash_flags =
+         FLAG_HASH_CLMUL_BASED      |
+         FLAG_HASH_LOOKUP_TABLE     |
+         FLAG_HASH_NO_SEED          |
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_LICENSE_GPL3,
+   $.bits = 64,
+   $.verification_LE = 0x578865A5,
+   $.verification_BE = 0x0D2B93FA,
+   $.hashfn_native   = CLHash<false>,
+   $.hashfn_bswap    = CLHash<true>,
+   $.initfn = clhash_init
+ );
 
 REGISTER_HASH(CLhash,
-  $.desc = "Carryless multiplication hash, without -DBITMIX",
-  $.hash_flags =
-        FLAG_HASH_CLMUL_BASED      |
-        FLAG_HASH_LOOKUP_TABLE     |
-        FLAG_HASH_NO_SEED          |
-        FLAG_HASH_SYSTEM_SPECIFIC  ,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_LICENSE_GPL3,
-  $.bits = 64,
-  $.verification_LE = 0xDD8248E4,
-  $.verification_BE = 0x25DDBEC2,
-  $.hashfn_native = CLHashNomix<false>,
-  $.hashfn_bswap = CLHashNomix<true>,
-  $.initfn = clhash_init
-);
+   $.desc       = "Carryless multiplication hash, without -DBITMIX",
+   $.hash_flags =
+         FLAG_HASH_CLMUL_BASED      |
+         FLAG_HASH_LOOKUP_TABLE     |
+         FLAG_HASH_NO_SEED          |
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_LICENSE_GPL3,
+   $.bits = 64,
+   $.verification_LE = 0xDD8248E4,
+   $.verification_BE = 0x25DDBEC2,
+   $.hashfn_native   = CLHashNomix<false>,
+   $.hashfn_bswap    = CLHashNomix<true>,
+   $.initfn = clhash_init
+ );
 
 #endif
diff --git a/hashes/crap.cpp b/hashes/crap.cpp
index 7ac675d4..fa198376 100644
--- a/hashes/crap.cpp
+++ b/hashes/crap.cpp
@@ -36,14 +36,14 @@
 // https://web.archive.org/web/20150218011152/http://floodyberry.com/noncryptohashzoo/CrapWow.html
 // https://web.archive.org/web/20150218011033/http://floodyberry.com/noncryptohashzoo/CrapWow64.html
 
-template < bool bswap >
-static uint32_t Crap8_impl(const uint8_t * key, size_t len, uint32_t seed) {
-#define c8fold( a, b, y, z ) {                  \
-        p  = (uint32_t)(a) * (uint64_t)(b);     \
-        y ^= (uint32_t)p;                       \
-        z ^= (uint32_t)(p >> 32);               \
+template <bool bswap>
+static uint32_t Crap8_impl( const uint8_t * key, size_t len, uint32_t seed ) {
+#define c8fold( a, b, y, z ) {              \
+        p  = (uint32_t)(a) * (uint64_t)(b); \
+        y ^= (uint32_t)p;                   \
+        z ^= (uint32_t)(p >> 32);           \
 }
-#define c8mix( in ) { h *= m; c8fold( in, m, k, h ); }
+#define c8mix(in) { h *= m; c8fold(in, m, k, h); }
 
     const uint32_t m = 0x83d2e73b, n = 0x97e1cc59;
     uint32_t h = (uint32_t)len + seed, k = n + (uint32_t)len;
@@ -62,28 +62,29 @@ static uint32_t Crap8_impl(const uint8_t * key, size_t len, uint32_t seed) {
         if (isLE() ^ bswap) {
             c8mix(GET_U32<bswap>(key, 0) & ((1 << (len * 8)) - 1));
         } else {
-            c8mix(GET_U32<bswap>(key, 0) >> (32 -(len * 8)));
+            c8mix(GET_U32<bswap>(key, 0) >> (32 - (len * 8)));
         }
     }
     c8fold(h ^ k, n, k, k);
     return k;
 }
+
 #undef c8mix
 #undef c8fold
 
-template < bool bswap >
-static uint32_t CrapWow_impl(const uint8_t * key, size_t len, uint32_t seed) {
-#define cwfold( a, b, lo, hi) {                       \
-        p   = (uint32_t)(a) * (uint64_t)(b);          \
-        lo ^= (uint32_t)p;                            \
-        hi ^= (uint32_t)(p >> 32);                    \
+template <bool bswap>
+static uint32_t CrapWow_impl( const uint8_t * key, size_t len, uint32_t seed ) {
+#define cwfold( a, b, lo, hi) {              \
+        p   = (uint32_t)(a) * (uint64_t)(b); \
+        lo ^= (uint32_t)p;                   \
+        hi ^= (uint32_t)(p >> 32);           \
     }
-#define cwmixa( in ) { cwfold( in, m, k, h ); }
-#define cwmixb( in ) { cwfold( in, n, h, k ); }
+#define cwmixa(in) { cwfold(in, m, k, h); }
+#define cwmixb(in) { cwfold(in, n, h, k); }
 
-	const uint32_t m = 0x57559429, n = 0x5052acdb;
-	uint32_t h = (uint32_t)len, k = (uint32_t)len + seed + n;
-	uint64_t p;
+    const uint32_t m = 0x57559429, n = 0x5052acdb;
+    uint32_t h = (uint32_t)len, k = (uint32_t)len + seed + n;
+    uint64_t p;
 
     while (len >= 8) {
         cwmixb(GET_U32<bswap>(key, 0));
@@ -102,26 +103,27 @@ static uint32_t CrapWow_impl(const uint8_t * key, size_t len, uint32_t seed) {
         }
     }
 
-	cwmixb(h ^ (k + n));
-	return k ^ h;
+    cwmixb(h ^ (k + n));
+    return k ^ h;
 }
+
 #undef cwmixb
 #undef cwmixa
 #undef cwfold
 
-template < bool bswap >
-static uint64_t CrapWow64_impl(const uint8_t * key, size_t len, uint64_t seed) {
-#define cwfold(a, b, lo, hi) {                  \
-        mult64_128(pl, ph, a, b);               \
-        lo ^= pl;                               \
-        hi ^= ph;                               \
+template <bool bswap>
+static uint64_t CrapWow64_impl( const uint8_t * key, size_t len, uint64_t seed ) {
+#define cwfold(a, b, lo, hi) {    \
+        mult64_128(pl, ph, a, b); \
+        lo ^= pl;                 \
+        hi ^= ph;                 \
     }
-#define cwmixa( in ) { cwfold( in, m, k, h ); }
-#define cwmixb( in ) { cwfold( in, n, h, k ); }
+#define cwmixa(in) { cwfold(in, m, k, h); }
+#define cwmixb(in) { cwfold(in, n, h, k); }
 
     const uint64_t m = UINT64_C(0x95b47aa3355ba1a1), n = UINT64_C(0x8a970be7488fda55);
-	uint64_t h = (uint64_t)len, k = (uint64_t)len + seed + n;
-	uint64_t pl, ph;
+    uint64_t h = (uint64_t)len, k = (uint64_t)len + seed + n;
+    uint64_t pl, ph;
 
     while (len >= 16) {
         cwmixb(GET_U64<bswap>(key, 0));
@@ -140,81 +142,85 @@ static uint64_t CrapWow64_impl(const uint8_t * key, size_t len, uint64_t seed) {
         }
     }
 
-	cwmixb(h ^ (k + n));
-	return k ^ h;
+    cwmixb(h ^ (k + n));
+    return k ^ h;
 }
+
 #undef cwmixb
 #undef cwmixa
 #undef cwfold
 
 //------------------------------------------------------------
-template < bool bswap >
-static void Crap8(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void Crap8( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = Crap8_impl<bswap>((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void CrapWow(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void CrapWow( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = CrapWow_impl<bswap>((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void CrapWow64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void CrapWow64( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = CrapWow64_impl<bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(crap,
-  $.src_url = "https://web.archive.org/web/20150218011033/http://floodyberry.com/noncryptohashzoo/",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://web.archive.org/web/20150218011033/http://floodyberry.com/noncryptohashzoo/",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(Crap8,
-  $.desc = "Noncryptohashzoo's Crap8 hash",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB  |
-        FLAG_IMPL_MULTIPLY       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x743E97A1,
-  $.verification_BE = 0xDFE06AD9,
-  $.hashfn_native = Crap8<false>,
-  $.hashfn_bswap = Crap8<true>
-);
+   $.desc       = "Noncryptohashzoo's Crap8 hash",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB  |
+         FLAG_IMPL_MULTIPLY       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x743E97A1,
+   $.verification_BE = 0xDFE06AD9,
+   $.hashfn_native   = Crap8<false>,
+   $.hashfn_bswap    = Crap8<true>
+ );
 
 REGISTER_HASH(CrapWow,
-  $.desc = "Noncryptohashzoo's CrapWow hash",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS   |
-        FLAG_IMPL_READ_PAST_EOB  |
-        FLAG_IMPL_MULTIPLY       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x49ECB015,
-  $.verification_BE = 0x4EF994DF,
-  $.hashfn_native = CrapWow<false>,
-  $.hashfn_bswap = CrapWow<true>
-);
+   $.desc       = "Noncryptohashzoo's CrapWow hash",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS   |
+         FLAG_IMPL_READ_PAST_EOB  |
+         FLAG_IMPL_MULTIPLY       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x49ECB015,
+   $.verification_BE = 0x4EF994DF,
+   $.hashfn_native   = CrapWow<false>,
+   $.hashfn_bswap    = CrapWow<true>
+ );
 
 REGISTER_HASH(CrapWow_64,
-  $.desc = "Noncryptohashzoo's CrapWow64 hash",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS    |
-        FLAG_IMPL_READ_PAST_EOB   |
-        FLAG_IMPL_MULTIPLY_64_128 |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x669D3A9B,
-  $.verification_BE = 0xCBB7690C,
-  $.hashfn_native = CrapWow64<false>,
-  $.hashfn_bswap = CrapWow64<true>
-);
+   $.desc       = "Noncryptohashzoo's CrapWow64 hash",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS    |
+         FLAG_IMPL_READ_PAST_EOB   |
+         FLAG_IMPL_MULTIPLY_64_128 |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x669D3A9B,
+   $.verification_BE = 0xCBB7690C,
+   $.hashfn_native   = CrapWow64<false>,
+   $.hashfn_bswap    = CrapWow64<true>
+ );
diff --git a/hashes/crc.cpp b/hashes/crc.cpp
index 9fc8bb25..8d29688f 100644
--- a/hashes/crc.cpp
+++ b/hashes/crc.cpp
@@ -29,12 +29,12 @@
 #include "Hashlib.h"
 
 typedef struct {
-    uint32_t crc32_long[4][256];
-    uint32_t crc32_short[4][256];
+    uint32_t  crc32_long[4][256];
+    uint32_t  crc32_short[4][256];
 } crc_hw_table;
 
 #if defined(HAVE_X86_64_CRC32C)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 
 // Fancy hardware version
 
@@ -44,7 +44,7 @@ typedef struct {
  * mat must have at least as many entries as the power of two for most
  * significant one bit in vec.
  */
-static inline uint32_t gf2_matrix_times(uint32_t * mat, uint32_t vec) {
+static inline uint32_t gf2_matrix_times( uint32_t * mat, uint32_t vec ) {
     uint32_t sum;
 
     sum = 0;
@@ -60,7 +60,7 @@ static inline uint32_t gf2_matrix_times(uint32_t * mat, uint32_t vec) {
  * Multiply a matrix by itself over GF(2).  Both mat and square must
  * have 32 rows.
  */
-static inline void gf2_matrix_square(uint32_t * square, uint32_t * mat) {
+static inline void gf2_matrix_square( uint32_t * square, uint32_t * mat ) {
     for (int n = 0; n < 32; n++) {
         square[n] = gf2_matrix_times(mat, mat[n]);
     }
@@ -74,33 +74,35 @@ static inline void gf2_matrix_square(uint32_t * square, uint32_t * mat) {
  * could be easily written for any len, but that is not needed for
  * this application.
  */
-template < uint32_t polynomial >
-static void crc32_zeros_op(uint32_t * even, size_t len) {
+template <uint32_t polynomial>
+static void crc32_zeros_op( uint32_t * even, size_t len ) {
     uint32_t row;
-    uint32_t odd[32];       /* odd-power-of-two zeros operator */
+    uint32_t odd[32]; /* odd-power-of-two zeros operator */
 
     /* put operator for one zero bit in odd */
-    odd[0] = polynomial;    /* CRC-32 polynomial */
-    row = 1;
+    odd[0] = polynomial; /* CRC-32 polynomial */
+    row    = 1;
     for (int n = 1; n < 32; n++) {
         odd[n] = row;
-        row <<= 1;
+        row  <<= 1;
     }
 
     /* put operator for two zero bits in even */
-    gf2_matrix_square(even, odd);
+    gf2_matrix_square(even, odd );
 
     /* put operator for four zero bits in odd */
-    gf2_matrix_square(odd, even);
+    gf2_matrix_square(odd , even);
 
-    /* first square will put the operator for one zero byte (eight zero bits),
-       in even -- next square puts operator for two zero bytes in odd, and so
-       on, until len has been rotated down to zero */
+    /*
+     * first square will put the operator for one zero byte (eight zero bits),
+     * in even -- next square puts operator for two zero bytes in odd, and so
+     * on, until len has been rotated down to zero
+     */
     do {
-        gf2_matrix_square(even, odd);
+        gf2_matrix_square(even, odd );
         len >>= 1;
         if (len == 0) { return; }
-        gf2_matrix_square(odd, even);
+        gf2_matrix_square(odd , even);
         len >>= 1;
     } while (len);
 
@@ -114,12 +116,12 @@ static void crc32_zeros_op(uint32_t * even, size_t len) {
  * Take a length and build four lookup tables for applying the zeros
  * operator for that length, byte-by-byte on the operand.
  */
-static void crc32_zeros(uint32_t op[32], uint32_t zeros[][256]) {
+static void crc32_zeros( uint32_t op[32], uint32_t zeros[][256] ) {
     uint32_t n;
 
     for (n = 0; n < 256; n++) {
-        zeros[0][n] = gf2_matrix_times(op, n);
-        zeros[1][n] = gf2_matrix_times(op, n << 8);
+        zeros[0][n] = gf2_matrix_times(op, n      );
+        zeros[1][n] = gf2_matrix_times(op, n <<  8);
         zeros[2][n] = gf2_matrix_times(op, n << 16);
         zeros[3][n] = gf2_matrix_times(op, n << 24);
     }
@@ -128,13 +130,14 @@ static void crc32_zeros(uint32_t op[32], uint32_t zeros[][256]) {
 // Block sizes for three-way parallel crc computation.
 // HW_LONGBLOCK_LEN and HW_SHORTBLOCK_LEN must both be
 // powers of two.
-static const uint32_t HW_LONGBLOCK_LEN = 8192;
+static const uint32_t HW_LONGBLOCK_LEN  = 8192;
 static const uint32_t HW_SHORTBLOCK_LEN = 256;
 
 /* Initialize tables for shifting crcs. */
-template < uint32_t polynomial >
-static void crc32_init_hw(crc_hw_table * tblp) {
+template <uint32_t polynomial>
+static void crc32_init_hw( crc_hw_table * tblp ) {
     uint32_t op[32];
+
     crc32_zeros_op<polynomial>(op, HW_LONGBLOCK_LEN);
     crc32_zeros(op, tblp->crc32_long);
 
@@ -143,16 +146,16 @@ static void crc32_init_hw(crc_hw_table * tblp) {
 }
 
 /* Apply the zeros operator table to crc. */
-static inline uint32_t crc32_shift(const uint32_t zeros[][256], uint32_t crc) {
+static inline uint32_t crc32_shift( const uint32_t zeros[][256], uint32_t crc ) {
     return zeros[0][crc & 0xff] ^ zeros[1][(crc >> 8) & 0xff] ^
            zeros[2][(crc >> 16) & 0xff] ^ zeros[3][crc >> 24];
 }
 
 /* Compute CRC-32C using the Intel hardware instruction. */
-static uint32_t crc32c_hw(uint32_t crc, const crc_hw_table * tbl, const void * buf, size_t len) {
+static uint32_t crc32c_hw( uint32_t crc, const crc_hw_table * tbl, const void * buf, size_t len ) {
     const uint8_t * next = (const uint8_t *)buf;
     const uint8_t * end;
-    uint64_t crc0, crc1, crc2;      /* need to be 64 bits for crc32q */
+    uint64_t        crc0, crc1, crc2; /* need to be 64 bits for crc32q */
 
     /* Pre-process the crc */
     crc0 = crc ^ 0xffffffff;
@@ -173,40 +176,40 @@ static uint32_t crc32c_hw(uint32_t crc, const crc_hw_table * tbl, const void * b
      * Bridge, and Ivy Bridge architectures, which have a throughput
      * of one crc per cycle, but a latency of three cycles.
      */
-    while (len >= HW_LONGBLOCK_LEN*3) {
+    while (len >= HW_LONGBLOCK_LEN * 3) {
         crc1 = 0;
         crc2 = 0;
-        end = next + HW_LONGBLOCK_LEN;
+        end  = next + HW_LONGBLOCK_LEN;
         do {
-            crc0 = _mm_crc32_u64(crc0, GET_U64<false>(next, 0));
-            crc1 = _mm_crc32_u64(crc1, GET_U64<false>(next, HW_LONGBLOCK_LEN));
-            crc2 = _mm_crc32_u64(crc2, GET_U64<false>(next, HW_LONGBLOCK_LEN+HW_LONGBLOCK_LEN));
+            crc0  = _mm_crc32_u64(crc0, GET_U64<false>(next, 0));
+            crc1  = _mm_crc32_u64(crc1, GET_U64<false>(next, HW_LONGBLOCK_LEN));
+            crc2  = _mm_crc32_u64(crc2, GET_U64<false>(next, HW_LONGBLOCK_LEN + HW_LONGBLOCK_LEN));
             next += 8;
         } while (next < end);
-        crc0 = crc32_shift(tbl->crc32_long, crc0) ^ crc1;
-        crc0 = crc32_shift(tbl->crc32_long, crc0) ^ crc2;
-        next += HW_LONGBLOCK_LEN*2;
-        len -= HW_LONGBLOCK_LEN*3;
+        crc0  = crc32_shift(tbl->crc32_long, crc0) ^ crc1;
+        crc0  = crc32_shift(tbl->crc32_long, crc0) ^ crc2;
+        next += HW_LONGBLOCK_LEN * 2;
+        len  -= HW_LONGBLOCK_LEN * 3;
     }
 
     /*
      * Do the same thing, but now on HW_SHORTBLOCK_LEN*3 blocks for
      * the remaining data less than a HW_LONGBLOCK_LEN*3 block.
      */
-    while (len >= HW_SHORTBLOCK_LEN*3) {
+    while (len >= HW_SHORTBLOCK_LEN * 3) {
         crc1 = 0;
         crc2 = 0;
-        end = next + HW_SHORTBLOCK_LEN;
+        end  = next + HW_SHORTBLOCK_LEN;
         do {
-            crc0 = _mm_crc32_u64(crc0, GET_U64<false>(next, 0));
-            crc1 = _mm_crc32_u64(crc1, GET_U64<false>(next, HW_SHORTBLOCK_LEN));
-            crc2 = _mm_crc32_u64(crc2, GET_U64<false>(next, HW_SHORTBLOCK_LEN+HW_SHORTBLOCK_LEN));
+            crc0  = _mm_crc32_u64(crc0, GET_U64<false>(next, 0));
+            crc1  = _mm_crc32_u64(crc1, GET_U64<false>(next, HW_SHORTBLOCK_LEN));
+            crc2  = _mm_crc32_u64(crc2, GET_U64<false>(next, HW_SHORTBLOCK_LEN + HW_SHORTBLOCK_LEN));
             next += 8;
         } while (next < end);
-        crc0 = crc32_shift(tbl->crc32_short, crc0) ^ crc1;
-        crc0 = crc32_shift(tbl->crc32_short, crc0) ^ crc2;
-        next += HW_SHORTBLOCK_LEN*2;
-        len -= HW_SHORTBLOCK_LEN*3;
+        crc0  = crc32_shift(tbl->crc32_short, crc0) ^ crc1;
+        crc0  = crc32_shift(tbl->crc32_short, crc0) ^ crc2;
+        next += HW_SHORTBLOCK_LEN * 2;
+        len  -= HW_SHORTBLOCK_LEN * 3;
     }
 
     /*
@@ -215,7 +218,7 @@ static uint32_t crc32c_hw(uint32_t crc, const crc_hw_table * tbl, const void * b
      */
     end = next + (len - (len & 7));
     while (next < end) {
-        crc0 = _mm_crc32_u64(crc0, GET_U64<false>(next, 0));
+        crc0  = _mm_crc32_u64(crc0, GET_U64<false>(next, 0));
         next += 8;
     }
     len &= 7;
@@ -236,7 +239,7 @@ static uint32_t crc32c_hw(uint32_t crc, const crc_hw_table * tbl, const void * b
 typedef  uint32_t crc_sw_table[16][256];
 
 /* Construct table for software CRC-32 calculation. */
-static void crc32_init_sw(const uint32_t POLY, crc_sw_table crc32_table) {
+static void crc32_init_sw( const uint32_t POLY, crc_sw_table crc32_table ) {
     uint32_t n, crc, k;
 
     for (n = 0; n < 256; n++) {
@@ -261,10 +264,10 @@ static void crc32_init_sw(const uint32_t POLY, crc_sw_table crc32_table) {
 }
 
 // Table-driven software version
-template < bool bswap >
-static uint32_t crc32_sw(uint32_t crci, const crc_sw_table crc32_table, const void * buf, size_t len) {
+template <bool bswap>
+static uint32_t crc32_sw( uint32_t crci, const crc_sw_table crc32_table, const void * buf, size_t len ) {
     const uint8_t * next = (const uint8_t *)buf;
-    uint64_t crc;
+    uint64_t        crc;
 
     crc = crci ^ 0xffffffff;
 
@@ -274,46 +277,46 @@ static uint32_t crc32_sw(uint32_t crci, const crc_sw_table crc32_table, const vo
     }
     while (len >= 16) {
         uint64_t wd1, wd2;
-        wd1 = GET_U64<bswap>(next, 0);
-        wd2 = GET_U64<false>(next, 8); // byteswapping taken care of via table indexing!
+        wd1  = GET_U64<bswap>(next, 0);
+        wd2  = GET_U64<false>(next, 8); // byteswapping taken care of via table indexing!
 
         crc ^= wd1;
         if (bswap) {
             crc =
-                crc32_table[15][ crc        & 0xff] ^
-                crc32_table[14][(crc >>  8) & 0xff] ^
-                crc32_table[13][(crc >> 16) & 0xff] ^
-                crc32_table[12][(crc >> 24) & 0xff] ^
-                crc32_table[11][(crc >> 32) & 0xff] ^
-                crc32_table[10][(crc >> 40) & 0xff] ^
-                crc32_table[ 9][(crc >> 48) & 0xff] ^
-                crc32_table[ 8][ crc >> 56]         ^
-                crc32_table[ 0][ wd2        & 0xff] ^
-                crc32_table[ 1][(wd2 >>  8) & 0xff] ^
-                crc32_table[ 2][(wd2 >> 16) & 0xff] ^
-                crc32_table[ 3][(wd2 >> 24) & 0xff] ^
-                crc32_table[ 4][(wd2 >> 32) & 0xff] ^
-                crc32_table[ 5][(wd2 >> 40) & 0xff] ^
-                crc32_table[ 6][(wd2 >> 48) & 0xff] ^
-                crc32_table[ 7][ wd2 >> 56]         ;
+                    crc32_table[15][crc &         0xff] ^
+                    crc32_table[14][(crc >>  8) & 0xff] ^
+                    crc32_table[13][(crc >> 16) & 0xff] ^
+                    crc32_table[12][(crc >> 24) & 0xff] ^
+                    crc32_table[11][(crc >> 32) & 0xff] ^
+                    crc32_table[10][(crc >> 40) & 0xff] ^
+                    crc32_table[ 9][(crc >> 48) & 0xff] ^
+                    crc32_table[ 8][crc        >>   56] ^
+                    crc32_table[ 0][wd2 &         0xff] ^
+                    crc32_table[ 1][(wd2 >>  8) & 0xff] ^
+                    crc32_table[ 2][(wd2 >> 16) & 0xff] ^
+                    crc32_table[ 3][(wd2 >> 24) & 0xff] ^
+                    crc32_table[ 4][(wd2 >> 32) & 0xff] ^
+                    crc32_table[ 5][(wd2 >> 40) & 0xff] ^
+                    crc32_table[ 6][(wd2 >> 48) & 0xff] ^
+                    crc32_table[ 7][wd2        >>   56];
         } else {
             crc =
-                crc32_table[15][ crc        & 0xff] ^
-                crc32_table[14][(crc >>  8) & 0xff] ^
-                crc32_table[13][(crc >> 16) & 0xff] ^
-                crc32_table[12][(crc >> 24) & 0xff] ^
-                crc32_table[11][(crc >> 32) & 0xff] ^
-                crc32_table[10][(crc >> 40) & 0xff] ^
-                crc32_table[ 9][(crc >> 48) & 0xff] ^
-                crc32_table[ 8][ crc >> 56]         ^
-                crc32_table[ 7][ wd2        & 0xff] ^
-                crc32_table[ 6][(wd2 >>  8) & 0xff] ^
-                crc32_table[ 5][(wd2 >> 16) & 0xff] ^
-                crc32_table[ 4][(wd2 >> 24) & 0xff] ^
-                crc32_table[ 3][(wd2 >> 32) & 0xff] ^
-                crc32_table[ 2][(wd2 >> 40) & 0xff] ^
-                crc32_table[ 1][(wd2 >> 48) & 0xff] ^
-                crc32_table[ 0][ wd2 >> 56]         ;
+                    crc32_table[15][crc &         0xff] ^
+                    crc32_table[14][(crc >>  8) & 0xff] ^
+                    crc32_table[13][(crc >> 16) & 0xff] ^
+                    crc32_table[12][(crc >> 24) & 0xff] ^
+                    crc32_table[11][(crc >> 32) & 0xff] ^
+                    crc32_table[10][(crc >> 40) & 0xff] ^
+                    crc32_table[ 9][(crc >> 48) & 0xff] ^
+                    crc32_table[ 8][crc        >>   56] ^
+                    crc32_table[ 7][wd2 &         0xff] ^
+                    crc32_table[ 6][(wd2 >>  8) & 0xff] ^
+                    crc32_table[ 5][(wd2 >> 16) & 0xff] ^
+                    crc32_table[ 4][(wd2 >> 24) & 0xff] ^
+                    crc32_table[ 3][(wd2 >> 32) & 0xff] ^
+                    crc32_table[ 2][(wd2 >> 40) & 0xff] ^
+                    crc32_table[ 1][(wd2 >> 48) & 0xff] ^
+                    crc32_table[ 0][wd2        >>   56];
         }
         next += 16;
         len  -= 16;
@@ -335,17 +338,17 @@ static uint32_t crc32_sw(uint32_t crci, const crc_sw_table crc32_table, const vo
 
 /*
  * For now, only store 1 set of tables at a time.
-*/
-static uint32_t table_poly;
+ */
+static uint32_t     table_poly;
 static crc_hw_table hw_tables;
 static crc_sw_table sw_tables;
 
-template < uint32_t polynomial >
-static void CRC32(const void * in, const size_t len, const seed_t seed, void * out) {
+template <uint32_t polynomial>
+static void CRC32( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t crc = seed;
+
     if (polynomial != table_poly) {
-        printf("CRC32 of poly %08x requested, but Init() was given %08x\n",
-                polynomial, table_poly);
+        printf("CRC32 of poly %08x requested, but Init() was given %08x\n", polynomial, table_poly);
         exit(1);
     }
 #if defined(HAVE_X86_64_CRC32C)
@@ -363,8 +366,8 @@ static void CRC32(const void * in, const size_t len, const seed_t seed, void * o
     memcpy(out, &crc, 4);
 }
 
-template < uint32_t polynomial >
-static bool CRC32_init(void) {
+template <uint32_t polynomial>
+static bool CRC32_init( void ) {
     table_poly = polynomial;
 #if defined(HAVE_X86_64_CRC32C)
     if (polynomial == POLY_CRC32C) {
@@ -377,24 +380,24 @@ static bool CRC32_init(void) {
 }
 
 REGISTER_FAMILY(crc,
-  $.src_url = "https://github.com/baruch/crcbench/blob/master/crc-mark-adler.c",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/baruch/crcbench/blob/master/crc-mark-adler.c",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(CRC_32C,
-  $.desc = "CRC32-C (Castagnoli, 0x1EDC6F41 / 0x82F63B78)",
-  $.hash_flags =
-        FLAG_HASH_CRC_BASED          |
-        FLAG_HASH_ENDIAN_INDEPENDENT |
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_INCREMENTAL        |
-        FLAG_IMPL_CANONICAL_LE       |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0x6E6071BD,
-  $.verification_BE = 0x6E6071BD,
-  $.initfn = CRC32_init<POLY_CRC32C>,
-  $.hashfn_native = CRC32<POLY_CRC32C>,
-  $.hashfn_bswap = CRC32<POLY_CRC32C>
-);
+   $.desc       = "CRC32-C (Castagnoli, 0x1EDC6F41 / 0x82F63B78)",
+   $.hash_flags =
+         FLAG_HASH_CRC_BASED          |
+         FLAG_HASH_ENDIAN_INDEPENDENT |
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_INCREMENTAL        |
+         FLAG_IMPL_CANONICAL_LE       |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0x6E6071BD,
+   $.verification_BE = 0x6E6071BD,
+   $.initfn = CRC32_init<POLY_CRC32C>,
+   $.hashfn_native   = CRC32<POLY_CRC32C>,
+   $.hashfn_bswap    = CRC32<POLY_CRC32C>
+ );
diff --git a/hashes/discohash.cpp b/hashes/discohash.cpp
index 5a77fad8..5bbae08e 100644
--- a/hashes/discohash.cpp
+++ b/hashes/discohash.cpp
@@ -27,36 +27,38 @@
 #include "Platform.h"
 #include "Hashlib.h"
 
-static const uint32_t STATE = 32; // Must be divisible by 8
-static const uint32_t STATE64 = STATE >> 3;
-static const uint32_t STATEM = STATE-1;
-static const uint32_t HSTATE64M = (STATE64 >> 1)-1;
-static const uint32_t STATE64M = STATE64-1;
-static const uint64_t P = UINT64_C(0xFFFFFFFFFFFFFFFF) - 58;
-static const uint64_t Q = UINT64_C(13166748625691186689);
+static const uint32_t STATE     = 32; // Must be divisible by 8
+static const uint32_t STATE64   = STATE >> 3;
+static const uint32_t STATEM    = STATE - 1;
+static const uint32_t HSTATE64M = (STATE64 >> 1) - 1;
+static const uint32_t STATE64M  = STATE64 - 1;
+static const uint64_t P         = UINT64_C(  0xFFFFFFFFFFFFFFFF) - 58;
+static const uint64_t Q         = UINT64_C(13166748625691186689);
 
 //--------
 // State mix function
-static FORCE_INLINE uint8_t ROTR8(uint8_t v, int n) {
+static FORCE_INLINE uint8_t ROTR8( uint8_t v, int n ) {
     n = n & 7U;
-    if (n)
-        v = (v >> n) | (v << (8-n));
+    if (n) {
+        v = (v >> n) | (v << (8 - n));
+    }
     return v;
 }
 
-static FORCE_INLINE void mix(uint64_t * ds, const uint32_t A) {
-      const uint32_t B = A+1;
-      ds[A] *= P;
-      ds[A] = ROTR64(ds[A], 23);
-      ds[A] *= Q;
-      //ds[A] = ROTR64(ds[A], 23);
+static FORCE_INLINE void mix( uint64_t * ds, const uint32_t A ) {
+    const uint32_t B = A + 1;
+
+    ds[A] *= P;
+    ds[A]  = ROTR64(ds[A], 23);
+    ds[A] *= Q;
+    // ds[A] = ROTR64(ds[A], 23);
 
-      ds[B] ^= ds[A];
+    ds[B] ^= ds[A];
 
-      ds[B] *= P;
-      ds[B] = ROTR64(ds[B], 23);
-      ds[B] *= Q;
-      //ds[B] = ROTR64(ds[B], 23);
+    ds[B] *= P;
+    ds[B]  = ROTR64(ds[B], 23);
+    ds[B] *= Q;
+    // ds[B] = ROTR64(ds[B], 23);
 }
 
 //---------
@@ -68,23 +70,23 @@ static FORCE_INLINE void mix(uint64_t * ds, const uint32_t A) {
 //
 // The oldver parameter "fixes" a possibly-unintentional behavior
 // change, details of which are below.
-template < bool bswap, bool reread, bool oldver >
-static FORCE_INLINE void round(uint64_t * ds, const uint8_t * m8, uint32_t len) {
+template <bool bswap, bool reread, bool oldver>
+static FORCE_INLINE void round( uint64_t * ds, const uint8_t * m8, uint32_t len ) {
     uint32_t index;
-    uint32_t sindex = 0;
-    uint32_t Len = len >> 3;
-    uint64_t counter = UINT64_C(0xfaccadaccad09997);
-    uint8_t counter8 = 137;
+    uint32_t sindex   = 0;
+    uint32_t Len      = len >> 3;
+    uint64_t counter  = UINT64_C(0xfaccadaccad09997);
+    uint8_t  counter8 = 137;
 
-    //#pragma omp parallel for
-    for(index = 0; index < Len; index++) {
-        uint64_t blk = GET_U64<bswap>(m8, index*8);
+    // #pragma omp parallel for
+    for (index = 0; index < Len; index++) {
+        uint64_t blk = GET_U64<bswap>(m8, index * 8);
         ds[sindex] += ROTR64(blk + index + counter + 1, 23);
-        if (reread) { blk = GET_U64<bswap>(m8, index*8); }
-        counter += ~blk + 1;
-        if ( sindex == HSTATE64M ) {
+        if (reread) { blk = GET_U64<bswap>(m8, index * 8); }
+        counter    += ~blk + 1;
+        if (sindex == HSTATE64M) {
             mix(ds, 0);
-        } else if ( sindex == STATE64M ) {
+        } else if (sindex == STATE64M) {
             mix(ds, 2);
             sindex = -1;
         }
@@ -103,14 +105,14 @@ static FORCE_INLINE void round(uint64_t * ds, const uint8_t * m8, uint32_t len)
     // are implemented here.
     Len = index << 3;
     if (oldver) {
-        sindex = Len&(STATEM);
+        sindex = Len & (STATEM);
     } else {
-        sindex = index&(STATEM);
+        sindex = index & (STATEM);
     }
 
-    //#pragma omp parallel for
-    for(index = Len; index < len; index++) {
-        ((uint8_t *)ds)[bswap ? (sindex^7) : sindex] += ROTR8(m8[index] + index + counter8 + 1, 23);
+    // #pragma omp parallel for
+    for (index = Len; index < len; index++) {
+        ((uint8_t *)ds)[bswap ? (sindex ^ 7) : sindex] += ROTR8(m8[index] + index + counter8 + 1, 23);
         // I also wonder if this was intended to be m8[index], to
         // mirror the primary 8-byte loop above...
         //
@@ -119,8 +121,8 @@ static FORCE_INLINE void round(uint64_t * ds, const uint8_t * m8, uint32_t len)
         // of sindex is (len & ~7) if oldver == true, and (len >> 3)
         // if oldver == false.
         counter8 += ~m8[sindex] + 1;
-        mix(ds, index%STATE64M);
-        if ( sindex >= STATEM ) {
+        mix(ds, index % STATE64M);
+        if (sindex >= STATEM) {
             sindex = -1;
         }
         sindex++;
@@ -134,31 +136,31 @@ static FORCE_INLINE void round(uint64_t * ds, const uint8_t * m8, uint32_t len)
 //---------
 // main hash function
 
-template < uint32_t hashsize, bool bswap, bool oldver >
-static void BEBB4185(const void * in, const size_t len, const seed_t seed, void * out) {
+template <uint32_t hashsize, bool bswap, bool oldver>
+static void BEBB4185( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint8_t * key8Arr = (const uint8_t *)in;
-    uint8_t * out8 = (uint8_t *)out;
-    uint32_t seedbuf[4];
+    uint8_t *       out8    = (uint8_t *      )out;
+    uint32_t        seedbuf[4];
 
     if (len >= UINT32_C(0xffffffff)) { return; }
 
     // the cali number from the Matrix (1999)
     uint32_t seed32 = seed;
     if (!bswap) {
-        seedbuf[0] = 0xc5550690;
+        seedbuf[0]  = 0xc5550690;
         seedbuf[0] -= seed32;
-        seedbuf[1] = 1 + seed32;
-        seedbuf[2] = ~(1 - seed32);
-        seedbuf[3] = (1+seed32) * 0xf00dacca;
+        seedbuf[1]  = 1            + seed32;
+        seedbuf[2]  = ~  (1        - seed32);
+        seedbuf[3]  = (1 + seed32) * 0xf00dacca;
     } else {
-        seedbuf[1] = 0xc5550690;
+        seedbuf[1]  = 0xc5550690;
         seedbuf[1] -= seed32;
-        seedbuf[0] = 1 + seed32;
-        seedbuf[3] = ~(1 - seed32);
-        seedbuf[2] = (1+seed32) * 0xf00dacca;
+        seedbuf[0]  = 1            + seed32;
+        seedbuf[3]  = ~  (1        - seed32);
+        seedbuf[2]  = (1 + seed32) * 0xf00dacca;
     }
 
-    uint64_t ds[STATE/8];
+    uint64_t ds[STATE / 8];
     // nothing up my sleeve
     ds[0] = UINT64_C(0x123456789abcdef0);
     ds[1] = UINT64_C(0x0fedcba987654321);
@@ -170,27 +172,29 @@ static void BEBB4185(const void * in, const size_t len, const seed_t seed, void
     // variable. The mixing of the state with itself also doesn't need
     // bswap set, because the endianness of the data will naturally
     // always match the endianness of the ds[] values.
-    round<bswap,false,oldver>(ds, key8Arr, (uint32_t)len);
-    round<false,false,oldver>(ds, (uint8_t *)seedbuf, 16);
-    round<false,true,oldver>(ds, (uint8_t *)ds, STATE);
+    round<bswap, false, oldver>(ds, key8Arr      ,     (uint32_t)len);
+    round<false, false, oldver>(ds, (uint8_t *)seedbuf, 16          );
+    round<false, true, oldver>( ds, (uint8_t *)ds, STATE            );
 
-    /**
-       printf("ds = %#018" PRIx64 " %#018" PRIx64 " %#018" PRIx64 " %#018" PRIx64 "\n",
-       ds[0], ds[1], ds[2], ds[3] );
-    **/
+    /*
+     *
+     * printf("ds = %#018" PRIx64 " %#018" PRIx64 " %#018" PRIx64 " %#018" PRIx64 "\n",
+     * ds[0], ds[1], ds[2], ds[3] );
+     *
+     */
 
-    uint64_t h[STATE64] = {0};
+    uint64_t h[STATE64] = { 0 };
 
-    h[0] = ds[2];
-    h[1] = ds[3];
+    h[0]  = ds[2];
+    h[1]  = ds[3];
 
-    h[0] += h[1];
+    h[0] += h [1];
 
     if (hashsize == 128) {
-        h[2] = ds[0];
-        h[3] = ds[1];
+        h[2]  = ds[0];
+        h[3]  = ds[1];
 
-        h[2] += h[3];
+        h[2] += h [3];
         PUT_U64<bswap>(h[2], out8, 8);
     }
     if (hashsize >= 64) {
@@ -199,75 +203,75 @@ static void BEBB4185(const void * in, const size_t len, const seed_t seed, void
 }
 
 REGISTER_FAMILY(discohash,
-  $.src_url = "https://github.com/crisdosyago/discohash",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/crisdosyago/discohash",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 // Yes, none of these have any bad seeds! See note at the top near "thread_local".
 REGISTER_HASH(Discohash__old,
-  $.desc = "Discohash (aka BEBB4185) prior version",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_SLOW             |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xBEBB4185,
-  $.verification_BE = 0x4B5579AD,
-  $.hashfn_native = BEBB4185<64, false, true>,
-  $.hashfn_bswap = BEBB4185<64, true, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Discohash (aka BEBB4185) prior version",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_SLOW             |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xBEBB4185,
+   $.verification_BE = 0x4B5579AD,
+   $.hashfn_native   = BEBB4185<64, false, true>,
+   $.hashfn_bswap    = BEBB4185<64, true, true>,
+   $.badseeds        = {}
+ );
 
 REGISTER_HASH(Discohash,
-  $.desc = "Discohash (aka BEBB4185)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_SLOW             |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xFBA72400,
-  $.verification_BE = 0x286DD52C,
-  $.hashfn_native = BEBB4185<64, false, false>,
-  $.hashfn_bswap = BEBB4185<64, true, false>,
-  $.badseeds = {}
-);
+   $.desc       = "Discohash (aka BEBB4185)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_SLOW             |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xFBA72400,
+   $.verification_BE = 0x286DD52C,
+   $.hashfn_native   = BEBB4185<64, false, false>,
+   $.hashfn_bswap    = BEBB4185<64, true, false>,
+   $.badseeds        = {}
+ );
 
 REGISTER_HASH(Discohash_128__old,
-  $.desc = "Discohash (aka BEBB4185) prior version",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_SLOW             |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x000ED2A6,
-  $.verification_BE = 0x3110ECFA,
-  $.hashfn_native = BEBB4185<128, false, true>,
-  $.hashfn_bswap = BEBB4185<128, true, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Discohash (aka BEBB4185) prior version",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_SLOW             |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x000ED2A6,
+   $.verification_BE = 0x3110ECFA,
+   $.hashfn_native   = BEBB4185<128, false, true>,
+   $.hashfn_bswap    = BEBB4185<128, true, true>,
+   $.badseeds        = {}
+ );
 
 REGISTER_HASH(Discohash_128,
-  $.desc = "Discohash (aka BEBB4185)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_SLOW             |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x231868B1,
-  $.verification_BE = 0xEB4228F3,
-  $.hashfn_native = BEBB4185<128, false, false>,
-  $.hashfn_bswap = BEBB4185<128, true, false>,
-  $.badseeds = {}
-);
+   $.desc       = "Discohash (aka BEBB4185)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_SLOW             |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x231868B1,
+   $.verification_BE = 0xEB4228F3,
+   $.hashfn_native   = BEBB4185<128, false, false>,
+   $.hashfn_bswap    = BEBB4185<128, true, false>,
+   $.badseeds        = {}
+ );
diff --git a/hashes/donothing.cpp b/hashes/donothing.cpp
index e116c983..e02eddc7 100644
--- a/hashes/donothing.cpp
+++ b/hashes/donothing.cpp
@@ -28,14 +28,14 @@
 #include "Platform.h"
 #include "Hashlib.h"
 
-static void DoNothingHash(const void * in, const size_t len, const seed_t seed, void * out) {
+static void DoNothingHash( const void * in, const size_t len, const seed_t seed, void * out ) {
 }
 
-template < uint32_t hashlen, bool bswap >
-static void DoNothingOAATHash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <uint32_t hashlen, bool bswap>
+static void DoNothingOAATHash( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint8_t *       data = (const uint8_t *)in;
     const uint8_t * const end  = &data[len];
-    uint32_t h                 = seed >> 32;
+    uint32_t h = seed >> 32;
 
     while (data < end) {
         h &= *data++;
@@ -44,93 +44,93 @@ static void DoNothingOAATHash(const void * in, const size_t len, const seed_t se
 }
 
 REGISTER_FAMILY(donothing,
-  $.src_url = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(donothing_32,
-  $.desc = "Do-Nothing function (measure call overhead)",
-  $.hash_flags =
-        FLAG_HASH_MOCK,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x0,
-  $.verification_BE = 0x0,
-  $.hashfn_native = DoNothingHash,
-  $.hashfn_bswap = DoNothingHash
-);
+   $.desc       = "Do-Nothing function (measure call overhead)",
+   $.hash_flags =
+         FLAG_HASH_MOCK,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x0,
+   $.verification_BE = 0x0,
+   $.hashfn_native   = DoNothingHash,
+   $.hashfn_bswap    = DoNothingHash
+ );
 
 REGISTER_HASH(donothing_64,
-  $.desc = "Do-Nothing function (measure call overhead)",
-  $.hash_flags =
-        FLAG_HASH_MOCK,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x0,
-  $.verification_BE = 0x0,
-  $.hashfn_native = DoNothingHash,
-  $.hashfn_bswap = DoNothingHash
-);
+   $.desc       = "Do-Nothing function (measure call overhead)",
+   $.hash_flags =
+         FLAG_HASH_MOCK,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x0,
+   $.verification_BE = 0x0,
+   $.hashfn_native   = DoNothingHash,
+   $.hashfn_bswap    = DoNothingHash
+ );
 
 REGISTER_HASH(donothing_128,
-  $.desc = "Do-Nothing function (measure call overhead)",
-  $.hash_flags =
-        FLAG_HASH_MOCK,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x0,
-  $.verification_BE = 0x0,
-  $.hashfn_native = DoNothingHash,
-  $.hashfn_bswap = DoNothingHash
-);
+   $.desc       = "Do-Nothing function (measure call overhead)",
+   $.hash_flags =
+         FLAG_HASH_MOCK,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x0,
+   $.verification_BE = 0x0,
+   $.hashfn_native   = DoNothingHash,
+   $.hashfn_bswap    = DoNothingHash
+ );
 
 REGISTER_HASH(donothingOAAT_32,
-  $.desc = "Do-Nothing OAAT function (measure call+OAAT overhead)",
-  $.hash_flags =
-        FLAG_HASH_MOCK,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x0,
-  $.verification_BE = 0x0,
-  $.hashfn_native = DoNothingOAATHash<32, false>,
-  $.hashfn_bswap = DoNothingOAATHash<32, true>,
-  $.sort_order = 10
-);
+   $.desc       = "Do-Nothing OAAT function (measure call+OAAT overhead)",
+   $.hash_flags =
+         FLAG_HASH_MOCK,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x0,
+   $.verification_BE = 0x0,
+   $.hashfn_native   = DoNothingOAATHash<32, false>,
+   $.hashfn_bswap    = DoNothingOAATHash<32, true>,
+   $.sort_order      = 10
+ );
 
 REGISTER_HASH(donothingOAAT_64,
-  $.desc = "Do-Nothing OAAT function (measure call+OAAT overhead)",
-  $.hash_flags =
-        FLAG_HASH_MOCK,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x0,
-  $.verification_BE = 0x0,
-  $.hashfn_native = DoNothingOAATHash<64, false>,
-  $.hashfn_bswap = DoNothingOAATHash<64, true>,
-  $.sort_order = 10
-);
+   $.desc       = "Do-Nothing OAAT function (measure call+OAAT overhead)",
+   $.hash_flags =
+         FLAG_HASH_MOCK,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x0,
+   $.verification_BE = 0x0,
+   $.hashfn_native   = DoNothingOAATHash<64, false>,
+   $.hashfn_bswap    = DoNothingOAATHash<64, true>,
+   $.sort_order      = 10
+ );
 
 REGISTER_HASH(donothingOAAT_128,
-  $.desc = "Do-Nothing OAAT function (measure call+OAAT overhead)",
-  $.hash_flags =
-        FLAG_HASH_MOCK,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x0,
-  $.verification_BE = 0x0,
-  $.hashfn_native = DoNothingOAATHash<128, false>,
-  $.hashfn_bswap = DoNothingOAATHash<128, true>,
-  $.sort_order = 10
-);
+   $.desc       = "Do-Nothing OAAT function (measure call+OAAT overhead)",
+   $.hash_flags =
+         FLAG_HASH_MOCK,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x0,
+   $.verification_BE = 0x0,
+   $.hashfn_native   = DoNothingOAATHash<128, false>,
+   $.hashfn_bswap    = DoNothingOAATHash<128, true>,
+   $.sort_order      = 10
+ );
diff --git a/hashes/falcon_oaat.cpp b/hashes/falcon_oaat.cpp
index 5399fe48..c4dce0bb 100644
--- a/hashes/falcon_oaat.cpp
+++ b/hashes/falcon_oaat.cpp
@@ -28,24 +28,26 @@
 #include "Hashlib.h"
 
 //------------------------------------------------------------
-static uint32_t GoodOAAT_impl(const uint8_t * str, size_t len, uint32_t seed) {
+static uint32_t GoodOAAT_impl( const uint8_t * str, size_t len, uint32_t seed ) {
     const uint8_t * const end = str + len;
     uint32_t h1 = seed ^ 0x3b00;
     uint32_t h2 = ROTL32(seed, 15);
 
-    for (;str != end; str++) {
+    for (; str != end; str++) {
         h1 += str[0];
         h1 += h1 << 3; // h1 *= 9
         h2 += h1;
         // the rest could be as in MicroOAAT: h1 = ROTL32(h1, 7)
         // but clang doesn't generate ROTL instruction then.
-        h2 = ROTL32(h2, 7);
+        h2  = ROTL32(h2, 7);
         h2 += h2 << 2; // h2 *= 5
     }
 
     h1 ^= h2;
-    /* now h1 passes all collision checks,
-     * so it is suitable for hash-tables with prime numbers. */
+    /*
+     * now h1 passes all collision checks,
+     * so it is suitable for hash-tables with prime numbers.
+     */
     h1 += ROTL32(h2, 14);
     h2 ^= h1; h2 += ROTR32(h1, 6);
     h1 ^= h2; h1 += ROTL32(h2, 5);
@@ -57,64 +59,67 @@ static uint32_t GoodOAAT_impl(const uint8_t * str, size_t len, uint32_t seed) {
 // MicroOAAT suitable for hash-tables using prime numbers.
 // It passes all collision checks.
 // Author: Sokolov Yura aka funny-falcon <funny.falcon@gmail.com>
-static uint32_t MicroOAAT_impl(const uint8_t * str, size_t len, uint32_t seed) {
-  const uint8_t * const end = str + len;
-  uint32_t h1 = seed ^ 0x3b00;
-  uint32_t h2 = ROTL32(seed, 15);
-  for (;str != end; str++) {
-    h1 += str[0];
-    h1 += h1 << 3; // h1 *= 9
-    h2 -= h1;
-    // unfortunately, clang produces bad code here,
-    // cause it doesn't generate rotl instruction.
-    h1 = ROTL32(h1, 7);
-  }
-  return (h1 ^ h2);
+static uint32_t MicroOAAT_impl( const uint8_t * str, size_t len, uint32_t seed ) {
+    const uint8_t * const end = str + len;
+    uint32_t h1 = seed ^ 0x3b00;
+    uint32_t h2 = ROTL32(seed, 15);
+
+    for (; str != end; str++) {
+        h1 += str[0];
+        h1 += h1 << 3; // h1 *= 9
+        h2 -= h1;
+        // unfortunately, clang produces bad code here,
+        // cause it doesn't generate rotl instruction.
+        h1 = ROTL32(h1, 7);
+    }
+    return h1 ^ h2;
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void GoodOAAT(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void GoodOAAT( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = GoodOAAT_impl((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void MicroOAAT(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void MicroOAAT( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = MicroOAAT_impl((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(falcon_oaat,
-  $.src_url = "https://github.com/rurban/smhasher/commit/3931fd6f723f4fb2afab6ef9a628912220e90ce7",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/rurban/smhasher/commit/3931fd6f723f4fb2afab6ef9a628912220e90ce7",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(GoodOAAT,
-  $.desc = "GoodOAAT (Small non-multiplicative OAAT by funny-falcon)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x7B14EEE5,
-  $.verification_BE = 0x1A834495,
-  $.hashfn_native = GoodOAAT<false>,
-  $.hashfn_bswap = GoodOAAT<true>
-);
+   $.desc       = "GoodOAAT (Small non-multiplicative OAAT by funny-falcon)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x7B14EEE5,
+   $.verification_BE = 0x1A834495,
+   $.hashfn_native   = GoodOAAT<false>,
+   $.hashfn_bswap    = GoodOAAT<true>
+ );
 
 REGISTER_HASH(MicroOAAT,
-  $.desc = "MicroOAAT (Small non-multiplicative OAAT by funny-falcon)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x16F1BA97,
-  $.verification_BE = 0xDE58061B,
-  $.hashfn_native = MicroOAAT<false>,
-  $.hashfn_bswap = MicroOAAT<true>
-);
+   $.desc       = "MicroOAAT (Small non-multiplicative OAAT by funny-falcon)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x16F1BA97,
+   $.verification_BE = 0xDE58061B,
+   $.hashfn_native   = MicroOAAT<false>,
+   $.hashfn_bswap    = MicroOAAT<true>
+ );
diff --git a/hashes/falkhash.cpp b/hashes/falkhash.cpp
index eb3719df..ffb34613 100644
--- a/hashes/falkhash.cpp
+++ b/hashes/falkhash.cpp
@@ -58,123 +58,124 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_X86_64_AES)
-#include "Intrinsics.h"
-
-template < uint32_t version, bool bswap >
-static void falkhash(const void * in, const size_t olen, const seed_t seed64, void * out) {
-  const uint8_t * buf = (const uint8_t *)in;
-  uint64_t len = (uint64_t)olen;
-  __m128i hash, seed;
-
-  // A chunk_size of 0x50 is ideal for AMD fam 15h platforms, which is
-  // what this was optimized and designed for. If you change this
-  // value, you have to manually add/remove instructions from the core
-  // loop below. This must be divisible by 16.
-  const uint64_t CHUNK_LEN = 80;
-
-  if (version == 1) {
-    // Add the seed to the length. Place the length+seed for both the
-    // low and high 64-bits into our hash output.
-    seed = _mm_set_epi64x(len + ((uint64_t)seed64), len + ((uint64_t)seed64));
-  } else {
-    // Create the 128-bit seed. Low 64-bits gets seed, high 64-bits gets
-    // seed + len + 1. The +1 ensures that both 64-bits values will never be
-    // the same (with the exception of a length of -1. If you have that much
-    // ram, send me some).
-    seed = _mm_set_epi64x(1 + len + ((uint64_t)seed64), (uint64_t)seed64);
-  }
-
-  hash = seed;
-
-  while (len > 0) {
-    __m128i piece[5];
-    uint8_t tmp[CHUNK_LEN];
-
-    // If the data is smaller than one chunk, pad it with 0xff for v1,
-    // or zeroes for v2.
-    if (len < CHUNK_LEN) {
-      memcpy(tmp, buf, len);
-      if (version == 1) {
-          memset(tmp + len, 0xff, CHUNK_LEN - len);
-      } else {
-          memset(tmp + len, 0, CHUNK_LEN - len);
-      }
-      buf = tmp;
-      len = CHUNK_LEN;
-    }
+  #include "Intrinsics.h"
 
-    // Read 5 pieces from memory into xmms
-    piece[0] = _mm_loadu_si128((__m128i*)(buf + 0*0x10));
-    piece[1] = _mm_loadu_si128((__m128i*)(buf + 1*0x10));
-    piece[2] = _mm_loadu_si128((__m128i*)(buf + 2*0x10));
-    piece[3] = _mm_loadu_si128((__m128i*)(buf + 3*0x10));
-    piece[4] = _mm_loadu_si128((__m128i*)(buf + 4*0x10));
-
-    if (bswap) {
-      // Arbitrarily chose 64-bit chunks
-      piece[0] = mm_bswap64(piece[0]);
-      piece[1] = mm_bswap64(piece[1]);
-      piece[2] = mm_bswap64(piece[2]);
-      piece[3] = mm_bswap64(piece[3]);
-      piece[4] = mm_bswap64(piece[4]);
-    }
+template <uint32_t version, bool bswap>
+static void falkhash( const void * in, const size_t olen, const seed_t seed64, void * out ) {
+    const uint8_t * buf = (const uint8_t *)in;
+    uint64_t        len = (uint64_t       )olen;
+    __m128i         hash, seed;
 
-    if (version == 2) {
-      // xor each piece against the seed
-      piece[0] = _mm_xor_si128(piece[0], seed);
-      piece[1] = _mm_xor_si128(piece[1], seed);
-      piece[2] = _mm_xor_si128(piece[2], seed);
-      piece[3] = _mm_xor_si128(piece[3], seed);
-      piece[4] = _mm_xor_si128(piece[4], seed);
+    // A chunk_size of 0x50 is ideal for AMD fam 15h platforms, which is
+    // what this was optimized and designed for. If you change this
+    // value, you have to manually add/remove instructions from the core
+    // loop below. This must be divisible by 16.
+    const uint64_t CHUNK_LEN = 80;
+
+    if (version == 1) {
+        // Add the seed to the length. Place the length+seed for both the
+        // low and high 64-bits into our hash output.
+        seed = _mm_set_epi64x(len + ((uint64_t)seed64), len + ((uint64_t)seed64));
+    } else {
+        // Create the 128-bit seed. Low 64-bits gets seed, high 64-bits gets
+        // seed + len + 1. The +1 ensures that both 64-bits values will never be
+        // the same (with the exception of a length of -1. If you have that much
+        // ram, send me some).
+        seed = _mm_set_epi64x(1 + len + ((uint64_t)seed64), (uint64_t)seed64);
     }
 
-    // Mix all pieces into xmm0
-    piece[0] = _mm_aesenc_si128(piece[0], piece[1]);
-    piece[0] = _mm_aesenc_si128(piece[0], piece[2]);
-    piece[0] = _mm_aesenc_si128(piece[0], piece[3]);
-    piece[0] = _mm_aesenc_si128(piece[0], piece[4]);
+    hash = seed;
+
+    while (len > 0) {
+        __m128i piece[5];
+        uint8_t tmp[CHUNK_LEN];
+
+        // If the data is smaller than one chunk, pad it with 0xff for v1,
+        // or zeroes for v2.
+        if (len < CHUNK_LEN) {
+            memcpy(tmp, buf, len);
+            if (version == 1) {
+                memset(tmp + len, 0xff, CHUNK_LEN - len);
+            } else {
+                memset(tmp + len, 0, CHUNK_LEN - len);
+            }
+            buf = tmp;
+            len = CHUNK_LEN;
+        }
+
+        // Read 5 pieces from memory into xmms
+        piece[0] = _mm_loadu_si128((__m128i *)(buf + 0 * 0x10));
+        piece[1] = _mm_loadu_si128((__m128i *)(buf + 1 * 0x10));
+        piece[2] = _mm_loadu_si128((__m128i *)(buf + 2 * 0x10));
+        piece[3] = _mm_loadu_si128((__m128i *)(buf + 3 * 0x10));
+        piece[4] = _mm_loadu_si128((__m128i *)(buf + 4 * 0x10));
+
+        if (bswap) {
+            // Arbitrarily chose 64-bit chunks
+            piece[0] = mm_bswap64(piece[0]);
+            piece[1] = mm_bswap64(piece[1]);
+            piece[2] = mm_bswap64(piece[2]);
+            piece[3] = mm_bswap64(piece[3]);
+            piece[4] = mm_bswap64(piece[4]);
+        }
+
+        if (version == 2) {
+            // xor each piece against the seed
+            piece[0] = _mm_xor_si128(piece[0], seed);
+            piece[1] = _mm_xor_si128(piece[1], seed);
+            piece[2] = _mm_xor_si128(piece[2], seed);
+            piece[3] = _mm_xor_si128(piece[3], seed);
+            piece[4] = _mm_xor_si128(piece[4], seed);
+        }
+
+        // Mix all pieces into xmm0
+        piece[0] = _mm_aesenc_si128(piece[0], piece[1]);
+        piece[0] = _mm_aesenc_si128(piece[0], piece[2]);
+        piece[0] = _mm_aesenc_si128(piece[0], piece[3]);
+        piece[0] = _mm_aesenc_si128(piece[0], piece[4]);
+
+        if (version == 1) {
+            // Finalize xmm0 by mixing with itself
+            piece[0] = _mm_aesenc_si128(piece[0], piece[0]);
+        } else {
+            // Finalize piece[0] by aesencing against seed
+            piece[0] = _mm_aesenc_si128(piece[0], seed);
+        }
+
+        // Mix in xmm0 to the hash
+        hash = _mm_aesenc_si128(hash, piece[0]);
+
+        buf += CHUNK_LEN;
+        len -= CHUNK_LEN;
+    }
 
     if (version == 1) {
-      // Finalize xmm0 by mixing with itself
-      piece[0] = _mm_aesenc_si128(piece[0], piece[0]);
+        // Finalize the hash. This is required at least once to pass
+        // Combination 0x8000000 and Combination 0x0000001. Need more than 1 to
+        // pass the Seed tests. We do 4 because they're pretty much free.
+        // Maybe we should actually use the seed better? Nah, more finalizing!
+        hash = _mm_aesenc_si128(hash, hash);
+        hash = _mm_aesenc_si128(hash, hash);
+        hash = _mm_aesenc_si128(hash, hash);
+        hash = _mm_aesenc_si128(hash, hash);
     } else {
-      // Finalize piece[0] by aesencing against seed
-      piece[0] = _mm_aesenc_si128(piece[0], seed);
+        // Finalize hash by aesencing against seed four times
+        hash = _mm_aesenc_si128(hash, seed);
+        hash = _mm_aesenc_si128(hash, seed);
+        hash = _mm_aesenc_si128(hash, seed);
+        hash = _mm_aesenc_si128(hash, seed);
     }
 
-    // Mix in xmm0 to the hash
-    hash = _mm_aesenc_si128(hash, piece[0]);
-
-    buf += CHUNK_LEN;
-    len -= CHUNK_LEN;
-  }
-
-  if (version == 1) {
-    // Finalize the hash. This is required at least once to pass
-    // Combination 0x8000000 and Combination 0x0000001. Need more than 1 to
-    // pass the Seed tests. We do 4 because they're pretty much free.
-    // Maybe we should actually use the seed better? Nah, more finalizing!
-    hash = _mm_aesenc_si128(hash, hash);
-    hash = _mm_aesenc_si128(hash, hash);
-    hash = _mm_aesenc_si128(hash, hash);
-    hash = _mm_aesenc_si128(hash, hash);
-  } else {
-    // Finalize hash by aesencing against seed four times
-    hash = _mm_aesenc_si128(hash, seed);
-    hash = _mm_aesenc_si128(hash, seed);
-    hash = _mm_aesenc_si128(hash, seed);
-    hash = _mm_aesenc_si128(hash, seed);
-  }
-
-  // Write hash to memory
-  _mm_storeu_si128((__m128i*)out, hash);
+    // Write hash to memory
+    _mm_storeu_si128((__m128i *)out, hash);
 }
+
 #endif
 
 REGISTER_FAMILY(falkhash,
-  $.src_url = "https://github.com/gamozolabs/falkhash",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/gamozolabs/falkhash",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 #if defined(HAVE_X86_64_AES)
 
@@ -185,29 +186,29 @@ REGISTER_FAMILY(falkhash,
 //     1) For a hash len of 0, a hash result of 0 was forced, and
 //     2) The hash output was truncated to 64 bits.
 REGISTER_HASH(falkhash1,
-  $.desc = "Falkhash v1",
-  $.hash_flags =
-        FLAG_HASH_AES_BASED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0xAEF96E69,
-  $.verification_BE = 0xDAE2ECE4,
-  $.hashfn_native = falkhash<1,false>,
-  $.hashfn_bswap = falkhash<1,true>
-);
+   $.desc            = "Falkhash v1",
+   $.hash_flags      =
+         FLAG_HASH_AES_BASED,
+   $.impl_flags      =
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 128,
+   $.verification_LE = 0xAEF96E69,
+   $.verification_BE = 0xDAE2ECE4,
+   $.hashfn_native   = falkhash<1, false>,
+   $.hashfn_bswap    = falkhash<1, true>
+ );
 
 REGISTER_HASH(falkhash2,
-  $.desc = "Falkhash v2",
-  $.hash_flags =
-        FLAG_HASH_AES_BASED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 128,
-  $.verification_LE = 0x7FA15220,
-  $.verification_BE = 0x0A8285F2,
-  $.hashfn_native = falkhash<2,false>,
-  $.hashfn_bswap = falkhash<2,true>
-);
+   $.desc            = "Falkhash v2",
+   $.hash_flags      =
+         FLAG_HASH_AES_BASED,
+   $.impl_flags      =
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits            = 128,
+   $.verification_LE = 0x7FA15220,
+   $.verification_BE = 0x0A8285F2,
+   $.hashfn_native   = falkhash<2, false>,
+   $.hashfn_bswap    = falkhash<2, true>
+ );
 
 #endif
diff --git a/hashes/farmhash.cpp b/hashes/farmhash.cpp
index ff358f61..6ed0b455 100644
--- a/hashes/farmhash.cpp
+++ b/hashes/farmhash.cpp
@@ -29,51 +29,62 @@
 #include <cassert>
 
 #if defined(HAVE_SSE_4_1) || defined(HAVE_X86_64_CRC32C) || defined(HAVE_X86_64_AES)
-#include "Intrinsics.h"
-#define FARMHASH_USE_INTRIN
+  #include "Intrinsics.h"
+  #define FARMHASH_USE_INTRIN
 #endif
 
 using namespace std;
 
 //------------------------------------------------------------
 #if defined(HAVE_INT128)
-static inline uint64_t Uint128Low64(const uint128_t x) {
-  return static_cast<uint64_t>(x);
+
+static inline uint64_t Uint128Low64( const uint128_t x ) {
+    return static_cast<uint64_t>(x);
 }
-static inline uint64_t Uint128High64(const uint128_t x) {
-  return static_cast<uint64_t>(x >> 64);
+
+static inline uint64_t Uint128High64( const uint128_t x ) {
+    return static_cast<uint64_t>(x >> 64);
 }
-static inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
-  return lo + (((uint128_t)hi) << 64);
+
+static inline uint128_t Uint128( uint64_t lo, uint64_t hi ) {
+    return lo + (((uint128_t)hi) << 64);
 }
+
 #else
 typedef std::pair<uint64_t, uint64_t> uint128_t;
-static inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
-static inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
-static inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
+
+static inline uint64_t Uint128Low64( const uint128_t x ) { return x.first; }
+
+static inline uint64_t Uint128High64( const uint128_t x ) { return x.second; }
+
+static inline uint128_t Uint128( uint64_t lo, uint64_t hi ) { return uint128_t(lo, hi); }
+
 #endif
 
 //------------------------------------------------------------
-template < bool bswap >
-static inline uint32_t Fetch32(const uint8_t * p) {
+template <bool bswap>
+static inline uint32_t Fetch32( const uint8_t * p ) {
     return GET_U32<bswap>(p, 0);
 }
 
-template < bool bswap >
-static inline uint64_t Fetch64(const uint8_t * p) {
+template <bool bswap>
+static inline uint64_t Fetch64( const uint8_t * p ) {
     return GET_U64<bswap>(p, 0);
 }
 
 #if defined(FARMHASH_USE_INTRIN)
-template < bool bswap >
-static inline __m128i Fetch128(const uint8_t * s) {
-    __m128i d = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s));
+
+template <bool bswap>
+static inline __m128i Fetch128( const uint8_t * s ) {
+    __m128i d = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s));
+
     if (bswap) {
-        const __m128i mask = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
+        const __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         d = _mm_shuffle_epi8(d, mask);
     }
     return d;
 }
+
 #endif
 
 #undef PERMUTE3
@@ -81,25 +92,34 @@ static inline __m128i Fetch128(const uint8_t * s) {
 
 //------------------------------------------------------------
 #if defined(FARMHASH_USE_INTRIN)
+
 // Helpers for data-parallel operations (1x 128 bits or 2x 64 or 4x 32).
-static inline __m128i Add64(__m128i x, __m128i y) { return _mm_add_epi64(x, y); }
-static inline __m128i Add32(__m128i x, __m128i y) { return _mm_add_epi32(x, y); }
-static inline __m128i Mul(__m128i x, __m128i y) { return _mm_mullo_epi32(x, y); }
-static inline __m128i Mul5(__m128i x) { return Add32(x, _mm_slli_epi32(x, 2)); }
+static inline __m128i Add64( __m128i x, __m128i y ) { return _mm_add_epi64(x, y); }
 
-static inline __m128i Xor(__m128i x, __m128i y) { return _mm_xor_si128(x, y); }
-static inline __m128i Or(__m128i x, __m128i y) { return _mm_or_si128(x, y); }
+static inline __m128i Add32( __m128i x, __m128i y ) { return _mm_add_epi32(x, y); }
 
-static inline __m128i RotateLeft(__m128i x, int c) {
-  return Or(_mm_slli_epi32(x, c), _mm_srli_epi32(x, 32 - c));
+static inline __m128i Mul( __m128i x, __m128i y ) { return _mm_mullo_epi32(x, y); }
+
+static inline __m128i Mul5( __m128i x ) { return Add32(x, _mm_slli_epi32(x, 2)); }
+
+static inline __m128i Xor( __m128i x, __m128i y ) { return _mm_xor_si128(x, y); }
+
+static inline __m128i Or( __m128i x, __m128i y ) { return _mm_or_si128(x, y); }
+
+static inline __m128i RotateLeft( __m128i x, int c ) {
+    return Or(_mm_slli_epi32(x, c), _mm_srli_epi32(x, 32 - c));
 }
-static inline __m128i Rol17(__m128i x) { return RotateLeft(x, 17); }
-static inline __m128i Rol19(__m128i x) { return RotateLeft(x, 19); }
 
-static inline __m128i Shuf(__m128i x, __m128i y) { return _mm_shuffle_epi8(y, x); }
-static inline __m128i Shuffle0321(__m128i x) {
-  return _mm_shuffle_epi32(x, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0));
+static inline __m128i Rol17( __m128i x ) { return RotateLeft(x, 17); }
+
+static inline __m128i Rol19( __m128i x ) { return RotateLeft(x, 19); }
+
+static inline __m128i Shuf( __m128i x, __m128i y ) { return _mm_shuffle_epi8(y, x); }
+
+static inline __m128i Shuffle0321( __m128i x ) {
+    return _mm_shuffle_epi32(x, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0));
 }
+
 #endif
 
 //------------------------------------------------------------
@@ -117,405 +137,409 @@ static const uint32_t c2 = 0x1b873593;
 
 // A 32-bit to 32-bit integer hash copied from Murmur3.
 // mul
-static inline uint32_t fmix(uint32_t h) {
-  h ^= h >> 16;
-  h *= 0x85ebca6b;
-  h ^= h >> 13;
-  h *= 0xc2b2ae35;
-  h ^= h >> 16;
-  return h;
+static inline uint32_t fmix( uint32_t h ) {
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+    return h;
 }
 
 // Helper from Murmur3 for combining two 32-bit values.
 // mul
-static inline uint32_t Mur(uint32_t a, uint32_t h) {
-  a *= c1;
-  a = ROTR32(a, 17);
-  a *= c2;
-  h ^= a;
-  h = ROTR32(h, 19);
-  return h * 5 + 0xe6546b64;
+static inline uint32_t Mur( uint32_t a, uint32_t h ) {
+    a *= c1;
+    a  = ROTR32(a, 17);
+    a *= c2;
+    h ^= a;
+    h  = ROTR32(h, 19);
+    return h * 5 + 0xe6546b64;
 }
 
-
-static inline uint64_t ShiftMix(uint64_t val) {
-  return val ^ (val >> 47);
+static inline uint64_t ShiftMix( uint64_t val ) {
+    return val ^ (val >> 47);
 }
 
 // Hash 128 input bits down to 64 bits of output.
 // This is intended to be a reasonably good hash function.
 // 64x64
-static inline uint64_t Hash128to64(uint128_t x) {
-  // Murmur-inspired hashing.
-  const uint64_t kMul = UINT64_C(0x9ddfea08eb382d69);
-  uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
-  a ^= (a >> 47);
-  uint64_t b = (Uint128High64(x) ^ a) * kMul;
-  b ^= (b >> 47);
-  b *= kMul;
-  return b;
+static inline uint64_t Hash128to64( uint128_t x ) {
+    // Murmur-inspired hashing.
+    const uint64_t kMul = UINT64_C(0x9ddfea08eb382d69);
+    uint64_t       a    = (Uint128Low64(x)  ^ Uint128High64(x)) * kMul;
+
+    a ^= (a >> 47);
+    uint64_t b =          (Uint128High64(x) ^ a) * kMul;
+    b ^= (b >> 47);
+    b *= kMul;
+    return b;
 }
 
 // 64x64
-static inline uint64_t HashLen16(uint64_t u, uint64_t v) {
-  return Hash128to64(Uint128(u, v));
+static inline uint64_t HashLen16( uint64_t u, uint64_t v ) {
+    return Hash128to64(Uint128(u, v));
 }
 
 // 64x64
-static inline uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) {
-  // Murmur-inspired hashing.
-  uint64_t a = (u ^ v) * mul;
-  a ^= (a >> 47);
-  uint64_t b = (v ^ a) * mul;
-  b ^= (b >> 47);
-  b *= mul;
-  return b;
+static inline uint64_t HashLen16( uint64_t u, uint64_t v, uint64_t mul ) {
+    // Murmur-inspired hashing.
+    uint64_t a = (u ^ v) * mul;
+
+    a ^= (a >> 47);
+    uint64_t b = (v ^ a) * mul;
+    b ^= (b >> 47);
+    b *= mul;
+    return b;
 }
 
 // Return a 16-byte hash for 48 bytes.  Quick and dirty.
 // Callers do best to use "random-looking" values for a and b.
-static inline pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
-    uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
-  a += w;
-  b = ROTR64(b + a + z, 21);
-  uint64_t c = a;
-  a += x;
-  a += y;
-  b += ROTR64(a, 44);
-  return make_pair(a + z, b + c);
+static inline pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( uint64_t w,
+        uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b ) {
+    a += w;
+    b  = ROTR64(b + a + z, 21);
+    uint64_t c = a;
+    a += x;
+    a += y;
+    b += ROTR64(a        , 44);
+    return make_pair(a + z, b + c);
 }
 
 // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
-template < bool bswap >
-static inline pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
-    const uint8_t* s, uint64_t a, uint64_t b) {
-  return WeakHashLen32WithSeeds(Fetch64<bswap>(s),
-                                Fetch64<bswap>(s + 8),
-                                Fetch64<bswap>(s + 16),
-                                Fetch64<bswap>(s + 24),
-                                a,
-                                b);
+template <bool bswap>
+static inline pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( const uint8_t * s, uint64_t a, uint64_t b ) {
+    return WeakHashLen32WithSeeds(Fetch64<bswap>(s), Fetch64<bswap>(
+            s + 8), Fetch64<bswap>(s + 16), Fetch64<bswap>(s + 24), a, b);
 }
 
 //------------------------------------------------------------
 namespace farmhashna {
-    template < bool bswap >
-        static inline uint64_t HashLen0to16(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static inline uint64_t HashLen17to32(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static inline uint64_t HashLen33to64(const uint8_t *s, size_t len);
-
-    template < bool bswap >
-        static uint64_t Hash64(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static uint64_t Hash64WithSeeds(const uint8_t *s, size_t len, uint64_t seed0, uint64_t seed1);
-    template < bool bswap >
-        static uint64_t Hash64WithSeed(const uint8_t *s, size_t len, uint64_t seed);
-}
-
-template < bool bswap >
-static inline uint64_t farmhashna::HashLen0to16(const uint8_t *s, size_t len) {
-  if (len >= 8) {
-    uint64_t mul = k2 + len * 2;
-    uint64_t a = Fetch64<bswap>(s) + k2;
-    uint64_t b = Fetch64<bswap>(s + len - 8);
-    uint64_t c = ROTR64(b, 37) * mul + a;
-    uint64_t d = (ROTR64(a, 25) + b) * mul;
-    return HashLen16(c, d, mul);
-  }
-  if (len >= 4) {
-    uint64_t mul = k2 + len * 2;
-    uint64_t a = Fetch32<bswap>(s);
-    return HashLen16(len + (a << 3), Fetch32<bswap>(s + len - 4), mul);
-  }
-  if (len > 0) {
-    uint8_t a = s[0];
-    uint8_t b = s[len >> 1];
-    uint8_t c = s[len - 1];
-    uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
-    uint32_t z = len + (static_cast<uint32_t>(c) << 2);
-    return ShiftMix(y * k2 ^ z * k0) * k2;
-  }
-  return k2;
+    template <bool bswap>
+    static inline uint64_t HashLen0to16( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static inline uint64_t HashLen17to32( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static inline uint64_t HashLen33to64( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint64_t Hash64( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint64_t Hash64WithSeeds( const uint8_t * s, size_t len, uint64_t seed0, uint64_t seed1 );
+
+    template <bool bswap>
+    static uint64_t Hash64WithSeed( const uint8_t * s, size_t len, uint64_t seed );
+} // namespace farmhashna
+
+template <bool bswap>
+static inline uint64_t farmhashna::HashLen0to16( const uint8_t * s, size_t len ) {
+    if (len >= 8) {
+        uint64_t mul = k2 + len * 2;
+        uint64_t a   = Fetch64<bswap>(s)      + k2;
+        uint64_t b   = Fetch64<bswap>(s + len - 8);
+        uint64_t c   = ROTR64(b, 37)  * mul + a;
+        uint64_t d   = (ROTR64(a, 25) + b)  * mul;
+        return HashLen16(c, d, mul);
+    }
+    if (len >= 4) {
+        uint64_t mul = k2 + len * 2;
+        uint64_t a   = Fetch32<bswap>(s);
+        return HashLen16(len + (a << 3), Fetch32<bswap>(s + len - 4), mul);
+    }
+    if (len > 0) {
+        uint8_t  a = s[0];
+        uint8_t  b = s[len >> 1];
+        uint8_t  c = s[len  - 1];
+        uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
+        uint32_t z = len + (static_cast<uint32_t>(c) << 2);
+        return ShiftMix(y * k2 ^ z * k0) * k2;
+    }
+    return k2;
 }
 
 // This probably works well for 16-byte strings as well, but it may be overkill
 // in that case.
-template < bool bswap >
-static inline uint64_t farmhashna::HashLen17to32(const uint8_t *s, size_t len) {
-  uint64_t mul = k2 + len * 2;
-  uint64_t a = Fetch64<bswap>(s) * k1;
-  uint64_t b = Fetch64<bswap>(s + 8);
-  uint64_t c = Fetch64<bswap>(s + len - 8) * mul;
-  uint64_t d = Fetch64<bswap>(s + len - 16) * k2;
-  return HashLen16(ROTR64(a + b, 43) + ROTR64(c, 30) + d,
-                   a + ROTR64(b + k2, 18) + c, mul);
+template <bool bswap>
+static inline uint64_t farmhashna::HashLen17to32( const uint8_t * s, size_t len ) {
+    uint64_t mul = k2 + len * 2;
+    uint64_t a   = Fetch64<bswap>(s    )        * k1;
+    uint64_t b   = Fetch64<bswap>(s + 8);
+    uint64_t c   = Fetch64<bswap>(s + len -  8) * mul;
+    uint64_t d   = Fetch64<bswap>(s + len - 16) * k2;
+
+    return HashLen16(ROTR64(a + b, 43) + ROTR64(c, 30) + d, a + ROTR64(b + k2, 18) + c, mul);
 }
 
 // Return an 8-byte hash for 33 to 64 bytes.
-template < bool bswap >
-static inline uint64_t farmhashna::HashLen33to64(const uint8_t *s, size_t len) {
-  uint64_t mul = k2 + len * 2;
-  uint64_t a = Fetch64<bswap>(s) * k2;
-  uint64_t b = Fetch64<bswap>(s + 8);
-  uint64_t c = Fetch64<bswap>(s + len - 8) * mul;
-  uint64_t d = Fetch64<bswap>(s + len - 16) * k2;
-  uint64_t y = ROTR64(a + b, 43) + ROTR64(c, 30) + d;
-  uint64_t z = HashLen16(y, a + ROTR64(b + k2, 18) + c, mul);
-  uint64_t e = Fetch64<bswap>(s + 16) * mul;
-  uint64_t f = Fetch64<bswap>(s + 24);
-  uint64_t g = (y + Fetch64<bswap>(s + len - 32)) * mul;
-  uint64_t h = (z + Fetch64<bswap>(s + len - 24)) * mul;
-  return HashLen16(ROTR64(e + f, 43) + ROTR64(g, 30) + h,
-                   e + ROTR64(f + a, 18) + g, mul);
-}
-
-template < bool bswap >
-static uint64_t farmhashna::Hash64(const uint8_t *s, size_t len) {
-  const uint64_t seed = 81;
-  if (len <= 32) {
-      if (len <= 16) {
-          return HashLen0to16<bswap>(s, len);
-      } else {
-          return HashLen17to32<bswap>(s, len);
-      }
-  } else if (len <= 64) {
-      return HashLen33to64<bswap>(s, len);
-  }
-
-  // For strings over 64 bytes we loop. I nternal state consists of
-  // 56 bytes: v, w, x, y, and z.
-  uint64_t x = seed;
-  uint64_t y = seed * k1 + 113;
-  uint64_t z = ShiftMix(y * k2 + 113) * k2;
-  pair<uint64_t, uint64_t> v = make_pair(0, 0);
-  pair<uint64_t, uint64_t> w = make_pair(0, 0);
-  x = x * k2 + Fetch64<bswap>(s);
-
-  // Set end so that after the loop we have 1 to 64 bytes left to process.
-  const uint8_t* end = s + ((len - 1) / 64) * 64;
-  const uint8_t* last64 = end + ((len - 1) & 63) - 63;
-  assert(s + len - 64 == last64);
-  do {
-    x = ROTR64(x + y + v.first + Fetch64<bswap>(s + 8), 37) * k1;
-    y = ROTR64(y + v.second + Fetch64<bswap>(s + 48), 42) * k1;
-    x ^= w.second;
-    y += v.first + Fetch64<bswap>(s + 40);
-    z = ROTR64(z + w.first, 33) * k1;
-    v = WeakHashLen32WithSeeds<bswap>(s, v.second * k1, x + w.first);
-    w = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second, y + Fetch64<bswap>(s + 16));
+template <bool bswap>
+static inline uint64_t farmhashna::HashLen33to64( const uint8_t * s, size_t len ) {
+    uint64_t mul = k2 + len * 2;
+    uint64_t a   = Fetch64<bswap>(s     )       * k2;
+    uint64_t b   = Fetch64<bswap>(s +  8);
+    uint64_t c   = Fetch64<bswap>(s + len -  8) * mul;
+    uint64_t d   = Fetch64<bswap>(s + len - 16) * k2;
+    uint64_t y   = ROTR64(a + b, 43) + ROTR64(c, 30) + d;
+    uint64_t z   = HashLen16(y, a + ROTR64(b + k2, 18) + c, mul);
+    uint64_t e   = Fetch64<bswap>(s + 16)       * mul;
+    uint64_t f   = Fetch64<bswap>(s + 24);
+    uint64_t g   = (y + Fetch64<bswap>(s + len - 32)) * mul;
+    uint64_t h   = (z + Fetch64<bswap>(s + len - 24)) * mul;
+
+    return HashLen16(ROTR64(e + f, 43) + ROTR64(g, 30) + h, e + ROTR64(f + a, 18) + g, mul);
+}
+
+template <bool bswap>
+static uint64_t farmhashna::Hash64( const uint8_t * s, size_t len ) {
+    const uint64_t seed = 81;
+
+    if (len <= 32) {
+        if (len <= 16) {
+            return HashLen0to16<bswap>(s, len);
+        } else {
+            return HashLen17to32<bswap>(s, len);
+        }
+    } else if (len <= 64) {
+        return HashLen33to64<bswap>(s, len);
+    }
+
+    // For strings over 64 bytes we loop. I nternal state consists of
+    // 56 bytes: v, w, x, y, and z.
+    uint64_t x = seed;
+    uint64_t y = seed * k1 + 113;
+    uint64_t z = ShiftMix(y * k2 + 113) * k2;
+    pair<uint64_t, uint64_t> v = make_pair(0, 0);
+    pair<uint64_t, uint64_t> w = make_pair(0, 0);
+    x = x * k2 + Fetch64<bswap>(s);
+
+    // Set end so that after the loop we have 1 to 64 bytes left to process.
+    const uint8_t * end    = s   + ((len - 1) / 64) * 64;
+    const uint8_t * last64 = end + ((len - 1) & 63) - 63;
+    assert(s + len - 64 == last64);
+    do {
+        x  = ROTR64(x + y        + v.first + Fetch64<bswap>(s +  8), 37) * k1;
+        y  = ROTR64(y + v.second + Fetch64          <bswap>(s + 48), 42) * k1;
+        x ^= w.second;
+        y += v.first + Fetch64<bswap>(s + 40);
+        z  = ROTR64(z + w.first, 33) * k1;
+        v  = WeakHashLen32WithSeeds<bswap>(s     , v.second * k1, x + w.first);
+        w  = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second , y + Fetch64<bswap>(s + 16));
+        std::swap(z, x);
+        s += 64;
+    } while (s != end);
+    uint64_t mul = k1 + ((z & 0xff) << 1);
+    // Make s point to the last 64 bytes of input.
+    s        = last64;
+    w.first += ((len - 1) & 63);
+    v.first += w.first;
+    w.first += v.first;
+    x        = ROTR64(x + y        + v.first + Fetch64<bswap>(s +  8), 37) * mul;
+    y        = ROTR64(y + v.second + Fetch64          <bswap>(s + 48), 42) * mul;
+    x       ^= w.second * 9;
+    y       += v.first * 9 + Fetch64<bswap>(s + 40);
+    z        = ROTR64(z + w.first, 33) * mul;
+    v        = WeakHashLen32WithSeeds<bswap>(s     , v.second * mul, x + w.first);
+    w        = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second  , y + Fetch64<bswap>(s + 16));
     std::swap(z, x);
-    s += 64;
-  } while (s != end);
-  uint64_t mul = k1 + ((z & 0xff) << 1);
-  // Make s point to the last 64 bytes of input.
-  s = last64;
-  w.first += ((len - 1) & 63);
-  v.first += w.first;
-  w.first += v.first;
-  x = ROTR64(x + y + v.first + Fetch64<bswap>(s + 8), 37) * mul;
-  y = ROTR64(y + v.second + Fetch64<bswap>(s + 48), 42) * mul;
-  x ^= w.second * 9;
-  y += v.first * 9 + Fetch64<bswap>(s + 40);
-  z = ROTR64(z + w.first, 33) * mul;
-  v = WeakHashLen32WithSeeds<bswap>(s, v.second * mul, x + w.first);
-  w = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second, y + Fetch64<bswap>(s + 16));
-  std::swap(z, x);
-  return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z,
-                   HashLen16(v.second, w.second, mul) + x,
-                   mul);
-}
-
-template < bool bswap >
-static uint64_t farmhashna::Hash64WithSeeds(const uint8_t *s, size_t len, uint64_t seed0, uint64_t seed1) {
+    return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z,
+            HashLen16(v.second, w.second, mul) + x, mul);
+}
+
+template <bool bswap>
+static uint64_t farmhashna::Hash64WithSeeds( const uint8_t * s, size_t len, uint64_t seed0, uint64_t seed1 ) {
     return HashLen16(farmhashna::Hash64<bswap>(s, len) - seed0, seed1);
 }
 
-template < bool bswap >
-static uint64_t farmhashna::Hash64WithSeed(const uint8_t *s, size_t len, uint64_t seed) {
+template <bool bswap>
+static uint64_t farmhashna::Hash64WithSeed( const uint8_t * s, size_t len, uint64_t seed ) {
     return farmhashna::Hash64WithSeeds<bswap>(s, len, k2, seed);
 }
 
 //------------------------------------------------------------
 namespace farmhashuo {
-    static inline uint64_t H(uint64_t x, uint64_t y, uint64_t mul, int r);
-
-    template < bool bswap >
-        static uint64_t Hash64(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static uint64_t Hash64WithSeeds(const uint8_t *s, size_t len, uint64_t seed0, uint64_t seed1);
-    template < bool bswap >
-        static uint64_t Hash64WithSeed(const uint8_t *s, size_t len, uint64_t seed);
-}
-
-static inline uint64_t farmhashuo::H(uint64_t x, uint64_t y, uint64_t mul, int r) {
-  uint64_t a = (x ^ y) * mul;
-  a ^= (a >> 47);
-  uint64_t b = (y ^ a) * mul;
-  return ROTR64(b, r) * mul;
-}
-
-template < bool bswap >
-static uint64_t farmhashuo::Hash64WithSeeds(const uint8_t *s, size_t len,
-                         uint64_t seed0, uint64_t seed1) {
-  if (len <= 64) {
-      return farmhashna::Hash64WithSeeds<bswap>(s, len, seed0, seed1);
-  }
-
-  // For strings over 64 bytes we loop.  Internal state consists of
-  // 64 bytes: u, v, w, x, y, and z.
-  uint64_t x = seed0;
-  uint64_t y = seed1 * k2 + 113;
-  uint64_t z = ShiftMix(y * k2) * k2;
-  pair<uint64_t, uint64_t> v = make_pair(seed0, seed1);
-  pair<uint64_t, uint64_t> w = make_pair(0, 0);
-  uint64_t u = x - z;
-  x *= k2;
-  uint64_t mul = k2 + (u & 0x82);
-
-  // Set end so that after the loop we have 1 to 64 bytes left to process.
-  const uint8_t* end = s + ((len - 1) / 64) * 64;
-  const uint8_t* last64 = end + ((len - 1) & 63) - 63;
-  assert(s + len - 64 == last64);
-  do {
-    uint64_t a0 = Fetch64<bswap>(s);
-    uint64_t a1 = Fetch64<bswap>(s + 8);
-    uint64_t a2 = Fetch64<bswap>(s + 16);
-    uint64_t a3 = Fetch64<bswap>(s + 24);
-    uint64_t a4 = Fetch64<bswap>(s + 32);
-    uint64_t a5 = Fetch64<bswap>(s + 40);
-    uint64_t a6 = Fetch64<bswap>(s + 48);
-    uint64_t a7 = Fetch64<bswap>(s + 56);
-    x += a0 + a1;
-    y += a2;
-    z += a3;
-    v.first += a4;
-    v.second += a5 + a1;
-    w.first += a6;
-    w.second += a7;
-
-    x = ROTR64(x, 26);
-    x *= 9;
-    y = ROTR64(y, 29);
-    z *= mul;
-    v.first = ROTR64(v.first, 33);
-    v.second = ROTR64(v.second, 30);
-    w.first ^= x;
-    w.first *= 9;
-    z = ROTR64(z, 32);
-    z += w.second;
-    w.second += z;
-    z *= 9;
-    std::swap(u, y);
-
-    z += a0 + a6;
-    v.first += a2;
-    v.second += a3;
-    w.first += a4;
-    w.second += a5 + a6;
-    x += a1;
-    y += a7;
-
-    y += v.first;
-    v.first += x - y;
-    v.second += w.first;
-    w.first += v.second;
-    w.second += x - y;
-    x += w.second;
-    w.second = ROTR64(w.second, 34);
-    std::swap(u, z);
-    s += 64;
-  } while (s != end);
-  // Make s point to the last 64 bytes of input.
-  s = last64;
-  u *= 9;
-  v.second = ROTR64(v.second, 28);
-  v.first = ROTR64(v.first, 20);
-  w.first += ((len - 1) & 63);
-  u += y;
-  y += u;
-  x = ROTR64(y - x + v.first + Fetch64<bswap>(s + 8), 37) * mul;
-  y = ROTR64(y ^ v.second ^ Fetch64<bswap>(s + 48), 42) * mul;
-  x ^= w.second * 9;
-  y += v.first + Fetch64<bswap>(s + 40);
-  z = ROTR64(z + w.first, 33) * mul;
-  v = WeakHashLen32WithSeeds<bswap>(s, v.second * mul, x + w.first);
-  w = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second, y + Fetch64<bswap>(s + 16));
-  return farmhashuo::H(
-                       HashLen16(v.first + x, w.first ^ y, mul) + z - u,
-                       farmhashuo::H(v.second + y, w.second + z, k2, 30) ^ x,
-                       k2,
-                       31);
-}
-
-template < bool bswap >
-static uint64_t farmhashuo::Hash64WithSeed(const uint8_t *s, size_t len, uint64_t seed) {
-  return len <= 64 ? farmhashna::Hash64WithSeed<bswap>(s, len, seed) :
-                     farmhashuo::Hash64WithSeeds<bswap>(s, len, 0, seed);
-}
-
-template < bool bswap >
-static uint64_t farmhashuo::Hash64(const uint8_t *s, size_t len) {
-  return len <= 64 ? farmhashna::Hash64<bswap>(s, len) :
-                     farmhashuo::Hash64WithSeeds<bswap>(s, len, 81, 0);
+    static inline uint64_t H( uint64_t x, uint64_t y, uint64_t mul, int r );
+
+    template <bool bswap>
+    static uint64_t Hash64( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint64_t Hash64WithSeeds( const uint8_t * s, size_t len, uint64_t seed0, uint64_t seed1 );
+
+    template <bool bswap>
+    static uint64_t Hash64WithSeed( const uint8_t * s, size_t len, uint64_t seed );
+} // namespace farmhashuo
+
+static inline uint64_t farmhashuo::H( uint64_t x, uint64_t y, uint64_t mul, int r ) {
+    uint64_t a = (x ^ y) * mul;
+
+    a ^= (a >> 47);
+    uint64_t b = (y ^ a) * mul;
+    return ROTR64(b, r) * mul;
+}
+
+template <bool bswap>
+static uint64_t farmhashuo::Hash64WithSeeds( const uint8_t * s, size_t len, uint64_t seed0, uint64_t seed1 ) {
+    if (len <= 64) {
+        return farmhashna::Hash64WithSeeds<bswap>(s, len, seed0, seed1);
+    }
+
+    // For strings over 64 bytes we loop.  Internal state consists of
+    // 64 bytes: u, v, w, x, y, and z.
+    uint64_t x = seed0;
+    uint64_t y = seed1 * k2 + 113;
+    uint64_t z = ShiftMix(y * k2) * k2;
+    pair<uint64_t, uint64_t> v = make_pair(seed0, seed1);
+    pair<uint64_t, uint64_t> w = make_pair(    0,     0);
+    uint64_t u   = x - z;
+    x *= k2;
+    uint64_t mul = k2 + (u & 0x82);
+
+    // Set end so that after the loop we have 1 to 64 bytes left to process.
+    const uint8_t * end    = s   + ((len - 1) / 64) * 64;
+    const uint8_t * last64 = end + ((len - 1) & 63) - 63;
+    assert(s + len - 64 == last64);
+    do {
+        uint64_t a0 = Fetch64<bswap>(s     );
+        uint64_t a1 = Fetch64<bswap>(s +  8);
+        uint64_t a2 = Fetch64<bswap>(s + 16);
+        uint64_t a3 = Fetch64<bswap>(s + 24);
+        uint64_t a4 = Fetch64<bswap>(s + 32);
+        uint64_t a5 = Fetch64<bswap>(s + 40);
+        uint64_t a6 = Fetch64<bswap>(s + 48);
+        uint64_t a7 = Fetch64<bswap>(s + 56);
+        x        += a0 + a1;
+        y        += a2;
+        z        += a3;
+        v.first  += a4;
+        v.second += a5 + a1;
+        w.first  += a6;
+        w.second += a7;
+
+        x         = ROTR64(x       , 26);
+        x        *= 9;
+        y         = ROTR64(y       , 29);
+        z        *= mul;
+        v.first   = ROTR64(v.first , 33);
+        v.second  = ROTR64(v.second, 30);
+        w.first  ^= x;
+        w.first  *= 9;
+        z         = ROTR64(z       , 32);
+        z        += w.second;
+        w.second += z;
+        z        *= 9;
+        std::swap(u, y);
+
+        z        += a0 + a6;
+        v.first  += a2;
+        v.second += a3;
+        w.first  += a4;
+        w.second += a5 + a6;
+        x        += a1;
+        y        += a7;
+
+        y        += v.first;
+        v.first  += x - y;
+        v.second += w.first;
+        w.first  += v.second;
+        w.second += x - y;
+        x        += w.second;
+        w.second  = ROTR64(w.second, 34);
+        std::swap(u, z);
+        s        += 64;
+    } while (s != end);
+    // Make s point to the last 64 bytes of input.
+    s        = last64;
+    u       *= 9;
+    v.second = ROTR64(v.second   , 28);
+    v.first  = ROTR64(v.first    , 20);
+    w.first += ((len - 1) & 63);
+    u       += y;
+    y       += u;
+    x        = ROTR64(y - x + v.first + Fetch64<bswap>(s + 8), 37) * mul;
+    y        = ROTR64(y ^ v.second ^ Fetch64<bswap>(s + 48), 42) * mul;
+    x       ^= w.second * 9;
+    y       += v.first + Fetch64<bswap>(s + 40);
+    z        = ROTR64(z + w.first, 33) * mul;
+    v        = WeakHashLen32WithSeeds<bswap>(s     , v.second * mul, x + w.first);
+    w        = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second  , y + Fetch64<bswap>(s + 16));
+    return farmhashuo::H(HashLen16(v.first + x, w.first ^ y, mul) + z - u, farmhashuo::H(
+            v.second + y, w.second + z, k2, 30) ^ x, k2, 31);
+}
+
+template <bool bswap>
+static uint64_t farmhashuo::Hash64WithSeed( const uint8_t * s, size_t len, uint64_t seed ) {
+    return len <= 64 ? farmhashna::Hash64WithSeed<bswap>(s, len, seed) :
+                       farmhashuo::Hash64WithSeeds<bswap>(s, len, 0, seed);
+}
+
+template <bool bswap>
+static uint64_t farmhashuo::Hash64( const uint8_t * s, size_t len ) {
+    return len <= 64 ? farmhashna::Hash64<bswap>(s, len) :
+                       farmhashuo::Hash64WithSeeds<bswap>(s, len, 81, 0);
 }
 
 //------------------------------------------------------------
 namespace farmhashxo {
-    template < bool bswap >
-        static inline uint64_t H32(const uint8_t *s, size_t len, uint64_t mul,
-            uint64_t seed0 = 0, uint64_t seed1 = 0);
-    template < bool bswap >
-        static inline uint64_t HashLen33to64(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static inline uint64_t HashLen65to96(const uint8_t *s, size_t len);
-
-    template < bool bswap >
-        static uint64_t Hash64(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static uint64_t Hash64WithSeeds(const uint8_t *s, size_t len, uint64_t seed0, uint64_t seed1);
-    template < bool bswap >
-        static uint64_t Hash64WithSeed(const uint8_t *s, size_t len, uint64_t seed);
-}
-
-template < bool bswap >
-static inline uint64_t farmhashxo::H32(const uint8_t *s, size_t len, uint64_t mul,
-                           uint64_t seed0, uint64_t seed1) {
-  uint64_t a = Fetch64<bswap>(s) * k1;
-  uint64_t b = Fetch64<bswap>(s + 8);
-  uint64_t c = Fetch64<bswap>(s + len - 8) * mul;
-  uint64_t d = Fetch64<bswap>(s + len - 16) * k2;
-  uint64_t u = ROTR64(a + b, 43) + ROTR64(c, 30) + d + seed0;
-  uint64_t v = a + ROTR64(b + k2, 18) + c + seed1;
-  a = ShiftMix((u ^ v) * mul);
-  b = ShiftMix((v ^ a) * mul);
-  return b;
+    template <bool bswap>
+    static inline uint64_t H32( const uint8_t * s, size_t len, uint64_t mul, uint64_t seed0 = 0, uint64_t seed1 = 0 );
+
+    template <bool bswap>
+    static inline uint64_t HashLen33to64( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static inline uint64_t HashLen65to96( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint64_t Hash64( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint64_t Hash64WithSeeds( const uint8_t * s, size_t len, uint64_t seed0, uint64_t seed1 );
+
+    template <bool bswap>
+    static uint64_t Hash64WithSeed( const uint8_t * s, size_t len, uint64_t seed );
+} // namespace farmhashxo
+
+template <bool bswap>
+static inline uint64_t farmhashxo::H32( const uint8_t * s, size_t len, uint64_t mul, uint64_t seed0, uint64_t seed1 ) {
+    uint64_t a = Fetch64<bswap>(s    )        * k1;
+    uint64_t b = Fetch64<bswap>(s + 8);
+    uint64_t c = Fetch64<bswap>(s + len -  8) * mul;
+    uint64_t d = Fetch64<bswap>(s + len - 16) * k2;
+    uint64_t u = ROTR64(a + b, 43) + ROTR64(c, 30) + d + seed0;
+    uint64_t v = a + ROTR64(b + k2, 18) + c + seed1;
+
+    a = ShiftMix((u ^ v) * mul);
+    b = ShiftMix((v ^ a) * mul);
+    return b;
 }
 
 // Return an 8-byte hash for 33 to 64 bytes.
-template < bool bswap >
-static inline uint64_t farmhashxo::HashLen33to64(const uint8_t *s, size_t len) {
-  uint64_t mul0 = k2 - 30;
-  uint64_t mul1 = k2 - 30 + 2 * len;
-  uint64_t h0 = H32<bswap>(s, 32, mul0);
-  uint64_t h1 = H32<bswap>(s + len - 32, 32, mul1);
-  return ((h1 * mul1) + h0) * mul1;
+template <bool bswap>
+static inline uint64_t farmhashxo::HashLen33to64( const uint8_t * s, size_t len ) {
+    uint64_t mul0 = k2 - 30;
+    uint64_t mul1 = k2 - 30 + 2 * len;
+    uint64_t h0   = H32<bswap>(s, 32, mul0);
+    uint64_t h1   = H32<bswap>(s + len - 32, 32, mul1);
+
+    return ((h1 * mul1) + h0) * mul1;
 }
 
 // Return an 8-byte hash for 65 to 96 bytes.
-template < bool bswap >
-static inline uint64_t farmhashxo::HashLen65to96(const uint8_t *s, size_t len) {
-  uint64_t mul0 = k2 - 114;
-  uint64_t mul1 = k2 - 114 + 2 * len;
-  uint64_t h0 = H32<bswap>(s, 32, mul0);
-  uint64_t h1 = H32<bswap>(s + 32, 32, mul1);
-  uint64_t h2 = H32<bswap>(s + len - 32, 32, mul1, h0, h1);
-  return (h2 * 9 + (h0 >> 17) + (h1 >> 21)) * mul1;
-}
-
-template < bool bswap >
-static uint64_t farmhashxo::Hash64(const uint8_t *s, size_t len) {
+template <bool bswap>
+static inline uint64_t farmhashxo::HashLen65to96( const uint8_t * s, size_t len ) {
+    uint64_t mul0 = k2 - 114;
+    uint64_t mul1 = k2 - 114 + 2 * len;
+    uint64_t h0   = H32<bswap>(s     , 32, mul0);
+    uint64_t h1   = H32<bswap>(s + 32, 32, mul1);
+    uint64_t h2   = H32<bswap>(s + len - 32, 32, mul1, h0, h1);
+
+    return (h2 * 9 + (h0 >> 17) + (h1 >> 21)) * mul1;
+}
+
+template <bool bswap>
+static uint64_t farmhashxo::Hash64( const uint8_t * s, size_t len ) {
     if (len <= 32) {
         if (len <= 16) {
             return farmhashna::HashLen0to16<bswap>(s, len);
@@ -533,226 +557,227 @@ static uint64_t farmhashxo::Hash64(const uint8_t *s, size_t len) {
     }
 }
 
-template < bool bswap >
-static uint64_t farmhashxo::Hash64WithSeeds(const uint8_t *s, size_t len, uint64_t seed0, uint64_t seed1) {
-  return farmhashuo::Hash64WithSeeds<bswap>(s, len, seed0, seed1);
+template <bool bswap>
+static uint64_t farmhashxo::Hash64WithSeeds( const uint8_t * s, size_t len, uint64_t seed0, uint64_t seed1 ) {
+    return farmhashuo::Hash64WithSeeds<bswap>(s, len, seed0, seed1);
 }
 
-template < bool bswap >
-static uint64_t farmhashxo::Hash64WithSeed(const uint8_t *s, size_t len, uint64_t seed) {
-  return farmhashuo::Hash64WithSeed<bswap>(s, len, seed);
+template <bool bswap>
+static uint64_t farmhashxo::Hash64WithSeed( const uint8_t * s, size_t len, uint64_t seed ) {
+    return farmhashuo::Hash64WithSeed<bswap>(s, len, seed);
 }
 
 //------------------------------------------------------------
 #if defined(HAVE_SSE_4_1)
 namespace farmhashte {
-    template < bool bswap >
-    static inline uint64_t Hash64Long(const uint8_t* s, size_t n,
-        uint64_t seed0, uint64_t seed1);
+    template <bool bswap>
+    static inline uint64_t Hash64Long( const uint8_t * s, size_t n, uint64_t seed0, uint64_t seed1 );
 
-    template < bool bswap >
-        static uint64_t Hash64(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static uint64_t Hash64WithSeeds(const uint8_t *s, size_t len, uint64_t seed0, uint64_t seed1);
-    template < bool bswap >
-        static uint64_t Hash64WithSeed(const uint8_t *s, size_t len, uint64_t seed);
-}
+    template <bool bswap>
+    static uint64_t Hash64( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint64_t Hash64WithSeeds( const uint8_t * s, size_t len, uint64_t seed0, uint64_t seed1 );
+
+    template <bool bswap>
+    static uint64_t Hash64WithSeed( const uint8_t * s, size_t len, uint64_t seed );
+} // namespace farmhashte
 
 // Requires n >= 256.  Requires SSE4.1. Should be slightly faster if the
 // compiler uses AVX instructions (e.g., use the -mavx flag with GCC).
-template < bool bswap >
-static inline uint64_t farmhashte::Hash64Long(const uint8_t* s, size_t n,
-        uint64_t seed0, uint64_t seed1) {
-  const __m128i kShuf =
-      _mm_set_epi8(4, 11, 10, 5, 8, 15, 6, 9, 12, 2, 14, 13, 0, 7, 3, 1);
-  const __m128i kMult =
-      _mm_set_epi8(0xbd, 0xd6, 0x33, 0x39, 0x45, 0x54, 0xfa, 0x03,
-                   0x34, 0x3e, 0x33, 0xed, 0xcc, 0x9e, 0x2d, 0x51);
-  uint64_t seed2 = (seed0 + 113) * (seed1 + 9);
-  uint64_t seed3 = (ROTR64(seed0, 23) + 27) * (ROTR64(seed1, 30) + 111);
-  __m128i d0 = _mm_cvtsi64_si128(seed0);
-  __m128i d1 = _mm_cvtsi64_si128(seed1);
-  __m128i d2 = Shuf(kShuf, d0);
-  __m128i d3 = Shuf(kShuf, d1);
-  __m128i d4 = Xor(d0, d1);
-  __m128i d5 = Xor(d1, d2);
-  __m128i d6 = Xor(d2, d4);
-  __m128i d7 = _mm_set1_epi32(seed2 >> 32);
-  __m128i d8 = Mul(kMult, d2);
-  __m128i d9 = _mm_set1_epi32(seed3 >> 32);
-  __m128i d10 = _mm_set1_epi32(seed3);
-  __m128i d11 = Add64(d2, _mm_set1_epi32(seed2));
-  const uint8_t* end = s + (n & ~static_cast<size_t>(255));
-  do {
-    __m128i z;
-    z = Fetch128<bswap>(s);
-    d0 = Add64(d0, z);
-    d1 = Shuf(kShuf, d1);
-    d2 = Xor(d2, d0);
-    d4 = Xor(d4, z);
-    d4 = Xor(d4, d1);
-    std::swap(d0, d6);
-    z = Fetch128<bswap>(s + 16);
-    d5 = Add64(d5, z);
-    d6 = Shuf(kShuf, d6);
-    d8 = Shuf(kShuf, d8);
-    d7 = Xor(d7, d5);
-    d0 = Xor(d0, z);
-    d0 = Xor(d0, d6);
-    std::swap(d5, d11);
-    z = Fetch128<bswap>(s + 32);
-    d1 = Add64(d1, z);
-    d2 = Shuf(kShuf, d2);
-    d4 = Shuf(kShuf, d4);
-    d5 = Xor(d5, z);
-    d5 = Xor(d5, d2);
-    std::swap(d10, d4);
-    z = Fetch128<bswap>(s + 48);
-    d6 = Add64(d6, z);
-    d7 = Shuf(kShuf, d7);
-    d0 = Shuf(kShuf, d0);
-    d8 = Xor(d8, d6);
-    d1 = Xor(d1, z);
-    d1 = Add64(d1, d7);
-    z = Fetch128<bswap>(s + 64);
-    d2 = Add64(d2, z);
-    d5 = Shuf(kShuf, d5);
-    d4 = Add64(d4, d2);
-    d6 = Xor(d6, z);
-    d6 = Xor(d6, d11);
-    std::swap(d8, d2);
-    z = Fetch128<bswap>(s + 80);
-    d7 = Xor(d7, z);
-    d8 = Shuf(kShuf, d8);
-    d1 = Shuf(kShuf, d1);
-    d0 = Add64(d0, d7);
-    d2 = Add64(d2, z);
-    d2 = Add64(d2, d8);
-    std::swap(d1, d7);
-    z = Fetch128<bswap>(s + 96);
-    d4 = Shuf(kShuf, d4);
-    d6 = Shuf(kShuf, d6);
-    d8 = Mul(kMult, d8);
-    d5 = Xor(d5, d11);
-    d7 = Xor(d7, z);
-    d7 = Add64(d7, d4);
-    std::swap(d6, d0);
-    z = Fetch128<bswap>(s + 112);
-    d8 = Add64(d8, z);
-    d0 = Shuf(kShuf, d0);
-    d2 = Shuf(kShuf, d2);
-    d1 = Xor(d1, d8);
-    d10 = Xor(d10, z);
-    d10 = Xor(d10, d0);
-    std::swap(d11, d5);
-    z = Fetch128<bswap>(s + 128);
-    d4 = Add64(d4, z);
-    d5 = Shuf(kShuf, d5);
-    d7 = Shuf(kShuf, d7);
-    d6 = Add64(d6, d4);
-    d8 = Xor(d8, z);
-    d8 = Xor(d8, d5);
-    std::swap(d4, d10);
-    z = Fetch128<bswap>(s + 144);
-    d0 = Add64(d0, z);
-    d1 = Shuf(kShuf, d1);
-    d2 = Add64(d2, d0);
-    d4 = Xor(d4, z);
-    d4 = Xor(d4, d1);
-    z = Fetch128<bswap>(s + 160);
-    d5 = Add64(d5, z);
-    d6 = Shuf(kShuf, d6);
-    d8 = Shuf(kShuf, d8);
-    d7 = Xor(d7, d5);
-    d0 = Xor(d0, z);
-    d0 = Xor(d0, d6);
-    std::swap(d2, d8);
-    z = Fetch128<bswap>(s + 176);
-    d1 = Add64(d1, z);
-    d2 = Shuf(kShuf, d2);
-    d4 = Shuf(kShuf, d4);
-    d5 = Mul(kMult, d5);
-    d5 = Xor(d5, z);
-    d5 = Xor(d5, d2);
-    std::swap(d7, d1);
-    z = Fetch128<bswap>(s + 192);
-    d6 = Add64(d6, z);
-    d7 = Shuf(kShuf, d7);
-    d0 = Shuf(kShuf, d0);
-    d8 = Add64(d8, d6);
-    d1 = Xor(d1, z);
-    d1 = Xor(d1, d7);
-    std::swap(d0, d6);
-    z = Fetch128<bswap>(s + 208);
-    d2 = Add64(d2, z);
-    d5 = Shuf(kShuf, d5);
-    d4 = Xor(d4, d2);
-    d6 = Xor(d6, z);
-    d6 = Xor(d6, d9);
-    std::swap(d5, d11);
-    z = Fetch128<bswap>(s + 224);
-    d7 = Add64(d7, z);
-    d8 = Shuf(kShuf, d8);
-    d1 = Shuf(kShuf, d1);
-    d0 = Xor(d0, d7);
-    d2 = Xor(d2, z);
-    d2 = Xor(d2, d8);
-    std::swap(d10, d4);
-    z = Fetch128<bswap>(s + 240);
-    d3 = Add64(d3, z);
-    d4 = Shuf(kShuf, d4);
-    d6 = Shuf(kShuf, d6);
-    d7 = Mul(kMult, d7);
-    d5 = Add64(d5, d3);
-    d7 = Xor(d7, z);
-    d7 = Xor(d7, d4);
-    std::swap(d3, d9);
-    s += 256;
-  } while (s != end);
-  d6 = Add64(Mul(kMult, d6), _mm_cvtsi64_si128(n));
-  if (n % 256 != 0) {
-    d7 = Add64(_mm_shuffle_epi32(d8, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0)), d7);
-    d8 = Add64(Mul(kMult, d8), _mm_cvtsi64_si128(farmhashxo::Hash64<bswap>(s, n % 256)));
-  }
-  __m128i t[8];
-  d0 = Mul(kMult, Shuf(kShuf, Mul(kMult, d0)));
-  d3 = Mul(kMult, Shuf(kShuf, Mul(kMult, d3)));
-  d9 = Mul(kMult, Shuf(kShuf, Mul(kMult, d9)));
-  d1 = Mul(kMult, Shuf(kShuf, Mul(kMult, d1)));
-  d0 = Add64(d11, d0);
-  d3 = Xor(d7, d3);
-  d9 = Add64(d8, d9);
-  d1 = Add64(d10, d1);
-  d4 = Add64(d3, d4);
-  d5 = Add64(d9, d5);
-  d6 = Xor(d1, d6);
-  d2 = Add64(d0, d2);
-  t[0] = d0;
-  t[1] = d3;
-  t[2] = d9;
-  t[3] = d1;
-  t[4] = d4;
-  t[5] = d5;
-  t[6] = d6;
-  t[7] = d2;
-  return farmhashxo::Hash64<bswap>(reinterpret_cast<const uint8_t*>(t), sizeof(t));
-}
-
-template < bool bswap >
-static uint64_t farmhashte::Hash64(const uint8_t *s, size_t len) {
-  // Empirically, farmhashxo seems faster until length 512.
+template <bool bswap>
+static inline uint64_t farmhashte::Hash64Long( const uint8_t * s, size_t n, uint64_t seed0, uint64_t seed1 ) {
+    const __m128i kShuf =
+            _mm_set_epi8(   4,   11,   10,    5, 8, 15, 6, 9, 12, 2, 14, 13, 0, 7, 3, 1);
+    const __m128i kMult =
+            _mm_set_epi8(0xbd, 0xd6, 0x33, 0x39, 0x45, 0x54, 0xfa,
+            0x03, 0x34, 0x3e, 0x33, 0xed, 0xcc, 0x9e, 0x2d, 0x51);
+    uint64_t        seed2 = (seed0 + 113) * (seed1 + 9);
+    uint64_t        seed3 = (ROTR64(seed0, 23) + 27) * (ROTR64(seed1, 30) + 111);
+    __m128i         d0    = _mm_cvtsi64_si128(seed0);
+    __m128i         d1    = _mm_cvtsi64_si128(seed1);
+    __m128i         d2    = Shuf(kShuf, d0);
+    __m128i         d3    = Shuf(kShuf, d1);
+    __m128i         d4    = Xor(d0, d1);
+    __m128i         d5    = Xor(d1, d2);
+    __m128i         d6    = Xor(d2, d4);
+    __m128i         d7    = _mm_set1_epi32(seed2 >> 32);
+    __m128i         d8    = Mul(kMult, d2);
+    __m128i         d9    = _mm_set1_epi32(seed3 >> 32);
+    __m128i         d10   = _mm_set1_epi32(seed3      );
+    __m128i         d11   = Add64(d2, _mm_set1_epi32(seed2));
+    const uint8_t * end   = s + (n & ~static_cast<size_t>(255));
+
+    do {
+        __m128i z;
+        z   = Fetch128<bswap>(s);
+        d0  = Add64(d0, z);
+        d1  = Shuf(kShuf, d1);
+        d2  = Xor(d2, d0);
+        d4  = Xor(d4, z );
+        d4  = Xor(d4, d1);
+        std::swap(d0, d6);
+        z   = Fetch128<bswap>(s + 16);
+        d5  = Add64(d5, z);
+        d6  = Shuf(kShuf, d6);
+        d8  = Shuf(kShuf, d8);
+        d7  = Xor(d7, d5);
+        d0  = Xor(d0, z );
+        d0  = Xor(d0, d6);
+        std::swap(d5, d11);
+        z   = Fetch128<bswap>(s + 32);
+        d1  = Add64(d1, z);
+        d2  = Shuf(kShuf, d2);
+        d4  = Shuf(kShuf, d4);
+        d5  = Xor(d5, z );
+        d5  = Xor(d5, d2);
+        std::swap(d10, d4);
+        z   = Fetch128<bswap>(s + 48);
+        d6  = Add64(d6, z);
+        d7  = Shuf(kShuf, d7);
+        d0  = Shuf(kShuf, d0);
+        d8  = Xor(d8, d6);
+        d1  = Xor(d1, z );
+        d1  = Add64(d1, d7);
+        z   = Fetch128<bswap>(s + 64);
+        d2  = Add64(d2, z);
+        d5  = Shuf(kShuf, d5);
+        d4  = Add64(d4, d2);
+        d6  = Xor(d6, z  );
+        d6  = Xor(d6, d11);
+        std::swap(d8, d2);
+        z   = Fetch128<bswap>(s + 80);
+        d7  = Xor(d7, z);
+        d8  = Shuf(kShuf, d8);
+        d1  = Shuf(kShuf, d1);
+        d0  = Add64(d0, d7);
+        d2  = Add64(d2, z );
+        d2  = Add64(d2, d8);
+        std::swap(d1, d7);
+        z   = Fetch128<bswap>(s + 96);
+        d4  = Shuf(kShuf, d4);
+        d6  = Shuf(kShuf, d6);
+        d8  = Mul(kMult, d8);
+        d5  = Xor(d5, d11);
+        d7  = Xor(d7, z  );
+        d7  = Add64(d7, d4);
+        std::swap(d6, d0);
+        z   = Fetch128<bswap>(s + 112);
+        d8  = Add64(d8, z);
+        d0  = Shuf(kShuf, d0);
+        d2  = Shuf(kShuf, d2);
+        d1  = Xor(d1 , d8);
+        d10 = Xor(d10, z );
+        d10 = Xor(d10, d0);
+        std::swap(d11, d5);
+        z   = Fetch128<bswap>(s + 128);
+        d4  = Add64(d4, z);
+        d5  = Shuf(kShuf, d5);
+        d7  = Shuf(kShuf, d7);
+        d6  = Add64(d6, d4);
+        d8  = Xor(d8, z );
+        d8  = Xor(d8, d5);
+        std::swap(d4, d10);
+        z   = Fetch128<bswap>(s + 144);
+        d0  = Add64(d0, z);
+        d1  = Shuf(kShuf, d1);
+        d2  = Add64(d2, d0);
+        d4  = Xor(d4, z );
+        d4  = Xor(d4, d1);
+        z   = Fetch128<bswap>(s + 160);
+        d5  = Add64(d5, z);
+        d6  = Shuf(kShuf, d6);
+        d8  = Shuf(kShuf, d8);
+        d7  = Xor(d7, d5);
+        d0  = Xor(d0, z );
+        d0  = Xor(d0, d6);
+        std::swap(d2, d8);
+        z   = Fetch128<bswap>(s + 176);
+        d1  = Add64(d1, z);
+        d2  = Shuf(kShuf, d2);
+        d4  = Shuf(kShuf, d4);
+        d5  = Mul(kMult, d5);
+        d5  = Xor(d5, z );
+        d5  = Xor(d5, d2);
+        std::swap(d7, d1);
+        z   = Fetch128<bswap>(s + 192);
+        d6  = Add64(d6, z);
+        d7  = Shuf(kShuf, d7);
+        d0  = Shuf(kShuf, d0);
+        d8  = Add64(d8, d6);
+        d1  = Xor(d1, z );
+        d1  = Xor(d1, d7);
+        std::swap(d0, d6);
+        z   = Fetch128<bswap>(s + 208);
+        d2  = Add64(d2, z);
+        d5  = Shuf(kShuf, d5);
+        d4  = Xor(d4, d2);
+        d6  = Xor(d6, z );
+        d6  = Xor(d6, d9);
+        std::swap(d5, d11);
+        z   = Fetch128<bswap>(s + 224);
+        d7  = Add64(d7, z);
+        d8  = Shuf(kShuf, d8);
+        d1  = Shuf(kShuf, d1);
+        d0  = Xor(d0, d7);
+        d2  = Xor(d2, z );
+        d2  = Xor(d2, d8);
+        std::swap(d10, d4);
+        z   = Fetch128<bswap>(s + 240);
+        d3  = Add64(d3, z);
+        d4  = Shuf(kShuf, d4);
+        d6  = Shuf(kShuf, d6);
+        d7  = Mul(kMult, d7);
+        d5  = Add64(d5, d3);
+        d7  = Xor(d7, z );
+        d7  = Xor(d7, d4);
+        std::swap(d3, d9);
+        s  += 256;
+    } while (s != end);
+    d6 = Add64(Mul(kMult, d6), _mm_cvtsi64_si128(n));
+    if (n % 256 != 0) {
+        d7 = Add64(_mm_shuffle_epi32(d8, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0)), d7    );
+        d8 = Add64(Mul(kMult, d8), _mm_cvtsi64_si128(farmhashxo::Hash64<bswap>(s, n % 256)));
+    }
+    __m128i t[8];
+    d0   = Mul(kMult, Shuf(kShuf, Mul(kMult, d0)));
+    d3   = Mul(kMult, Shuf(kShuf, Mul(kMult, d3)));
+    d9   = Mul(kMult, Shuf(kShuf, Mul(kMult, d9)));
+    d1   = Mul(kMult, Shuf(kShuf, Mul(kMult, d1)));
+    d0   = Add64(d11, d0);
+    d3   = Xor(d7, d3);
+    d9   = Add64(d8 , d9);
+    d1   = Add64(d10, d1);
+    d4   = Add64(d3 , d4);
+    d5   = Add64(d9 , d5);
+    d6   = Xor(d1, d6);
+    d2   = Add64(d0, d2);
+    t[0] = d0;
+    t[1] = d3;
+    t[2] = d9;
+    t[3] = d1;
+    t[4] = d4;
+    t[5] = d5;
+    t[6] = d6;
+    t[7] = d2;
+    return farmhashxo::Hash64<bswap>(reinterpret_cast<const uint8_t *>(t), sizeof(t));
+}
+
+template <bool bswap>
+static uint64_t farmhashte::Hash64( const uint8_t * s, size_t len ) {
+    // Empirically, farmhashxo seems faster until length 512.
     return len >= 512 ? farmhashte::Hash64Long<bswap>(s, len, k2, k1) :
                         farmhashxo::Hash64<bswap>(s, len);
 }
 
-template < bool bswap >
-static uint64_t farmhashte::Hash64WithSeed(const uint8_t *s, size_t len, uint64_t seed) {
+template <bool bswap>
+static uint64_t farmhashte::Hash64WithSeed( const uint8_t * s, size_t len, uint64_t seed ) {
     return len >= 512 ? farmhashte::Hash64Long<bswap>(s, len, k1, seed) :
                         farmhashxo::Hash64WithSeed<bswap>(s, len, seed);
 }
 
-template < bool bswap >
-static uint64_t farmhashte::Hash64WithSeeds(const uint8_t *s, size_t len, uint64_t seed0, uint64_t seed1) {
+template <bool bswap>
+static uint64_t farmhashte::Hash64WithSeeds( const uint8_t * s, size_t len, uint64_t seed0, uint64_t seed1 ) {
     return len >= 512 ? farmhashte::Hash64Long<bswap>(s, len, seed0, seed1) :
                         farmhashxo::Hash64WithSeeds<bswap>(s, len, seed0, seed1);
 }
@@ -762,140 +787,150 @@ static uint64_t farmhashte::Hash64WithSeeds(const uint8_t *s, size_t len, uint64
 //------------------------------------------------------------
 #if defined(HAVE_SSE_4_1)
 namespace farmhashnt {
-    template < bool bswap >
-        static uint32_t Hash32(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static uint32_t Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed);
+    template <bool bswap>
+    static uint32_t Hash32( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint32_t Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed );
 }
 
-template < bool bswap >
-static uint32_t farmhashnt::Hash32(const uint8_t *s, size_t len) {
-  return static_cast<uint32_t>(farmhashte::Hash64<bswap>(s, len));
+template <bool bswap>
+static uint32_t farmhashnt::Hash32( const uint8_t * s, size_t len ) {
+    return static_cast<uint32_t>(farmhashte::Hash64<bswap>(s, len));
 }
 
-template < bool bswap >
-static uint32_t farmhashnt::Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed) {
-  return static_cast<uint32_t>(farmhashte::Hash64WithSeed<bswap>(s, len, seed));
+template <bool bswap>
+static uint32_t farmhashnt::Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed ) {
+    return static_cast<uint32_t>(farmhashte::Hash64WithSeed<bswap>(s, len, seed));
 }
+
 #endif
 
 //------------------------------------------------------------
 namespace farmhashmk {
-    static inline uint32_t Hash32Len0to4(const uint8_t *s, size_t len, uint32_t seed = 0);
-    template < bool bswap >
-        static inline uint32_t Hash32Len5to12(const uint8_t *s, size_t len, uint32_t seed = 0);
-    template < bool bswap >
-        static inline uint32_t Hash32Len13to24(const uint8_t *s, size_t len, uint32_t seed = 0);
-
-    template < bool bswap >
-        static uint32_t Hash32(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static uint32_t Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed);
-}
-
-template < bool bswap >
-static inline uint32_t farmhashmk::Hash32Len13to24(const uint8_t *s, size_t len, uint32_t seed) {
-  uint32_t a = Fetch32<bswap>(s - 4 + (len >> 1));
-  uint32_t b = Fetch32<bswap>(s + 4);
-  uint32_t c = Fetch32<bswap>(s + len - 8);
-  uint32_t d = Fetch32<bswap>(s + (len >> 1));
-  uint32_t e = Fetch32<bswap>(s);
-  uint32_t f = Fetch32<bswap>(s + len - 4);
-  uint32_t h = d * c1 + len + seed;
-  a = ROTR32(a, 12) + f;
-  h = Mur(c, h) + a;
-  a = ROTR32(a, 3) + c;
-  h = Mur(e, h) + a;
-  a = ROTR32(a + f, 12) + d;
-  h = Mur(b ^ seed, h) + a;
-  return fmix(h);
-}
-
-static inline uint32_t farmhashmk::Hash32Len0to4(const uint8_t *s, size_t len, uint32_t seed) {
-  uint32_t b = seed;
-  uint32_t c = 9;
-  for (size_t i = 0; i < len; i++) {
-    int8_t v = s[i];
-    b = b * c1 + v;
-    c ^= b;
-  }
-  return fmix(Mur(b, Mur(len, c)));
-}
-
-template < bool bswap >
-static inline uint32_t farmhashmk::Hash32Len5to12(const uint8_t *s, size_t len, uint32_t seed) {
-  uint32_t a = len, b = len * 5, c = 9, d = b + seed;
-  a += Fetch32<bswap>(s);
-  b += Fetch32<bswap>(s + len - 4);
-  c += Fetch32<bswap>(s + ((len >> 1) & 4));
-  return fmix(seed ^ Mur(c, Mur(b, Mur(a, d))));
-}
-
-template < bool bswap >
-static uint32_t farmhashmk::Hash32(const uint8_t *s, size_t len) {
+    static inline uint32_t Hash32Len0to4( const uint8_t * s, size_t len, uint32_t seed = 0 );
+
+    template <bool bswap>
+    static inline uint32_t Hash32Len5to12( const uint8_t * s, size_t len, uint32_t seed = 0 );
+
+    template <bool bswap>
+    static inline uint32_t Hash32Len13to24( const uint8_t * s, size_t len, uint32_t seed = 0 );
+
+    template <bool bswap>
+    static uint32_t Hash32( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint32_t Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed );
+} // namespace farmhashmk
+
+template <bool bswap>
+static inline uint32_t farmhashmk::Hash32Len13to24( const uint8_t * s, size_t len, uint32_t seed ) {
+    uint32_t a = Fetch32<bswap>(s - 4   + (len >> 1));
+    uint32_t b = Fetch32<bswap>(s + 4);
+    uint32_t c = Fetch32<bswap>(s + len - 8);
+    uint32_t d = Fetch32<bswap>(s +       (len >> 1));
+    uint32_t e = Fetch32<bswap>(s);
+    uint32_t f = Fetch32<bswap>(s + len - 4);
+    uint32_t h = d * c1 + len + seed;
+
+    a = ROTR32(a, 12) + f;
+    h = Mur(c, h) + a;
+    a = ROTR32(a, 3) + c;
+    h = Mur(e, h) + a;
+    a = ROTR32(a + f, 12)   + d;
+    h = Mur(b    ^ seed, h) + a;
+    return fmix(h);
+}
+
+static inline uint32_t farmhashmk::Hash32Len0to4( const uint8_t * s, size_t len, uint32_t seed ) {
+    uint32_t b = seed;
+    uint32_t c = 9;
+
+    for (size_t i = 0; i < len; i++) {
+        int8_t v = s[i];
+        b  = b * c1 + v;
+        c ^= b;
+    }
+    return fmix(Mur(b, Mur(len, c)));
+}
+
+template <bool bswap>
+static inline uint32_t farmhashmk::Hash32Len5to12( const uint8_t * s, size_t len, uint32_t seed ) {
+    uint32_t a = len, b = len * 5, c = 9, d = b + seed;
+
+    a += Fetch32<bswap>(s);
+    b += Fetch32<bswap>(s + len - 4);
+    c += Fetch32<bswap>(s + ((len >> 1) & 4));
+    return fmix(seed ^ Mur(c, Mur(b, Mur(a, d))));
+}
+
+template <bool bswap>
+static uint32_t farmhashmk::Hash32( const uint8_t * s, size_t len ) {
     if (len <= 24) {
         return len <= 12 ?
-            (len <= 4 ? farmhashmk::Hash32Len0to4(s, len) : farmhashmk::Hash32Len5to12<bswap>(s, len)) :
-            farmhashmk::Hash32Len13to24<bswap>(s, len);
-  }
-
-  // len > 24
-  uint32_t h = len, g = c1 * len, f = g;
-  uint32_t a0 = ROTR32(Fetch32<bswap>(s + len - 4) * c1, 17) * c2;
-  uint32_t a1 = ROTR32(Fetch32<bswap>(s + len - 8) * c1, 17) * c2;
-  uint32_t a2 = ROTR32(Fetch32<bswap>(s + len - 16) * c1, 17) * c2;
-  uint32_t a3 = ROTR32(Fetch32<bswap>(s + len - 12) * c1, 17) * c2;
-  uint32_t a4 = ROTR32(Fetch32<bswap>(s + len - 20) * c1, 17) * c2;
-  h ^= a0;
-  h = ROTR32(h, 19);
-  h = h * 5 + 0xe6546b64;
-  h ^= a2;
-  h = ROTR32(h, 19);
-  h = h * 5 + 0xe6546b64;
-  g ^= a1;
-  g = ROTR32(g, 19);
-  g = g * 5 + 0xe6546b64;
-  g ^= a3;
-  g = ROTR32(g, 19);
-  g = g * 5 + 0xe6546b64;
-  f += a4;
-  f = ROTR32(f, 19) + 113;
-  size_t iters = (len - 1) / 20;
-  do {
-    uint32_t a = Fetch32<bswap>(s);
-    uint32_t b = Fetch32<bswap>(s + 4);
-    uint32_t c = Fetch32<bswap>(s + 8);
-    uint32_t d = Fetch32<bswap>(s + 12);
-    uint32_t e = Fetch32<bswap>(s + 16);
-    h += a;
-    g += b;
-    f += c;
-    h = Mur(d, h) + e;
-    g = Mur(c, g) + a;
-    f = Mur(b + e * c1, f) + d;
-    f += g;
-    g += f;
-    s += 20;
-  } while (--iters != 0);
-  g = ROTR32(g, 11) * c1;
-  g = ROTR32(g, 17) * c1;
-  f = ROTR32(f, 11) * c1;
-  f = ROTR32(f, 17) * c1;
-  h = ROTR32(h + g, 19);
-  h = h * 5 + 0xe6546b64;
-  h = ROTR32(h, 17) * c1;
-  h = ROTR32(h + f, 19);
-  h = h * 5 + 0xe6546b64;
-  h = ROTR32(h, 17) * c1;
-  return h;
-}
-
-template < bool bswap >
-static uint32_t farmhashmk::Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed) {
+                   (len <= 4 ? farmhashmk::Hash32Len0to4(s, len) : farmhashmk::Hash32Len5to12<bswap>(s, len)) :
+                   farmhashmk::Hash32Len13to24<bswap>(s, len);
+    }
+
+    // len > 24
+    uint32_t h = len, g = c1 * len, f = g;
+    uint32_t a0 = ROTR32(Fetch32<bswap>(s + len -  4) * c1, 17) * c2;
+    uint32_t a1 = ROTR32(Fetch32<bswap>(s + len -  8) * c1, 17) * c2;
+    uint32_t a2 = ROTR32(Fetch32<bswap>(s + len - 16) * c1, 17) * c2;
+    uint32_t a3 = ROTR32(Fetch32<bswap>(s + len - 12) * c1, 17) * c2;
+    uint32_t a4 = ROTR32(Fetch32<bswap>(s + len - 20) * c1, 17) * c2;
+    h ^= a0;
+    h  = ROTR32(h, 19);
+    h  = h * 5 + 0xe6546b64;
+    h ^= a2;
+    h  = ROTR32(h, 19);
+    h  = h * 5 + 0xe6546b64;
+    g ^= a1;
+    g  = ROTR32(g, 19);
+    g  = g * 5 + 0xe6546b64;
+    g ^= a3;
+    g  = ROTR32(g, 19);
+    g  = g * 5 + 0xe6546b64;
+    f += a4;
+    f  = ROTR32(f, 19) + 113;
+    size_t iters = (len - 1) / 20;
+    do {
+        uint32_t a = Fetch32<bswap>(s     );
+        uint32_t b = Fetch32<bswap>(s +  4);
+        uint32_t c = Fetch32<bswap>(s +  8);
+        uint32_t d = Fetch32<bswap>(s + 12);
+        uint32_t e = Fetch32<bswap>(s + 16);
+        h += a;
+        g += b;
+        f += c;
+        h  = Mur(d, h) + e;
+        g  = Mur(c, g) + a;
+        f  = Mur(b + e * c1, f) + d;
+        f += g;
+        g += f;
+        s += 20;
+    } while (--iters != 0);
+    g = ROTR32(g    , 11) * c1;
+    g = ROTR32(g    , 17) * c1;
+    f = ROTR32(f    , 11) * c1;
+    f = ROTR32(f    , 17) * c1;
+    h = ROTR32(h + g, 19);
+    h = h * 5 + 0xe6546b64;
+    h = ROTR32(h    , 17) * c1;
+    h = ROTR32(h + f, 19);
+    h = h * 5 + 0xe6546b64;
+    h = ROTR32(h    , 17) * c1;
+    return h;
+}
+
+template <bool bswap>
+static uint32_t farmhashmk::Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed ) {
     if (len <= 24) {
-        if (len >= 13) return farmhashmk::Hash32Len13to24<bswap>(s, len, seed * c1);
-        else if (len >= 5) return farmhashmk::Hash32Len5to12<bswap>(s, len, seed);
-        else return farmhashmk::Hash32Len0to4(s, len, seed);
+        if (len >= 13) { return farmhashmk::Hash32Len13to24<bswap>(s, len, seed * c1); } else if (len >= 5) {
+            return farmhashmk::Hash32Len5to12<bswap>(s, len, seed);
+        } else {
+            return farmhashmk::Hash32Len0to4(s, len, seed);
+        }
     }
     uint32_t h = farmhashmk::Hash32Len13to24<bswap>(s, 24, seed ^ len);
     return Mur(farmhashmk::Hash32<bswap>(s + 24, len - 24) + seed, h);
@@ -904,190 +939,194 @@ static uint32_t farmhashmk::Hash32WithSeed(const uint8_t *s, size_t len, uint32_
 //------------------------------------------------------------
 #if defined(HAVE_X86_64_CRC32C) && defined(HAVE_X86_64_AES)
 namespace farmhashsu {
-    template < bool bswap >
-        static uint32_t Hash32(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static uint32_t Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed);
+    template <bool bswap>
+    static uint32_t Hash32( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint32_t Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed );
 }
 
-template < bool bswap >
-static uint32_t farmhashsu::Hash32(const uint8_t *s, size_t len) {
+template <bool bswap>
+static uint32_t farmhashsu::Hash32( const uint8_t * s, size_t len ) {
     const uint32_t seed = 81;
+
     if (len <= 24) {
         return len <= 12 ?
-            (len <= 4 ?
-                    farmhashmk::Hash32Len0to4(s, len) :
-                    farmhashmk::Hash32Len5to12<bswap>(s, len)) :
-            farmhashmk::Hash32Len13to24<bswap>(s, len);
-  }
-
-  if (len < 40) {
-    uint32_t a = len, b = seed * c2, c = a + b;
-    a += Fetch32<bswap>(s + len - 4);
-    b += Fetch32<bswap>(s + len - 20);
-    c += Fetch32<bswap>(s + len - 16);
-    uint32_t d = a;
-    a = ROTR32(a, 21);
-    a = Mur(a, Mur(b, _mm_crc32_u32(c, d)));
-    a += Fetch32<bswap>(s + len - 12);
-    b += Fetch32<bswap>(s + len - 8);
-    d += a;
-    a += d;
-    b = Mur(b, d) * c2;
-    a = _mm_crc32_u32(a, b + c);
-    return farmhashmk::Hash32Len13to24<bswap>(s, (len + 1) / 2, a) + b;
-  }
-
-#undef Mulc1
-#define Mulc1(x) Mul((x), cc1)
-
-#undef Mulc2
-#define Mulc2(x) Mul((x), cc2)
-
-#undef Murk
-#define Murk(a, h)                                  \
-  Add32(k,                                          \
-          Mul5(                                     \
-               Rol19(                               \
-                     Xor(                           \
-                         Mulc2(                     \
-                               Rol17(               \
-                                     Mulc1(a))),    \
+                   (len <= 4 ?
+                       farmhashmk::Hash32Len0to4(s, len) :
+                       farmhashmk::Hash32Len5to12<bswap>(s, len)) :
+                   farmhashmk::Hash32Len13to24<bswap>(s, len);
+    }
+
+    if (len < 40) {
+        uint32_t a = len, b = seed * c2, c = a + b;
+        a += Fetch32<bswap>(s + len -  4);
+        b += Fetch32<bswap>(s + len - 20);
+        c += Fetch32<bswap>(s + len - 16);
+        uint32_t d = a;
+        a  = ROTR32(a, 21);
+        a  = Mur(a, Mur(b, _mm_crc32_u32(c, d)));
+        a += Fetch32<bswap>(s + len - 12);
+        b += Fetch32<bswap>(s + len -  8);
+        d += a;
+        a += d;
+        b  = Mur(b, d) * c2;
+        a  = _mm_crc32_u32(a, b + c);
+        return farmhashmk::Hash32Len13to24<bswap>(s, (len + 1) / 2, a) + b;
+    }
+
+  #undef Mulc1
+  #define Mulc1(x) Mul((x), cc1)
+
+  #undef Mulc2
+  #define Mulc2(x) Mul((x), cc2)
+
+  #undef Murk
+#define Murk(a, h)                               \
+  Add32(k,                                       \
+          Mul5(                                  \
+               Rol19(                            \
+                     Xor(                        \
+                         Mulc2(                  \
+                               Rol17(            \
+                                     Mulc1(a))), \
                          (h)))))
 
-  const __m128i cc1 = _mm_set1_epi32(c1);
-  const __m128i cc2 = _mm_set1_epi32(c2);
-  __m128i h = _mm_set1_epi32(seed);
-  __m128i g = _mm_set1_epi32(c1 * seed);
-  __m128i f = g;
-  __m128i k = _mm_set1_epi32(0xe6546b64);
-  __m128i q;
-  if (len < 80) {
-    __m128i a = Fetch128<bswap>(s);
-    __m128i b = Fetch128<bswap>(s + 16);
-    __m128i c = Fetch128<bswap>(s + (len - 15) / 2);
-    __m128i d = Fetch128<bswap>(s + len - 32);
-    __m128i e = Fetch128<bswap>(s + len - 16);
-    h = Add32(h, a);
-    g = Add32(g, b);
-    q = g;
-    g = Shuffle0321(g);
-    f = Add32(f, c);
-    __m128i be = Add32(b, Mulc1(e));
-    h = Add32(h, f);
-    f = Add32(f, h);
-    h = Add32(Murk(d, h), e);
-    k = Xor(k, _mm_shuffle_epi8(g, f));
-    g = Add32(Xor(c, g), a);
-    f = Add32(Xor(be, f), d);
-    k = Add32(k, be);
-    k = Add32(k, _mm_shuffle_epi8(f, h));
-    f = Add32(f, g);
-    g = Add32(g, f);
-    g = Add32(_mm_set1_epi32(len), Mulc1(g));
-  } else {
-    // len >= 80
-    // The following is loosely modelled after farmhashmk::Hash32.
-    size_t iters = (len - 1) / 80;
-    len -= iters * 80;
-
-#undef Chunk
-#define Chunk() do {                                   \
-        __m128i a = Fetch128<bswap>(s);                \
-        __m128i b = Fetch128<bswap>(s + 16);           \
-        __m128i c = Fetch128<bswap>(s + 32);           \
-        __m128i d = Fetch128<bswap>(s + 48);           \
-        __m128i e = Fetch128<bswap>(s + 64);           \
-        h = Add32(h, a);                               \
-        g = Add32(g, b);                               \
-        g = Shuffle0321(g);                            \
-        f = Add32(f, c);                               \
-        __m128i be = Add32(b, Mulc1(e));               \
-        h = Add32(h, f);                               \
-        f = Add32(f, h);                               \
-        h = Add32(h, d);                               \
-        q = Add32(q, e);                               \
-        h = Rol17(h);                                  \
-        h = Mulc1(h);                                  \
-        k = Xor(k, _mm_shuffle_epi8(g, f));            \
-        g = Add32(Xor(c, g), a);                       \
-        f = Add32(Xor(be, f), d);                      \
-        std::swap(f, q);                               \
-        q = _mm_aesimc_si128(q);                       \
-        k = Add32(k, be);                              \
-        k = Add32(k, _mm_shuffle_epi8(f, h));          \
-        f = Add32(f, g);                               \
-        g = Add32(g, f);                               \
-        f = Mulc1(f);                                  \
+    const __m128i cc1 = _mm_set1_epi32(c1);
+    const __m128i cc2 = _mm_set1_epi32(        c2       );
+    __m128i       h   = _mm_set1_epi32(      seed       );
+    __m128i       g   = _mm_set1_epi32(        c1 * seed);
+    __m128i       f   = g;
+    __m128i       k   = _mm_set1_epi32(0xe6546b64       );
+    __m128i       q;
+    if (len < 80) {
+        __m128i a = Fetch128<bswap>(s     );
+        __m128i b = Fetch128<bswap>(s + 16);
+        __m128i c = Fetch128<bswap>(s +     (len - 15) / 2);
+        __m128i d = Fetch128<bswap>(s + len - 32);
+        __m128i e = Fetch128<bswap>(s + len - 16);
+        h = Add32(h, a);
+        g = Add32(g, b);
+        q = g;
+        g = Shuffle0321(g);
+        f = Add32(f, c);
+        __m128i be = Add32(b, Mulc1(e));
+        h = Add32(h, f);
+        f = Add32(f, h);
+        h = Add32(Murk(d, h), e);
+        k = Xor(k, _mm_shuffle_epi8(g, f));
+        g = Add32(Xor(c, g) , a);
+        f = Add32(Xor(be, f), d);
+        k = Add32(k, be        );
+        k = Add32(k, _mm_shuffle_epi8(f, h));
+        f = Add32(f, g);
+        g = Add32(g, f);
+        g = Add32(_mm_set1_epi32(len), Mulc1(g));
+    } else {
+        // len >= 80
+        // The following is loosely modelled after farmhashmk::Hash32.
+        size_t iters = (len - 1) / 80;
+        len -= iters * 80;
+
+  #undef Chunk
+#define Chunk() do {                          \
+        __m128i a = Fetch128<bswap>(s);       \
+        __m128i b = Fetch128<bswap>(s + 16);  \
+        __m128i c = Fetch128<bswap>(s + 32);  \
+        __m128i d = Fetch128<bswap>(s + 48);  \
+        __m128i e = Fetch128<bswap>(s + 64);  \
+        h = Add32(h, a);                      \
+        g = Add32(g, b);                      \
+        g = Shuffle0321(g);                   \
+        f = Add32(f, c);                      \
+        __m128i be = Add32(b, Mulc1(e));      \
+        h = Add32(h, f);                      \
+        f = Add32(f, h);                      \
+        h = Add32(h, d);                      \
+        q = Add32(q, e);                      \
+        h = Rol17(h);                         \
+        h = Mulc1(h);                         \
+        k = Xor(k, _mm_shuffle_epi8(g, f));   \
+        g = Add32(Xor(c, g), a);              \
+        f = Add32(Xor(be, f), d);             \
+        std::swap(f, q);                      \
+        q = _mm_aesimc_si128(q);              \
+        k = Add32(k, be);                     \
+        k = Add32(k, _mm_shuffle_epi8(f, h)); \
+        f = Add32(f, g);                      \
+        g = Add32(g, f);                      \
+        f = Mulc1(f);                         \
     } while (0)
 
-    q = g;
-    while (iters-- != 0) {
-      Chunk();
-      s += 80;
+        q = g;
+        while (iters-- != 0) {
+            Chunk();
+            s += 80;
+        }
+
+        if (len != 0) {
+            h = Add32(h, _mm_set1_epi32(len));
+            s = s + len - 80;
+            Chunk();
+        }
     }
 
-    if (len != 0) {
-      h = Add32(h, _mm_set1_epi32(len));
-      s = s + len - 80;
-      Chunk();
+    g      = Shuffle0321(g);
+    k      = Xor(k, g);
+    k      = Xor(k, q);
+    h      = Xor(h, q);
+    f      = Mulc1(f);
+    k      = Mulc2(k);
+    g      = Mulc1(g);
+    h      = Mulc2(h);
+    k      = Add32(k, _mm_shuffle_epi8(g, f));
+    h      = Add32(h, f);
+    f      = Add32(f, h);
+    g      = Add32(g, k);
+    k      = Add32(k, g);
+    k      = Xor(k, _mm_shuffle_epi8(f, h));
+    __m128i buf[4];
+    buf[0] = f;
+    buf[1] = g;
+    buf[2] = k;
+    buf[3] = h;
+    s      = reinterpret_cast<uint8_t *>(buf);
+    uint32_t x = Fetch32<bswap>(s    );
+    uint32_t y = Fetch32<bswap>(s + 4);
+    uint32_t z = Fetch32<bswap>(s + 8);
+    x = _mm_crc32_u32(x     , Fetch32<bswap>(s + 12));
+    y = _mm_crc32_u32(y     , Fetch32<bswap>(s + 16));
+    z = _mm_crc32_u32(z * c1, Fetch32<bswap>(s + 20));
+    x = _mm_crc32_u32(x     , Fetch32<bswap>(s + 24));
+    y = _mm_crc32_u32(y * c1, Fetch32<bswap>(s + 28));
+    uint32_t o = y;
+    z = _mm_crc32_u32(z     , Fetch32<bswap>(s + 32));
+    x = _mm_crc32_u32(x * c1, Fetch32<bswap>(s + 36));
+    y = _mm_crc32_u32(y     , Fetch32<bswap>(s + 40));
+    z = _mm_crc32_u32(z * c1, Fetch32<bswap>(s + 44));
+    x = _mm_crc32_u32(x     , Fetch32<bswap>(s + 48));
+    y = _mm_crc32_u32(y * c1, Fetch32<bswap>(s + 52));
+    z = _mm_crc32_u32(z     , Fetch32<bswap>(s + 56));
+    x = _mm_crc32_u32(x     , Fetch32<bswap>(s + 60));
+    return (o - x + y - z) * c1;
+}
+
+  #undef Chunk
+  #undef Murk
+  #undef Mulc2
+  #undef Mulc1
+
+template <bool bswap>
+static uint32_t farmhashsu::Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed ) {
+    if (len <= 24) {
+        if (len >= 13) { return farmhashmk::Hash32Len13to24<bswap>(s, len, seed * c1); } else if (len >= 5) {
+            return farmhashmk::Hash32Len5to12<bswap>(s, len, seed);
+        } else {
+            return farmhashmk::Hash32Len0to4(s, len, seed);
+        }
     }
-  }
-
-  g = Shuffle0321(g);
-  k = Xor(k, g);
-  k = Xor(k, q);
-  h = Xor(h, q);
-  f = Mulc1(f);
-  k = Mulc2(k);
-  g = Mulc1(g);
-  h = Mulc2(h);
-  k = Add32(k, _mm_shuffle_epi8(g, f));
-  h = Add32(h, f);
-  f = Add32(f, h);
-  g = Add32(g, k);
-  k = Add32(k, g);
-  k = Xor(k, _mm_shuffle_epi8(f, h));
-  __m128i buf[4];
-  buf[0] = f;
-  buf[1] = g;
-  buf[2] = k;
-  buf[3] = h;
-  s = reinterpret_cast<uint8_t*>(buf);
-  uint32_t x = Fetch32<bswap>(s);
-  uint32_t y = Fetch32<bswap>(s+4);
-  uint32_t z = Fetch32<bswap>(s+8);
-  x = _mm_crc32_u32(x, Fetch32<bswap>(s+12));
-  y = _mm_crc32_u32(y, Fetch32<bswap>(s+16));
-  z = _mm_crc32_u32(z * c1, Fetch32<bswap>(s+20));
-  x = _mm_crc32_u32(x, Fetch32<bswap>(s+24));
-  y = _mm_crc32_u32(y * c1, Fetch32<bswap>(s+28));
-  uint32_t o = y;
-  z = _mm_crc32_u32(z, Fetch32<bswap>(s+32));
-  x = _mm_crc32_u32(x * c1, Fetch32<bswap>(s+36));
-  y = _mm_crc32_u32(y, Fetch32<bswap>(s+40));
-  z = _mm_crc32_u32(z * c1, Fetch32<bswap>(s+44));
-  x = _mm_crc32_u32(x, Fetch32<bswap>(s+48));
-  y = _mm_crc32_u32(y * c1, Fetch32<bswap>(s+52));
-  z = _mm_crc32_u32(z, Fetch32<bswap>(s+56));
-  x = _mm_crc32_u32(x, Fetch32<bswap>(s+60));
-  return (o - x + y - z) * c1;
-}
-
-#undef Chunk
-#undef Murk
-#undef Mulc2
-#undef Mulc1
-
-template < bool bswap >
-static uint32_t farmhashsu::Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed) {
-  if (len <= 24) {
-    if (len >= 13) return farmhashmk::Hash32Len13to24<bswap>(s, len, seed * c1);
-    else if (len >= 5) return farmhashmk::Hash32Len5to12<bswap>(s, len, seed);
-    else return farmhashmk::Hash32Len0to4(s, len, seed);
-  }
-  uint32_t h = farmhashmk::Hash32Len13to24<bswap>(s, 24, seed ^ len);
-  return _mm_crc32_u32(farmhashsu::Hash32<bswap>(s + 24, len - 24) + seed, h);
+    uint32_t h = farmhashmk::Hash32Len13to24<bswap>(s, 24, seed ^ len);
+    return _mm_crc32_u32(farmhashsu::Hash32<bswap>(s + 24, len - 24) + seed, h);
 }
 
 #endif
@@ -1095,451 +1134,465 @@ static uint32_t farmhashsu::Hash32WithSeed(const uint8_t *s, size_t len, uint32_
 //------------------------------------------------------------
 #if defined(HAVE_X86_64_CRC32C)
 namespace farmhashsa {
-    template < bool bswap >
-        static uint32_t Hash32(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static uint32_t Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed);
-}
-
-template < bool bswap >
-static uint32_t farmhashsa::Hash32(const uint8_t *s, size_t len) {
-  const uint32_t seed = 81;
-  if (len <= 24) {
-      return len <= 12 ?
-          (len <= 4 ?
-                  farmhashmk::Hash32Len0to4(s, len) :
-                  farmhashmk::Hash32Len5to12<bswap>(s, len)) :
-          farmhashmk::Hash32Len13to24<bswap>(s, len);
-  }
-
-  if (len < 40) {
-    uint32_t a = len, b = seed * c2, c = a + b;
-    a += Fetch32<bswap>(s + len - 4);
-    b += Fetch32<bswap>(s + len - 20);
-    c += Fetch32<bswap>(s + len - 16);
-    uint32_t d = a;
-    a = ROTR32(a, 21);
-    a = Mur(a, Mur(b, Mur(c, d)));
-    a += Fetch32<bswap>(s + len - 12);
-    b += Fetch32<bswap>(s + len - 8);
-    d += a;
-    a += d;
-    b = Mur(b, d) * c2;
-    a = _mm_crc32_u32(a, b + c);
-    return farmhashmk::Hash32Len13to24<bswap>(s, (len + 1) / 2, a) + b;
-  }
-
-#undef Mulc1
-#define Mulc1(x) Mul((x), cc1)
-
-#undef Mulc2
-#define Mulc2(x) Mul((x), cc2)
-
-#undef Murk
-#define Murk(a, h)                                  \
-  Add32(k,                                          \
-          Mul5(                                     \
-               Rol19(                               \
-                     Xor(                           \
-                         Mulc2(                     \
-                               Rol17(               \
-                                     Mulc1(a))),    \
+    template <bool bswap>
+    static uint32_t Hash32( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint32_t Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed );
+}
+
+template <bool bswap>
+static uint32_t farmhashsa::Hash32( const uint8_t * s, size_t len ) {
+    const uint32_t seed = 81;
+
+    if (len <= 24) {
+        return len <= 12 ?
+                   (len <= 4 ?
+                       farmhashmk::Hash32Len0to4(s, len) :
+                       farmhashmk::Hash32Len5to12<bswap>(s, len)) :
+                   farmhashmk::Hash32Len13to24<bswap>(s, len);
+    }
+
+    if (len < 40) {
+        uint32_t a = len, b = seed * c2, c = a + b;
+        a += Fetch32<bswap>(s + len -  4);
+        b += Fetch32<bswap>(s + len - 20);
+        c += Fetch32<bswap>(s + len - 16);
+        uint32_t d = a;
+        a  = ROTR32(a, 21);
+        a  = Mur(a, Mur(b, Mur(c, d)));
+        a += Fetch32<bswap>(s + len - 12);
+        b += Fetch32<bswap>(s + len -  8);
+        d += a;
+        a += d;
+        b  = Mur(b, d) * c2;
+        a  = _mm_crc32_u32(a, b + c);
+        return farmhashmk::Hash32Len13to24<bswap>(s, (len + 1) / 2, a) + b;
+    }
+
+  #undef Mulc1
+  #define Mulc1(x) Mul((x), cc1)
+
+  #undef Mulc2
+  #define Mulc2(x) Mul((x), cc2)
+
+  #undef Murk
+#define Murk(a, h)                               \
+  Add32(k,                                       \
+          Mul5(                                  \
+               Rol19(                            \
+                     Xor(                        \
+                         Mulc2(                  \
+                               Rol17(            \
+                                     Mulc1(a))), \
                          (h)))))
 
-  const __m128i cc1 = _mm_set1_epi32(c1);
-  const __m128i cc2 = _mm_set1_epi32(c2);
-  __m128i h = _mm_set1_epi32(seed);
-  __m128i g = _mm_set1_epi32(c1 * seed);
-  __m128i f = g;
-  __m128i k = _mm_set1_epi32(0xe6546b64);
-  if (len < 80) {
-    __m128i a = Fetch128<bswap>(s);
-    __m128i b = Fetch128<bswap>(s + 16);
-    __m128i c = Fetch128<bswap>(s + (len - 15) / 2);
-    __m128i d = Fetch128<bswap>(s + len - 32);
-    __m128i e = Fetch128<bswap>(s + len - 16);
-    h = Add32(h, a);
-    g = Add32(g, b);
-    g = Shuffle0321(g);
-    f = Add32(f, c);
-    __m128i be = Add32(b, Mulc1(e));
-    h = Add32(h, f);
-    f = Add32(f, h);
-    h = Add32(Murk(d, h), e);
-    k = Xor(k, _mm_shuffle_epi8(g, f));
-    g = Add32(Xor(c, g), a);
-    f = Add32(Xor(be, f), d);
-    k = Add32(k, be);
-    k = Add32(k, _mm_shuffle_epi8(f, h));
-    f = Add32(f, g);
-    g = Add32(g, f);
-    g = Add32(_mm_set1_epi32(len), Mulc1(g));
-  } else {
-    // len >= 80
-    // The following is loosely modelled after farmhashmk::Hash32.
-    size_t iters = (len - 1) / 80;
-    len -= iters * 80;
-
-#undef Chunk
-#define Chunk() do {                            \
-        __m128i a = Fetch128<bswap>(s);         \
-        __m128i b = Fetch128<bswap>(s + 16);    \
-        __m128i c = Fetch128<bswap>(s + 32);    \
-        __m128i d = Fetch128<bswap>(s + 48);    \
-        __m128i e = Fetch128<bswap>(s + 64);    \
-        h = Add32(h, a);                        \
-        g = Add32(g, b);                        \
-        g = Shuffle0321(g);                     \
-        f = Add32(f, c);                        \
-        __m128i be = Add32(b, Mulc1(e));        \
-        h = Add32(h, f);                        \
-        f = Add32(f, h);                        \
-        h = Add32(Murk(d, h), e);               \
-        k = Xor(k, _mm_shuffle_epi8(g, f));     \
-        g = Add32(Xor(c, g), a);                \
-        f = Add32(Xor(be, f), d);               \
-        k = Add32(k, be);                       \
-        k = Add32(k, _mm_shuffle_epi8(f, h));   \
-        f = Add32(f, g);                        \
-        g = Add32(g, f);                        \
-        f = Mulc1(f);                           \
+    const __m128i cc1 = _mm_set1_epi32(c1);
+    const __m128i cc2 = _mm_set1_epi32(        c2       );
+    __m128i       h   = _mm_set1_epi32(      seed       );
+    __m128i       g   = _mm_set1_epi32(        c1 * seed);
+    __m128i       f   = g;
+    __m128i       k   = _mm_set1_epi32(0xe6546b64       );
+    if (len < 80) {
+        __m128i a = Fetch128<bswap>(s     );
+        __m128i b = Fetch128<bswap>(s + 16);
+        __m128i c = Fetch128<bswap>(s +     (len - 15) / 2);
+        __m128i d = Fetch128<bswap>(s + len - 32);
+        __m128i e = Fetch128<bswap>(s + len - 16);
+        h = Add32(h, a);
+        g = Add32(g, b);
+        g = Shuffle0321(g);
+        f = Add32(f, c);
+        __m128i be = Add32(b, Mulc1(e));
+        h = Add32(h, f);
+        f = Add32(f, h);
+        h = Add32(Murk(d, h), e);
+        k = Xor(k, _mm_shuffle_epi8(g, f));
+        g = Add32(Xor(c, g) , a);
+        f = Add32(Xor(be, f), d);
+        k = Add32(k, be        );
+        k = Add32(k, _mm_shuffle_epi8(f, h));
+        f = Add32(f, g);
+        g = Add32(g, f);
+        g = Add32(_mm_set1_epi32(len), Mulc1(g));
+    } else {
+        // len >= 80
+        // The following is loosely modelled after farmhashmk::Hash32.
+        size_t iters = (len - 1) / 80;
+        len -= iters * 80;
+
+  #undef Chunk
+#define Chunk() do {                          \
+        __m128i a = Fetch128<bswap>(s);       \
+        __m128i b = Fetch128<bswap>(s + 16);  \
+        __m128i c = Fetch128<bswap>(s + 32);  \
+        __m128i d = Fetch128<bswap>(s + 48);  \
+        __m128i e = Fetch128<bswap>(s + 64);  \
+        h = Add32(h, a);                      \
+        g = Add32(g, b);                      \
+        g = Shuffle0321(g);                   \
+        f = Add32(f, c);                      \
+        __m128i be = Add32(b, Mulc1(e));      \
+        h = Add32(h, f);                      \
+        f = Add32(f, h);                      \
+        h = Add32(Murk(d, h), e);             \
+        k = Xor(k, _mm_shuffle_epi8(g, f));   \
+        g = Add32(Xor(c, g), a);              \
+        f = Add32(Xor(be, f), d);             \
+        k = Add32(k, be);                     \
+        k = Add32(k, _mm_shuffle_epi8(f, h)); \
+        f = Add32(f, g);                      \
+        g = Add32(g, f);                      \
+        f = Mulc1(f);                         \
     } while (0)
 
-    while (iters-- != 0) {
-      Chunk();
-      s += 80;
+        while (iters-- != 0) {
+            Chunk();
+            s += 80;
+        }
+
+        if (len != 0) {
+            h = Add32(h, _mm_set1_epi32(len));
+            s = s + len - 80;
+            Chunk();
+        }
     }
 
-    if (len != 0) {
-      h = Add32(h, _mm_set1_epi32(len));
-      s = s + len - 80;
-      Chunk();
+    g      = Shuffle0321(g);
+    k      = Xor(k, g);
+    f      = Mulc1(f);
+    k      = Mulc2(k);
+    g      = Mulc1(g);
+    h      = Mulc2(h);
+    k      = Add32(k, _mm_shuffle_epi8(g, f));
+    h      = Add32(h, f);
+    f      = Add32(f, h);
+    g      = Add32(g, k);
+    k      = Add32(k, g);
+    k      = Xor(k, _mm_shuffle_epi8(f, h));
+    __m128i buf[4];
+    buf[0] = f;
+    buf[1] = g;
+    buf[2] = k;
+    buf[3] = h;
+    s      = reinterpret_cast<uint8_t *>(buf);
+    uint32_t x = Fetch32<bswap>(s    );
+    uint32_t y = Fetch32<bswap>(s + 4);
+    uint32_t z = Fetch32<bswap>(s + 8);
+    x = _mm_crc32_u32(x     , Fetch32<bswap>(s + 12));
+    y = _mm_crc32_u32(y     , Fetch32<bswap>(s + 16));
+    z = _mm_crc32_u32(z * c1, Fetch32<bswap>(s + 20));
+    x = _mm_crc32_u32(x     , Fetch32<bswap>(s + 24));
+    y = _mm_crc32_u32(y * c1, Fetch32<bswap>(s + 28));
+    uint32_t o = y;
+    z = _mm_crc32_u32(z     , Fetch32<bswap>(s + 32));
+    x = _mm_crc32_u32(x * c1, Fetch32<bswap>(s + 36));
+    y = _mm_crc32_u32(y     , Fetch32<bswap>(s + 40));
+    z = _mm_crc32_u32(z * c1, Fetch32<bswap>(s + 44));
+    x = _mm_crc32_u32(x     , Fetch32<bswap>(s + 48));
+    y = _mm_crc32_u32(y * c1, Fetch32<bswap>(s + 52));
+    z = _mm_crc32_u32(z     , Fetch32<bswap>(s + 56));
+    x = _mm_crc32_u32(x     , Fetch32<bswap>(s + 60));
+    return (o - x + y - z) * c1;
+}
+
+  #undef Chunk
+  #undef Murk
+  #undef Mulc2
+  #undef Mulc1
+
+template <bool bswap>
+static uint32_t farmhashsa::Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed ) {
+    if (len <= 24) {
+        if (len >= 13) { return farmhashmk::Hash32Len13to24<bswap>(s, len, seed * c1); } else if (len >= 5) {
+            return farmhashmk::Hash32Len5to12<bswap>(s, len, seed);
+        } else {
+            return farmhashmk::Hash32Len0to4(s, len, seed);
+        }
     }
-  }
-
-  g = Shuffle0321(g);
-  k = Xor(k, g);
-  f = Mulc1(f);
-  k = Mulc2(k);
-  g = Mulc1(g);
-  h = Mulc2(h);
-  k = Add32(k, _mm_shuffle_epi8(g, f));
-  h = Add32(h, f);
-  f = Add32(f, h);
-  g = Add32(g, k);
-  k = Add32(k, g);
-  k = Xor(k, _mm_shuffle_epi8(f, h));
-  __m128i buf[4];
-  buf[0] = f;
-  buf[1] = g;
-  buf[2] = k;
-  buf[3] = h;
-  s = reinterpret_cast<uint8_t*>(buf);
-  uint32_t x = Fetch32<bswap>(s);
-  uint32_t y = Fetch32<bswap>(s+4);
-  uint32_t z = Fetch32<bswap>(s+8);
-  x = _mm_crc32_u32(x, Fetch32<bswap>(s+12));
-  y = _mm_crc32_u32(y, Fetch32<bswap>(s+16));
-  z = _mm_crc32_u32(z * c1, Fetch32<bswap>(s+20));
-  x = _mm_crc32_u32(x, Fetch32<bswap>(s+24));
-  y = _mm_crc32_u32(y * c1, Fetch32<bswap>(s+28));
-  uint32_t o = y;
-  z = _mm_crc32_u32(z, Fetch32<bswap>(s+32));
-  x = _mm_crc32_u32(x * c1, Fetch32<bswap>(s+36));
-  y = _mm_crc32_u32(y, Fetch32<bswap>(s+40));
-  z = _mm_crc32_u32(z * c1, Fetch32<bswap>(s+44));
-  x = _mm_crc32_u32(x, Fetch32<bswap>(s+48));
-  y = _mm_crc32_u32(y * c1, Fetch32<bswap>(s+52));
-  z = _mm_crc32_u32(z, Fetch32<bswap>(s+56));
-  x = _mm_crc32_u32(x, Fetch32<bswap>(s+60));
-  return (o - x + y - z) * c1;
-}
-
-#undef Chunk
-#undef Murk
-#undef Mulc2
-#undef Mulc1
-
-template < bool bswap >
-static uint32_t farmhashsa::Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed) {
-  if (len <= 24) {
-    if (len >= 13) return farmhashmk::Hash32Len13to24<bswap>(s, len, seed * c1);
-    else if (len >= 5) return farmhashmk::Hash32Len5to12<bswap>(s, len, seed);
-    else return farmhashmk::Hash32Len0to4(s, len, seed);
-  }
-  uint32_t h = farmhashmk::Hash32Len13to24<bswap>(s, 24, seed ^ len);
-  return _mm_crc32_u32(farmhashsa::Hash32<bswap>(s + 24, len - 24) + seed, h);
+    uint32_t h = farmhashmk::Hash32Len13to24<bswap>(s, 24, seed ^ len);
+    return _mm_crc32_u32(farmhashsa::Hash32<bswap>(s + 24, len - 24) + seed, h);
 }
 
 #endif
 
 //------------------------------------------------------------
 namespace farmhashcc {
-    static inline uint32_t Hash32Len0to4(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static inline uint32_t Hash32Len5to12(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static inline uint32_t Hash32Len13to24(const uint8_t *s, size_t len);
-
-    template < bool bswap >
-        static uint32_t Hash32(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static uint32_t Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed);
-
-    template < bool bswap >
-        static inline uint64_t HashLen0to16(const uint8_t *s, size_t len);
-    template < bool bswap >
-        static inline uint128_t CityMurmur(const uint8_t *s, size_t len, uint128_t seed);
-
-    template < bool bswap >
-        static uint128_t Hash128WithSeed(const uint8_t *s, size_t len, uint128_t seed);
-}
-
-template < bool bswap >
-static inline uint32_t farmhashcc::Hash32Len13to24(const uint8_t *s, size_t len) {
-  uint32_t a = Fetch32<bswap>(s - 4 + (len >> 1));
-  uint32_t b = Fetch32<bswap>(s + 4);
-  uint32_t c = Fetch32<bswap>(s + len - 8);
-  uint32_t d = Fetch32<bswap>(s + (len >> 1));
-  uint32_t e = Fetch32<bswap>(s);
-  uint32_t f = Fetch32<bswap>(s + len - 4);
-  uint32_t h = len;
-
-  return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
-}
-
-static inline uint32_t farmhashcc::Hash32Len0to4(const uint8_t *s, size_t len) {
-  uint32_t b = 0;
-  uint32_t c = 9;
-  for (size_t i = 0; i < len; i++) {
-    int8_t v = s[i];
-    b = b * c1 + v;
-    c ^= b;
-  }
-  return fmix(Mur(b, Mur(len, c)));
-}
-
-template < bool bswap >
-static inline uint32_t farmhashcc::Hash32Len5to12(const uint8_t *s, size_t len) {
-  uint32_t a = len, b = len * 5, c = 9, d = b;
-  a += Fetch32<bswap>(s);
-  b += Fetch32<bswap>(s + len - 4);
-  c += Fetch32<bswap>(s + ((len >> 1) & 4));
-  return fmix(Mur(c, Mur(b, Mur(a, d))));
-}
-
-template < bool bswap >
-static uint32_t farmhashcc::Hash32(const uint8_t *s, size_t len) {
-  if (len <= 24) {
-    return len <= 12 ?
-        (len <= 4 ? farmhashcc::Hash32Len0to4(s, len) : farmhashcc::Hash32Len5to12<bswap>(s, len)) :
-        farmhashcc::Hash32Len13to24<bswap>(s, len);
-  }
-
-  // len > 24
-  uint32_t h = len, g = c1 * len, f = g;
-  uint32_t a0 = ROTR32(Fetch32<bswap>(s + len - 4) * c1, 17) * c2;
-  uint32_t a1 = ROTR32(Fetch32<bswap>(s + len - 8) * c1, 17) * c2;
-  uint32_t a2 = ROTR32(Fetch32<bswap>(s + len - 16) * c1, 17) * c2;
-  uint32_t a3 = ROTR32(Fetch32<bswap>(s + len - 12) * c1, 17) * c2;
-  uint32_t a4 = ROTR32(Fetch32<bswap>(s + len - 20) * c1, 17) * c2;
-  h ^= a0;
-  h = ROTR32(h, 19);
-  h = h * 5 + 0xe6546b64;
-  h ^= a2;
-  h = ROTR32(h, 19);
-  h = h * 5 + 0xe6546b64;
-  g ^= a1;
-  g = ROTR32(g, 19);
-  g = g * 5 + 0xe6546b64;
-  g ^= a3;
-  g = ROTR32(g, 19);
-  g = g * 5 + 0xe6546b64;
-  f += a4;
-  f = ROTR32(f, 19);
-  f = f * 5 + 0xe6546b64;
-  size_t iters = (len - 1) / 20;
-  do {
-    uint32_t a0 = ROTR32(Fetch32<bswap>(s) * c1, 17) * c2;
-    uint32_t a1 = Fetch32<bswap>(s + 4);
-    uint32_t a2 = ROTR32(Fetch32<bswap>(s + 8) * c1, 17) * c2;
-    uint32_t a3 = ROTR32(Fetch32<bswap>(s + 12) * c1, 17) * c2;
-    uint32_t a4 = Fetch32<bswap>(s + 16);
+    static inline uint32_t Hash32Len0to4( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static inline uint32_t Hash32Len5to12( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static inline uint32_t Hash32Len13to24( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint32_t Hash32( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static uint32_t Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed );
+
+    template <bool bswap>
+    static inline uint64_t HashLen0to16( const uint8_t * s, size_t len );
+
+    template <bool bswap>
+    static inline uint128_t CityMurmur( const uint8_t * s, size_t len, uint128_t seed );
+
+    template <bool bswap>
+    static uint128_t Hash128WithSeed( const uint8_t * s, size_t len, uint128_t seed );
+} // namespace farmhashcc
+
+template <bool bswap>
+static inline uint32_t farmhashcc::Hash32Len13to24( const uint8_t * s, size_t len ) {
+    uint32_t a = Fetch32<bswap>(s - 4   + (len >> 1));
+    uint32_t b = Fetch32<bswap>(s + 4);
+    uint32_t c = Fetch32<bswap>(s + len - 8);
+    uint32_t d = Fetch32<bswap>(s +       (len >> 1));
+    uint32_t e = Fetch32<bswap>(s);
+    uint32_t f = Fetch32<bswap>(s + len - 4);
+    uint32_t h = len;
+
+    return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
+}
+
+static inline uint32_t farmhashcc::Hash32Len0to4( const uint8_t * s, size_t len ) {
+    uint32_t b = 0;
+    uint32_t c = 9;
+
+    for (size_t i = 0; i < len; i++) {
+        int8_t v = s[i];
+        b  = b * c1 + v;
+        c ^= b;
+    }
+    return fmix(Mur(b, Mur(len, c)));
+}
+
+template <bool bswap>
+static inline uint32_t farmhashcc::Hash32Len5to12( const uint8_t * s, size_t len ) {
+    uint32_t a = len, b = len * 5, c = 9, d = b;
+
+    a += Fetch32<bswap>(s);
+    b += Fetch32<bswap>(s + len - 4);
+    c += Fetch32<bswap>(s + ((len >> 1) & 4));
+    return fmix(Mur(c, Mur(b, Mur(a, d))));
+}
+
+template <bool bswap>
+static uint32_t farmhashcc::Hash32( const uint8_t * s, size_t len ) {
+    if (len <= 24) {
+        return len <= 12 ?
+                   (len <= 4 ? farmhashcc::Hash32Len0to4(s, len) : farmhashcc::Hash32Len5to12<bswap>(s, len)) :
+                   farmhashcc::Hash32Len13to24<bswap>(s, len);
+    }
+
+    // len > 24
+    uint32_t h = len, g = c1 * len, f = g;
+    uint32_t a0 = ROTR32(Fetch32<bswap>(s + len -  4) * c1, 17) * c2;
+    uint32_t a1 = ROTR32(Fetch32<bswap>(s + len -  8) * c1, 17) * c2;
+    uint32_t a2 = ROTR32(Fetch32<bswap>(s + len - 16) * c1, 17) * c2;
+    uint32_t a3 = ROTR32(Fetch32<bswap>(s + len - 12) * c1, 17) * c2;
+    uint32_t a4 = ROTR32(Fetch32<bswap>(s + len - 20) * c1, 17) * c2;
     h ^= a0;
-    h = ROTR32(h, 18);
+    h  = ROTR32(h, 19);
+    h  = h * 5 + 0xe6546b64;
+    h ^= a2;
+    h  = ROTR32(h, 19);
+    h  = h * 5 + 0xe6546b64;
+    g ^= a1;
+    g  = ROTR32(g, 19);
+    g  = g * 5 + 0xe6546b64;
+    g ^= a3;
+    g  = ROTR32(g, 19);
+    g  = g * 5 + 0xe6546b64;
+    f += a4;
+    f  = ROTR32(f, 19);
+    f  = f * 5 + 0xe6546b64;
+    size_t iters = (len - 1) / 20;
+    do {
+        uint32_t a0 = ROTR32(Fetch32<bswap>(s)      * c1, 17) * c2;
+        uint32_t a1 = Fetch32<bswap>(s +  4);
+        uint32_t a2 = ROTR32(Fetch32<bswap>(s +  8) * c1, 17) * c2;
+        uint32_t a3 = ROTR32(Fetch32<bswap>(s + 12) * c1, 17) * c2;
+        uint32_t a4 = Fetch32<bswap>(s + 16);
+        h ^= a0;
+        h  = ROTR32(h, 18);
+        h  = h * 5 + 0xe6546b64;
+        f += a1;
+        f  = ROTR32(f, 19);
+        f  = f * c1;
+        g += a2;
+        g  = ROTR32(g, 18);
+        g  = g * 5 + 0xe6546b64;
+        h ^= a3 + a1;
+        h  = ROTR32(h, 19);
+        h  = h * 5 + 0xe6546b64;
+        g ^= a4;
+        g  = BSWAP(g) * 5;
+        h += a4 * 5;
+        h  = BSWAP(h);
+        f += a0;
+        PERMUTE3(f, h, g);
+        s += 20;
+    } while (--iters != 0);
+    g = ROTR32(g    , 11) * c1;
+    g = ROTR32(g    , 17) * c1;
+    f = ROTR32(f    , 11) * c1;
+    f = ROTR32(f    , 17) * c1;
+    h = ROTR32(h + g, 19);
     h = h * 5 + 0xe6546b64;
-    f += a1;
-    f = ROTR32(f, 19);
-    f = f * c1;
-    g += a2;
-    g = ROTR32(g, 18);
-    g = g * 5 + 0xe6546b64;
-    h ^= a3 + a1;
-    h = ROTR32(h, 19);
+    h = ROTR32(h    , 17) * c1;
+    h = ROTR32(h + f, 19);
     h = h * 5 + 0xe6546b64;
-    g ^= a4;
-    g = BSWAP(g) * 5;
-    h += a4 * 5;
-    h = BSWAP(h);
-    f += a0;
-    PERMUTE3(f, h, g);
-    s += 20;
-  } while (--iters != 0);
-  g = ROTR32(g, 11) * c1;
-  g = ROTR32(g, 17) * c1;
-  f = ROTR32(f, 11) * c1;
-  f = ROTR32(f, 17) * c1;
-  h = ROTR32(h + g, 19);
-  h = h * 5 + 0xe6546b64;
-  h = ROTR32(h, 17) * c1;
-  h = ROTR32(h + f, 19);
-  h = h * 5 + 0xe6546b64;
-  h = ROTR32(h, 17) * c1;
-  return h;
-}
-
-template < bool bswap >
-static uint32_t farmhashcc::Hash32WithSeed(const uint8_t *s, size_t len, uint32_t seed) {
-  if (len <= 24) {
-    if (len >= 13) return farmhashmk::Hash32Len13to24<bswap>(s, len, seed * c1);
-    else if (len >= 5) return farmhashmk::Hash32Len5to12<bswap>(s, len, seed);
-    else return farmhashmk::Hash32Len0to4(s, len, seed);
-  }
-  uint32_t h = farmhashmk::Hash32Len13to24<bswap>(s, 24, seed ^ len);
-  return Mur(farmhashcc::Hash32<bswap>(s + 24, len - 24) + seed, h);
-}
-
-template < bool bswap >
-static inline uint64_t farmhashcc::HashLen0to16(const uint8_t *s, size_t len) {
-  if (len >= 8) {
-    uint64_t mul = k2 + len * 2;
-    uint64_t a = Fetch64<bswap>(s) + k2;
-    uint64_t b = Fetch64<bswap>(s + len - 8);
-    uint64_t c = ROTR64(b, 37) * mul + a;
-    uint64_t d = (ROTR64(a, 25) + b) * mul;
-    return HashLen16(c, d, mul);
-  }
-  if (len >= 4) {
-    uint64_t mul = k2 + len * 2;
-    uint64_t a = Fetch32<bswap>(s);
-    return HashLen16(len + (a << 3), Fetch32<bswap>(s + len - 4), mul);
-  }
-  if (len > 0) {
-    uint8_t a = s[0];
-    uint8_t b = s[len >> 1];
-    uint8_t c = s[len - 1];
-    uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
-    uint32_t z = len + (static_cast<uint32_t>(c) << 2);
-    return ShiftMix(y * k2 ^ z * k0) * k2;
-  }
-  return k2;
-}
-
-template < bool bswap >
-static inline uint128_t farmhashcc::CityMurmur(const uint8_t *s, size_t len, uint128_t seed) {
-  uint64_t a = Uint128Low64(seed);
-  uint64_t b = Uint128High64(seed);
-  uint64_t c = 0;
-  uint64_t d = 0;
-  signed long l = len - 16;
-  if (l <= 0) {  // len <= 16
-    a = ShiftMix(a * k1) * k1;
-    c = b * k1 + farmhashcc::HashLen0to16<bswap>(s, len);
-    d = ShiftMix(a + (len >= 8 ? Fetch64<bswap>(s) : c));
-  } else {  // len > 16
-    c = HashLen16(Fetch64<bswap>(s + len - 8) + k1, a);
-    d = HashLen16(b + len, c + Fetch64<bswap>(s + len - 16));
-    a += d;
+    h = ROTR32(h    , 17) * c1;
+    return h;
+}
+
+template <bool bswap>
+static uint32_t farmhashcc::Hash32WithSeed( const uint8_t * s, size_t len, uint32_t seed ) {
+    if (len <= 24) {
+        if (len >= 13) { return farmhashmk::Hash32Len13to24<bswap>(s, len, seed * c1); } else if (len >= 5) {
+            return farmhashmk::Hash32Len5to12<bswap>(s, len, seed);
+        } else {
+            return farmhashmk::Hash32Len0to4(s, len, seed);
+        }
+    }
+    uint32_t h = farmhashmk::Hash32Len13to24<bswap>(s, 24, seed ^ len);
+    return Mur(farmhashcc::Hash32<bswap>(s + 24, len - 24) + seed, h);
+}
+
+template <bool bswap>
+static inline uint64_t farmhashcc::HashLen0to16( const uint8_t * s, size_t len ) {
+    if (len >= 8) {
+        uint64_t mul = k2 + len * 2;
+        uint64_t a   = Fetch64<bswap>(s)      + k2;
+        uint64_t b   = Fetch64<bswap>(s + len - 8);
+        uint64_t c   = ROTR64(b, 37)  * mul + a;
+        uint64_t d   = (ROTR64(a, 25) + b)  * mul;
+        return HashLen16(c, d, mul);
+    }
+    if (len >= 4) {
+        uint64_t mul = k2 + len * 2;
+        uint64_t a   = Fetch32<bswap>(s);
+        return HashLen16(len + (a << 3), Fetch32<bswap>(s + len - 4), mul);
+    }
+    if (len > 0) {
+        uint8_t  a = s[0];
+        uint8_t  b = s[len >> 1];
+        uint8_t  c = s[len  - 1];
+        uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
+        uint32_t z = len + (static_cast<uint32_t>(c) << 2);
+        return ShiftMix(y * k2 ^ z * k0) * k2;
+    }
+    return k2;
+}
+
+template <bool bswap>
+static inline uint128_t farmhashcc::CityMurmur( const uint8_t * s, size_t len, uint128_t seed ) {
+    uint64_t    a = Uint128Low64(seed);
+    uint64_t    b = Uint128High64(seed);
+    uint64_t    c = 0;
+    uint64_t    d = 0;
+    signed long l = len - 16;
+
+    if (l <= 0) { // len <= 16
+        a = ShiftMix(a * k1) * k1;
+        c = b * k1 + farmhashcc::HashLen0to16<bswap>(s, len);
+        d = ShiftMix(a + (len >= 8 ? Fetch64<bswap>(s) : c));
+    } else { // len > 16
+        c  = HashLen16(Fetch64<bswap>(s + len - 8) + k1, a      );
+        d  = HashLen16(b + len, c + Fetch64<bswap>(s + len - 16));
+        a += d;
+        do {
+            a ^= ShiftMix(Fetch64<bswap>(s)     * k1) * k1;
+            a *= k1;
+            b ^= a;
+            c ^= ShiftMix(Fetch64<bswap>(s + 8) * k1) * k1;
+            c *= k1;
+            d ^= c;
+            s += 16;
+            l -= 16;
+        } while (l > 0);
+    }
+    a = HashLen16(a, c);
+    b = HashLen16(d, b);
+    return Uint128(a ^ b, HashLen16(b, a));
+}
+
+template <bool bswap>
+static uint128_t farmhashcc::Hash128WithSeed( const uint8_t * s, size_t len, uint128_t seed ) {
+    if (len < 128) {
+        return farmhashcc::CityMurmur<bswap>(s, len, seed);
+    }
+
+    // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
+    // v, w, x, y, and z.
+    pair<uint64_t, uint64_t> v, w;
+    uint64_t x = Uint128Low64(seed);
+    uint64_t y = Uint128High64(seed);
+    uint64_t z = len * k1;
+    v.first  = ROTR64(y ^ k1 , 49) * k1 + Fetch64<bswap>(s);
+    v.second = ROTR64(v.first, 42) * k1 + Fetch64<bswap>(s + 8);
+    w.first  = ROTR64(y + z  , 35) * k1 + x;
+    w.second = ROTR64(x + Fetch64<bswap>(s + 88), 53) * k1;
+
+    // This is the same inner loop as CityHash64(), manually unrolled.
     do {
-      a ^= ShiftMix(Fetch64<bswap>(s) * k1) * k1;
-      a *= k1;
-      b ^= a;
-      c ^= ShiftMix(Fetch64<bswap>(s + 8) * k1) * k1;
-      c *= k1;
-      d ^= c;
-      s += 16;
-      l -= 16;
-    } while (l > 0);
-  }
-  a = HashLen16(a, c);
-  b = HashLen16(d, b);
-  return Uint128(a ^ b, HashLen16(b, a));
-}
-
-template < bool bswap >
-static uint128_t farmhashcc::Hash128WithSeed(const uint8_t *s, size_t len, uint128_t seed) {
-  if (len < 128) {
-      return farmhashcc::CityMurmur<bswap>(s, len, seed);
-  }
-
-  // We expect len >= 128 to be the common case.  Keep 56 bytes of state:
-  // v, w, x, y, and z.
-  pair<uint64_t, uint64_t> v, w;
-  uint64_t x = Uint128Low64(seed);
-  uint64_t y = Uint128High64(seed);
-  uint64_t z = len * k1;
-  v.first = ROTR64(y ^ k1, 49) * k1 + Fetch64<bswap>(s);
-  v.second = ROTR64(v.first, 42) * k1 + Fetch64<bswap>(s + 8);
-  w.first = ROTR64(y + z, 35) * k1 + x;
-  w.second = ROTR64(x + Fetch64<bswap>(s + 88), 53) * k1;
-
-  // This is the same inner loop as CityHash64(), manually unrolled.
-  do {
-    x = ROTR64(x + y + v.first + Fetch64<bswap>(s + 8), 37) * k1;
-    y = ROTR64(y + v.second + Fetch64<bswap>(s + 48), 42) * k1;
-    x ^= w.second;
-    y += v.first + Fetch64<bswap>(s + 40);
-    z = ROTR64(z + w.first, 33) * k1;
-    v = WeakHashLen32WithSeeds<bswap>(s, v.second * k1, x + w.first);
-    w = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second, y + Fetch64<bswap>(s + 16));
-    std::swap(z, x);
-    s += 64;
-    x = ROTR64(x + y + v.first + Fetch64<bswap>(s + 8), 37) * k1;
-    y = ROTR64(y + v.second + Fetch64<bswap>(s + 48), 42) * k1;
-    x ^= w.second;
-    y += v.first + Fetch64<bswap>(s + 40);
-    z = ROTR64(z + w.first, 33) * k1;
-    v = WeakHashLen32WithSeeds<bswap>(s, v.second * k1, x + w.first);
-    w = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second, y + Fetch64<bswap>(s + 16));
-    std::swap(z, x);
-    s += 64;
-    len -= 128;
-  } while (likely(len >= 128));
-  x += ROTR64(v.first + z, 49) * k0;
-  y = y * k0 + ROTR64(w.second, 37);
-  z = z * k0 + ROTR64(w.first, 27);
-  w.first *= 9;
-  v.first *= k0;
-  // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
-  for (size_t tail_done = 0; tail_done < len; ) {
-    tail_done += 32;
-    y = ROTR64(x + y, 42) * k0 + v.second;
-    w.first += Fetch64<bswap>(s + len - tail_done + 16);
-    x = x * k0 + w.first;
-    z += w.second + Fetch64<bswap>(s + len - tail_done);
-    w.second += v.first;
-    v = WeakHashLen32WithSeeds<bswap>(s + len - tail_done, v.first + z, v.second);
+        x    = ROTR64(x + y        + v.first + Fetch64<bswap>(s +  8), 37) * k1;
+        y    = ROTR64(y + v.second + Fetch64          <bswap>(s + 48), 42) * k1;
+        x   ^= w.second;
+        y   += v.first + Fetch64<bswap>(s + 40);
+        z    = ROTR64(z + w.first, 33) * k1;
+        v    = WeakHashLen32WithSeeds<bswap>(s     , v.second * k1, x + w.first);
+        w    = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second , y + Fetch64<bswap>(s + 16));
+        std::swap(z, x);
+        s   += 64;
+        x    = ROTR64(x + y        + v.first + Fetch64<bswap>(s +  8), 37) * k1;
+        y    = ROTR64(y + v.second + Fetch64          <bswap>(s + 48), 42) * k1;
+        x   ^= w.second;
+        y   += v.first + Fetch64<bswap>(s + 40);
+        z    = ROTR64(z + w.first, 33) * k1;
+        v    = WeakHashLen32WithSeeds<bswap>(s     , v.second * k1, x + w.first);
+        w    = WeakHashLen32WithSeeds<bswap>(s + 32, z + w.second , y + Fetch64<bswap>(s + 16));
+        std::swap(z, x);
+        s   += 64;
+        len -= 128;
+    } while (likely(len >= 128));
+    x       += ROTR64(v.first + z, 49) * k0;
+    y        = y * k0 + ROTR64(w.second, 37);
+    z        = z * k0 + ROTR64(w.first , 27);
+    w.first *= 9;
     v.first *= k0;
-  }
-  // At this point our 56 bytes of state should contain more than
-  // enough information for a strong 128-bit hash.  We use two
-  // different 56-byte-to-8-byte hashes to get a 16-byte final result.
-  x = HashLen16(x, v.first);
-  y = HashLen16(y + z, w.first);
-  return Uint128(HashLen16(x + v.second, w.second) + y,
-                 HashLen16(x + w.second, y + v.second));
+    // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
+    for (size_t tail_done = 0; tail_done < len;) {
+        tail_done += 32;
+        y          = ROTR64(x + y, 42) * k0 + v.second;
+        w.first   += Fetch64               <bswap>(s + len - tail_done + 16);
+        x          = x                 * k0 + w.first;
+        z         += w.second + Fetch64    <bswap>(s       + len       - tail_done);
+        w.second  += v.first;
+        v          = WeakHashLen32WithSeeds<bswap>(s + len - tail_done, v.first + z, v.second);
+        v.first   *= k0;
+    }
+    // At this point our 56 bytes of state should contain more than
+    // enough information for a strong 128-bit hash.  We use two
+    // different 56-byte-to-8-byte hashes to get a 16-byte final result.
+    x = HashLen16(x    , v.first);
+    y = HashLen16(y + z, w.first);
+    return Uint128(HashLen16(x + v.second, w.second) + y, HashLen16(x + w.second, y + v.second));
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void FarmHashNA(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void FarmHashNA( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = farmhashna::Hash64WithSeed<bswap>((const uint8_t *)in, len, seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void FarmHashUO(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void FarmHashUO( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = farmhashuo::Hash64WithSeed<bswap>((const uint8_t *)in, len, seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
@@ -1547,309 +1600,323 @@ static void FarmHashUO(const void * in, const size_t len, const seed_t seed, voi
 // version, the XO version won't be tested explicitly.
 
 #if defined(HAVE_SSE_4_1)
-template < bool bswap >
-static void FarmHashTE(const void * in, const size_t len, const seed_t seed, void * out) {
+
+template <bool bswap>
+static void FarmHashTE( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = farmhashte::Hash64WithSeed<bswap>((const uint8_t *)in, len, seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void FarmHashNT(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void FarmHashNT( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = farmhashnt::Hash32WithSeed<bswap>((const uint8_t *)in, len, seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
+
 #endif
 
-template < bool bswap >
-static void FarmHashMK(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void FarmHashMK( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = farmhashmk::Hash32WithSeed<bswap>((const uint8_t *)in, len, seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
 #if defined(HAVE_X86_64_CRC32C) && defined(HAVE_X86_64_AES)
-template < bool bswap >
-static void FarmHashSU(const void * in, const size_t len, const seed_t seed, void * out) {
+
+template <bool bswap>
+static void FarmHashSU( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = farmhashsu::Hash32WithSeed<bswap>((const uint8_t *)in, len, seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
+
 #endif
 
 #if defined(HAVE_X86_64_CRC32C)
-template < bool bswap >
-static void FarmHashSA(const void * in, const size_t len, const seed_t seed, void * out) {
+
+template <bool bswap>
+static void FarmHashSA( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = farmhashsa::Hash32WithSeed<bswap>((const uint8_t *)in, len, seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
+
 #endif
 
-template < bool bswap >
-static void FarmHashCC_32(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void FarmHashCC_32( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = farmhashcc::Hash32WithSeed<bswap>((const uint8_t *)in, len, seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap, uint32_t seedmode >
-static void FarmHashCC_128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap, uint32_t seedmode>
+static void FarmHashCC_128( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint128_t seed128;
-    switch(seedmode) {
+
+    switch (seedmode) {
     case 1: seed128 = Uint128((uint64_t)seed, 0); break;
     case 2: seed128 = Uint128(0, (uint64_t)seed); break;
     case 3: seed128 = Uint128((uint64_t)seed, (uint64_t)seed); break;
     default: exit(1);
     }
     uint128_t h = farmhashcc::Hash128WithSeed<bswap>((const uint8_t *)in, len, seed128);
-    PUT_U64<bswap>(Uint128Low64(h), (uint8_t *)out, 0);
+    PUT_U64<bswap>(Uint128Low64(h) , (uint8_t *)out, 0);
     PUT_U64<bswap>(Uint128High64(h), (uint8_t *)out, 8);
 }
 
-template < bool bswap, uint32_t seedmode >
-static void FarmHashCityMurmur_128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap, uint32_t seedmode>
+static void FarmHashCityMurmur_128( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint128_t seed128;
-    switch(seedmode) {
+
+    switch (seedmode) {
     case 1: seed128 = Uint128((uint64_t)seed, 0); break;
     case 2: seed128 = Uint128(0, (uint64_t)seed); break;
     case 3: seed128 = Uint128((uint64_t)seed, (uint64_t)seed); break;
     default: exit(1);
     }
     uint128_t h = farmhashcc::CityMurmur<bswap>((const uint8_t *)in, len, seed128);
-    PUT_U64<bswap>(Uint128Low64(h), (uint8_t *)out, 0);
+    PUT_U64<bswap>(Uint128Low64(h) , (uint8_t *)out, 0);
     PUT_U64<bswap>(Uint128High64(h), (uint8_t *)out, 8);
 }
 
 REGISTER_FAMILY(farmhash,
-  $.src_url = "https://github.com/google/farmhash",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/google/farmhash",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(FarmHash_64__NA,
-  $.desc = "FarmHash Hash64WithSeed (NA version)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 64,
-  $.sort_order = 10,
-  $.verification_LE = 0xEBC4A679,
-  $.verification_BE = 0xB24C5C09,
-  $.hashfn_native = FarmHashNA<false>,
-  $.hashfn_bswap = FarmHashNA<true>
-);
+   $.desc       = "FarmHash Hash64WithSeed (NA version)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 64,
+   $.sort_order      = 10,
+   $.verification_LE = 0xEBC4A679,
+   $.verification_BE = 0xB24C5C09,
+   $.hashfn_native   = FarmHashNA<false>,
+   $.hashfn_bswap    = FarmHashNA<true>
+ );
 
 REGISTER_HASH(FarmHash_64__UO,
-  $.desc = "FarmHash Hash64WithSeed (UO version)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 64,
-  $.sort_order = 20,
-  $.verification_LE = 0x5438EF2C,
-  $.verification_BE = 0x72B8113E,
-  $.hashfn_native = FarmHashUO<false>,
-  $.hashfn_bswap = FarmHashUO<true>
-);
+   $.desc       = "FarmHash Hash64WithSeed (UO version)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 64,
+   $.sort_order      = 20,
+   $.verification_LE = 0x5438EF2C,
+   $.verification_BE = 0x72B8113E,
+   $.hashfn_native   = FarmHashUO<false>,
+   $.hashfn_bswap    = FarmHashUO<true>
+ );
 
 #if defined(HAVE_SSE_4_1)
 REGISTER_HASH(FarmHash_64__TE,
-  $.desc = "FarmHash Hash64WithSeed (TE version)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 64,
-  $.sort_order = 30,
-  $.verification_LE = 0xF1BF42C3,
-  $.verification_BE = 0x7188736E,
-  $.hashfn_native = FarmHashTE<false>,
-  $.hashfn_bswap = FarmHashTE<true>
-);
+   $.desc       = "FarmHash Hash64WithSeed (TE version)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 64,
+   $.sort_order      = 30,
+   $.verification_LE = 0xF1BF42C3,
+   $.verification_BE = 0x7188736E,
+   $.hashfn_native   = FarmHashTE<false>,
+   $.hashfn_bswap    = FarmHashTE<true>
+ );
 
 REGISTER_HASH(FarmHash_32__NT,
-  $.desc = "FarmHash Hash32WithSeed (NT version)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 32,
-  $.sort_order = 40,
-  $.verification_LE = 0x47AB39AF,
-  $.verification_BE = 0x6AE8BA9B,
-  $.hashfn_native = FarmHashNT<false>,
-  $.hashfn_bswap = FarmHashNT<true>
-);
+   $.desc       = "FarmHash Hash32WithSeed (NT version)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 32,
+   $.sort_order      = 40,
+   $.verification_LE = 0x47AB39AF,
+   $.verification_BE = 0x6AE8BA9B,
+   $.hashfn_native   = FarmHashNT<false>,
+   $.hashfn_bswap    = FarmHashNT<true>
+ );
 #endif
 
 REGISTER_HASH(FarmHash_32__MK,
-  $.desc = "FarmHash Hash32WithSeed (MK version)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY        |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 32,
-  $.sort_order = 50,
-  $.verification_LE = 0x0DC9AF39,
-  $.verification_BE = 0x6B67BB90,
-  $.hashfn_native = FarmHashMK<false>,
-  $.hashfn_bswap = FarmHashMK<true>
-);
+   $.desc       = "FarmHash Hash32WithSeed (MK version)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY        |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 32,
+   $.sort_order      = 50,
+   $.verification_LE = 0x0DC9AF39,
+   $.verification_BE = 0x6B67BB90,
+   $.hashfn_native   = FarmHashMK<false>,
+   $.hashfn_bswap    = FarmHashMK<true>
+ );
 
 #if defined(HAVE_X86_64_CRC32C) && defined(HAVE_X86_64_AES)
 REGISTER_HASH(FarmHash_32__SU,
-  $.desc = "FarmHash Hash32WithSeed (SU version)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED  |
-        FLAG_HASH_AES_BASED   |
-        FLAG_HASH_CRC_BASED   ,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY        |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 32,
-  $.sort_order = 60,
-  $.verification_LE = 0xE7A53C98,
-  $.verification_BE = 0x9CC06B52,
-  $.hashfn_native = FarmHashSU<false>,
-  $.hashfn_bswap = FarmHashSU<true>
-);
+   $.desc       = "FarmHash Hash32WithSeed (SU version)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED  |
+         FLAG_HASH_AES_BASED   |
+         FLAG_HASH_CRC_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY        |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 32,
+   $.sort_order      = 60,
+   $.verification_LE = 0xE7A53C98,
+   $.verification_BE = 0x9CC06B52,
+   $.hashfn_native   = FarmHashSU<false>,
+   $.hashfn_bswap    = FarmHashSU<true>
+ );
 #endif
 
 #if defined(HAVE_X86_64_CRC32C)
 REGISTER_HASH(FarmHash_32__SA,
-  $.desc = "FarmHash Hash32WithSeed (SA version)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED  |
-        FLAG_HASH_CRC_BASED   ,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY        |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 32,
-  $.sort_order = 70,
-  $.verification_LE = 0x553B1655,
-  $.verification_BE = 0x19A1CCEA,
-  $.hashfn_native = FarmHashSA<false>,
-  $.hashfn_bswap = FarmHashSA<true>
-);
+   $.desc       = "FarmHash Hash32WithSeed (SA version)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED  |
+         FLAG_HASH_CRC_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY        |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 32,
+   $.sort_order      = 70,
+   $.verification_LE = 0x553B1655,
+   $.verification_BE = 0x19A1CCEA,
+   $.hashfn_native   = FarmHashSA<false>,
+   $.hashfn_bswap    = FarmHashSA<true>
+ );
 #endif
 
 REGISTER_HASH(FarmHash_32__CC,
-  $.desc = "FarmHash Hash32WithSeed (CC version)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY        |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 32,
-  $.sort_order = 80,
-  $.verification_LE = 0x61DEEE7E,
-  $.verification_BE = 0xAE9514F0,
-  $.hashfn_native = FarmHashCC_32<false>,
-  $.hashfn_bswap = FarmHashCC_32<true>
-);
+   $.desc       = "FarmHash Hash32WithSeed (CC version)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY        |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 32,
+   $.sort_order      = 80,
+   $.verification_LE = 0x61DEEE7E,
+   $.verification_BE = 0xAE9514F0,
+   $.hashfn_native   = FarmHashCC_32<false>,
+   $.hashfn_bswap    = FarmHashCC_32<true>
+ );
 
 REGISTER_HASH(FarmHash_128__CC__seed1,
-  $.desc = "FarmHash Hash128WithSeed (CC version, seeded low 64 bit)",
-  $.hash_flags =
-        FLAG_HASH_XL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.sort_order = 90,
-  $.verification_LE = 0x305C0D9A,
-  $.verification_BE = 0xDC1669A2,
-  $.hashfn_native = FarmHashCC_128<false,1>,
-  $.hashfn_bswap = FarmHashCC_128<true,1>
-);
+   $.desc       = "FarmHash Hash128WithSeed (CC version, seeded low 64 bit)",
+   $.hash_flags =
+         FLAG_HASH_XL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 128,
+   $.sort_order      = 90,
+   $.verification_LE = 0x305C0D9A,
+   $.verification_BE = 0xDC1669A2,
+   $.hashfn_native   = FarmHashCC_128<false, 1>,
+   $.hashfn_bswap    = FarmHashCC_128<true, 1>
+ );
 
 REGISTER_HASH(FarmHash_128__CC__seed2,
-  $.desc = "FarmHash Hash128WithSeed (CC version, seeded high 64 bit)",
-  $.hash_flags =
-        FLAG_HASH_XL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.sort_order = 100,
-  $.verification_LE = 0x0DB4D383,
-  $.verification_BE = 0xFA39DBEA,
-  $.hashfn_native = FarmHashCC_128<false,2>,
-  $.hashfn_bswap = FarmHashCC_128<true,2>
-);
+   $.desc       = "FarmHash Hash128WithSeed (CC version, seeded high 64 bit)",
+   $.hash_flags =
+         FLAG_HASH_XL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 128,
+   $.sort_order      = 100,
+   $.verification_LE = 0x0DB4D383,
+   $.verification_BE = 0xFA39DBEA,
+   $.hashfn_native   = FarmHashCC_128<false, 2>,
+   $.hashfn_bswap    = FarmHashCC_128<true, 2>
+ );
 
 REGISTER_HASH(FarmHash_128__CC__seed3,
-  $.desc = "FarmHash Hash128WithSeed (CC version, seeded low+high 64 bit)",
-  $.hash_flags =
-        FLAG_HASH_XL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.sort_order = 110,
-  $.verification_LE = 0xA93EBF71,
-  $.verification_BE = 0x38CD0ED1,
-  $.hashfn_native = FarmHashCC_128<false,3>,
-  $.hashfn_bswap = FarmHashCC_128<true,3>
-);
+   $.desc       = "FarmHash Hash128WithSeed (CC version, seeded low+high 64 bit)",
+   $.hash_flags =
+         FLAG_HASH_XL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 128,
+   $.sort_order      = 110,
+   $.verification_LE = 0xA93EBF71,
+   $.verification_BE = 0x38CD0ED1,
+   $.hashfn_native   = FarmHashCC_128<false, 3>,
+   $.hashfn_bswap    = FarmHashCC_128<true, 3>
+ );
 
 REGISTER_HASH(FarmHash_128__CM__seed1,
-  $.desc = "FarmHash CityMurmur (CM version, seeded low 64 bit)",
-  $.hash_flags =
-        FLAG_HASH_XL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.sort_order = 120,
-  $.verification_LE = 0x6593FD6D,
-  $.verification_BE = 0xF84ED47F,
-  $.hashfn_native = FarmHashCityMurmur_128<false,1>,
-  $.hashfn_bswap = FarmHashCityMurmur_128<true,1>
-);
+   $.desc       = "FarmHash CityMurmur (CM version, seeded low 64 bit)",
+   $.hash_flags =
+         FLAG_HASH_XL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 128,
+   $.sort_order      = 120,
+   $.verification_LE = 0x6593FD6D,
+   $.verification_BE = 0xF84ED47F,
+   $.hashfn_native   = FarmHashCityMurmur_128<false, 1>,
+   $.hashfn_bswap    = FarmHashCityMurmur_128<true, 1>
+ );
 
 REGISTER_HASH(FarmHash_128__CM__seed2,
-  $.desc = "FarmHash CityMurmur (CM version, seeded high 64 bit)",
-  $.hash_flags =
-        FLAG_HASH_XL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.sort_order = 130,
-  $.verification_LE = 0xF1483884,
-  $.verification_BE = 0x5185F2C4,
-  $.hashfn_native = FarmHashCityMurmur_128<false,2>,
-  $.hashfn_bswap = FarmHashCityMurmur_128<true,2>
-);
+   $.desc       = "FarmHash CityMurmur (CM version, seeded high 64 bit)",
+   $.hash_flags =
+         FLAG_HASH_XL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 128,
+   $.sort_order      = 130,
+   $.verification_LE = 0xF1483884,
+   $.verification_BE = 0x5185F2C4,
+   $.hashfn_native   = FarmHashCityMurmur_128<false, 2>,
+   $.hashfn_bswap    = FarmHashCityMurmur_128<true, 2>
+ );
 
 REGISTER_HASH(FarmHash_128__CM__seed3,
-  $.desc = "FarmHash CityMurmur (CM version, seeded low+high 64 bit)",
-  $.hash_flags =
-        FLAG_HASH_XL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_LICENSE_MIT     ,
-  $.bits = 128,
-  $.sort_order = 140,
-  $.verification_LE = 0x6D028510,
-  $.verification_BE = 0xFC258701,
-  $.hashfn_native = FarmHashCityMurmur_128<false,3>,
-  $.hashfn_bswap = FarmHashCityMurmur_128<true,3>
-);
+   $.desc       = "FarmHash CityMurmur (CM version, seeded low+high 64 bit)",
+   $.hash_flags =
+         FLAG_HASH_XL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits            = 128,
+   $.sort_order      = 140,
+   $.verification_LE = 0x6D028510,
+   $.verification_BE = 0xFC258701,
+   $.hashfn_native   = FarmHashCityMurmur_128<false, 3>,
+   $.hashfn_bswap    = FarmHashCityMurmur_128<true, 3>
+ );
diff --git a/hashes/farsh.cpp b/hashes/farsh.cpp
index d5d888d1..522b5c81 100644
--- a/hashes/farsh.cpp
+++ b/hashes/farsh.cpp
@@ -28,130 +28,138 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_AVX2) || defined(HAVE_SSE_2)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
-#define FARSH_MAX_HASHES             32  /* number of 32-bit hashes supported by the built-in key */
-#define FARSH_BASE_KEY_SIZE        1024  /* size of user-supplied key required to compute 32-bit hash with index 0 */
-#define FARSH_EXTRA_KEY_SIZE         16  /* extra bytes required to compute 32-bit hash with every next index */
+#define FARSH_MAX_HASHES             32 /* number of 32-bit hashes supported by the built-in key */
+#define FARSH_BASE_KEY_SIZE        1024 /* size of user-supplied key required to compute 32-bit hash with index 0 */
+#define FARSH_EXTRA_KEY_SIZE         16 /* extra bytes required to compute 32-bit hash with every next index */
 
 #define STRIPE          FARSH_BASE_KEY_SIZE
-#define STRIPE_ELEMENTS (STRIPE/sizeof(uint32_t))  /* should be power of 2 due to use of 'x % STRIPE_ELEMENTS' below */
-#define EXTRA_ELEMENTS  (((FARSH_MAX_HASHES-1) * FARSH_EXTRA_KEY_SIZE) / sizeof(uint32_t))
+#define STRIPE_ELEMENTS (STRIPE / sizeof(uint32_t)) /*
+                                                     * should be power of 2 due to use of 'x % STRIPE_ELEMENTS' below
+                                                     * 
+                                                                                                         */
+#define EXTRA_ELEMENTS  (((FARSH_MAX_HASHES - 1) * FARSH_EXTRA_KEY_SIZE) / sizeof(uint32_t))
 
 /* STRIPE bytes of key material plus extra keys for hashes up to 1024 bits long */
-alignas(32) static const uint32_t FARSH_KEYS [STRIPE_ELEMENTS + EXTRA_ELEMENTS] = {
-    0xb8fe6c39,0x23a44bbe,0x7c01812c,0xf721ad1c,0xded46de9,0x839097db,0x7240a4a4,0xb7b3671f,
-    0xcb79e64e,0xccc0e578,0x825ad07d,0xccff7221,0xb8084674,0xf743248e,0xe03590e6,0x813a264c,
-    0x3c2852bb,0x91c300cb,0x88d0658b,0x1b532ea3,0x71644897,0xa20df94e,0x3819ef46,0xa9deacd8,
-    0xa8fa763f,0xe39c343f,0xf9dcbbc7,0xc70b4f1d,0x8a51e04b,0xcdb45931,0xc89f7ec9,0xd9787364,
-    0x4f6a0752,0xa79b079c,0x8fc49499,0x8ec9b7a9,0x33c92249,0x4eb6404f,0xfb2afb4e,0xa4814255,
-    0x2f0e1b98,0xace93b24,0x188850cd,0x6c5c74a7,0x66fa4404,0xeac5ac83,0x34d3ebc3,0xc581a0ff,
-    0xfa1363eb,0x170ddd51,0xb7f0da49,0xd3165526,0x29d4689e,0x2b16be58,0x7d47a1fc,0x8ff8b8d1,
-    0x7ad031ce,0x45cb3a8f,0x95160428,0xafd7fbca,0xbb4b407e,0x995274a4,0xeb9a2d93,0x3be78908,
-    0xed475f6c,0x919cd8f2,0xd3861e5a,0x6e31390c,0xfe6a3a49,0xdcad0914,0x06508beb,0xa88399f3,
-    0xb058112f,0xe8b0fa79,0x29b4da06,0xedc253fb,0xc3e96dad,0x6e372b83,0x4f78b153,0xfffa6e86,
-    0x21beeeec,0x01caea02,0x1267e50d,0x11e6092f,0xe819d298,0x832f80dd,0x0c4e2477,0xbc7886eb,
-    0x01506637,0x8ba89668,0x6d11e7a0,0xfc12fd15,0x86a54c19,0x593ce3dd,0xd2b13fe5,0x8e772b53,
-    0xae4a60cc,0x647a3b1b,0x547786e0,0x3ec4378e,0x8d7acf89,0xca36f947,0x0e89d5ef,0xaada6a3c,
-    0x6da4a109,0x9ac6e11c,0x686691ef,0xa357bd2b,0xd16f1b9a,0x38c70303,0x7d4622b3,0x2968fa8f,
-    0x8ca5bcb9,0xfcd61005,0x228b5e96,0x2c9dcc19,0x57cf243c,0x3c53f9c1,0x0cc7952c,0x686de4f0,
-    0x93a747b5,0x4e87a510,0x975e91ae,0x4c10b98e,0x8a7f068c,0x346b19ab,0x353ca625,0xf20a50e0,
-    0xce9921f6,0xdf66e014,0x0a11ef4b,0x8bc84ddf,0x84d25d22,0xc823936d,0x94741ec3,0x88278a60,
-    0xb8649331,0x7a707a10,0x7292cad6,0xa7c644c2,0xbd156bfa,0x646c9578,0xb7f4dfd5,0x9f8277a7,
-    0x7013924e,0xad674cc3,0x2cae9d05,0x912a9a22,0xf67c53fa,0x8d7e22a9,0x59ae372b,0x850199f3,
-    0x63a2102c,0xd6ff1261,0x56738ee1,0xaa95145b,0xfdd12832,0x5b684deb,0x0784de94,0xaa62390e,
-    0xbb7ccf19,0x0fefd572,0x565b41ca,0x2206d202,0x2d608479,0x4c0fcd3d,0xd36d3be3,0x155a9a65,
-    0x10f9e732,0xac9b0f1e,0x1f72a03b,0xea9440ae,0x5b674b4f,0x31a827d1,0xecca954f,0x3d2cd61e,
-    0x768d3da4,0x93745ac1,0x1d5d58cb,0x4b86f3b6,0x2aba923a,0x0e65814c,0x8ae063d9,0xcd6969b0,
-    0x36641585,0x742af59d,0x613a1316,0x338ea471,0x47861af3,0x30479dc3,0x1270a481,0x08771069,
-    0xe3c4f0d2,0x0229874c,0x5a8a3bc1,0xe30d9733,0xd05be5a2,0xe2af31ba,0x222049f9,0x9f923b6a,
-    0x033f64ec,0xe528b62b,0x8201efbd,0x2107d877,0xd8312ef1,0xa5679f99,0x1730b51b,0x752616d2,
-    0x05305909,0x0dca440b,0x2093cdd9,0x6409ab50,0xba5c8ecc,0x8d4708ea,0x429f0917,0xb762fab0,
-    0x5161ea75,0x45eba0eb,0xb6f34b41,0x52047123,0xe4181523,0x8d74e90a,0x54fa401c,0xddda0cc7,
-    0x63df182a,0xc6403ef6,0x348ec6e8,0xb9ff57f5,0xf652b8bd,0x0f86b0f3,0xfb3a088a,0x4dc71533,
-    0x7b3617d2,0xa34e87eb,0xba2a9bdd,0xe3381306,0x14bad6bb,0xc96dc7c2,0x333b54b6,0x9be47cfa,
-    0x1dcf9299,0xe7ea5f99,0xb38feacd,0xc3cfe2f7,0x5b87e822,0x39c5ab56,0x18f4a18f,0x2d484d9c,
-    0x4163d519,0x79769e98,0xf58a67f0,0x40590c02,0x319671c0,0x266b133a,0xaf81b287,0x6a31f737,
+alignas(32) static const uint32_t FARSH_KEYS[STRIPE_ELEMENTS + EXTRA_ELEMENTS] = {
+    0xb8fe6c39, 0x23a44bbe, 0x7c01812c, 0xf721ad1c, 0xded46de9, 0x839097db, 0x7240a4a4, 0xb7b3671f,
+    0xcb79e64e, 0xccc0e578, 0x825ad07d, 0xccff7221, 0xb8084674, 0xf743248e, 0xe03590e6, 0x813a264c,
+    0x3c2852bb, 0x91c300cb, 0x88d0658b, 0x1b532ea3, 0x71644897, 0xa20df94e, 0x3819ef46, 0xa9deacd8,
+    0xa8fa763f, 0xe39c343f, 0xf9dcbbc7, 0xc70b4f1d, 0x8a51e04b, 0xcdb45931, 0xc89f7ec9, 0xd9787364,
+    0x4f6a0752, 0xa79b079c, 0x8fc49499, 0x8ec9b7a9, 0x33c92249, 0x4eb6404f, 0xfb2afb4e, 0xa4814255,
+    0x2f0e1b98, 0xace93b24, 0x188850cd, 0x6c5c74a7, 0x66fa4404, 0xeac5ac83, 0x34d3ebc3, 0xc581a0ff,
+    0xfa1363eb, 0x170ddd51, 0xb7f0da49, 0xd3165526, 0x29d4689e, 0x2b16be58, 0x7d47a1fc, 0x8ff8b8d1,
+    0x7ad031ce, 0x45cb3a8f, 0x95160428, 0xafd7fbca, 0xbb4b407e, 0x995274a4, 0xeb9a2d93, 0x3be78908,
+    0xed475f6c, 0x919cd8f2, 0xd3861e5a, 0x6e31390c, 0xfe6a3a49, 0xdcad0914, 0x06508beb, 0xa88399f3,
+    0xb058112f, 0xe8b0fa79, 0x29b4da06, 0xedc253fb, 0xc3e96dad, 0x6e372b83, 0x4f78b153, 0xfffa6e86,
+    0x21beeeec, 0x01caea02, 0x1267e50d, 0x11e6092f, 0xe819d298, 0x832f80dd, 0x0c4e2477, 0xbc7886eb,
+    0x01506637, 0x8ba89668, 0x6d11e7a0, 0xfc12fd15, 0x86a54c19, 0x593ce3dd, 0xd2b13fe5, 0x8e772b53,
+    0xae4a60cc, 0x647a3b1b, 0x547786e0, 0x3ec4378e, 0x8d7acf89, 0xca36f947, 0x0e89d5ef, 0xaada6a3c,
+    0x6da4a109, 0x9ac6e11c, 0x686691ef, 0xa357bd2b, 0xd16f1b9a, 0x38c70303, 0x7d4622b3, 0x2968fa8f,
+    0x8ca5bcb9, 0xfcd61005, 0x228b5e96, 0x2c9dcc19, 0x57cf243c, 0x3c53f9c1, 0x0cc7952c, 0x686de4f0,
+    0x93a747b5, 0x4e87a510, 0x975e91ae, 0x4c10b98e, 0x8a7f068c, 0x346b19ab, 0x353ca625, 0xf20a50e0,
+    0xce9921f6, 0xdf66e014, 0x0a11ef4b, 0x8bc84ddf, 0x84d25d22, 0xc823936d, 0x94741ec3, 0x88278a60,
+    0xb8649331, 0x7a707a10, 0x7292cad6, 0xa7c644c2, 0xbd156bfa, 0x646c9578, 0xb7f4dfd5, 0x9f8277a7,
+    0x7013924e, 0xad674cc3, 0x2cae9d05, 0x912a9a22, 0xf67c53fa, 0x8d7e22a9, 0x59ae372b, 0x850199f3,
+    0x63a2102c, 0xd6ff1261, 0x56738ee1, 0xaa95145b, 0xfdd12832, 0x5b684deb, 0x0784de94, 0xaa62390e,
+    0xbb7ccf19, 0x0fefd572, 0x565b41ca, 0x2206d202, 0x2d608479, 0x4c0fcd3d, 0xd36d3be3, 0x155a9a65,
+    0x10f9e732, 0xac9b0f1e, 0x1f72a03b, 0xea9440ae, 0x5b674b4f, 0x31a827d1, 0xecca954f, 0x3d2cd61e,
+    0x768d3da4, 0x93745ac1, 0x1d5d58cb, 0x4b86f3b6, 0x2aba923a, 0x0e65814c, 0x8ae063d9, 0xcd6969b0,
+    0x36641585, 0x742af59d, 0x613a1316, 0x338ea471, 0x47861af3, 0x30479dc3, 0x1270a481, 0x08771069,
+    0xe3c4f0d2, 0x0229874c, 0x5a8a3bc1, 0xe30d9733, 0xd05be5a2, 0xe2af31ba, 0x222049f9, 0x9f923b6a,
+    0x033f64ec, 0xe528b62b, 0x8201efbd, 0x2107d877, 0xd8312ef1, 0xa5679f99, 0x1730b51b, 0x752616d2,
+    0x05305909, 0x0dca440b, 0x2093cdd9, 0x6409ab50, 0xba5c8ecc, 0x8d4708ea, 0x429f0917, 0xb762fab0,
+    0x5161ea75, 0x45eba0eb, 0xb6f34b41, 0x52047123, 0xe4181523, 0x8d74e90a, 0x54fa401c, 0xddda0cc7,
+    0x63df182a, 0xc6403ef6, 0x348ec6e8, 0xb9ff57f5, 0xf652b8bd, 0x0f86b0f3, 0xfb3a088a, 0x4dc71533,
+    0x7b3617d2, 0xa34e87eb, 0xba2a9bdd, 0xe3381306, 0x14bad6bb, 0xc96dc7c2, 0x333b54b6, 0x9be47cfa,
+    0x1dcf9299, 0xe7ea5f99, 0xb38feacd, 0xc3cfe2f7, 0x5b87e822, 0x39c5ab56, 0x18f4a18f, 0x2d484d9c,
+    0x4163d519, 0x79769e98, 0xf58a67f0, 0x40590c02, 0x319671c0, 0x266b133a, 0xaf81b287, 0x6a31f737,
 
-    0xe3bc0197,0x55079913,0x9f72c696,0x363e00c8,0x53153947,0xebfd127f,0x00f60519,0x46a6b62a,
-    0x93b83380,0x3fe29324,0xdfc67091,0x0f62386d,0xdc375e79,0x8fea3f3e,0xdf8463d0,0x3702fa7b,
-    0x3954435e,0x87caa648,0xa9158bee,0x08f30c25,0x66b82936,0xe7fc3feb,0x183c5450,0xd7ef4345,
-    0x798c7963,0xc02cf557,0x098553d1,0xfa4312aa,0xe29ef883,0x7caf128d,0x74b3a07d,0xc8efdf5b,
-    0x8db23782,0x2c409f4a,0xdae469da,0x4d3e1b3f,0x2e7b9a58,0xc83e3753,0xcefd96a6,0x44ddb068,
-    0x5faed141,0xdee7d0f1,0xc223dbb4,0x7bfbe104,0x114d6e1d,0x52039cd5,0x307c0a9c,0xa6289c12,
-    0x20ee8b3e,0x03724b0b,0xba68ae4a,0x93c5f2a1,0x9af27bb2,0x480f0eba,0xc14c6bbe,0xe7331f87,
-    0xf0104df4,0x22c05363,0xb7e6d08a,0x6f15c449,0x4b9ee2cd,0x6b2c78ae,0x25ed2673,0xb6256596,
-    0x99ad4803,0x654f8f10,0xe89eca64,0xd9a506df,0x530dc5fa,0xfe75be5c,0xa543833d,0xf739fd45,
-    0x1605b488,0xe50f614a,0xe930df83,0x4540195d,0xf2da0f32,0x6b04f79c,0xe3c73c99,0xb3a5265c,
-    0x5a1be07d,0xbda13d2a,0xeddc281c,0xe9d9a39a,0xde9beff1,0x573c1747,0x40be5b3e,0x3756e968,
-    0x968077b6,0x6525a28f,0x747d0735,0x8a0ec11d,0x49c03af5,0xf3def45b,0xc3c9214d,0x9ea2e76d,
-    0xfad3a715,0xcaa7ad89,0xde828e4c,0xa5769bd5,0x467cdb5a,0xd5f2cacb,0x68ebd182,0x8d40341a,
-    0x21556887,0x000a5f6f,0x5ad8a473,0xafe7e886,0x98997d39,0x945ad218,0x46be0c93,0x93a5bd3a,
-    0x3ffa4a8c,0xd834d936,0x2f022a2a,0x20791c6b,0x5db51516,0x8defeed2,0x9dee28a5,0x5188eba7,
-    0xab4f8c67,0x48ceac96,0x2a11e16f,0xc1593b6d
+    0xe3bc0197, 0x55079913, 0x9f72c696, 0x363e00c8, 0x53153947, 0xebfd127f, 0x00f60519, 0x46a6b62a,
+    0x93b83380, 0x3fe29324, 0xdfc67091, 0x0f62386d, 0xdc375e79, 0x8fea3f3e, 0xdf8463d0, 0x3702fa7b,
+    0x3954435e, 0x87caa648, 0xa9158bee, 0x08f30c25, 0x66b82936, 0xe7fc3feb, 0x183c5450, 0xd7ef4345,
+    0x798c7963, 0xc02cf557, 0x098553d1, 0xfa4312aa, 0xe29ef883, 0x7caf128d, 0x74b3a07d, 0xc8efdf5b,
+    0x8db23782, 0x2c409f4a, 0xdae469da, 0x4d3e1b3f, 0x2e7b9a58, 0xc83e3753, 0xcefd96a6, 0x44ddb068,
+    0x5faed141, 0xdee7d0f1, 0xc223dbb4, 0x7bfbe104, 0x114d6e1d, 0x52039cd5, 0x307c0a9c, 0xa6289c12,
+    0x20ee8b3e, 0x03724b0b, 0xba68ae4a, 0x93c5f2a1, 0x9af27bb2, 0x480f0eba, 0xc14c6bbe, 0xe7331f87,
+    0xf0104df4, 0x22c05363, 0xb7e6d08a, 0x6f15c449, 0x4b9ee2cd, 0x6b2c78ae, 0x25ed2673, 0xb6256596,
+    0x99ad4803, 0x654f8f10, 0xe89eca64, 0xd9a506df, 0x530dc5fa, 0xfe75be5c, 0xa543833d, 0xf739fd45,
+    0x1605b488, 0xe50f614a, 0xe930df83, 0x4540195d, 0xf2da0f32, 0x6b04f79c, 0xe3c73c99, 0xb3a5265c,
+    0x5a1be07d, 0xbda13d2a, 0xeddc281c, 0xe9d9a39a, 0xde9beff1, 0x573c1747, 0x40be5b3e, 0x3756e968,
+    0x968077b6, 0x6525a28f, 0x747d0735, 0x8a0ec11d, 0x49c03af5, 0xf3def45b, 0xc3c9214d, 0x9ea2e76d,
+    0xfad3a715, 0xcaa7ad89, 0xde828e4c, 0xa5769bd5, 0x467cdb5a, 0xd5f2cacb, 0x68ebd182, 0x8d40341a,
+    0x21556887, 0x000a5f6f, 0x5ad8a473, 0xafe7e886, 0x98997d39, 0x945ad218, 0x46be0c93, 0x93a5bd3a,
+    0x3ffa4a8c, 0xd834d936, 0x2f022a2a, 0x20791c6b, 0x5db51516, 0x8defeed2, 0x9dee28a5, 0x5188eba7,
+    0xab4f8c67, 0x48ceac96, 0x2a11e16f, 0xc1593b6d
 };
 
 /* Internal: hash exactly STRIPE bytes */
-template < bool bswap >
-static uint64_t farsh_full_block (const uint8_t *data, const uint32_t *key) {
+template <bool bswap>
+static uint64_t farsh_full_block( const uint8_t * data, const uint32_t * key ) {
 #if defined(HAVE_AVX2)
-    __m256i sum = _mm256_setzero_si256();  __m128i sum128;  int i;
-    const __m256i *xdata = (const __m256i *) data;
-    const __m256i *xkey  = (const __m256i *) key;
+    __m256i         sum = _mm256_setzero_si256();  __m128i sum128;  int i;
+    const __m256i * xdata = (const __m256i *)data;
+    const __m256i * xkey  = (const __m256i *)key;
 
-    for (i=0; i < STRIPE/sizeof(__m256i); i++) {
-        __m256i d = _mm256_loadu_si256 (xdata+i);
+    for (i = 0; i < STRIPE / sizeof(__m256i); i++) {
+        __m256i d   = _mm256_loadu_si256(xdata + i);
         if (bswap) { d = mm256_bswap32(d); }
-        __m256i k = _mm256_loadu_si256 (xkey+i);
-        __m256i dk = _mm256_add_epi32(d,k);                                     // uint32 dk[8]  = {d0+k0, d1+k1 .. d7+k7}
-        __m256i res = _mm256_mul_epu32 (dk, _mm256_shuffle_epi32 (dk,0x31));    // uint64 res[4] = {dk0*dk1, dk2*dk3, dk4*dk5, dk6*dk7}
-        sum = _mm256_add_epi64(sum,res);
+        __m256i k   = _mm256_loadu_si256(xkey + i );
+        __m256i dk  = _mm256_add_epi32(d, k);                               // uint32 dk[8]  = {d0+k0, d1+k1 .. d7+k7}
+        __m256i res = _mm256_mul_epu32(dk, _mm256_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3,
+                                                                            // dk4*dk5, dk6*dk7}
+        sum = _mm256_add_epi64(sum, res);
     }
-    sum = _mm256_add_epi64 (sum, _mm256_shuffle_epi32(sum,3*4+2));              // return sum of four 64-bit values in the sum
-    sum128 = _mm_add_epi64 (_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum,1));
+    sum    = _mm256_add_epi64(sum, _mm256_shuffle_epi32(sum, 3 * 4 + 2));       // return sum of four 64-bit values in
+                                                                                // the sum
+    sum128 = _mm_add_epi64(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
     return _mm_cvtsi128_si64(sum128);
 #elif defined(HAVE_SSE_2)
-    __m128i sum = _mm_setzero_si128();  int i;
-    const __m128i *xdata = (const __m128i *) data;
-    const __m128i *xkey  = (const __m128i *) key;
+    __m128i         sum = _mm_setzero_si128();  int i;
+    const __m128i * xdata = (const __m128i *)data;
+    const __m128i * xkey  = (const __m128i *)key;
 
-    for (i=0; i < STRIPE/sizeof(__m128i); i++) {
-        __m128i d = _mm_loadu_si128 (xdata+i);
+    for (i = 0; i < STRIPE / sizeof(__m128i); i++) {
+        __m128i d   = _mm_loadu_si128(xdata + i);
         if (bswap) { d = mm_bswap32(d); }
-        __m128i k = _mm_load_si128 (xkey+i);
-        __m128i dk = _mm_add_epi32(d,k);                                        // uint32 dk[4]  = {d0+k0, d1+k1, d2+k2, d3+k3}
-        __m128i res = _mm_mul_epu32 (dk, _mm_shuffle_epi32 (dk,0x31));          // uint64 res[2] = {dk0*dk1,dk2*dk3}
-        sum = _mm_add_epi64(sum,res);
+        __m128i k   = _mm_load_si128(xkey + i);
+        __m128i dk  = _mm_add_epi32(d, k);                            // uint32 dk[4]  = {d0+k0, d1+k1, d2+k2, d3+k3}
+        __m128i res = _mm_mul_epu32(dk, _mm_shuffle_epi32(dk, 0x31)); // uint64 res[2] = {dk0*dk1,dk2*dk3}
+        sum = _mm_add_epi64(sum, res);
     }
-    sum = _mm_add_epi64 (sum, _mm_shuffle_epi32(sum,3*4+2));                    // return sum of two 64-bit values in the sum
+    sum = _mm_add_epi64(sum, _mm_shuffle_epi32(sum, 3 * 4 + 2)); // return sum of two 64-bit values in the sum
     return _mm_cvtsi128_si64(sum);
 #else
     uint64_t sum = 0;  int i;
-    for (i=0; i < STRIPE_ELEMENTS; i+=2) {
-        sum += (GET_U32<bswap>(data, i*4) + key[i]) *
-            (uint64_t)(GET_U32<bswap>(data, (i+1)*4) + key[i+1]);
+    for (i = 0; i < STRIPE_ELEMENTS; i += 2) {
+        sum += (GET_U32<bswap>(data, i * 4) + key[i]) *
+                (uint64_t)(GET_U32<bswap>(data, (i + 1) * 4) + key[i + 1]);
     }
     return sum;
 #endif
 }
 
 /* Internal: hash less than STRIPE bytes, with careful handling of partial uint32_t pair at the end of buffer */
-template < bool bswap >
-static uint64_t farsh_partial_block(const uint8_t *data, size_t bytes, const uint32_t *key) {
+template <bool bswap>
+static uint64_t farsh_partial_block( const uint8_t * data, size_t bytes, const uint32_t * key ) {
     uint64_t sum = 0;  int i;
-    size_t elements = (bytes/sizeof(uint32_t)) & (~1);
+    size_t   elements      = (bytes / sizeof(uint32_t)) & (~1);
 
-    uint32_t extra_data[2] = {0};
-    size_t extra_bytes = bytes - elements*sizeof(uint32_t);
-    memcpy (extra_data, data+4*elements, extra_bytes);
+    uint32_t extra_data[2] = { 0 };
+    size_t   extra_bytes   = bytes - elements * sizeof(uint32_t);
 
-    for (i=0; i < elements; i+=2)
-        sum += (GET_U32<bswap>(data, i*4) + key[i]) *
-            (uint64_t)(GET_U32<bswap>(data, (i+1)*4) + key[i+1]);
-    if (extra_bytes)
-        sum += (COND_BSWAP(extra_data[0],bswap) + key[i]) *
-            (uint64_t)(COND_BSWAP(extra_data[1],bswap) + key[i+1]);
+    memcpy(extra_data, data + 4 * elements, extra_bytes);
+
+    for (i = 0; i < elements; i += 2) {
+        sum += (GET_U32<bswap>(data, i * 4) + key[i]) *
+                (uint64_t)(GET_U32<bswap>(data, (i + 1) * 4) + key[i + 1]);
+    }
+    if (extra_bytes) {
+        sum += (COND_BSWAP(extra_data[0], bswap) + key[i]) *
+                (uint64_t)(COND_BSWAP(extra_data[1], bswap) + key[i + 1]);
+    }
     return sum;
 }
 
@@ -160,21 +168,21 @@ static uint64_t farsh_partial_block(const uint8_t *data, size_t bytes, const uin
 
 static const uint64_t PRIME64_1 = UINT64_C(11400714785074694791);
 static const uint64_t PRIME64_2 = UINT64_C(14029467366897019727);
-static const uint64_t PRIME64_3 = UINT64_C( 1609587929392839161);
-static const uint64_t PRIME64_4 = UINT64_C( 9650029242287828579);
+static const uint64_t PRIME64_3 = UINT64_C(1609587929392839161);
+static const uint64_t PRIME64_4 = UINT64_C(9650029242287828579);
 
 /* Internal: combine hash of the current block with overall hashsum */
-static uint64_t farsh_combine (uint64_t sum, uint64_t h) {
-    h *= PRIME64_2;
-    h += h >> 31;
-    h *= PRIME64_1;
+static uint64_t farsh_combine( uint64_t sum, uint64_t h ) {
+    h   *= PRIME64_2;
+    h   += h >> 31;
+    h   *= PRIME64_1;
     sum ^= h;
-    sum = (sum+(sum>>27)) * PRIME64_1 + PRIME64_4;
+    sum  = (sum + (sum >> 27)) * PRIME64_1 + PRIME64_4;
     return sum;
 }
 
 /* Internal: compute the final hashsum value */
-static uint32_t farsh_final (uint64_t sum) {
+static uint32_t farsh_final( uint64_t sum ) {
     sum ^= sum >> 33;
     sum *= PRIME64_2;
     sum ^= sum >> 29;
@@ -185,111 +193,117 @@ static uint32_t farsh_final (uint64_t sum) {
 /* End of hash mixing code kidnapped from the xxHash64 */
 /* ////////////////////////////////////////////////////////////////////////// */
 
-
 /* Public API functions documented in farsh.h */
-template < bool bswap >
-static uint32_t farsh_keyed (const void *data, size_t bytes, const void *key, uint64_t seed) {
-    uint64_t sum = seed;
-    const uint8_t *  ptr     = (const uint8_t *)  data;
-    const uint32_t * key_ptr = (const uint32_t *) key;
+template <bool bswap>
+static uint32_t farsh_keyed( const void * data, size_t bytes, const void * key, uint64_t seed ) {
+    uint64_t         sum     = seed;
+    const uint8_t  *  ptr     = (const uint8_t * )data;
+    const uint32_t * key_ptr =  (const uint32_t *)key;
+
     while (bytes >= STRIPE) {
-        size_t chunk = STRIPE;
-        uint64_t h = farsh_full_block<bswap>(ptr, key_ptr);
-        sum = farsh_combine (sum, h);
+        size_t   chunk = STRIPE;
+        uint64_t h     = farsh_full_block<bswap>(ptr, key_ptr);
+        sum  = farsh_combine(sum, h);
         ptr += chunk;  bytes -= chunk;
     }
     if (bytes) {
-        size_t chunk = bytes;
-        uint64_t h = farsh_partial_block<bswap>(ptr, chunk, key_ptr);
-        sum = farsh_combine (sum, h);
+        size_t   chunk = bytes;
+        uint64_t h     = farsh_partial_block<bswap>(ptr, chunk, key_ptr);
+        sum  = farsh_combine(sum, h);
         ptr += chunk;  bytes -= chunk;
     }
-    return farsh_final(sum) ^ key_ptr[bytes%STRIPE_ELEMENTS];   /* ensure that zeroes at the end of data will affect the hash value */
+    return farsh_final(sum) ^ key_ptr[bytes % STRIPE_ELEMENTS]; /*
+                                                                 * ensure that zeroes at the end of data will affect the
+                                                                 * hash value 
+                                                                                                                                 */
 }
 
-template < bool bswap >
-static void farsh_keyed_n (const void *data, size_t bytes, const void *key, int n, uint64_t seed, void *hash) {
-    uint32_t * hash_ptr = (uint32_t*)hash;
-    for (int i = 0; i < n; i++)
-        hash_ptr[i] = COND_BSWAP(farsh_keyed<bswap>(data, bytes, (const uint8_t*)key + i*FARSH_EXTRA_KEY_SIZE, seed), bswap);
+template <bool bswap>
+static void farsh_keyed_n( const void * data, size_t bytes, const void * key, int n, uint64_t seed, void * hash ) {
+    uint32_t * hash_ptr = (uint32_t *)hash;
+
+    for (int i = 0; i < n; i++) {
+        hash_ptr[i] = COND_BSWAP(farsh_keyed<bswap>(data, bytes, (const uint8_t *)key + i * FARSH_EXTRA_KEY_SIZE,
+                seed), bswap);
+    }
 }
 
-template < bool bswap >
-static void farsh_n (const void *data, size_t bytes, int k, int n, uint64_t seed, void *hash) {
+template <bool bswap>
+static void farsh_n( const void * data, size_t bytes, int k, int n, uint64_t seed, void * hash ) {
     /* FARSH_KEYS contains only material for the hashes 0..FARSH_MAX_HASHES-1 */
-    if (k+n > FARSH_MAX_HASHES) return;
+    if (k + n > FARSH_MAX_HASHES) { return; }
 
-    farsh_keyed_n<bswap>(data, bytes, (const uint8_t*)FARSH_KEYS + k*FARSH_EXTRA_KEY_SIZE, n, seed, hash);
+    farsh_keyed_n<bswap>(data, bytes, (const uint8_t *)FARSH_KEYS + k * FARSH_EXTRA_KEY_SIZE, n, seed, hash);
 }
 
-template < bool bswap, uint32_t hashcount >
-static void farsh(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap, uint32_t hashcount>
+static void farsh( const void * in, const size_t len, const seed_t seed, void * out ) {
     farsh_n<bswap>(in, len, 0, hashcount, (uint64_t)seed, out);
 }
 
 REGISTER_FAMILY(farsh,
-  $.src_url = "https://github.com/Bulat-Ziganshin/FARSH",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/Bulat-Ziganshin/FARSH",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(FARSH_32,
-  $.desc = "FARSH 32-bit (1 hash output)",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS    |
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xBCDE332C,
-  $.verification_BE = 0x1AD2B744,
-  $.hashfn_native = farsh<false,1>,
-  $.hashfn_bswap = farsh<true,1>
-);
+   $.desc       = "FARSH 32-bit (1 hash output)",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS    |
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xBCDE332C,
+   $.verification_BE = 0x1AD2B744,
+   $.hashfn_native   = farsh<false, 1>,
+   $.hashfn_bswap    = farsh<true, 1>
+ );
 
 REGISTER_HASH(FARSH_64,
-  $.desc = "FARSH 64-bit (2 hash outputs)",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS    |
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xDE2FDAEE,
-  $.verification_BE = 0xEFE7812E,
-  $.hashfn_native = farsh<false,2>,
-  $.hashfn_bswap = farsh<true,2>
-);
+   $.desc       = "FARSH 64-bit (2 hash outputs)",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS    |
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xDE2FDAEE,
+   $.verification_BE = 0xEFE7812E,
+   $.hashfn_native   = farsh<false, 2>,
+   $.hashfn_bswap    = farsh<true, 2>
+ );
 
 REGISTER_HASH(FARSH_128,
-  $.desc = "FARSH 128-bit (4 hash outputs)",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS    |
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_SLOW            |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x82B6CBEC,
-  $.verification_BE = 0x51150D39,
-  $.hashfn_native = farsh<false,4>,
-  $.hashfn_bswap = farsh<true,4>
-);
+   $.desc       = "FARSH 128-bit (4 hash outputs)",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS    |
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_SLOW            |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x82B6CBEC,
+   $.verification_BE = 0x51150D39,
+   $.hashfn_native   = farsh<false, 4>,
+   $.hashfn_bswap    = farsh<true, 4>
+ );
 
 REGISTER_HASH(FARSH_256,
-  $.desc = "FARSH 256-bit (8 hash outputs)",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS    |
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_VERY_SLOW       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 256,
-  $.verification_LE = 0xFEBEA0BC,
-  $.verification_BE = 0x75FAC191,
-  $.hashfn_native = farsh<false,8>,
-  $.hashfn_bswap = farsh<true,8>
-);
+   $.desc       = "FARSH 256-bit (8 hash outputs)",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS    |
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_VERY_SLOW       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 256,
+   $.verification_LE = 0xFEBEA0BC,
+   $.verification_BE = 0x75FAC191,
+   $.hashfn_native   = farsh<false, 8>,
+   $.hashfn_bswap    = farsh<true, 8>
+ );
diff --git a/hashes/fasthash.cpp b/hashes/fasthash.cpp
index 6c8a09fa..1e9da60d 100644
--- a/hashes/fasthash.cpp
+++ b/hashes/fasthash.cpp
@@ -22,39 +22,39 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
-*/
+ */
 #include "Platform.h"
 #include "Hashlib.h"
 
 //------------------------------------------------------------
 // Compression function for Merkle-Damgard construction.
 // This function is generated using the framework provided.
-static inline uint64_t mix(uint64_t h) {
+static inline uint64_t mix( uint64_t h ) {
     h ^= h >> 23;
     h *= UINT64_C(0x2127599bf4325c37);
     h ^= h >> 47;
     return h;
 }
 
-static inline uint32_t fold(uint64_t h) {
+static inline uint32_t fold( uint64_t h ) {
     // the following trick converts the 64-bit hashcode to Fermat
     // residue, which shall retain information from both the higher
     // and lower parts of hashcode.
     return h - (h >> 32);
 }
 
-template < bool bswap >
-static uint64_t fasthash_impl(const uint8_t * pos, size_t len, uint64_t seed) {
-    const uint64_t    m = UINT64_C(0x880355f21e6d1965);
+template <bool bswap>
+static uint64_t fasthash_impl( const uint8_t * pos, size_t len, uint64_t seed ) {
+    const uint64_t  m   = UINT64_C(0x880355f21e6d1965);
     const uint8_t * end = pos + (len & ~7);
 
     uint64_t h = seed ^ (len * m);
     uint64_t v;
 
     while (pos != end) {
-        v  = GET_U64<bswap>(pos, 0);
-        h ^= mix(v);
-        h *= m;
+        v    = GET_U64<bswap>(pos, 0);
+        h   ^= mix(v);
+        h   *= m;
         pos += 8;
     }
 
@@ -66,58 +66,60 @@ static uint64_t fasthash_impl(const uint8_t * pos, size_t len, uint64_t seed) {
     case 5: v ^= (uint64_t)pos[4] << 32;
     case 4: v ^= (uint64_t)pos[3] << 24;
     case 3: v ^= (uint64_t)pos[2] << 16;
-    case 2: v ^= (uint64_t)pos[1] << 8;
+    case 2: v ^= (uint64_t)pos[1] <<  8;
     case 1: v ^= (uint64_t)pos[0];
-        h ^= mix(v);
-        h *= m;
+            h ^= mix(v);
+            h *= m;
     }
 
     return mix(h);
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void fasthash64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void fasthash64( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = fasthash_impl<bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void fasthash32(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void fasthash32( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = fasthash_impl<bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U32<bswap>(fold(h), (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(fasthash,
-  $.src_url = "https://github.com/ztanml/fast-hash",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/ztanml/fast-hash",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(fasthash_32,
-  $.desc = "fast-hash, 32-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xE9481AFC,
-  $.verification_BE = 0x48BCE1ED,
-  $.hashfn_native = fasthash32<false>,
-  $.hashfn_bswap = fasthash32<true>
-);
+   $.desc       = "fast-hash, 32-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xE9481AFC,
+   $.verification_BE = 0x48BCE1ED,
+   $.hashfn_native   = fasthash32<false>,
+   $.hashfn_bswap    = fasthash32<true>
+ );
 
 REGISTER_HASH(fasthash_64,
-  $.desc = "fast-hash, 64-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xA16231A7,
-  $.verification_BE = 0x82AD8DDB,
-  $.hashfn_native = fasthash64<false>,
-  $.hashfn_bswap = fasthash64<true>
-);
+   $.desc       = "fast-hash, 64-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xA16231A7,
+   $.verification_BE = 0x82AD8DDB,
+   $.hashfn_native   = fasthash64<false>,
+   $.hashfn_bswap    = fasthash64<true>
+ );
diff --git a/hashes/fletcher.cpp b/hashes/fletcher.cpp
index 204ea6f2..bd223d18 100644
--- a/hashes/fletcher.cpp
+++ b/hashes/fletcher.cpp
@@ -31,12 +31,13 @@
 // Hash based on 1 lane of ZFS's fletcher2 checksum. ZFS is always
 // guaranteed blocks of multiples-of-128 bytes for checksums, so it
 // does two of these on alternate sets of words.
-template < bool fullhash, bool bswap >
-static void fletcher2(const uint8_t * key, size_t len, uint64_t seed, uint8_t * out) {
+template <bool fullhash, bool bswap>
+static void fletcher2( const uint8_t * key, size_t len, uint64_t seed, uint8_t * out ) {
     const uint8_t * const endc = key + len;
     const uint8_t * const endw = key + (len & ~7);
     // Legacy homegrown seeding for SMHasher3
     uint64_t A = seed, B = 0;
+
     for (; key < endw; key += 8) {
         A += GET_U64<bswap>(key, 0);
         B += A;
@@ -56,12 +57,13 @@ static void fletcher2(const uint8_t * key, size_t len, uint64_t seed, uint8_t *
 // Hash based on 1 lane of ZFS's fletcher4 checksum. ZFS is always
 // guaranteed blocks of multiples-of-128 bytes for checksums, so it
 // does two of these on alternate sets of words.
-template < bool fullhash, bool bswap >
-static void fletcher4(const uint8_t * key, size_t len, uint64_t seed, uint8_t * out) {
+template <bool fullhash, bool bswap>
+static void fletcher4( const uint8_t * key, size_t len, uint64_t seed, uint8_t * out ) {
     const uint8_t * const endc = key + len;
     const uint8_t * const endw = key + (len & ~3);
     // Legacy homegrown seeding for SMHasher3
     uint64_t A = seed, B = 0, C = 0, D = 0;
+
     for (; key < endw; key += 4) {
         A += GET_U32<bswap>(key, 0);
         B += A;
@@ -90,10 +92,10 @@ static void fletcher4(const uint8_t * key, size_t len, uint64_t seed, uint8_t *
 // overflow operations. This is important to the mathematical
 // operation of the checksum, and it was excluded from the ZFS
 // implementations.
-template < bool bswap >
-static uint32_t fletcher32(const uint8_t * key, size_t len, uint64_t seed) {
+template <bool bswap>
+static uint32_t fletcher32( const uint8_t * key, size_t len, uint64_t seed ) {
     // Legacy homegrown seeding for SMHasher3
-	uint32_t c0 = (uint32_t)(seed + len), c1 = (uint32_t)((seed >> 32) + len);
+    uint32_t c0 = (uint32_t)(seed + len), c1 = (uint32_t)((seed >> 32) + len);
 
     while (len > 1) {
         // 360 16-bit blocks can be processed without the possibility
@@ -105,23 +107,23 @@ static uint32_t fletcher32(const uint8_t * key, size_t len, uint64_t seed) {
             c1 += c0;
         }
         len -= blklen;
-		c0 = c0 % 65535;
-		c1 = c1 % 65535;
-	};
+        c0   = c0 % 65535;
+        c1   = c1 % 65535;
+    }
     if (len) {
         c0 += *key;
         c1 += c0;
-		c0 = c0 % 65535;
-		c1 = c1 % 65535;
+        c0  = c0 % 65535;
+        c1  = c1 % 65535;
     }
 
-	return (c1 << 16 | c0);
+    return c1 << 16 | c0;
 }
 
-template < bool bswap >
-static uint64_t fletcher64(const uint8_t * key, size_t len, uint64_t seed) {
+template <bool bswap>
+static uint64_t fletcher64( const uint8_t * key, size_t len, uint64_t seed ) {
     // Legacy homegrown seeding for SMHasher3
-	uint64_t c0 = seed + len, c1 = seed + len;
+    uint64_t c0 = seed + len, c1 = seed + len;
 
     while (len > 3) {
         // 92681 32-bit blocks can be processed without the possibility
@@ -133,144 +135,146 @@ static uint64_t fletcher64(const uint8_t * key, size_t len, uint64_t seed) {
             c1 += c0;
         }
         len -= blklen;
-		c0 = c0 % 4294967295;
-		c1 = c1 % 4294967295;
-	};
+        c0   = c0 % 4294967295;
+        c1   = c1 % 4294967295;
+    }
     if (len > 0) {
         do {
             c0 += *key++;
             c1 += c0;
             len--;
         } while (len > 0);
-		c0 = c0 % 4294967295;
-		c1 = c1 % 4294967295;
+        c0 = c0 % 4294967295;
+        c1 = c1 % 4294967295;
     }
 
-	return (c1 << 32 | c0);
+    return c1 << 32 | c0;
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void fletcher2_64(const void * in, const size_t len, const seed_t seed, void * out) {
-    fletcher2<false,bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
+template <bool bswap>
+static void fletcher2_64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    fletcher2<false, bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
 }
 
-template < bool bswap >
-static void fletcher2_128(const void * in, const size_t len, const seed_t seed, void * out) {
-    fletcher2<true,bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
+template <bool bswap>
+static void fletcher2_128( const void * in, const size_t len, const seed_t seed, void * out ) {
+    fletcher2<true, bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
 }
 
-template < bool bswap >
-static void fletcher4_64(const void * in, const size_t len, const seed_t seed, void * out) {
-    fletcher4<false,bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
+template <bool bswap>
+static void fletcher4_64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    fletcher4<false, bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
 }
 
-template < bool bswap >
-static void fletcher4_256(const void * in, const size_t len, const seed_t seed, void * out) {
-    fletcher4<true,bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
+template <bool bswap>
+static void fletcher4_256( const void * in, const size_t len, const seed_t seed, void * out ) {
+    fletcher4<true, bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
 }
 
-template < bool bswap >
-static void fletcher32(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void fletcher32( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = fletcher32<bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void fletcher64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void fletcher64( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = fletcher64<bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(fletcher,
-  $.src_url = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(fletcher2__64,
-  $.desc = "fletcher2 from ZFS (one lane, best 64 bits)",
-  $.sort_order = 10,
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x890767C0,
-  $.verification_BE = 0x8FC6FD34,
-  $.hashfn_native = fletcher2_64<false>,
-  $.hashfn_bswap = fletcher2_64<true>
-);
+   $.desc       = "fletcher2 from ZFS (one lane, best 64 bits)",
+   $.sort_order = 10,
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x890767C0,
+   $.verification_BE = 0x8FC6FD34,
+   $.hashfn_native   = fletcher2_64<false>,
+   $.hashfn_bswap    = fletcher2_64<true>
+ );
 
 REGISTER_HASH(fletcher2,
-  $.desc = "fletcher2 from ZFS (one lane, all 128 bits)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x70FD3480,
-  $.verification_BE = 0xFC346DA5,
-  $.hashfn_native = fletcher2_128<false>,
-  $.hashfn_bswap = fletcher2_128<true>
-);
+   $.desc       = "fletcher2 from ZFS (one lane, all 128 bits)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x70FD3480,
+   $.verification_BE = 0xFC346DA5,
+   $.hashfn_native   = fletcher2_128<false>,
+   $.hashfn_bswap    = fletcher2_128<true>
+ );
 
 REGISTER_HASH(fletcher4__64,
-  $.desc = "fletcher4 from ZFS (one lane, best 64 bits)",
-  $.sort_order = 20,
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x47660EB7,
-  $.verification_BE = 0xA502FD23,
-  $.hashfn_native = fletcher4_64<false>,
-  $.hashfn_bswap = fletcher4_64<true>
-);
+   $.desc       = "fletcher4 from ZFS (one lane, best 64 bits)",
+   $.sort_order = 20,
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x47660EB7,
+   $.verification_BE = 0xA502FD23,
+   $.hashfn_native   = fletcher4_64<false>,
+   $.hashfn_bswap    = fletcher4_64<true>
+ );
 
 REGISTER_HASH(fletcher4,
-  $.desc = "fletcher4 from ZFS (one lane, all 256 bits)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 256,
-  $.verification_LE = 0x1F1358EF,
-  $.verification_BE = 0x94EECE23,
-  $.hashfn_native = fletcher4_256<false>,
-  $.hashfn_bswap = fletcher4_256<true>
-);
+   $.desc       = "fletcher4 from ZFS (one lane, all 256 bits)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 256,
+   $.verification_LE = 0x1F1358EF,
+   $.verification_BE = 0x94EECE23,
+   $.hashfn_native   = fletcher4_256<false>,
+   $.hashfn_bswap    = fletcher4_256<true>
+ );
 
 REGISTER_HASH(Fletcher_32,
-  $.desc = "Fletcher's checksum, 32-bit, IV == len",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MODULUS      |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x4FE14644,
-  $.verification_BE = 0x05853CCE,
-  $.hashfn_native = fletcher32<false>,
-  $.hashfn_bswap = fletcher32<true>
-);
+   $.desc       = "Fletcher's checksum, 32-bit, IV == len",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MODULUS      |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x4FE14644,
+   $.verification_BE = 0x05853CCE,
+   $.hashfn_native   = fletcher32<false>,
+   $.hashfn_bswap    = fletcher32<true>
+ );
 
 REGISTER_HASH(Fletcher_64,
-  $.desc = "Fletcher's checksum, 64-bit, IV == len",
-  $.sort_order = 0,
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MODULUS      |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x2E16C3AA,
-  $.verification_BE = 0x1E644927,
-  $.hashfn_native = fletcher64<false>,
-  $.hashfn_bswap = fletcher64<true>
-);
+   $.desc       = "Fletcher's checksum, 64-bit, IV == len",
+   $.sort_order = 0,
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MODULUS      |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x2E16C3AA,
+   $.verification_BE = 0x1E644927,
+   $.hashfn_native   = fletcher64<false>,
+   $.hashfn_bswap    = fletcher64<true>
+ );
diff --git a/hashes/floppsyhash.cpp b/hashes/floppsyhash.cpp
index 212c2258..d2ba7bac 100644
--- a/hashes/floppsyhash.cpp
+++ b/hashes/floppsyhash.cpp
@@ -35,32 +35,32 @@ static_assert(std::numeric_limits<double>::is_iec559, "IEEE 754 floating point r
 
 //------------------------------------------------------------
 // Q function : Continued Egyptian Fraction update function
-template < bool old>
-static FORCE_INLINE void q(double * state, double key_val,
-        double numerator, double denominator) {
+template <bool old>
+static FORCE_INLINE void q( double * state, double key_val, double numerator, double denominator ) {
     double frac = numerator / denominator;
+
     state[0] += frac;
-    state[0] = 1.0 / state[0];
+    state[0]  = 1.0       / state[0];
 
     if (!old) { key_val += M_PI; }
     state[1] += key_val;
-    state[1] = numerator / state[1];
+    state[1]  = numerator / state[1];
 }
 
 // round function : process the message
-template < bool old>
-static FORCE_INLINE void round(const uint8_t * msg, size_t len, double * state) {
+template <bool old>
+static FORCE_INLINE void round( const uint8_t * msg, size_t len, double * state ) {
     double numerator = 1.0;
 
     // Loop
-    for (size_t i = 0; i < len; i++ ) {
+    for (size_t i = 0; i < len; i++) {
         double val = (double)msg[i];
         double tmp;
         if (old) {
-            tmp = (double)(msg[i] + i + 1);
+            tmp =  (double)(msg[i] + i + 1);
         } else {
-            tmp = val * M_E;
-            tmp += (double)(i + 1);
+            tmp  = val * M_E;
+            tmp += (double)(i          + 1);
         }
         double denominator = tmp / state[1];
 
@@ -71,33 +71,33 @@ static FORCE_INLINE void round(const uint8_t * msg, size_t len, double * state)
 
     if (old) {
         double tmp;
-        tmp = M_PI + state[1];
+        tmp       = M_PI + state[1];
         state[0] *= tmp;
-        tmp = M_E + state[0];
+        tmp       = M_E  + state[0];
         state[1] *= tmp;
     }
 }
 
 // setup function : setup the state
-static FORCE_INLINE void setup(double * state, double init = 0) {
+static FORCE_INLINE void setup( double * state, double init = 0 ) {
     if (init == 0) {
         state[0] = (double)3.0;
-        state[1] = (double)1.0/7.0;
+        state[1] = (double)1.0 / 7.0;
     } else {
         double tmp = 1.0 / init;
-        tmp += init;
-        state[0] = pow(tmp, 1.0/3.0);
-        state[1] = pow(tmp, 1.0/7.0);
+        tmp     += init;
+        state[0] = pow(tmp, 1.0 / 3.0);
+        state[1] = pow(tmp, 1.0 / 7.0);
     }
 }
 
 //------------------------------------------------------------
-//static_assert(sizeof(double) == 8);
-template < bool old, bool bswap >
-static void floppsyhash(const void * in, const size_t len, const seed_t seed, void * out) {
+// static_assert(sizeof(double) == 8);
+template <bool old, bool bswap>
+static void floppsyhash( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint8_t * data = (const uint8_t *)in;
-    double state[2];
-    uint8_t seedbuf[4];
+    double          state[2];
+    uint8_t         seedbuf[4];
 
     PUT_U32<bswap>((uint32_t)seed, seedbuf, 0);
 
@@ -126,40 +126,40 @@ static void floppsyhash(const void * in, const size_t len, const seed_t seed, vo
 
 //------------------------------------------------------------
 REGISTER_FAMILY(floppsy,
-  $.src_url = "https://github.com/dosyago/floppsy",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/dosyago/floppsy",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(floppsyhash,
-  $.desc = "Floppsyhash v1.1.10 (floating-point hash using continued Egyptian fractions)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED      |
-        FLAG_HASH_FLOATING_POINT  ,
-  $.impl_flags =
-        FLAG_IMPL_VERY_SLOW    |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_DIVIDE       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x5F9F6226,
-  $.verification_BE = 0x4D4F96F0,
-  $.hashfn_native = floppsyhash<false,false>,
-  $.hashfn_bswap = floppsyhash<false,true>
-);
+   $.desc       = "Floppsyhash v1.1.10 (floating-point hash using continued Egyptian fractions)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED      |
+         FLAG_HASH_FLOATING_POINT,
+   $.impl_flags =
+         FLAG_IMPL_VERY_SLOW    |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_DIVIDE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x5F9F6226,
+   $.verification_BE = 0x4D4F96F0,
+   $.hashfn_native   = floppsyhash<false, false>,
+   $.hashfn_bswap    = floppsyhash<false, true>
+ );
 
 REGISTER_HASH(floppsyhash__old,
-  $.desc = "Floppsyhash (old version, fka \"tifuhash\")",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED      |
-        FLAG_HASH_FLOATING_POINT  ,
-  $.impl_flags =
-        FLAG_IMPL_VERY_SLOW    |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_DIVIDE       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x644236D4,
-  $.verification_BE = 0x7A3D2F7E,
-  $.hashfn_native = floppsyhash<true,false>,
-  $.hashfn_bswap = floppsyhash<true,true>
-);
+   $.desc       = "Floppsyhash (old version, fka \"tifuhash\")",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED      |
+         FLAG_HASH_FLOATING_POINT,
+   $.impl_flags =
+         FLAG_IMPL_VERY_SLOW    |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_DIVIDE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x644236D4,
+   $.verification_BE = 0x7A3D2F7E,
+   $.hashfn_native   = floppsyhash<true, false>,
+   $.hashfn_bswap    = floppsyhash<true, true>
+ );
diff --git a/hashes/fnv.cpp b/hashes/fnv.cpp
index 558530c3..6cf28509 100644
--- a/hashes/fnv.cpp
+++ b/hashes/fnv.cpp
@@ -27,24 +27,24 @@
 #include "Platform.h"
 #include "Hashlib.h"
 
-template < typename hashT, bool bswap >
-static void fibonacci(const void * in, const size_t len, const seed_t seed, void * out) {
-    hashT h = (hashT)seed;
-    const hashT * dw = (const hashT *)in;
-    const hashT * const endw = &dw[len/sizeof(hashT)];
-    const uint64_t C = UINT64_C(11400714819323198485);
+template <typename hashT, bool bswap>
+static void fibonacci( const void * in, const size_t len, const seed_t seed, void * out ) {
+    hashT         h          = (hashT)seed;
+    const hashT * dw         = (const hashT *)in;
+    const hashT * const endw = &dw[len / sizeof(hashT)];
+    const uint64_t      C    = UINT64_C(11400714819323198485);
     hashT w;
 
-    //word stepper
+    // word stepper
     while (dw < endw) {
         memcpy(&w, dw++, sizeof(w));
-        w = COND_BSWAP(w, bswap);
+        w  = COND_BSWAP(w, bswap);
         h += w * C;
     }
-    //byte stepper
-    if (len & (sizeof(hashT)-1)) {
+    // byte stepper
+    if (len & (sizeof(hashT) - 1)) {
         uint8_t * dc = (uint8_t *)dw;
-        const uint8_t *const endc = &((const uint8_t*)in)[len];
+        const uint8_t * const endc = &((const uint8_t *)in)[len];
         while (dc < endc) {
             h += *dc++ * C;
         }
@@ -56,13 +56,13 @@ static void fibonacci(const void * in, const size_t len, const seed_t seed, void
 
 // All seeding below this is homegrown for SMHasher3
 
-template < typename hashT, bool bswap >
-static void FNV1a(const void * in, const size_t len, const seed_t seed, void * out) {
+template <typename hashT, bool bswap>
+static void FNV1a( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint8_t * data = (const uint8_t *)in;
-    const hashT C1 = (sizeof(hashT)==4)? UINT32_C(2166136261) :
-        UINT64_C(0xcbf29ce484222325);
-    const hashT C2 = (sizeof(hashT)==4)? UINT32_C(16777619) :
-        UINT64_C(0x100000001b3);
+    const hashT     C1   = (sizeof(hashT) == 4) ? UINT32_C(2166136261) :
+                                                  UINT64_C(0xcbf29ce484222325);
+    const hashT C2       = (sizeof(hashT) == 4) ? UINT32_C(  16777619) :
+                                                  UINT64_C(0x100000001b3);
     hashT h = (hashT)seed;
 
     h ^= C1;
@@ -75,27 +75,27 @@ static void FNV1a(const void * in, const size_t len, const seed_t seed, void * o
     memcpy(out, &h, sizeof(h));
 }
 
-template < typename hashT, bool bswap >
-static void FNV2(const void * in, const size_t len, const seed_t seed, void * out) {
-    const hashT * dw = (const hashT *)in;
-    const hashT * const endw = &dw[len/sizeof(hashT)];
-    const uint64_t C1 = (sizeof(hashT)==4)? UINT32_C(2166136261) :
-        UINT64_C(0xcbf29ce484222325);
-    const uint64_t C2 = (sizeof(hashT)==4)? UINT32_C(16777619) :
-        UINT64_C(0x100000001b3);
+template <typename hashT, bool bswap>
+static void FNV2( const void * in, const size_t len, const seed_t seed, void * out ) {
+    const hashT *       dw   = (const hashT *)in;
+    const hashT * const endw = &dw[len / sizeof(hashT)];
+    const uint64_t      C1   = (sizeof(hashT) == 4) ? UINT32_C(2166136261) :
+                                                      UINT64_C(0xcbf29ce484222325);
+    const uint64_t C2        = (sizeof(hashT) == 4) ? UINT32_C(  16777619) :
+                                                      UINT64_C(0x100000001b3);
     hashT h = C1 ^ (hashT)seed;
     hashT w;
 
-    //word stepper
+    // word stepper
     while (dw < endw) {
         memcpy(&w, dw++, sizeof(w));
         h ^= COND_BSWAP(w, bswap);
         h *= C2;
     }
-    //byte stepper
-    if (len & (sizeof(hashT)-1)) {
+    // byte stepper
+    if (len & (sizeof(hashT) - 1)) {
         uint8_t * dc = (uint8_t *)dw;
-        const uint8_t *const endc = &((const uint8_t*)in)[len];
+        const uint8_t * const endc = &((const uint8_t *)in)[len];
         while (dc < endc) {
             h ^= *dc++;
             h *= C2;
@@ -106,45 +106,45 @@ static void FNV2(const void * in, const size_t len, const seed_t seed, void * ou
     memcpy(out, &h, sizeof(h));
 }
 
-template < bool bswap >
-static void FNV_YoshimitsuTRIAD(const void * in, const size_t olen, const seed_t seed, void * out) {
-    const uint8_t  *p = (const uint8_t *)in;
-    const uint32_t  PRIME = 709607;
-    uint32_t	  hash32A = UINT32_C(2166136261) ^ seed;
-    uint32_t	  hash32B = UINT32_C(2166136261) + olen;
-    uint32_t	  hash32C = UINT32_C(2166136261);
-    size_t len = olen;
+template <bool bswap>
+static void FNV_YoshimitsuTRIAD( const void * in, const size_t olen, const seed_t seed, void * out ) {
+    const uint8_t * p       = (const uint8_t *)in;
+    const uint32_t  PRIME   = 709607;
+    uint32_t        hash32A = UINT32_C(2166136261) ^ seed;
+    uint32_t        hash32B = UINT32_C(2166136261) + olen;
+    uint32_t        hash32C = UINT32_C(2166136261);
+    size_t          len     = olen;
 
     for (; len >= 3 * 2 * sizeof(uint32_t); len -= 3 * 2 * sizeof(uint32_t), p += 3 * 2 * sizeof(uint32_t)) {
-        hash32A = (hash32A ^ (ROTL32(GET_U32<bswap>(p,  0), 5)  ^ GET_U32<bswap>(p,  4)))  * PRIME;
-        hash32B = (hash32B ^ (ROTL32(GET_U32<bswap>(p,  8), 5)  ^ GET_U32<bswap>(p, 12)))  * PRIME;
-        hash32C = (hash32C ^ (ROTL32(GET_U32<bswap>(p, 16), 5)  ^ GET_U32<bswap>(p, 20)))  * PRIME;
+        hash32A = (hash32A ^ (ROTL32(GET_U32<bswap>(p,  0), 5) ^ GET_U32<bswap>(p,  4))) * PRIME;
+        hash32B = (hash32B ^ (ROTL32(GET_U32<bswap>(p,  8), 5) ^ GET_U32<bswap>(p, 12))) * PRIME;
+        hash32C = (hash32C ^ (ROTL32(GET_U32<bswap>(p, 16), 5) ^ GET_U32<bswap>(p, 20))) * PRIME;
     }
     if (p != (const uint8_t *)in) {
         hash32A = (hash32A ^ ROTL32(hash32C, 5)) * PRIME;
     }
-    //Cases 0. .31
+    // Cases 0. .31
     if (len & (4 * sizeof(uint32_t))) {
-        hash32A = (hash32A ^ (ROTL32(GET_U32<bswap>(p,  0), 5) ^ GET_U32<bswap>(p,  4))) * PRIME;
-        hash32B = (hash32B ^ (ROTL32(GET_U32<bswap>(p,  8), 5) ^ GET_U32<bswap>(p, 12))) * PRIME;
-        p += 8 * sizeof(uint16_t);
+        hash32A = (hash32A ^ (ROTL32(GET_U32<bswap>(p, 0), 5) ^ GET_U32<bswap>(p,  4))) * PRIME;
+        hash32B = (hash32B ^ (ROTL32(GET_U32<bswap>(p, 8), 5) ^ GET_U32<bswap>(p, 12))) * PRIME;
+        p      += 8 * sizeof(uint16_t);
     }
-    //Cases 0. .15
+    // Cases 0. .15
     if (len & (2 * sizeof(uint32_t))) {
         hash32A = (hash32A ^ GET_U32<bswap>(p, 0)) * PRIME;
         hash32B = (hash32B ^ GET_U32<bswap>(p, 4)) * PRIME;
-        p += 4 * sizeof(uint16_t);
+        p      += 4 * sizeof(uint16_t);
     }
-    //Cases:0. .7
+    // Cases:0. .7
     if (len & sizeof(uint32_t)) {
         hash32A = (hash32A ^ GET_U16<bswap>(p, 0)) * PRIME;
         hash32B = (hash32B ^ GET_U16<bswap>(p, 2)) * PRIME;
-        p += 2 * sizeof(uint16_t);
+        p      += 2 * sizeof(uint16_t);
     }
-    //Cases:0. .3
+    // Cases:0. .3
     if (len & sizeof(uint16_t)) {
         hash32A = (hash32A ^ GET_U16<bswap>(p, 0)) * PRIME;
-        p += sizeof(uint16_t);
+        p      += sizeof(uint16_t);
     }
     if (len & 1) {
         hash32A = (hash32A ^ *p) * PRIME;
@@ -157,28 +157,28 @@ static void FNV_YoshimitsuTRIAD(const void * in, const size_t olen, const seed_t
     memcpy(out, &hash32A, 4);
 }
 
-template < bool keeplsb >
-static FORCE_INLINE uint64_t _PADr_KAZE(uint64_t x, int n) {
-    if (n >= 64) return 0;
+template <bool keeplsb>
+static FORCE_INLINE uint64_t _PADr_KAZE( uint64_t x, int n ) {
+    if (n >= 64) { return 0; }
     if (keeplsb) {
         return (x << n) >> n;
     } else {
-        return (x >> n);
+        return x >> n;
     }
 }
 
-template < bool bswap >
-static void FNV_Totenschiff(const void * in, const size_t olen, const seed_t seed, void * out) {
-    const uint8_t * p = (uint8_t *)in;
-    const uint32_t PRIME = 591798841;
-    uint32_t hash32;
-    uint64_t hash64 = (uint64_t)seed ^ UINT64_C(14695981039346656037);
-    uint64_t PADDEDby8;
-    size_t len = olen;
+template <bool bswap>
+static void FNV_Totenschiff( const void * in, const size_t olen, const seed_t seed, void * out ) {
+    const uint8_t * p      = (uint8_t *)in;
+    const uint32_t  PRIME  = 591798841;
+    uint32_t        hash32;
+    uint64_t        hash64 = (uint64_t)seed ^ UINT64_C(14695981039346656037);
+    uint64_t        PADDEDby8;
+    size_t          len    = olen;
 
     for (; len > 8; len -= 8, p += 8) {
         PADDEDby8 = GET_U64<bswap>(p, 0);
-        hash64 = (hash64 ^ PADDEDby8) * PRIME;
+        hash64    = (hash64 ^ PADDEDby8) * PRIME;
     }
 
     // Here len is 1..8. when (8-8) the QWORD remains intact
@@ -205,29 +205,29 @@ static void FNV_Totenschiff(const void * in, const size_t olen, const seed_t see
 //
 // Many thanks go to Yurii 'Hordi' Hordiienko, he lessened with 3
 // instructions the original 'Pippip', thus:
-template < bool bswap >
-static void FNV_Pippip_Yurii(const void * in, const size_t len, const seed_t seed, void * out) {
-    const uint8_t * str = (uint8_t *)in;
-    const uint32_t PRIME = 591798841;
-    uint32_t hash32;
-    uint64_t hash64 = (uint64_t)seed ^ UINT64_C(14695981039346656037);
-    size_t Cycles, NDhead;
+template <bool bswap>
+static void FNV_Pippip_Yurii( const void * in, const size_t len, const seed_t seed, void * out ) {
+    const uint8_t * str    = (uint8_t *)in;
+    const uint32_t  PRIME  = 591798841;
+    uint32_t        hash32;
+    uint64_t        hash64 = (uint64_t)seed ^ UINT64_C(14695981039346656037);
+    size_t          Cycles, NDhead;
 
     if (len > 8) {
         Cycles = ((len - 1) >> 4) + 1;
         NDhead = len - (Cycles << 3);
 #pragma nounroll
         for (; Cycles--; str += 8) {
-            hash64 = (hash64 ^ (GET_U64<bswap>(str,      0))) * PRIME;
+            hash64 = (hash64 ^ (GET_U64<bswap>(str, 0)     )) * PRIME;
             hash64 = (hash64 ^ (GET_U64<bswap>(str, NDhead))) * PRIME;
         }
     } else {
         if (isLE() ^ bswap) {
-            hash64 = (hash64 ^ _PADr_KAZE<true>(GET_U64<bswap>(str, 0), (8 - len) << 3)) *
-                PRIME;
+            hash64 = (hash64 ^ _PADr_KAZE<true >(GET_U64<bswap>(str, 0), (8 - len) << 3)) *
+                    PRIME;
         } else {
             hash64 = (hash64 ^ _PADr_KAZE<false>(GET_U64<bswap>(str, 0), (8 - len) << 3)) *
-                PRIME;
+                    PRIME;
         }
     }
     hash32 = (uint32_t)(hash64 ^ (hash64 >> 32));
@@ -239,153 +239,153 @@ static void FNV_Pippip_Yurii(const void * in, const size_t len, const seed_t see
 
 // Also https://www.codeproject.com/articles/716530/fastest-hash-function-for-table-lookups-in-c
 REGISTER_FAMILY(fnv,
-  $.src_url = "http://www.sanmayce.com/Fastest_Hash/index.html",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "http://www.sanmayce.com/Fastest_Hash/index.html",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(fibonacci_32,
-  $.desc = "32-bit wordwise Fibonacci hash (Knuth)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x09952480,
-  $.verification_BE = 0x006F7705,
-  $.hashfn_native = fibonacci<uint32_t, false>,
-  $.hashfn_bswap = fibonacci<uint32_t, true>,
-  $.badseeds = {0, UINT64_C(0xffffffff00000000)} /* !! all keys ending with 0x0000_0000 */
-);
+   $.desc       = "32-bit wordwise Fibonacci hash (Knuth)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x09952480,
+   $.verification_BE = 0x006F7705,
+   $.hashfn_native   = fibonacci<uint32_t, false>,
+   $.hashfn_bswap    = fibonacci<uint32_t, true>,
+   $.badseeds        = { 0, UINT64_C (0xffffffff00000000) } /* !! all keys ending with 0x0000_0000 */
+ );
 
 REGISTER_HASH(fibonacci_64,
-  $.desc = "64-bit wordwise Fibonacci hash (Knuth)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS   |
-        FLAG_IMPL_MULTIPLY_64_64 |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xFE3BD380,
-  $.verification_BE = 0x3E67D58C,
-  $.hashfn_native = fibonacci<uint64_t, false>,
-  $.hashfn_bswap = fibonacci<uint64_t, true>,
-  $.badseeds = {0, UINT64_C(0xffffffff00000000)} /* !! all keys ending with 0x0000_0000 */
-);
+   $.desc       = "64-bit wordwise Fibonacci hash (Knuth)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS   |
+         FLAG_IMPL_MULTIPLY_64_64 |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xFE3BD380,
+   $.verification_BE = 0x3E67D58C,
+   $.hashfn_native   = fibonacci<uint64_t, false>,
+   $.hashfn_bswap    = fibonacci<uint64_t, true>,
+   $.badseeds        = { 0, UINT64_C (0xffffffff00000000) } /* !! all keys ending with 0x0000_0000 */
+ );
 
 REGISTER_HASH(FNV_1a_32,
-  $.desc = "32-bit bytewise FNV-1a (Fowler-Noll-Vo)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED      |
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xE3CBBE91,
-  $.verification_BE = 0x656F95A0,
-  $.hashfn_native = FNV1a<uint32_t, false>,
-  $.hashfn_bswap = FNV1a<uint32_t, true>,
-  $.badseeds = {0x811c9dc5}
-);
+   $.desc       = "32-bit bytewise FNV-1a (Fowler-Noll-Vo)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED      |
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xE3CBBE91,
+   $.verification_BE = 0x656F95A0,
+   $.hashfn_native   = FNV1a<uint32_t, false>,
+   $.hashfn_bswap    = FNV1a<uint32_t, true>,
+   $.badseeds        = { 0x811c9dc5 }
+ );
 
 REGISTER_HASH(FNV_1a_64,
-  $.desc = "64-bit bytewise FNV-1a (Fowler-Noll-Vo)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64 |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x103455FC,
-  $.verification_BE = 0x4B032B63,
-  $.hashfn_native = FNV1a<uint64_t, false>,
-  $.hashfn_bswap = FNV1a<uint64_t, true>,
-  $.badseeds = {0x811c9dc5, 0xcbf29ce4, 0x84222325, UINT64_C(0xcbf29ce484222325)}
-);
+   $.desc       = "64-bit bytewise FNV-1a (Fowler-Noll-Vo)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64 |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x103455FC,
+   $.verification_BE = 0x4B032B63,
+   $.hashfn_native   = FNV1a<uint64_t, false>,
+   $.hashfn_bswap    = FNV1a<uint64_t, true>,
+   $.badseeds        = { 0x811c9dc5, 0xcbf29ce4, 0x84222325, UINT64_C (0xcbf29ce484222325) }
+ );
 
 REGISTER_HASH(FNV_1a_32__wordwise,
-  $.desc = "32-bit wordwise hash based on FNV-1a",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED      |
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x739801C5,
-  $.verification_BE = 0xC5999647,
-  $.hashfn_native = FNV2<uint32_t, false>,
-  $.hashfn_bswap = FNV2<uint32_t, true>
-);
+   $.desc       = "32-bit wordwise hash based on FNV-1a",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED      |
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x739801C5,
+   $.verification_BE = 0xC5999647,
+   $.hashfn_native   = FNV2<uint32_t, false>,
+   $.hashfn_bswap    = FNV2<uint32_t, true>
+ );
 
 REGISTER_HASH(FNV_1a_64__wordwise,
-  $.desc = "64-bit wordwise hash based on FNV1-a",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS    |
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x1967C625,
-  $.verification_BE = 0x06F5053E,
-  $.hashfn_native = FNV2<uint64_t, false>,
-  $.hashfn_bswap = FNV2<uint64_t, true>
-);
+   $.desc       = "64-bit wordwise hash based on FNV1-a",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS    |
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x1967C625,
+   $.verification_BE = 0x06F5053E,
+   $.hashfn_native   = FNV2<uint64_t, false>,
+   $.hashfn_bswap    = FNV2<uint64_t, true>
+ );
 
 REGISTER_HASH(FNV_YoshimitsuTRIAD,
-  $.desc = "FNV-YoshimitsuTRIAD 32-bit (sanmayce)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED      |
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xD8AFFD71,
-  $.verification_BE = 0x85C2EC2F,
-  $.hashfn_native = FNV_YoshimitsuTRIAD<false>,
-  $.hashfn_bswap = FNV_YoshimitsuTRIAD<true>,
-  $.badseeds = {0x811c9dc5, 0x23d4a49d}
-);
+   $.desc       = "FNV-YoshimitsuTRIAD 32-bit (sanmayce)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED      |
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xD8AFFD71,
+   $.verification_BE = 0x85C2EC2F,
+   $.hashfn_native   = FNV_YoshimitsuTRIAD<false>,
+   $.hashfn_bswap    = FNV_YoshimitsuTRIAD<true>,
+   $.badseeds        = { 0x811c9dc5, 0x23d4a49d }
+ );
 
 REGISTER_HASH(FNV_Totenschiff,
-  $.desc = "FNV-Totenschiff 32-bit (sanmayce)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED      |
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_READ_PAST_EOB|
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x95D95ACF,
-  $.verification_BE = 0xC16E2C8F,
-  $.hashfn_native = FNV_Totenschiff<false>,
-  $.hashfn_bswap = FNV_Totenschiff<true>,
-  $.badseeds = {0x811c9dc5}
-);
+   $.desc       = "FNV-Totenschiff 32-bit (sanmayce)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED      |
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_READ_PAST_EOB |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x95D95ACF,
+   $.verification_BE = 0xC16E2C8F,
+   $.hashfn_native   = FNV_Totenschiff<false>,
+   $.hashfn_bswap    = FNV_Totenschiff<true>,
+   $.badseeds        = { 0x811c9dc5 }
+ );
 
 REGISTER_HASH(FNV_PippipYurii,
-  $.desc = "FNV-Pippip-Yurii 32-bit (sanmayce)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED      |
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_READ_PAST_EOB|
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xE79AE3E4,
-  $.verification_BE = 0x90C8C706,
-  $.hashfn_native = FNV_Pippip_Yurii<false>,
-  $.hashfn_bswap = FNV_Pippip_Yurii<true>,
-  $.badseeds = {0x811c9dc5}
-);
+   $.desc       = "FNV-Pippip-Yurii 32-bit (sanmayce)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED      |
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_READ_PAST_EOB |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xE79AE3E4,
+   $.verification_BE = 0x90C8C706,
+   $.hashfn_native   = FNV_Pippip_Yurii<false>,
+   $.hashfn_bswap    = FNV_Pippip_Yurii<true>,
+   $.badseeds        = { 0x811c9dc5 }
+ );
diff --git a/hashes/halftimehash.cpp b/hashes/halftimehash.cpp
index 6d5aa658..08534b8a 100644
--- a/hashes/halftimehash.cpp
+++ b/hashes/halftimehash.cpp
@@ -37,701 +37,762 @@
 
 //------------------------------------------------------------
 namespace halftime_hash {
+    namespace advanced {
+        namespace {
+//------------------------------------------------------------
+            inline uint64_t Xor( uint64_t a, uint64_t b ) { return a ^ b; }
 
-namespace advanced {
+            inline uint64_t Plus( uint64_t a, uint64_t b ) { return a + b; }
 
-namespace {
+            inline uint64_t Minus( uint64_t a, uint64_t b ) { return a - b; }
 
-//------------------------------------------------------------
-    inline uint64_t Xor(uint64_t a, uint64_t b) { return a ^ b; }
-    inline uint64_t Plus(uint64_t a, uint64_t b) { return a + b; }
-    inline uint64_t Minus(uint64_t a, uint64_t b) { return a - b; }
-    inline uint64_t LeftShift(uint64_t a, int s) { return a << s; }
-    inline uint64_t RightShift32(uint64_t a) { return a >> 32; }
-    inline uint64_t Sum(uint64_t a) { return a; }
-    inline uint64_t Negate(uint64_t a) { return -a; }
-
-    inline uint64_t Plus32(uint64_t a, uint64_t b) {
-        uint64_t result;
-        uint32_t temp[2] = {(uint32_t)a + (uint32_t)b,
-                            (uint32_t)(a >> 32) + (uint32_t)(b >> 32)};
-        result = temp[0] + (((uint64_t)temp[1]) << 32);
-        return result;
-    }
+            inline uint64_t LeftShift( uint64_t a, int s ) { return a << s; }
 
-    inline uint64_t Times(uint64_t a, uint64_t b) {
-        constexpr uint64_t mask = (((uint64_t)1) << 32) - 1;
-        return (a & mask) * (b & mask);
-    }
+            inline uint64_t RightShift32( uint64_t a ) { return a >> 32; }
 
-    template < bool bswap >
-    struct BlockWrapperScalar {
-        using Block = uint64_t;
+            inline uint64_t Sum( uint64_t a ) { return a; }
 
-        static uint64_t LoadBlock(const void* x) {
-            auto y = reinterpret_cast<const uint8_t*>(x);
-            return GET_U64<bswap>(y, 0);
-        }
+            inline uint64_t Negate( uint64_t a ) { return -a; }
 
-        static uint64_t LoadBlockNative(const void* x) {
-            auto y = reinterpret_cast<const uint8_t*>(x);
-            return GET_U64<false>(y, 0);
-        }
+            inline uint64_t Plus32( uint64_t a, uint64_t b ) {
+                uint64_t result;
+                uint32_t temp[2] = {
+                    (uint32_t)a + (uint32_t)b,
+                    (uint32_t)(a >> 32) + (uint32_t)(b >> 32)
+                };
+
+                result = temp[0] + (((uint64_t)temp[1]) << 32);
+                return result;
+            }
+
+            inline uint64_t Times( uint64_t a, uint64_t b ) {
+                constexpr uint64_t mask = (((uint64_t)1) << 32) - 1;
+
+                return (a & mask) * (b & mask);
+            }
+
+            template <bool bswap>
+            struct BlockWrapperScalar {
+                using Block = uint64_t;
+
+                static uint64_t LoadBlock( const void * x ) {
+                    auto y = reinterpret_cast<const uint8_t *>(x);
+
+                    return GET_U64<bswap>(y, 0);
+                }
 
-        static uint64_t LoadOne(uint64_t entropy) { return entropy; }
-    };
+                static uint64_t LoadBlockNative( const void * x ) {
+                    auto y = reinterpret_cast<const uint8_t *>(x);
+
+                    return GET_U64<false>(y, 0);
+                }
+
+                static uint64_t LoadOne( uint64_t entropy ) { return entropy; }
+            };
 
 #if defined(HAVE_ARM_NEON)
-    using u128 = uint64x2_t;
-
-    inline u128 LeftShift(u128 a, int i) { return vshlq_s64(a, vdupq_n_s64(i)); }
-    inline u128 Plus(u128 a, u128 b) { return vaddq_s64(a, b); }
-    inline u128 Minus(u128 a, u128 b) { return vsubq_s64(a, b); }
-    inline u128 Plus32(u128 a, u128 b) { return vaddq_s32(a, b); }
-    inline u128 RightShift32(u128 a) { return vshrq_n_u64(a, 32); }
-
-    inline u128 Times(u128 a, u128 b) {
-        uint32x2_t a_lo = vmovn_u64(a);
-        uint32x2_t b_lo = vmovn_u64(b);
-        return vmull_u32(a_lo, b_lo);
-    }
+            using u128 = uint64x2_t;
 
-    inline u128 Xor(u128 a, u128 b) { return veorq_s32(a, b); }
+            inline u128 LeftShift( u128 a, int i ) { return vshlq_s64(a, vdupq_n_s64(i)); }
 
-    static inline u128 Negate(u128 a) {
-        const auto zero = vdupq_n_s64(0);
-        return Minus(zero, a);
-    }
+            inline u128 Plus( u128 a, u128 b ) { return vaddq_s64(a, b); }
 
-    inline uint64_t Sum(u128 a) { return vgetq_lane_s64(a, 0) + vgetq_lane_s64(a, 1); }
+            inline u128 Minus( u128 a, u128 b ) { return vsubq_s64(a, b); }
 
-    template < bool bswap >
-    struct BlockWrapper128 {
-        using Block = u128;
+            inline u128 Plus32( u128 a, u128 b ) { return vaddq_s32(a, b); }
 
-        static u128 LoadBlock(const void* x) {
-            auto y = reinterpret_cast<const int32_t*>(x);
-            if (bswap) {
-                return vrev64q_u8(vld1q_s32(y));
+            inline u128 RightShift32( u128 a ) { return vshrq_n_u64(a, 32); }
+
+            inline u128 Times( u128 a, u128 b ) {
+                uint32x2_t a_lo = vmovn_u64(a);
+                uint32x2_t b_lo = vmovn_u64(b);
+
+                return vmull_u32(a_lo, b_lo);
             }
-            return vld1q_s32(y);
-        }
 
-        static u128 LoadBlockNative(const void* x) {
-            auto y = reinterpret_cast<const int32_t*>(x);
-            return vld1q_s32(y);
-        }
+            inline u128 Xor( u128 a, u128 b ) { return veorq_s32(a, b); }
+
+            static inline u128 Negate( u128 a ) {
+                const auto zero = vdupq_n_s64(0);
+
+                return Minus(zero, a);
+            }
+
+            inline uint64_t Sum( u128 a ) { return vgetq_lane_s64(a, 0) + vgetq_lane_s64(a, 1); }
+
+            template <bool bswap>
+            struct BlockWrapper128 {
+                using Block = u128;
+
+                static u128 LoadBlock( const void * x ) {
+                    auto y = reinterpret_cast<const int32_t *>(x);
 
-        static u128 LoadOne(uint64_t entropy) { return vdupq_n_s64(entropy); }
-    };
+                    if (bswap) {
+                        return vrev64q_u8(vld1q_s32(y));
+                    }
+                    return vld1q_s32(y);
+                }
+
+                static u128 LoadBlockNative( const void * x ) {
+                    auto y = reinterpret_cast<const int32_t *>(x);
+
+                    return vld1q_s32(y);
+                }
+
+                static u128 LoadOne( uint64_t entropy ) { return vdupq_n_s64(entropy); }
+            };
 
 #elif defined(HAVE_SSE_2)
-    using u128 = __m128i;
-
-    inline u128 LeftShift(u128 a, int i) { return _mm_slli_epi64(a, i); }
-    inline u128 Plus(u128 a, u128 b) { return _mm_add_epi64(a, b); }
-    inline u128 Minus(u128 a, u128 b) { return _mm_sub_epi64(a, b); }
-    inline u128 Plus32(u128 a, u128 b) { return _mm_add_epi32(a, b); }
-    inline u128 RightShift32(u128 a) { return _mm_srli_epi64(a, 32); }
-    inline u128 Times(u128 a, u128 b) { return _mm_mul_epu32(a, b); }
-    inline u128 Xor(u128 a, u128 b) { return _mm_xor_si128(a, b); }
-
-    static inline u128 Negate(u128 a) {
-        const auto zero = _mm_set1_epi64x(0);
-        return Minus(zero, a);
-    }
+            using u128 = __m128i;
+
+            inline u128 LeftShift( u128 a, int i ) { return _mm_slli_epi64(a, i); }
+
+            inline u128 Plus( u128 a, u128 b ) { return _mm_add_epi64(a, b); }
+
+            inline u128 Minus( u128 a, u128 b ) { return _mm_sub_epi64(a, b); }
 
-    inline uint64_t Sum(u128 a) { return (uint64_t)a[0] + (uint64_t)a[1]; }
+            inline u128 Plus32( u128 a, u128 b ) { return _mm_add_epi32(a, b); }
 
-    template < bool bswap >
-    struct BlockWrapper128 {
-        using Block = u128;
+            inline u128 RightShift32( u128 a ) { return _mm_srli_epi64(a, 32); }
 
-        static u128 LoadBlock(const void* x) {
-            auto y = reinterpret_cast<const u128*>(x);
-            if (bswap) {
-                return mm_bswap64(_mm_loadu_si128(y));
+            inline u128 Times( u128 a, u128 b ) { return _mm_mul_epu32(a, b); }
+
+            inline u128 Xor( u128 a, u128 b ) { return _mm_xor_si128(a, b); }
+
+            static inline u128 Negate( u128 a ) {
+                const auto zero = _mm_set1_epi64x(0);
+
+                return Minus(zero, a);
             }
-            return _mm_loadu_si128(y);
-        }
 
-        static u128 LoadBlockNative(const void* x) {
-            auto y = reinterpret_cast<const u128*>(x);
-            return _mm_loadu_si128(y);
-        }
+            inline uint64_t Sum( u128 a ) { return (uint64_t)a[0] + (uint64_t)a[1]; }
+
+            template <bool bswap>
+            struct BlockWrapper128 {
+                using Block = u128;
+
+                static u128 LoadBlock( const void * x ) {
+                    auto y = reinterpret_cast<const u128 *>(x);
 
-        static u128 LoadOne(uint64_t entropy) { return _mm_set1_epi64x(entropy); }
-    };
+                    if (bswap) {
+                        return mm_bswap64(_mm_loadu_si128(y));
+                    }
+                    return _mm_loadu_si128(y);
+                }
+
+                static u128 LoadBlockNative( const void * x ) {
+                    auto y = reinterpret_cast<const u128 *>(x);
+
+                    return _mm_loadu_si128(y);
+                }
+
+                static u128 LoadOne( uint64_t entropy ) { return _mm_set1_epi64x(entropy); }
+            };
 #endif
 
 #if defined(HAVE_AVX2)
-    using u256 = __m256i;
-
-    inline u256 Plus(u256 a, u256 b) { return _mm256_add_epi64(a, b); }
-    inline u256 Plus32(u256 a, u256 b) { return _mm256_add_epi32(a, b); }
-    inline u256 Times(u256 a, u256 b) { return _mm256_mul_epu32(a, b); }
-    inline u256 Xor(u256 a, u256 b) { return _mm256_xor_si256(a, b); }
-    inline u256 LeftShift(u256 a, int i) { return _mm256_slli_epi64(a, i); }
-    inline u256 RightShift32(u256 a) { return _mm256_srli_epi64(a, 32); }
-    inline u256 Minus(u256 a, u256 b) { return _mm256_sub_epi64(a, b); }
-
-    static inline u256 Negate(u256 a) {
-        const auto zero = _mm256_set1_epi64x(0);
-        return Minus(zero, a);
-    }
+            using u256 = __m256i;
 
-    inline uint64_t Sum(u256 a) {
-        auto c = _mm256_extracti128_si256(a, 0);
-        auto d = _mm256_extracti128_si256(a, 1);
-        c = _mm_add_epi64(c, d);
-        static_assert(sizeof(c[0]) == sizeof(uint64_t), "u256 too granular");
-        static_assert(sizeof(c) == 2 * sizeof(uint64_t), "u256 too granular");
-        return (uint64_t)c[0] + (uint64_t)c[1];
-    }
+            inline u256 Plus( u256 a, u256 b ) { return _mm256_add_epi64(a, b); }
+
+            inline u256 Plus32( u256 a, u256 b ) { return _mm256_add_epi32(a, b); }
+
+            inline u256 Times( u256 a, u256 b ) { return _mm256_mul_epu32(a, b); }
 
-    template < bool bswap >
-    struct BlockWrapper256 {
-        using Block = u256;
+            inline u256 Xor( u256 a, u256 b ) { return _mm256_xor_si256(a, b); }
 
-        static u256 LoadBlock(const void* x) {
-            auto y = reinterpret_cast<const u256*>(x);
-            if (bswap) {
-                return mm256_bswap64(_mm256_loadu_si256(y));
+            inline u256 LeftShift( u256 a, int i ) { return _mm256_slli_epi64(a, i); }
+
+            inline u256 RightShift32( u256 a ) { return _mm256_srli_epi64(a, 32); }
+
+            inline u256 Minus( u256 a, u256 b ) { return _mm256_sub_epi64(a, b); }
+
+            static inline u256 Negate( u256 a ) {
+                const auto zero = _mm256_set1_epi64x(0);
+
+                return Minus(zero, a);
             }
-            return _mm256_loadu_si256(y);
-        }
 
-        static u256 LoadBlockNative(const void* x) {
-            auto y = reinterpret_cast<const u256*>(x);
-            return _mm256_loadu_si256(y);
-        }
+            inline uint64_t Sum( u256 a ) {
+                auto c = _mm256_extracti128_si256(a, 0);
+                auto d = _mm256_extracti128_si256(a, 1);
 
-        static u256 LoadOne(uint64_t entropy) { return _mm256_set1_epi64x(entropy); }
-    };
+                c = _mm_add_epi64(c, d);
+                static_assert(sizeof(c[0]) == sizeof(uint64_t) , "u256 too granular");
+                static_assert(sizeof(c) == 2 * sizeof(uint64_t), "u256 too granular");
+                return (uint64_t)c[0] + (uint64_t)c[1];
+            }
+
+            template <bool bswap>
+            struct BlockWrapper256 {
+                using Block = u256;
+
+                static u256 LoadBlock( const void * x ) {
+                    auto y = reinterpret_cast<const u256 *>(x);
+
+                    if (bswap) {
+                        return mm256_bswap64(_mm256_loadu_si256(y));
+                    }
+                    return _mm256_loadu_si256(y);
+                }
+
+                static u256 LoadBlockNative( const void * x ) {
+                    auto y = reinterpret_cast<const u256 *>(x);
+
+                    return _mm256_loadu_si256(y);
+                }
+
+                static u256 LoadOne( uint64_t entropy ) { return _mm256_set1_epi64x(entropy); }
+            };
 #endif
 
 #if defined(HAVE_AVX512_F)
-    using u512 = __m512i;
-
-    inline u512 Plus(u512 a, u512 b) { return _mm512_add_epi64(a, b); }
-    inline u512 Plus32(u512 a, u512 b) { return _mm512_add_epi32(a, b); }
-    inline u512 Times(u512 a, u512 b) { return _mm512_mul_epu32(a, b); }
-    inline u512 Xor(u512 a, u512 b) { return _mm512_xor_epi32(a, b); }
-    inline uint64_t Sum(u512 a) { return _mm512_reduce_add_epi64(a); }
-    inline u512 RightShift32(u512 a) { return _mm512_srli_epi64(a, 32); }
-    //  inline u512 RightShift32(u512 a, int i) { return _mm512_shuffle_epi32(a,
-    //  _MM_PERM_ACAC); }
-    inline u512 LeftShift(u512 a, int i) { return _mm512_slli_epi64(a, i); }
-    inline u512 Minus(u512 a, u512 b) { return _mm512_sub_epi64(a, b); }
-    inline u512 Negate(u512 a) { return Minus(_mm512_set1_epi64(0), a); }
-
-    template < bool bswap >
-    struct BlockWrapper512 {
-        using Block = u512;
-
-        static Block LoadBlock(const void* x) {
-            if (bswap) {
-                return mm512_bswap64(_mm512_loadu_si512(x));
-            }
-            return _mm512_loadu_si512(x);
-        }
+            using u512 = __m512i;
 
-        static Block LoadBlockNative(const void* x) {
-            return _mm512_loadu_si512(x);
-        }
+            inline u512 Plus( u512 a, u512 b ) { return _mm512_add_epi64(a, b); }
 
-        static Block LoadOne(uint64_t entropy) {
-            return _mm512_set1_epi64(entropy);
-        }
-    };
+            inline u512 Plus32( u512 a, u512 b ) { return _mm512_add_epi32(a, b); }
+
+            inline u512 Times( u512 a, u512 b ) { return _mm512_mul_epu32(a, b); }
+
+            inline u512 Xor( u512 a, u512 b ) { return _mm512_xor_epi32(a, b); }
+
+            inline uint64_t Sum( u512 a ) { return _mm512_reduce_add_epi64(a); }
+
+            inline u512 RightShift32( u512 a ) { return _mm512_srli_epi64(a, 32); }
+
+            //  inline u512 RightShift32(u512 a, int i) { return _mm512_shuffle_epi32(a,
+            //  _MM_PERM_ACAC); }
+            inline u512 LeftShift( u512 a, int i ) { return _mm512_slli_epi64(a, i); }
+
+            inline u512 Minus( u512 a, u512 b ) { return _mm512_sub_epi64(a, b); }
+
+            inline u512 Negate( u512 a ) { return Minus(_mm512_set1_epi64(0), a); }
+
+            template <bool bswap>
+            struct BlockWrapper512 {
+                using Block = u512;
+
+                static Block LoadBlock( const void * x ) {
+                    if (bswap) {
+                        return mm512_bswap64(_mm512_loadu_si512(x));
+                    }
+                    return _mm512_loadu_si512(x);
+                }
+
+                static Block LoadBlockNative( const void * x ) {
+                    return _mm512_loadu_si512(x);
+                }
+
+                static Block LoadOne( uint64_t entropy ) {
+                    return _mm512_set1_epi64(entropy);
+                }
+            };
 #endif
 
-    template <typename T>
-    T MultiplyAdd(const T & summand, const T & factor1, const T & factor2) {
-        return Plus(summand, Times(factor1, factor2));
-    }
+            template <typename T>
+            T MultiplyAdd( const T & summand, const T & factor1, const T & factor2 ) {
+                return Plus(summand, Times(factor1, factor2));
+            }
 
 #if defined(HAVE_ARM_NEON)
-    template <>
-    u128 MultiplyAdd(const u128 & summand, const u128 & factor1, const u128 & factor2) {
-        return vmlal_u32(summand, vmovn_u64(factor1), vmovn_u64(factor2));
-    }
-#endif
 
+            template <>
+            u128 MultiplyAdd( const u128 & summand, const u128 & factor1, const u128 & factor2 ) {
+                return vmlal_u32(summand, vmovn_u64(factor1), vmovn_u64(factor2));
+            }
+
+#endif
 
 //------------------------------------------------------------
-template <typename Block>
-inline void Encode3(Block raw_io[9 * 3]) {
-  auto io = reinterpret_cast<Block(*)[3]>(raw_io);
-  constexpr unsigned x = 0, y = 1, z = 2;
-
-  const Block* iter = io[0];
-  io[7][x] = io[8][x] = iter[x];
-  io[7][y] = io[8][y] = iter[y];
-  io[7][z] = io[8][z] = iter[z];
-  iter += 1;
-
-  auto DistributeRaw = [io, iter](unsigned slot, unsigned label,
-                                  std::initializer_list<unsigned> rest) {
-    for (unsigned i : rest) {
-      io[slot][i] = Xor(io[slot][i], iter[label]);
-    }
-  };
-
-  auto Distribute3 = [&iter, DistributeRaw, x, y, z](unsigned idx,
-                                                     std::initializer_list<unsigned> a,
-                                                     std::initializer_list<unsigned> b,
-                                                     std::initializer_list<unsigned> c) {
-    DistributeRaw(idx, x, a);
-    DistributeRaw(idx, y, b);
-    DistributeRaw(idx, z, c);
-    iter += 1;
-  };
-
-  while (iter != io[9]) {
-    Distribute3(7, {x}, {y}, {z});
-  }
-
-  iter = io[1];
-  Distribute3(8, {z}, {x, z}, {y});
-  Distribute3(8, {x, z}, {x, y, z}, {y, z});
-  Distribute3(8, {y}, {y, z}, {x, z});
-  Distribute3(8, {x, y}, {z}, {x});
-  Distribute3(8, {y, z}, {x, y}, {x, y, z});
-  Distribute3(8, {x, y, z}, {x}, {x, y});
-}
+            template <typename Block>
+            inline void Encode3( Block raw_io[9 * 3] ) {
+                auto io = reinterpret_cast<Block(*)[3]>(raw_io);
+                constexpr unsigned x = 0, y = 1, z = 2;
+
+                const Block * iter = io[0];
+
+                io[7][x] = io[8][x] = iter[x];
+                io[7][y] = io[8][y] = iter[y];
+                io[7][z] = io[8][z] = iter[z];
+                iter    += 1;
+
+                auto DistributeRaw = [io, iter]( unsigned slot, unsigned label,
+                        std::initializer_list<unsigned> rest ) {
+                        for (unsigned i: rest) {
+                            io[slot][i] = Xor(io[slot][i], iter[label]);
+                        }
+                    };
+
+                auto Distribute3 = [&iter, DistributeRaw, x, y, z]( unsigned idx,
+                        std::initializer_list<unsigned> a ,
+                        std::initializer_list<unsigned> b ,
+                        std::initializer_list<unsigned> c ) {
+                        DistributeRaw(idx, x, a);
+                        DistributeRaw(idx, y, b);
+                        DistributeRaw(idx, z, c);
+                        iter += 1;
+                    };
+
+                while (iter != io[9]) {
+                    Distribute3(7, { x }, { y }, { z });
+                }
+
+                iter = io[1];
+                Distribute3(8, { z }      , { x, z }   , { y }   );
+                Distribute3(8, { x, z }   , { x, y, z }, { y, z });
+                Distribute3(8, { y }      , { y, z }   , { x, z });
+                Distribute3(8, { x, y }   , { z }      , { x }   );
+                Distribute3(8, { y, z }   , { x, y }   , { x, y, z });
+                Distribute3(8, { x, y, z }, { x }      , { x, y });
+            }
 
-template <typename Block>
-inline void Encode2(Block raw_io[7 * 3]) {
-  auto io = reinterpret_cast<Block(*)[3]>(raw_io);
-  for (int i = 0; i < 3; ++i) {
-    io[6][i] = io[0][i];
-    for (int j = 1; j < 6; ++j) {
-      io[6][i] = Xor(io[6][i], io[j][i]);
-    }
-  }
-}
+            template <typename Block>
+            inline void Encode2( Block raw_io[7 * 3] ) {
+                auto io = reinterpret_cast<Block(*)[3]>(raw_io);
+
+                for (int i = 0; i < 3; ++i) {
+                    io[6][i] = io[0][i];
+                    for (int j = 1; j < 6; ++j) {
+                        io[6][i] = Xor(io[6][i], io[j][i]);
+                    }
+                }
+            }
 
 // https://docs.switzernet.com/people/emin-gabrielyan/051102-erasure-10-7-resilient/
-template <typename Block>
-inline void Encode4(Block raw_io[10 * 3]) {
-  auto io = reinterpret_cast<Block(*)[3]>(raw_io);
-
-  constexpr unsigned x = 0, y = 1, z = 2;
-
-  const Block* iter = io[0];
-  io[7][x] = io[8][x] = io[9][x] = iter[x];
-  io[7][y] = io[8][y] = io[9][y] = iter[y];
-  io[7][z] = io[8][z] = io[9][z] = iter[z];
-  iter += 1;
-
-  auto DistributeRaw = [io, iter](unsigned slot, unsigned label,
-                                  std::initializer_list<unsigned> rest) {
-    for (unsigned i : rest) {
-      io[slot][i] = Xor(io[slot][i], iter[label]);
-    }
-  };
-
-  auto Distribute3 = [&iter, DistributeRaw, x, y, z](unsigned idx,
-                                                     std::initializer_list<unsigned> a,
-                                                     std::initializer_list<unsigned> b,
-                                                     std::initializer_list<unsigned> c) {
-    DistributeRaw(idx, x, a);
-    DistributeRaw(idx, y, b);
-    DistributeRaw(idx, z, c);
-    iter += 1;
-  };
-
-  while (iter != io[10]) {
-    Distribute3(7, {x}, {y}, {z});
-  }
-
-  iter = io[1];
-  Distribute3(8, {z}, {x, z}, {y});           // 73
-  Distribute3(8, {x, z}, {x, y, z}, {y, z});  // 140
-  Distribute3(8, {y}, {y, z}, {x, z});        // 167
-  Distribute3(8, {x, y}, {z}, {x});           // 198
-  Distribute3(8, {y, z}, {x, y}, {x, y, z});  // 292
-  Distribute3(8, {x, y, z}, {x}, {x, y});     // 323
-
-  iter = io[1];
-  Distribute3(9, {x, z}, {x, y, z}, {y, z});  // 140
-  Distribute3(9, {x, y}, {z}, {x});           // 198
-  Distribute3(9, {z}, {x, z}, {y});           // 73
-  Distribute3(9, {y, z}, {x, y}, {x, y, z});  // 292
-  Distribute3(9, {x, y, z}, {x}, {x, y});     // 323
-  Distribute3(9, {y}, {y, z}, {x, z});        // 167
-}
+            template <typename Block>
+            inline void Encode4( Block raw_io[10 * 3] ) {
+                auto io = reinterpret_cast<Block(*)[3]>(raw_io);
+
+                constexpr unsigned x = 0, y = 1, z = 2;
+
+                const Block * iter = io[0];
+
+                io[7][x] = io[8][x] = io[9][x] = iter[x];
+                io[7][y] = io[8][y] = io[9][y] = iter[y];
+                io[7][z] = io[8][z] = io[9][z] = iter[z];
+                iter    += 1;
+
+                auto DistributeRaw = [io, iter]( unsigned slot, unsigned label,
+                        std::initializer_list<unsigned> rest ) {
+                        for (unsigned i: rest) {
+                            io[slot][i] = Xor(io[slot][i], iter[label]);
+                        }
+                    };
+
+                auto Distribute3 = [&iter, DistributeRaw, x, y, z]( unsigned idx,
+                        std::initializer_list<unsigned> a ,
+                        std::initializer_list<unsigned> b ,
+                        std::initializer_list<unsigned> c ) {
+                        DistributeRaw(idx, x, a);
+                        DistributeRaw(idx, y, b);
+                        DistributeRaw(idx, z, c);
+                        iter += 1;
+                    };
+
+                while (iter != io[10]) {
+                    Distribute3(7, { x }, { y }, { z });
+                }
+
+                iter = io[1];
+                Distribute3(8, { z }      , { x, z }   , { y }   );    // 73
+                Distribute3(8, { x, z }   , { x, y, z }, { y, z });    // 140
+                Distribute3(8, { y }      , { y, z }   , { x, z });    // 167
+                Distribute3(8, { x, y }   , { z }      , { x }   );    // 198
+                Distribute3(8, { y, z }   , { x, y }   , { x, y, z }); // 292
+                Distribute3(8, { x, y, z }, { x }      , { x, y });    // 323
+
+                iter = io[1];
+                Distribute3(9, { x, z }   , { x, y, z }, { y, z });    // 140
+                Distribute3(9, { x, y }   , { z }      , { x }   );    // 198
+                Distribute3(9, { z }      , { x, z }   , { y }   );    // 73
+                Distribute3(9, { y, z }   , { x, y }   , { x, y, z }); // 292
+                Distribute3(9, { x, y, z }, { x }      , { x, y });    // 323
+                Distribute3(9, { y }      , { y, z }   , { x, z });    // 167
+            }
 
 // https://docs.switzernet.com/people/emin-gabrielyan/051103-erasure-9-5-resilient/
-template <typename Block>
-inline void Encode5(Block raw_io[9 * 3]) {
-  auto io = reinterpret_cast<Block(*)[3]>(raw_io);
-
-  constexpr unsigned x = 0, y = 1, z = 2;
-
-  const Block* iter = io[0];
-  io[5][x] = io[6][x] = iter[x];
-  io[5][y] = io[6][y] = iter[y];
-  io[5][z] = io[6][z] = iter[z];
-
-  io[7][x] = io[8][x] = iter[y];
-  io[7][y] = io[8][y] = iter[z];
-  io[7][z] = io[8][z] = Xor(iter[x], iter[y]);
-  iter += 1;
-
-  auto DistributeRaw = [io, iter](unsigned slot, unsigned label,
-                                  std::initializer_list<unsigned> rest) {
-    for (unsigned i : rest) {
-      io[slot][i] = Xor(io[slot][i], iter[label]);
-    }
-  };
-
-  auto Distribute3 = [&iter, DistributeRaw, x, y, z](unsigned idx,
-                                                     std::initializer_list<unsigned> a,
-                                                     std::initializer_list<unsigned> b,
-                                                     std::initializer_list<unsigned> c) {
-    DistributeRaw(idx, x, a);
-    DistributeRaw(idx, y, b);
-    DistributeRaw(idx, z, c);
-    iter += 1;
-  };
-
-  while (iter != io[9]) {
-    Distribute3(5, {x}, {y}, {z});
-  }
-
-  iter = io[1];
-  Distribute3(6, {z}, {x, z}, {y});           // 73
-  Distribute3(6, {x, z}, {x, y, z}, {y, z});  // 140
-  Distribute3(6, {y}, {y, z}, {x, z});        // 167
-  Distribute3(6, {x, y}, {z}, {x});           // 198
-
-  iter = io[1];
-  Distribute3(7, {x, y, z}, {x}, {x, y});     // 323
-  Distribute3(7, {x, z}, {x, y, z}, {y, z});  // 140
-  Distribute3(7, {x}, {y}, {z});              // 11
-  Distribute3(7, {y}, {y, z}, {x, z});        // 167
-
-  iter = io[1];
-  Distribute3(8, {x}, {y}, {z});              // 11
-  Distribute3(8, {x, y}, {z}, {x});           // 198
-  Distribute3(8, {y, z}, {x, y}, {x, y, z});  // 292
-  Distribute3(8, {x, z}, {x, y, z}, {y, z});  // 140
-}
+            template <typename Block>
+            inline void Encode5( Block raw_io[9 * 3] ) {
+                auto io = reinterpret_cast<Block(*)[3]>(raw_io);
+
+                constexpr unsigned x = 0, y = 1, z = 2;
+
+                const Block * iter = io[0];
+
+                io[5][x] = io[6][x] = iter[x];
+                io[5][y] = io[6][y] = iter[y];
+                io[5][z] = io[6][z] = iter[z];
+
+                io[7][x] = io[8][x] = iter[y];
+                io[7][y] = io[8][y] = iter[z];
+                io[7][z] = io[8][z] = Xor(iter[x], iter[y]);
+                iter    += 1;
+
+                auto DistributeRaw = [io, iter]( unsigned slot, unsigned label,
+                        std::initializer_list<unsigned> rest ) {
+                        for (unsigned i: rest) {
+                            io[slot][i] = Xor(io[slot][i], iter[label]);
+                        }
+                    };
+
+                auto Distribute3 = [&iter, DistributeRaw, x, y, z]( unsigned idx,
+                        std::initializer_list<unsigned> a ,
+                        std::initializer_list<unsigned> b ,
+                        std::initializer_list<unsigned> c ) {
+                        DistributeRaw(idx, x, a);
+                        DistributeRaw(idx, y, b);
+                        DistributeRaw(idx, z, c);
+                        iter += 1;
+                    };
+
+                while (iter != io[9]) {
+                    Distribute3(5, { x }, { y }, { z });
+                }
+
+                iter = io[1];
+                Distribute3(6, { z }      , { x, z }   , { y }   ); // 73
+                Distribute3(6, { x, z }   , { x, y, z }, { y, z }); // 140
+                Distribute3(6, { y }      , { y, z }   , { x, z }); // 167
+                Distribute3(6, { x, y }   , { z }      , { x }   ); // 198
+
+                iter = io[1];
+                Distribute3(7, { x, y, z }, { x }      , { x, y }); // 323
+                Distribute3(7, { x, z }   , { x, y, z }, { y, z }); // 140
+                Distribute3(7, { x }      , { y }, { z });          // 11
+                Distribute3(7, { y }      , { y, z }   , { x, z }); // 167
+
+                iter = io[1];
+                Distribute3(8, { x }      , { y }, { z });             // 11
+                Distribute3(8, { x, y }   , { z }      , { x }   );    // 198
+                Distribute3(8, { y, z }   , { x, y }   , { x, y, z }); // 292
+                Distribute3(8, { x, z }   , { x, y, z }, { y, z });    // 140
+            }
 
-template <typename Badger, typename Block>
-inline void Combine2(const Block input[7], Block output[2]);
+            template <typename Badger, typename Block>
+            inline void Combine2( const Block input[7], Block output[2] );
 
-template <typename Badger, typename Block>
-inline void Combine3(const Block input[9], Block output[3]);
+            template <typename Badger, typename Block>
+            inline void Combine3( const Block input[9], Block output[3] );
 
-template <typename Badger, typename Block>
-inline void Combine4(const Block input[10], Block output[3]);
+            template <typename Badger, typename Block>
+            inline void Combine4( const Block input[10], Block output[3] );
 
-template <typename Badger, typename Block>
-inline void Combine5(const Block input[9], Block output[3]);
+            template <typename Badger, typename Block>
+            inline void Combine5( const Block input[9], Block output[3] );
 
-constexpr inline uint64_t FloorLog(uint64_t a, uint64_t b) {
-  return (0 == a) ? 0 : ((b < a) ? 0 : (1 + (FloorLog(a, b / a))));
-}
+            constexpr inline uint64_t FloorLog( uint64_t a, uint64_t b ) {
+                return (0 == a) ? 0 : ((b < a) ? 0 : (1 + (FloorLog(a, b / a))));
+            }
 
-template <typename BlockWrapper, unsigned dimension, unsigned in_width,
-          unsigned encoded_dimension, unsigned out_width, unsigned fanout = 8>
-struct EhcBadger {
-  using Block = typename BlockWrapper::Block;
-
-  static Block Mix(const Block & accum, const Block & input, const Block & entropy) {
-    Block output = Plus32(entropy, input);
-    Block twin = RightShift32(output);
-    output = MultiplyAdd(accum, output, twin);
-    return output;
-  }
-
-  static Block MixOne(const Block & accum, const Block & input, uint64_t entropy) {
-    return Mix(accum, input, BlockWrapper::LoadOne(entropy));
-  }
-
-  static Block MixNone(const Block & input, uint64_t entropy_word) {
-    Block entropy = BlockWrapper::LoadOne(entropy_word);
-    Block output = Plus32(entropy, input);
-    Block twin = RightShift32(output);
-    output = Times(output, twin);
-    return output;
-  }
-
-  static void EhcUpperLayer(const Block (&input)[fanout][out_width],
-                            const uint64_t entropy[out_width * (fanout - 1)],
-                            Block (&output)[out_width]) {
-    for (unsigned i = 0; i < out_width; ++i) {
-      output[i] = input[0][i];
-      for (unsigned j = 1; j < fanout; ++j) {
-        output[i] = MixOne(output[i], input[j][i], entropy[(fanout - 1) * i + j - 1]);
-      }
-    }
-  }
-
-  static void Encode(Block io[encoded_dimension][in_width]) {
-    static_assert(2 <= out_width && out_width <= 5, "uhoh");
-    if (out_width == 3) return Encode3<Block>(&io[0][0]);
-    if (out_width == 2) return Encode2<Block>(&io[0][0]);
-    if (out_width == 4) return Encode4<Block>(&io[0][0]);
-    if (out_width == 5) return Encode5<Block>(&io[0][0]);
-  }
-
-  static Block SimpleTimes(std::integral_constant<int, -1>, const Block & x) { return Negate(x); }
-  static Block SimpleTimes(std::integral_constant<int, 1>, const Block & x) { return x; }
-  static Block SimpleTimes(std::integral_constant<int, 2>, const Block & x) {
-    return LeftShift(x, 1);
-  }
-  static Block SimpleTimes(std::integral_constant<int, 3>, const Block & x) {
-    return Plus(x, LeftShift(x, 1));
-  }
-  static Block SimpleTimes(std::integral_constant<int, 4>, const Block & x) {
-    return LeftShift(x, 2);
-  }
-  static Block SimpleTimes(std::integral_constant<int, 5>, const Block & x) {
-    return Plus(x, LeftShift(x, 2));
-  }
-  static Block SimpleTimes(std::integral_constant<int, 7>, const Block & x) {
-    return Minus(LeftShift(x, 3), x);
-  }
-  static Block SimpleTimes(std::integral_constant<int, 8>, const Block & x) {
-    return LeftShift(x, 3);
-  }
-  static Block SimpleTimes(std::integral_constant<int, 9>, const Block & x) {
-    return Plus(x, LeftShift(x, 3));
-  }
-
-  template <int a>
-  static Block SimplerTimes(const Block & x) {
-    return SimpleTimes(std::integral_constant<int, a>{}, x);
-  }
-
-  template <int a, int b>
-  static void Dot2(Block sinks[2], const Block & x) {
-    sinks[0] = Plus(sinks[0], SimplerTimes<a>(x));
-    sinks[1] = Plus(sinks[1], SimplerTimes<b>(x));
-  }
-
-  template <int a, int b, int c>
-  static void Dot3(Block sinks[3], const Block & x) {
-    Dot2<a, b>(sinks, x);
-    sinks[2] = Plus(sinks[2], SimplerTimes<c>(x));
-  }
-
-  template <int a, int b, int c, int d>
-  static void Dot4(Block sinks[4], const Block & x) {
-    Dot3<a, b, c>(sinks, x);
-    sinks[3] = Plus(sinks[3], SimplerTimes<d>(x));
-  }
-
-  template <int a, int b, int c, int d, int e>
-  static void Dot5(Block sinks[5], const Block & x) {
-    Dot4<a, b, c, d>(sinks, x);
-    sinks[4] = Plus(sinks[4], SimplerTimes<e>(x));
-  }
-
-  static void Combine(const Block input[encoded_dimension], Block (&output)[out_width]) {
-    if (out_width == 3) return Combine3<EhcBadger>(input, output);
-    if (out_width == 2) return Combine2<EhcBadger>(input, output);
-    if (out_width == 4) return Combine4<EhcBadger>(input, output);
-    if (out_width == 5) return Combine5<EhcBadger>(input, output);
-  }
-
-  static void Load(const uint8_t input[dimension * in_width * sizeof(Block)],
-                   Block output[dimension][in_width]) {
-    static_assert(dimension * in_width <= 28, "");
+            template <typename BlockWrapper, unsigned dimension, unsigned in_width,
+                    unsigned encoded_dimension, unsigned out_width, unsigned fanout = 8>
+            struct EhcBadger {
+                using Block = typename BlockWrapper::Block;
+
+                static Block Mix( const Block & accum, const Block & input, const Block & entropy ) {
+                    Block output = Plus32(entropy, input);
+                    Block twin   = RightShift32(output);
+
+                    output = MultiplyAdd(accum, output, twin);
+                    return output;
+                }
+
+                static Block MixOne( const Block & accum, const Block & input, uint64_t entropy ) {
+                    return Mix(accum, input, BlockWrapper::LoadOne(entropy));
+                }
+
+                static Block MixNone( const Block & input, uint64_t entropy_word ) {
+                    Block entropy = BlockWrapper::LoadOne(entropy_word);
+                    Block output  = Plus32(entropy, input);
+                    Block twin    = RightShift32(output);
+
+                    output = Times(output, twin);
+                    return output;
+                }
+
+                static void EhcUpperLayer( const Block (& input)[fanout][out_width],
+                        const uint64_t entropy[out_width * (fanout - 1)], Block (& output)[out_width] ) {
+                    for (unsigned i = 0; i < out_width; ++i) {
+                        output[i] = input[0][i];
+                        for (unsigned j = 1; j < fanout; ++j) {
+                            output[i] = MixOne(output[i], input[j][i], entropy[(fanout - 1) * i + j - 1]);
+                        }
+                    }
+                }
+
+                static void Encode( Block io[encoded_dimension][in_width] ) {
+                    static_assert(2 <= out_width && out_width <= 5, "uhoh");
+                    if (out_width == 3) { return Encode3<Block>(&io[0][0]); }
+                    if (out_width == 2) { return Encode2<Block>(&io[0][0]); }
+                    if (out_width == 4) { return Encode4<Block>(&io[0][0]); }
+                    if (out_width == 5) { return Encode5<Block>(&io[0][0]); }
+                }
+
+                static Block SimpleTimes( std::integral_constant<int, -1>, const Block & x ) { return Negate(x); }
+
+                static Block SimpleTimes( std::integral_constant<int, 1>, const Block & x ) { return x; }
+
+                static Block SimpleTimes( std::integral_constant<int, 2>, const Block & x ) {
+                    return LeftShift(x, 1);
+                }
+
+                static Block SimpleTimes( std::integral_constant<int, 3>, const Block & x ) {
+                    return Plus(x, LeftShift(x, 1));
+                }
+
+                static Block SimpleTimes( std::integral_constant<int, 4>, const Block & x ) {
+                    return LeftShift(x, 2);
+                }
+
+                static Block SimpleTimes( std::integral_constant<int, 5>, const Block & x ) {
+                    return Plus(x, LeftShift(x, 2));
+                }
+
+                static Block SimpleTimes( std::integral_constant<int, 7>, const Block & x ) {
+                    return Minus(LeftShift(x, 3), x);
+                }
+
+                static Block SimpleTimes( std::integral_constant<int, 8>, const Block & x ) {
+                    return LeftShift(x, 3);
+                }
+
+                static Block SimpleTimes( std::integral_constant<int, 9>, const Block & x ) {
+                    return Plus(x, LeftShift(x, 3));
+                }
+
+                template <int a>
+                static Block SimplerTimes( const Block & x ) {
+                    return SimpleTimes(std::integral_constant<int, a>{}, x);
+                }
+
+                template <int a, int b>
+                static void Dot2( Block sinks[2], const Block & x ) {
+                    sinks[0] = Plus(sinks[0], SimplerTimes<a>(x));
+                    sinks[1] = Plus(sinks[1], SimplerTimes<b>(x));
+                }
+
+                template <int a, int b, int c>
+                static void Dot3( Block sinks[3], const Block & x ) {
+                    Dot2<a, b>(sinks, x);
+                    sinks[2] = Plus(sinks[2], SimplerTimes<c>(x));
+                }
+
+                template <int a, int b, int c, int d>
+                static void Dot4( Block sinks[4], const Block & x ) {
+                    Dot3<a, b, c>(sinks, x);
+                    sinks[3] = Plus(sinks[3], SimplerTimes<d>(x));
+                }
+
+                template <int a, int b, int c, int d, int e>
+                static void Dot5( Block sinks[5], const Block & x ) {
+                    Dot4<a, b, c, d>(sinks, x);
+                    sinks[4] = Plus(sinks[4], SimplerTimes<e>(x));
+                }
+
+                static void Combine( const Block input[encoded_dimension], Block (& output)[out_width] ) {
+                    if (out_width == 3) { return Combine3<EhcBadger>(input, output); }
+                    if (out_width == 2) { return Combine2<EhcBadger>(input, output); }
+                    if (out_width == 4) { return Combine4<EhcBadger>(input, output); }
+                    if (out_width == 5) { return Combine5<EhcBadger>(input, output); }
+                }
+
+                static void Load( const uint8_t input[dimension * in_width * sizeof(Block)],
+                        Block output[dimension][in_width] ) {
+                    static_assert(dimension * in_width <= 28, "");
 #if !defined(__clang__)
-#pragma GCC unroll 28
+  #pragma GCC unroll 28
 #else
-#pragma unroll
+  #pragma unroll
 #endif
-    for (unsigned i = 0; i < dimension; ++i) {
+                    for (unsigned i = 0; i < dimension; ++i) {
 #if !defined(__clang__)
-#pragma GCC unroll 28
+  #pragma GCC unroll 28
 #else
-#pragma unroll
+  #pragma unroll
 #endif
-      for (unsigned j = 0; j < in_width; ++j) {
-        output[i][j] =
-            BlockWrapper::LoadBlock(&input[(i * in_width + j) * sizeof(Block)]);
-      }
-    }
-  }
-
-  static void Hash(const Block (&input)[encoded_dimension][in_width],
-                   const uint64_t entropy[encoded_dimension][in_width],
-                   Block output[encoded_dimension]) {
-    for (unsigned i = 0; i < encoded_dimension; ++i) {
-      output[i] = MixNone(input[i][0], entropy[i][0]);
-      // TODO: should loading take care of this?
-    }
-    for (unsigned j = 1; j < in_width; ++j) {
-      for (unsigned i = 0; i < encoded_dimension; ++i) {
-        output[i] = MixOne(output[i], input[i][j], entropy[i][j]);
-        // TODO: this might be optional; it might not matter which way we iterate over
-        // entropy
-      }
-    }
-  }
-
-  static void EhcBaseLayer(const uint8_t input[dimension * in_width * sizeof(Block)],
-                           const uint64_t raw_entropy[encoded_dimension][in_width],
-                           Block (&output)[out_width]) {
-    Block scratch[encoded_dimension][in_width];
-    Block tmpout[encoded_dimension];
-    Load(input, scratch);
-    Encode(scratch);
-    Hash(scratch, raw_entropy, tmpout);
-    Combine(tmpout, output);
-  }
-
-  static void DfsTreeHash(const uint8_t* data, size_t block_group_length,
-                          Block stack[][fanout][out_width], int stack_lengths[],
-                          const uint64_t* entropy) {
-    auto entropy_matrix = reinterpret_cast<const uint64_t(*)[in_width]>(entropy);
-    for (size_t k = 0; k < block_group_length; ++k) {
-      int i = 0;
-      while (stack_lengths[i] == fanout) ++i;
-      for (int j = i - 1; j >= 0; --j) {
-        EhcUpperLayer(
-            stack[j],
-            &entropy[encoded_dimension * in_width + (fanout - 1) * out_width * j],
-            stack[j + 1][stack_lengths[j + 1]]);
-        stack_lengths[j] = 0;
-        stack_lengths[j + 1] += 1;
-      }
-
-      EhcBaseLayer(&data[k * dimension * in_width * sizeof(Block)], entropy_matrix,
-                   stack[0][stack_lengths[0]]);
-      stack_lengths[0] += 1;
-    }
-  }
-
-  // auto b = sizeof(Block) / sizeof(uint64_t);
-  static constexpr size_t GEBN_b() { return sizeof(Block) / sizeof(uint64_t); }
-  // auto h = FloorLog(fanout, n / (b * dimension * in_width));
-  static constexpr size_t GEBN_h(size_t n) { return FloorLog(fanout, n / (GEBN_b() * dimension * in_width)); }
-  static constexpr size_t GetEntropyBytesNeeded(size_t n) {
-      return sizeof(uint64_t) * (encoded_dimension * in_width + (fanout - 1) * out_width * GEBN_h(n) +
-              GEBN_b() * fanout * out_width * GEBN_h(n) + GEBN_b() * dimension * in_width + out_width - 1);
-  }
-
-  struct BlockGreedy {
-   private:
-    const uint64_t* seeds;
-    Block accum[out_width] = {};
-
-   public:
-    BlockGreedy(const uint64_t seeds[]) : seeds(seeds) {}
-
-    void Insert(const Block (&x)[out_width]) {
-      for (unsigned i = 0; i < out_width; ++i) {
-        accum[i] = Mix(accum[i], x[i], BlockWrapper::LoadBlockNative(seeds));
-        seeds += sizeof(Block) / sizeof(uint64_t);
-      }
-    }
-
-    void Insert(const Block & x) {
-      for (unsigned i = 0; i < out_width; ++i) {
-        accum[i] =
-            Mix(accum[i], x,
-                BlockWrapper::LoadBlockNative(&seeds[i * sizeof(Block) / sizeof(uint64_t)]));
-      }
-      // Toeplitz
-      seeds += sizeof(Block) / sizeof(uint64_t);
-    }
-
-    void Hash(uint64_t output[out_width]) const {
-      for (unsigned i = 0; i < out_width; ++i) {
-        output[i] = Sum(accum[i]);
-      }
-    }
-  };
-
-  static void DfsGreedyFinalizer(const Block stack[][fanout][out_width],
-                                 const int stack_lengths[], const uint8_t* uint8_t_input,
-                                 size_t uint8_t_length, const uint64_t* entropy,
-                                 uint64_t output[out_width]) {
-    BlockGreedy b(entropy);
-    for (int j = 0; stack_lengths[j] > 0; ++j) {
-      for (int k = 0; k < stack_lengths[j]; k += 1) {
-        b.Insert(stack[j][k]);
-      }
-    }
-
-    size_t i = 0;
-    for (; i + sizeof(Block) <= uint8_t_length; i += sizeof(Block)) {
-        b.Insert(BlockWrapper::LoadBlock(&uint8_t_input[i]));
-    }
-
-    if (1) {
-      uint8_t extra[sizeof(Block)];
-      memcpy(extra, &uint8_t_input[i], uint8_t_length - i);
-      memset(extra + uint8_t_length - i, 0, sizeof(extra) - uint8_t_length + i);
-      b.Insert(BlockWrapper::LoadBlock(extra));
-    } else if (1) {
-      Block extra = {};
-      memcpy(&extra, &uint8_t_input[i], uint8_t_length - i);
-      b.Insert(extra);
-    } else {
-      Block extra;
-      uint8_t* extra_uint8_t = reinterpret_cast<uint8_t*>(&extra);
-      for (unsigned j = 0; j < sizeof(Block); ++j) {
-        if (j < uint8_t_length - i) {
-          extra_uint8_t[j] = uint8_t_input[i + j];
-        } else {
-          extra_uint8_t[j] = 0;
-        }
-      }
-      b.Insert(extra);
-    }
-    b.Hash(output);
-  }
-};  // EhcBadger
+                        for (unsigned j = 0; j < in_width; ++j) {
+                            output[i][j] =
+                                    BlockWrapper::LoadBlock(&input[(i * in_width + j) * sizeof(Block)]);
+                        }
+                    }
+                }
+
+                static void Hash( const Block (& input)[encoded_dimension][in_width],
+                        const uint64_t entropy[encoded_dimension][in_width], Block output[encoded_dimension] ) {
+                    for (unsigned i = 0; i < encoded_dimension; ++i) {
+                        output[i] = MixNone(input[i][0], entropy[i][0]);
+                        // TODO: should loading take care of this?
+                    }
+                    for (unsigned j = 1; j < in_width; ++j) {
+                        for (unsigned i = 0; i < encoded_dimension; ++i) {
+                            output[i] = MixOne(output[i], input[i][j], entropy[i][j]);
+                            // TODO: this might be optional; it might not matter which way we iterate over
+                            // entropy
+                        }
+                    }
+                }
+
+                static void EhcBaseLayer( const uint8_t input[dimension * in_width * sizeof(Block)],
+                        const uint64_t raw_entropy[encoded_dimension][in_width], Block (& output)[out_width] ) {
+                    Block scratch[encoded_dimension][in_width];
+                    Block tmpout[encoded_dimension];
+
+                    Load(input, scratch);
+                    Encode(scratch);
+                    Hash(scratch, raw_entropy, tmpout);
+                    Combine(tmpout, output);
+                }
+
+                static void DfsTreeHash( const uint8_t * data, size_t block_group_length,
+                        Block stack[][fanout][out_width], int stack_lengths[], const uint64_t * entropy ) {
+                    auto entropy_matrix = reinterpret_cast<const uint64_t(*)[in_width]>(entropy);
+
+                    for (size_t k = 0; k < block_group_length; ++k) {
+                        int i = 0;
+                        while (stack_lengths[i] == fanout) { ++i; }
+                        for (int j = i - 1; j >= 0; --j) {
+                            EhcUpperLayer(stack[j],
+                                    &entropy[encoded_dimension * in_width + (fanout - 1) * out_width * j],
+                                    stack[j + 1][stack_lengths[j + 1]]);
+                            stack_lengths[j]      = 0;
+                            stack_lengths[j + 1] += 1;
+                        }
+
+                        EhcBaseLayer(&data[k * dimension * in_width * sizeof(Block)],
+                                entropy_matrix, stack[0][stack_lengths[0]]);
+                        stack_lengths[0] += 1;
+                    }
+                }
+
+                // auto b = sizeof(Block) / sizeof(uint64_t);
+                static constexpr size_t GEBN_b() { return sizeof(Block) / sizeof(uint64_t); }
+
+                // auto h = FloorLog(fanout, n / (b * dimension * in_width));
+                static constexpr size_t GEBN_h( size_t n ) {
+                    return FloorLog(fanout, n / (GEBN_b() * dimension * in_width));
+                }
+
+                static constexpr size_t GetEntropyBytesNeeded( size_t n ) {
+                    return sizeof(uint64_t) * (encoded_dimension * in_width + (fanout - 1) * out_width * GEBN_h(n) +
+                           GEBN_b() * fanout * out_width * GEBN_h(n) + GEBN_b() * dimension * in_width + out_width - 1);
+                }
+
+                struct BlockGreedy {
+                  private:
+                    const uint64_t * seeds;
+                    Block            accum[out_width] = {};
+
+                  public:
+                    BlockGreedy( const uint64_t seeds[] ) :
+                        seeds( seeds ) {}
+
+                    void Insert( const Block (& x)[out_width] ) {
+                        for (unsigned i = 0; i < out_width; ++i) {
+                            accum[i] = Mix(accum[i], x[i], BlockWrapper::LoadBlockNative(seeds));
+                            seeds   += sizeof(Block) / sizeof(uint64_t);
+                        }
+                    }
+
+                    void Insert( const Block & x ) {
+                        for (unsigned i = 0; i < out_width; ++i) {
+                            accum[i] =
+                                    Mix(accum[i], x, BlockWrapper::LoadBlockNative(
+                                    &seeds[i * sizeof(Block) / sizeof(uint64_t)]));
+                        }
+                        // Toeplitz
+                        seeds += sizeof(Block) / sizeof(uint64_t);
+                    }
+
+                    void Hash( uint64_t output[out_width] ) const {
+                        for (unsigned i = 0; i < out_width; ++i) {
+                            output[i] = Sum(accum[i]);
+                        }
+                    }
+                };
+
+                static void DfsGreedyFinalizer( const Block stack[][fanout][out_width], const int stack_lengths[],
+                        const uint8_t * uint8_t_input, size_t uint8_t_length, const uint64_t * entropy,
+                        uint64_t output[out_width] ) {
+                    BlockGreedy b( entropy );
+
+                    for (int j = 0; stack_lengths[j] > 0; ++j) {
+                        for (int k = 0; k < stack_lengths[j]; k += 1) {
+                            b.Insert(stack[j][k]);
+                        }
+                    }
+
+                    size_t i = 0;
+                    for (; i + sizeof(Block) <= uint8_t_length; i += sizeof(Block)) {
+                        b.Insert(BlockWrapper::LoadBlock(&uint8_t_input[i]));
+                    }
+
+                    if (1) {
+                        uint8_t extra[sizeof(Block)];
+                        memcpy(extra, &uint8_t_input[i], uint8_t_length - i);
+                        memset(extra + uint8_t_length - i, 0, sizeof(extra) - uint8_t_length + i);
+                        b.Insert(BlockWrapper::LoadBlock(extra));
+                    } else if (1) {
+                        Block extra = {};
+                        memcpy(&extra, &uint8_t_input[i], uint8_t_length - i);
+                        b.Insert(extra);
+                    } else {
+                        Block     extra;
+                        uint8_t * extra_uint8_t = reinterpret_cast<uint8_t *>(&extra);
+                        for (unsigned j = 0; j < sizeof(Block); ++j) {
+                            if (j < uint8_t_length - i) {
+                                extra_uint8_t[j] = uint8_t_input[i + j];
+                            } else {
+                                extra_uint8_t[j] = 0;
+                            }
+                        }
+                        b.Insert(extra);
+                    }
+                    b.Hash(output);
+                }
+            }; // EhcBadger
 
 // evenness: 2 weight: 10
 //  0   0   1   4   1   1   2   2   1
 //  1   1   0   0   1   4   1   2   2
 //  1   4   1   1   0   0   2   1   2
 
-template <typename Badger, typename Block>
-inline void Combine3(const Block input[9], Block output[3]) {
-  output[1] = input[0];
-  output[2] = input[0];
+            template <typename Badger, typename Block>
+            inline void Combine3( const Block input[9], Block output[3] ) {
+                output[1] = input[0];
+                output[2] = input[0];
 
-  output[1] = Plus(output[1], input[1]);
-  output[2] = Plus(output[2], LeftShift(input[1], 2));
+                output[1] = Plus(output[1], input[1]);
+                output[2] = Plus(output[2], LeftShift(input[1], 2));
 
-  output[0] = input[2];
-  output[2] = Plus(output[2], input[2]);
+                output[0] = input[2];
+                output[2] = Plus(output[2], input[2]);
 
-  output[0] = Plus(output[0], LeftShift(input[3], 2));
-  output[2] = Plus(output[2], input[3]);
+                output[0] = Plus(output[0], LeftShift(input[3], 2));
+                output[2] = Plus(output[2], input[3]);
 
-  output[0] = Plus(output[0], input[4]);
-  output[1] = Plus(output[1], input[4]);
+                output[0] = Plus(output[0], input[4]);
+                output[1] = Plus(output[1], input[4]);
 
-  output[0] = Plus(output[0], input[5]);
-  output[1] = Plus(output[1], LeftShift(input[5], 2));
+                output[0] = Plus(output[0], input[5]);
+                output[1] = Plus(output[1], LeftShift(input[5], 2));
 
-  Badger::template Dot3<2, 1, 2>(output, input[6]);
-  Badger::template Dot3<2, 2, 1>(output, input[7]);
-  Badger::template Dot3<1, 2, 2>(output, input[8]);
-}
+                Badger::template Dot3<2, 1, 2>(output, input[6]);
+                Badger::template Dot3<2, 2, 1>(output, input[7]);
+                Badger::template Dot3<1, 2, 2>(output, input[8]);
+            }
 
-template <typename Badger, typename Block>
-inline void Combine2(const Block input[7], Block output[2]) {
-  output[0] = input[0];
-  output[1] = input[1];
+            template <typename Badger, typename Block>
+            inline void Combine2( const Block input[7], Block output[2] ) {
+                output[0] = input[0];
+                output[1] = input[1];
 
-  Badger::template Dot2<1, 1>(output, input[2]);
-  Badger::template Dot2<1, 2>(output, input[3]);
-  Badger::template Dot2<2, 1>(output, input[4]);
-  Badger::template Dot2<1, 4>(output, input[5]);
-  Badger::template Dot2<4, 1>(output, input[6]);
-}
+                Badger::template Dot2<1, 1>(output, input[2]);
+                Badger::template Dot2<1, 2>(output, input[3]);
+                Badger::template Dot2<2, 1>(output, input[4]);
+                Badger::template Dot2<1, 4>(output, input[5]);
+                Badger::template Dot2<4, 1>(output, input[6]);
+            }
 
 // evenness: 4 weight: 16
 //   8   8   0   2   1   8   2   1   2   4
@@ -745,31 +806,31 @@ inline void Combine2(const Block input[7], Block output[2]) {
 // 2   0   1   0   4   0   1   1   1   1
 // 1   1   0   1   0   0   4   1   2   8
 
-template <typename Badger, typename Block>
-inline void Combine4(const Block input[10], Block output[4]) {
-  output[2] = LeftShift(input[0], 1);
-  output[3] = input[0];
+            template <typename Badger, typename Block>
+            inline void Combine4( const Block input[10], Block output[4] ) {
+                output[2] = LeftShift(input[0], 1);
+                output[3] = input[0];
 
-  output[1] = input[1];
-  output[3] = Plus(output[3], input[1]);
+                output[1] = input[1];
+                output[3] = Plus(output    [3], input[1        ]      );
 
-  output[1] = Plus(output[1], LeftShift(input[2], 1));
-  output[2] = Plus(output[2], input[2]);
+                output[1] = Plus(output    [1], LeftShift(input[2], 1));
+                output[2] = Plus(output    [2], input[2        ]      );
 
-  output[0] = input[3];
-  output[3] = Plus(output[3], input[3]);
+                output[0] = input[3];
+                output[3] = Plus(output    [3], input[3        ]      );
 
-  output[0] = Plus(output[0], input[4]);
-  output[2] = Plus(output[2], LeftShift(input[4], 2));
+                output[0] = Plus(output    [0], input[4        ]      );
+                output[2] = Plus(output    [2], LeftShift(input[4], 2));
 
-  output[0] = Plus(output[0], LeftShift(input[5], 2));
-  output[1] = Plus(output[1], input[5]);
+                output[0] = Plus(output    [0], LeftShift(input[5], 2));
+                output[1] = Plus(output    [1], input[5        ]      );
 
-  Badger::template Dot4<2, 1, 1, 4>(output, input[6]);
-  Badger::template Dot4<4, 2, 1, 1>(output, input[7]);
-  Badger::template Dot4<1, 4, 1, 2>(output, input[8]);
-  Badger::template Dot4<1, 1, 1, 8>(output, input[9]);
-}
+                Badger::template Dot4<2, 1, 1, 4>(output, input[6]);
+                Badger::template Dot4<4, 2, 1, 1>(output, input[7]);
+                Badger::template Dot4<1, 4, 1, 2>(output, input[8]);
+                Badger::template Dot4<1, 1, 1, 8>(output, input[9]);
+            }
 
 // TODO:
 // 0   0   0   0   1   x   x   x   x
@@ -785,353 +846,381 @@ inline void Combine4(const Block input[10], Block output[4]) {
 // 0   0   0   1   0   1   4   9   8
 // 0   0   0   0   1   1   5   3   9
 
-template <typename Badger, typename Block>
-inline void Combine5(const Block input[10], Block output[5]) {
-  output[0] = input[0];
-  output[1] = input[1];
-  output[2] = input[2];
-  output[3] = input[3];
-  output[4] = input[4];
-
-  output[0] = Plus(output[0], input[5]);
-  output[1] = Plus(output[1], input[5]);
-  output[2] = Plus(output[2], input[5]);
-  output[3] = Plus(output[3], input[5]);
-  output[4] = Plus(output[4], input[5]);
-
-  Badger::template Dot5<1, 2, 3, 4, 5>(output, input[6]);
-  Badger::template Dot5<2, 1, 8, 9, 3>(output, input[7]);
-  Badger::template Dot5<4, 7, 5, 8, 9>(output, input[8]);
-}
+            template <typename Badger, typename Block>
+            inline void Combine5( const Block input[10], Block output[5] ) {
+                output[0] = input[0];
+                output[1] = input[1];
+                output[2] = input[2];
+                output[3] = input[3];
+                output[4] = input[4];
+
+                output[0] = Plus(output[0], input[5]);
+                output[1] = Plus(output[1], input[5]);
+                output[2] = Plus(output[2], input[5]);
+                output[3] = Plus(output[3], input[5]);
+                output[4] = Plus(output[4], input[5]);
+
+                Badger::template Dot5<1, 2, 3, 4, 5>(output, input[6]);
+                Badger::template Dot5<2, 1, 8, 9, 3>(output, input[7]);
+                Badger::template Dot5<4, 7, 5, 8, 9>(output, input[8]);
+            }
 
-template <int width>
-inline uint64_t TabulateBytes(uint64_t input, const uint64_t entropy[256 * width]) {
-  const uint64_t(&table)[width][256] =
-      *reinterpret_cast<const uint64_t(*)[width][256]>(entropy);
-  uint64_t result = 0;
-  for (unsigned i = 0; i < width; ++i) {
-    uint8_t index = input >> (i * CHAR_BIT);
-    result ^= table[i][index];
-  }
-  return result;
-}
+            template <int width>
+            inline uint64_t TabulateBytes( uint64_t input, const uint64_t entropy[256 * width] ) {
+                const uint64_t(&table)[width][256] =
+                        *reinterpret_cast<const uint64_t(*)[width][256]>(entropy);
+                uint64_t result = 0;
+                for (unsigned i = 0; i < width; ++i) {
+                    uint8_t index = input >> (i * CHAR_BIT);
+                    result ^= table[i][index];
+                }
+                return result;
+            }
 
-template <typename BlockWrapper, unsigned dimension, unsigned in_width,
-          unsigned encoded_dimension, unsigned out_width>
-static void Hash(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-          uint64_t output[out_width]) {
-  constexpr unsigned kMaxStackSize = 9;
-  constexpr unsigned kFanout = 8;
+            template <typename BlockWrapper, unsigned dimension, unsigned in_width,
+                    unsigned encoded_dimension, unsigned out_width>
+            static void Hash( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                    size_t length, uint64_t output[out_width] ) {
+                constexpr unsigned kMaxStackSize = 9;
+                constexpr unsigned kFanout       = 8;
 
-  using Block = typename BlockWrapper::Block;
+                using Block = typename BlockWrapper::Block;
 
-  Block stack[kMaxStackSize][kFanout][out_width];
-  int stack_lengths[kMaxStackSize] = {};
-  size_t wide_length = length / sizeof(Block) / (dimension * in_width);
+                Block  stack[kMaxStackSize][kFanout][out_width];
+                int    stack_lengths[kMaxStackSize] = {};
+                size_t wide_length = length / sizeof(Block) / (dimension * in_width);
 
-  EhcBadger<BlockWrapper, dimension, in_width, encoded_dimension, out_width,
-            kFanout>::DfsTreeHash(uint8_t_input, wide_length, stack, stack_lengths, entropy);
-  entropy += encoded_dimension * in_width + out_width * (kFanout - 1) * kMaxStackSize;
+                EhcBadger<BlockWrapper, dimension, in_width, encoded_dimension, out_width,
+                        kFanout>::DfsTreeHash(uint8_t_input, wide_length, stack, stack_lengths, entropy);
+                entropy += encoded_dimension * in_width + out_width * (kFanout - 1) * kMaxStackSize;
 
-  auto used_uint8_ts = wide_length * sizeof(Block) * (dimension * in_width);
-  uint8_t_input += used_uint8_ts;
+                auto used_uint8_ts = wide_length * sizeof(Block) * (dimension * in_width);
+                uint8_t_input += used_uint8_ts;
 
-  EhcBadger<BlockWrapper, dimension, in_width, encoded_dimension, out_width,
-            kFanout>::DfsGreedyFinalizer(stack, stack_lengths, uint8_t_input,
-                                         length - used_uint8_ts, entropy, output);
-}
+                EhcBadger<BlockWrapper, dimension, in_width, encoded_dimension, out_width,
+                        kFanout>::DfsGreedyFinalizer(stack, stack_lengths, uint8_t_input,
+                        length - used_uint8_ts, entropy, output);
+            }
 
-template <typename Block, unsigned count>
-struct alignas(alignof(Block)) Repeat {
-  Block it[count];
-};
+            template <typename Block, unsigned count>
+            struct alignas( alignof(Block)) Repeat {
+                Block  it[count];
+            };
+
+            template <typename InnerBlockWrapper, unsigned count>
+            struct RepeatWrapper {
+                using InnerBlock = typename InnerBlockWrapper::Block;
+
+                using Block      = Repeat<InnerBlock, count>;
+
+                static Block LoadOne( uint64_t entropy ) {
+                    Block result;
+
+                    for (unsigned i = 0; i < count; ++i) {
+                        result.it[i] = InnerBlockWrapper::LoadOne(entropy);
+                    }
+                    return result;
+                }
+
+                static Block LoadBlock( const void * x ) {
+                    auto  y = reinterpret_cast<const uint8_t *>(x);
+                    Block result;
+
+                    for (unsigned i = 0; i < count; ++i) {
+                        result.it[i] = InnerBlockWrapper::LoadBlock(y + i * sizeof(InnerBlock));
+                    }
+                    return result;
+                }
+
+                static Block LoadBlockNative( const void * x ) {
+                    auto  y = reinterpret_cast<const uint8_t *>(x);
+                    Block result;
+
+                    for (unsigned i = 0; i < count; ++i) {
+                        result.it[i] = InnerBlockWrapper::LoadBlockNative(y + i * sizeof(InnerBlock));
+                    }
+                    return result;
+                }
+            };
+
+            template <typename Block, unsigned count>
+            inline Repeat<Block, count> Xor( const Repeat<Block, count> & a, const Repeat<Block, count> & b ) {
+                Repeat<Block, count> result;
+
+                for (unsigned i = 0; i < count; ++i) {
+                    result.it[i] = Xor(a.it[i], b.it[i]);
+                }
+                return result;
+            }
 
-template <typename InnerBlockWrapper, unsigned count>
-struct RepeatWrapper {
-  using InnerBlock = typename InnerBlockWrapper::Block;
+            template <typename Block, unsigned count>
+            inline Repeat<Block, count> Plus32( const Repeat<Block, count> & a, const Repeat<Block, count> & b ) {
+                Repeat<Block, count> result;
 
-  using Block = Repeat<InnerBlock, count>;
+                for (unsigned i = 0; i < count; ++i) {
+                    result.it[i] = Plus32(a.it[i], b.it[i]);
+                }
+                return result;
+            }
 
-  static Block LoadOne(uint64_t entropy) {
-    Block result;
-    for (unsigned i = 0; i < count; ++i) {
-      result.it[i] = InnerBlockWrapper::LoadOne(entropy);
-    }
-    return result;
-  }
-
-  static Block LoadBlock(const void* x) {
-    auto y = reinterpret_cast<const uint8_t*>(x);
-    Block result;
-    for (unsigned i = 0; i < count; ++i) {
-        result.it[i] = InnerBlockWrapper::LoadBlock(y + i * sizeof(InnerBlock));
-    }
-    return result;
-  }
-
-  static Block LoadBlockNative(const void* x) {
-    auto y = reinterpret_cast<const uint8_t*>(x);
-    Block result;
-    for (unsigned i = 0; i < count; ++i) {
-        result.it[i] = InnerBlockWrapper::LoadBlockNative(y + i * sizeof(InnerBlock));
-    }
-    return result;
-  }
-};
-
-template <typename Block, unsigned count>
-inline Repeat<Block, count> Xor(const Repeat<Block, count> & a, const Repeat<Block, count> & b) {
-  Repeat<Block, count> result;
-  for (unsigned i = 0; i < count; ++i) {
-    result.it[i] = Xor(a.it[i], b.it[i]);
-  }
-  return result;
-}
+            template <typename Block, unsigned count>
+            inline Repeat<Block, count> Plus( const Repeat<Block, count> & a, const Repeat<Block, count> & b ) {
+                Repeat<Block, count> result;
 
-template <typename Block, unsigned count>
-inline Repeat<Block, count> Plus32(const Repeat<Block, count> & a, const Repeat<Block, count> & b) {
-  Repeat<Block, count> result;
-  for (unsigned i = 0; i < count; ++i) {
-    result.it[i] = Plus32(a.it[i], b.it[i]);
-  }
-  return result;
-}
+                for (unsigned i = 0; i < count; ++i) {
+                    result.it[i] = Plus(a.it[i], b.it[i]);
+                }
+                return result;
+            }
 
-template <typename Block, unsigned count>
-inline Repeat<Block, count> Plus(const Repeat<Block, count> & a, const Repeat<Block, count> & b) {
-  Repeat<Block, count> result;
-  for (unsigned i = 0; i < count; ++i) {
-    result.it[i] = Plus(a.it[i], b.it[i]);
-  }
-  return result;
-}
+            template <typename Block, unsigned count>
+            inline Repeat<Block, count> Minus( const Repeat<Block, count> & a, const Repeat<Block, count> & b ) {
+                Repeat<Block, count> result;
 
-template <typename Block, unsigned count>
-inline Repeat<Block, count> Minus(const Repeat<Block, count> & a, const Repeat<Block, count> & b) {
-  Repeat<Block, count> result;
-  for (unsigned i = 0; i < count; ++i) {
-    result.it[i] = Minus(a.it[i], b.it[i]);
-  }
-  return result;
-}
+                for (unsigned i = 0; i < count; ++i) {
+                    result.it[i] = Minus(a.it[i], b.it[i]);
+                }
+                return result;
+            }
 
-template <typename Block, unsigned count>
-inline Repeat<Block, count> LeftShift(const Repeat<Block, count> & a, int s) {
-  Repeat<Block, count> result;
-  for (unsigned i = 0; i < count; ++i) {
-    result.it[i] = LeftShift(a.it[i], s);
-  }
-  return result;
-}
+            template <typename Block, unsigned count>
+            inline Repeat<Block, count> LeftShift( const Repeat<Block, count> & a, int s ) {
+                Repeat<Block, count> result;
 
-template <typename Block, unsigned count>
-inline Repeat<Block, count> RightShift32(const Repeat<Block, count> & a) {
-  Repeat<Block, count> result;
-  for (unsigned i = 0; i < count; ++i) {
-    result.it[i] = RightShift32(a.it[i]);
-  }
-  return result;
-}
+                for (unsigned i = 0; i < count; ++i) {
+                    result.it[i] = LeftShift(a.it[i], s);
+                }
+                return result;
+            }
 
-template <typename Block, unsigned count>
-inline Repeat<Block, count> Times(const Repeat<Block, count> & a, const Repeat<Block, count> & b) {
-  Repeat<Block, count> result;
-  for (unsigned i = 0; i < count; ++i) {
-    result.it[i] = Times(a.it[i], b.it[i]);
-  }
-  return result;
-}
+            template <typename Block, unsigned count>
+            inline Repeat<Block, count> RightShift32( const Repeat<Block, count> & a ) {
+                Repeat<Block, count> result;
 
-template <typename Block, unsigned count>
-inline uint64_t Sum(const Repeat<Block, count> & a) {
-  uint64_t result = 0;
-  for (unsigned i = 0; i < count; ++i) {
-    result += Sum(a.it[i]);
-  }
-  return result;
-}
+                for (unsigned i = 0; i < count; ++i) {
+                    result.it[i] = RightShift32(a.it[i]);
+                }
+                return result;
+            }
 
-template <typename Block, unsigned count>
-inline Repeat<Block, count> Negate(const Repeat<Block, count> & a) {
-  Repeat<Block, count> b;
-  for (unsigned i = 0; i < count; ++i) {
-    b.it[i] = Negate(a.it[i]);
-  }
-  return b;
-}
+            template <typename Block, unsigned count>
+            inline Repeat<Block, count> Times( const Repeat<Block, count> & a, const Repeat<Block, count> & b ) {
+                Repeat<Block, count> result;
 
-} // namespace
+                for (unsigned i = 0; i < count; ++i) {
+                    result.it[i] = Times(a.it[i], b.it[i]);
+                }
+                return result;
+            }
+
+            template <typename Block, unsigned count>
+            inline uint64_t Sum( const Repeat<Block, count> & a ) {
+                uint64_t result = 0;
+
+                for (unsigned i = 0; i < count; ++i) {
+                    result += Sum(a.it[i]);
+                }
+                return result;
+            }
+
+            template <typename Block, unsigned count>
+            inline Repeat<Block, count> Negate( const Repeat<Block, count> & a ) {
+                Repeat<Block, count> b;
+
+                for (unsigned i = 0; i < count; ++i) {
+                    b.it[i] = Negate(a.it[i]);
+                }
+                return b;
+            }
+        } // namespace
 
 //------------------------------------------------------------
-template <typename Wrapper, unsigned out_width>
-inline constexpr size_t GetEntropyBytesNeeded(size_t n) {
-  return (3 == out_width)
-             ? EhcBadger<Wrapper, 7, 3, 9, out_width>::GetEntropyBytesNeeded(n)
-         : (2 == out_width)
-             ? EhcBadger<Wrapper, 6, 3, 7, out_width>::GetEntropyBytesNeeded(
-                   n)
-         : (4 == out_width)
-             ? EhcBadger<Wrapper, 7, 3, 10, out_width>::GetEntropyBytesNeeded(
-                   n)
-             : EhcBadger<Wrapper, 5, 3, 9, out_width>::GetEntropyBytesNeeded(
-                   n);
-}
+        template <typename Wrapper, unsigned out_width>
+        inline constexpr size_t GetEntropyBytesNeeded( size_t n ) {
+            return (3 == out_width) ?
+                       EhcBadger<Wrapper, 7, 3, 9, out_width>::GetEntropyBytesNeeded(n) :
+                       (2 == out_width) ?
+                           EhcBadger<Wrapper, 6, 3, 7,
+                                   out_width>::GetEntropyBytesNeeded(n)
+                       :
+                           (4 == out_width) ?
+                               EhcBadger<Wrapper, 7, 3, 10,
+                                       out_width>::GetEntropyBytesNeeded(n)
+                           :
+                               EhcBadger<Wrapper, 5, 3, 9, out_width>::GetEntropyBytesNeeded(n);
+        }
 
 // auto b = 8;
-inline constexpr size_t MEBN_b() { return 8; }
+        inline constexpr size_t MEBN_b() { return 8; }
+
 // auto h = FloorLog(8, ~0ull / 21);
-inline constexpr size_t MEBN_h() { return FloorLog(8, ~0ull / 21); }
+        inline constexpr size_t MEBN_h() { return FloorLog(8, ~0ull / 21); }
+
 // auto tab_words = 0;//6 * 8 * 256; // TODO: include words of tabulation?
-inline constexpr size_t MEBN_tab_words() { return 0; }
+        inline constexpr size_t MEBN_tab_words() { return 0; }
+
 // auto words = 21 + 7 * 5 * h + b * 8 * 5 * h + b * 21 + 5 - 1;
-inline constexpr size_t MEBN_words() { return 21 + 7 * 5 * MEBN_h() + MEBN_b() * 8 * 5 * MEBN_h() + MEBN_b() * 21 + 5 - 1; }
-inline constexpr size_t MaxEntropyBytesNeeded() {
-  return sizeof(uint64_t) * (MEBN_words() + MEBN_tab_words());
-}
+        inline constexpr size_t MEBN_words() {
+            return 21 + 7 * 5 * MEBN_h() + MEBN_b() * 8 * 5 * MEBN_h() + MEBN_b() * 21 + 5 - 1;
+        }
 
-template <void (*Hasher)(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                         uint64_t output[]),
-          int width>
-inline uint64_t TabulateAfter(const uint64_t* entropy, const uint8_t* uint8_t_input,
-                              size_t length) {
-  const uint64_t(&table)[sizeof(uint64_t) * (1 + width)][256] =
-      *reinterpret_cast<const uint64_t(*)[sizeof(uint64_t) * (1 + width)][256]>(entropy);
-  entropy += width * 256;
-  uint64_t output[width];
-  Hasher(entropy, uint8_t_input, length, output);
-  uint64_t result = TabulateBytes<sizeof(length)>(length, &table[0][0]);
-  for (int i = 0; i < width; ++i) {
-    result ^= TabulateBytes<sizeof(output[i])>(output[i], &table[8 * (i + 1)][0]);
-  }
-  return result;
-}
+        inline constexpr size_t MaxEntropyBytesNeeded() {
+            return sizeof(uint64_t) * (MEBN_words() + MEBN_tab_words());
+        }
+
+        template <void(*Hasher)(const uint64_t * entropy, const uint8_t * uint8_t_input, size_t length,
+                uint64_t output[]),
+                int width>
+        inline uint64_t TabulateAfter( const uint64_t * entropy, const uint8_t * uint8_t_input, size_t length ) {
+            const uint64_t(&table)[sizeof(uint64_t) * (1 + width)][256] =
+                    *reinterpret_cast<const uint64_t(*)[sizeof(uint64_t) * (1 + width)][256]>(entropy);
+            entropy += width * 256;
+            uint64_t output[width];
+            Hasher(entropy, uint8_t_input, length, output);
+            uint64_t result = TabulateBytes<sizeof(length)>(length, &table[0][0]);
+            for (int i = 0; i < width; ++i) {
+                result ^= TabulateBytes<sizeof(output[i])>(output[i], &table[8 * (i + 1)][0]);
+            }
+            return result;
+        }
 
 //------------------------------------------------------------
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V4Scalar(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                     uint64_t output[out_width]) {
-  return Hash<RepeatWrapper<BlockWrapperScalar<bswap>, 8>, dimension, in_width,
-              encoded_dimension, out_width>(entropy, uint8_t_input, length, output);
-}
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V4Scalar( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<RepeatWrapper<BlockWrapperScalar<bswap>, 8>, dimension, in_width,
+                    encoded_dimension, out_width>(entropy, uint8_t_input, length, output);
+        }
 
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V3Scalar(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                     uint64_t output[out_width]) {
-  return Hash<RepeatWrapper<BlockWrapperScalar<bswap>, 4>, dimension, in_width,
-              encoded_dimension, out_width>(entropy, uint8_t_input, length, output);
-}
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V3Scalar( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<RepeatWrapper<BlockWrapperScalar<bswap>, 4>, dimension, in_width,
+                    encoded_dimension, out_width>(entropy, uint8_t_input, length, output);
+        }
 
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V2Scalar(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                     uint64_t output[out_width]) {
-  return Hash<RepeatWrapper<BlockWrapperScalar<bswap>, 2>, dimension, in_width,
-              encoded_dimension, out_width>(entropy, uint8_t_input, length, output);
-}
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V2Scalar( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<RepeatWrapper<BlockWrapperScalar<bswap>, 2>, dimension, in_width,
+                    encoded_dimension, out_width>(entropy, uint8_t_input, length, output);
+        }
 
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V1Scalar(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                     uint64_t output[out_width]) {
-  return Hash<BlockWrapperScalar<bswap>, dimension, in_width, encoded_dimension, out_width>(
-      entropy, uint8_t_input, length, output);
-}
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V1Scalar( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<BlockWrapperScalar<bswap>, dimension, in_width, encoded_dimension, out_width>(
+                    entropy, uint8_t_input, length, output);
+        }
 
 #if defined(HAVE_ARM_NEON)
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V2Neon(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                   uint64_t output[out_width]) {
-  return Hash<BlockWrapper128<bswap>, dimension, in_width, encoded_dimension, out_width>(
-      entropy, uint8_t_input, length, output);
-}
 
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V3Neon(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                   uint64_t output[out_width]) {
-  return Hash<RepeatWrapper<BlockWrapper128<bswap>, 2>, dimension, in_width, encoded_dimension,
-              out_width>(entropy, uint8_t_input, length, output);
-}
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V2Neon( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<BlockWrapper128<bswap>, dimension, in_width, encoded_dimension, out_width>(
+                    entropy, uint8_t_input, length, output);
+        }
+
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V3Neon( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<RepeatWrapper<BlockWrapper128<bswap>, 2>, dimension, in_width, encoded_dimension,
+                    out_width>(entropy, uint8_t_input, length, output);
+        }
+
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V4Neon( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<RepeatWrapper<BlockWrapper128<bswap>, 4>, dimension, in_width, encoded_dimension,
+                    out_width>(entropy, uint8_t_input, length, output);
+        }
 
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V4Neon(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                   uint64_t output[out_width]) {
-  return Hash<RepeatWrapper<BlockWrapper128<bswap>, 4>, dimension, in_width, encoded_dimension,
-              out_width>(entropy, uint8_t_input, length, output);
-}
 #else // HAVE_ARM_NEON
-#if defined(HAVE_SSE_2)
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V2Sse2(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                   uint64_t output[out_width]) {
-  return Hash<BlockWrapper128<bswap>, dimension, in_width, encoded_dimension, out_width>(
-      entropy, uint8_t_input, length, output);
-}
+  #if defined(HAVE_SSE_2)
+
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V2Sse2( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<BlockWrapper128<bswap>, dimension, in_width, encoded_dimension, out_width>(
+                    entropy, uint8_t_input, length, output);
+        }
 
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V3Sse2(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                   uint64_t output[out_width]) {
-  return Hash<RepeatWrapper<BlockWrapper128<bswap>, 2>, dimension, in_width, encoded_dimension,
-              out_width>(entropy, uint8_t_input, length, output);
-}
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V3Sse2( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<RepeatWrapper<BlockWrapper128<bswap>, 2>, dimension, in_width, encoded_dimension,
+                    out_width>(entropy, uint8_t_input, length, output);
+        }
 
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V4Sse2(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                   uint64_t output[out_width]) {
-  return Hash<RepeatWrapper<BlockWrapper128<bswap>, 4>, dimension, in_width, encoded_dimension,
-              out_width>(entropy, uint8_t_input, length, output);
-}
-#endif
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V4Sse2( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<RepeatWrapper<BlockWrapper128<bswap>, 4>, dimension, in_width, encoded_dimension,
+                    out_width>(entropy, uint8_t_input, length, output);
+        }
 
-#if defined(HAVE_AVX2)
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V3Avx2(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                   uint64_t output[out_width]) {
-  return Hash<BlockWrapper256<bswap>, dimension, in_width, encoded_dimension, out_width>(
-      entropy, uint8_t_input, length, output);
-}
+  #endif
 
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V4Avx2(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                   uint64_t output[out_width]) {
-  return Hash<RepeatWrapper<BlockWrapper256<bswap>, 2>, dimension, in_width, encoded_dimension,
-              out_width>(entropy, uint8_t_input, length, output);
-}
-#endif
+  #if defined(HAVE_AVX2)
 
-#if defined(HAVE_AVX512_F)
-template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
-          unsigned out_width, bool bswap>
-inline void V4Avx512(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-                     uint64_t output[out_width]) {
-  return Hash<BlockWrapper512<bswap>, dimension, in_width, encoded_dimension, out_width>(
-      entropy, uint8_t_input, length, output);
-}
-#endif
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V3Avx2( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<BlockWrapper256<bswap>, dimension, in_width, encoded_dimension, out_width>(
+                    entropy, uint8_t_input, length, output);
+        }
+
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V4Avx2( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<RepeatWrapper<BlockWrapper256<bswap>, 2>, dimension, in_width, encoded_dimension,
+                    out_width>(entropy, uint8_t_input, length, output);
+        }
+
+  #endif
+
+  #if defined(HAVE_AVX512_F)
+
+        template <unsigned dimension, unsigned in_width, unsigned encoded_dimension,
+                unsigned out_width, bool bswap>
+        inline void V4Avx512( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] ) {
+            return Hash<BlockWrapper512<bswap>, dimension, in_width, encoded_dimension, out_width>(
+                    entropy, uint8_t_input, length, output);
+        }
+
+  #endif
 
 #endif // HAVE_ARM_NEON
 
-template <unsigned out_width, bool bswap>
-static inline void V4(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-               uint64_t output[out_width]);
-template <unsigned out_width, bool bswap>
-static inline void V3(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-               uint64_t output[out_width]);
-template <unsigned out_width, bool bswap>
-static inline void V2(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-               uint64_t output[out_width]);
-template <unsigned out_width, bool bswap>
-static inline void V1(const uint64_t* entropy, const uint8_t* uint8_t_input, size_t length,
-               uint64_t output[out_width]);
+        template <unsigned out_width, bool bswap>
+        static inline void V4( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] );
+
+        template <unsigned out_width, bool bswap>
+        static inline void V3( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] );
+
+        template <unsigned out_width, bool bswap>
+        static inline void V2( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] );
+
+        template <unsigned out_width, bool bswap>
+        static inline void V1( const uint64_t * entropy, const uint8_t * uint8_t_input,
+                size_t length, uint64_t output[out_width] );
 
 //------------------------------------------------------------
 #define SPECIALIZE(version, isa, out_width, dimension, in_width, encoded_dimension)                 \
@@ -1159,92 +1248,87 @@ static inline void V1(const uint64_t* entropy, const uint8_t* uint8_t_input, siz
 
 #if defined(HAVE_ARM_NEON)
 
-SPECIALIZE_4(4, Neon)
-SPECIALIZE_4(3, Neon)
-SPECIALIZE_4(2, Neon)
-SPECIALIZE_4(1, Scalar)
+        SPECIALIZE_4(4, Neon  )
+        SPECIALIZE_4(3, Neon  )
+        SPECIALIZE_4(2, Neon  )
+        SPECIALIZE_4(1, Scalar)
 
 #elif defined(HAVE_AVX512_F)
 
-SPECIALIZE_4(4, Avx512)
-SPECIALIZE_4(3, Avx2)
-SPECIALIZE_4(2, Sse2)
-SPECIALIZE_4(1, Scalar)
+        SPECIALIZE_4(4, Avx512)
+        SPECIALIZE_4(3, Avx2  )
+        SPECIALIZE_4(2, Sse2  )
+        SPECIALIZE_4(1, Scalar)
 
 #elif defined(HAVE_AVX2)
 
-SPECIALIZE_4(4, Avx2)
-SPECIALIZE_4(3, Avx2)
-SPECIALIZE_4(2, Sse2)
-SPECIALIZE_4(1, Scalar)
+        SPECIALIZE_4(4, Avx2  )
+        SPECIALIZE_4(3, Avx2  )
+        SPECIALIZE_4(2, Sse2  )
+        SPECIALIZE_4(1, Scalar)
 
 #elif defined(HAVE_SSE_2)
 
-SPECIALIZE_4(4, Sse2)
-SPECIALIZE_4(3, Sse2)
-SPECIALIZE_4(2, Sse2)
-SPECIALIZE_4(1, Scalar)
+        SPECIALIZE_4(4, Sse2  )
+        SPECIALIZE_4(3, Sse2  )
+        SPECIALIZE_4(2, Sse2  )
+        SPECIALIZE_4(1, Scalar)
 
 #else
 
-SPECIALIZE_4(4, Scalar)
-SPECIALIZE_4(3, Scalar)
-SPECIALIZE_4(2, Scalar)
-SPECIALIZE_4(1, Scalar)
+        SPECIALIZE_4(4, Scalar)
+        SPECIALIZE_4(3, Scalar)
+        SPECIALIZE_4(2, Scalar)
+        SPECIALIZE_4(1, Scalar)
 
 #endif
-
-}  // namespace advanced
+    } // namespace advanced
 
 //------------------------------------------------------------
-static constexpr size_t kEntropyBytesNeeded =
-    256 * 3 * sizeof(uint64_t) * sizeof(uint64_t) +
-    advanced::GetEntropyBytesNeeded<
-        advanced::RepeatWrapper<advanced::BlockWrapperScalar<false>, 8>, 2>(~0ul);
-
-template < bool bswap >
-static inline uint64_t HalftimeHashStyle512(
-    const uint64_t entropy[kEntropyBytesNeeded / sizeof(uint64_t)], const uint8_t input[],
-    size_t length) {
-  return advanced::TabulateAfter<advanced::V4<2, bswap>, 2>(entropy, input, length);
-}
-
-template < bool bswap >
-static inline uint64_t HalftimeHashStyle256(
-    const uint64_t entropy[kEntropyBytesNeeded / sizeof(uint64_t)], const uint8_t input[],
-    size_t length) {
-  return advanced::TabulateAfter<advanced::V3<2, bswap>, 2>(entropy, input, length);
-}
+    static constexpr size_t kEntropyBytesNeeded =
+            256 * 3 * sizeof(uint64_t) * sizeof(uint64_t) +
+            advanced::GetEntropyBytesNeeded<
+            advanced::RepeatWrapper<advanced::BlockWrapperScalar<false>, 8>, 2>(~0ul);
+
+    template <bool bswap>
+    static inline uint64_t HalftimeHashStyle512( const uint64_t entropy[kEntropyBytesNeeded / sizeof(uint64_t)],
+            const uint8_t input[], size_t length ) {
+        return advanced::TabulateAfter<advanced::V4<2, bswap>, 2>(entropy, input, length);
+    }
 
-template < bool bswap >
-static inline uint64_t HalftimeHashStyle128(
-    const uint64_t entropy[kEntropyBytesNeeded / sizeof(uint64_t)], const uint8_t input[],
-    size_t length) {
-  return advanced::TabulateAfter<advanced::V2<2, bswap>, 2>(entropy, input, length);
-}
+    template <bool bswap>
+    static inline uint64_t HalftimeHashStyle256( const uint64_t entropy[kEntropyBytesNeeded / sizeof(uint64_t)],
+            const uint8_t input[], size_t length ) {
+        return advanced::TabulateAfter<advanced::V3<2, bswap>, 2>(entropy, input, length);
+    }
 
-template < bool bswap >
-static inline uint64_t HalftimeHashStyle64(
-    const uint64_t entropy[kEntropyBytesNeeded / sizeof(uint64_t)], const uint8_t input[],
-    size_t length) {
-  return advanced::TabulateAfter<advanced::V1<2, bswap>, 2>(entropy, input, length);
-}
+    template <bool bswap>
+    static inline uint64_t HalftimeHashStyle128( const uint64_t entropy[kEntropyBytesNeeded / sizeof(uint64_t)],
+            const uint8_t input[], size_t length ) {
+        return advanced::TabulateAfter<advanced::V2<2, bswap>, 2>(entropy, input, length);
+    }
 
-}  // namespace halftime_hash
+    template <bool bswap>
+    static inline uint64_t HalftimeHashStyle64( const uint64_t entropy[kEntropyBytesNeeded / sizeof(uint64_t)],
+            const uint8_t input[], size_t length ) {
+        return advanced::TabulateAfter<advanced::V1<2, bswap>, 2>(entropy, input, length);
+    }
+} // namespace halftime_hash
 
 //------------------------------------------------------------
 alignas(64) static thread_local uint64_t
-    halftime_hash_random[8 * ((halftime_hash::kEntropyBytesNeeded / 64) + 1)];
+halftime_hash_random[8 * ((halftime_hash::kEntropyBytesNeeded / 64) + 1)];
 
 // romu random number generator for seeding the HalftimeHash entropy
-static uint64_t splitmix(uint64_t & state) {
-  uint64_t z = (state += UINT64_C(0x9e3779b97f4a7c15));
-  z = (z ^ (z >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
-  z = (z ^ (z >> 27)) * UINT64_C(0x94d049bb133111eb);
-  return z ^ (z >> 31);
+static uint64_t splitmix( uint64_t & state ) {
+    uint64_t z = (state += UINT64_C(0x9e3779b97f4a7c15));
+
+    z = (z ^ (z >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
+    z = (z ^ (z >> 27)) * UINT64_C(0x94d049bb133111eb);
+    return z ^ (z >> 31);
 }
 
-static uintptr_t halftime_hash_seed_init(const seed_t seed) {
+static uintptr_t halftime_hash_seed_init( const seed_t seed ) {
     uint64_t mState = seed;
     uint64_t wState = splitmix(mState);
     uint64_t xState = splitmix(mState);
@@ -1273,108 +1357,112 @@ static uintptr_t halftime_hash_seed_init(const seed_t seed) {
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void HalftimeHash64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void HalftimeHash64( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint64_t * random_words = (const uint64_t *)(uintptr_t)seed;
-    uint64_t h = halftime_hash::HalftimeHashStyle64<bswap>(random_words, (const uint8_t *)in, (size_t)len);
+    uint64_t         h = halftime_hash::HalftimeHashStyle64<bswap>(random_words, (const uint8_t *)in, (size_t)len);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void HalftimeHash128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void HalftimeHash128( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint64_t * random_words = (const uint64_t *)(uintptr_t)seed;
-    uint64_t h = halftime_hash::HalftimeHashStyle128<bswap>(random_words, (const uint8_t *)in, (size_t)len);
+    uint64_t         h = halftime_hash::HalftimeHashStyle128<bswap>(random_words, (const uint8_t *)in, (size_t)len);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void HalftimeHash256(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void HalftimeHash256( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint64_t * random_words = (const uint64_t *)(uintptr_t)seed;
-    uint64_t h = halftime_hash::HalftimeHashStyle256<bswap>(random_words, (const uint8_t *)in, (size_t)len);
+    uint64_t         h = halftime_hash::HalftimeHashStyle256<bswap>(random_words, (const uint8_t *)in, (size_t)len);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void HalftimeHash512(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void HalftimeHash512( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint64_t * random_words = (const uint64_t *)(uintptr_t)seed;
-    uint64_t h = halftime_hash::HalftimeHashStyle512<bswap>(random_words, (const uint8_t *)in, (size_t)len);
+    uint64_t         h = halftime_hash::HalftimeHashStyle512<bswap>(random_words, (const uint8_t *)in, (size_t)len);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(halftimehash,
-  $.src_url = "https://github.com/jbapple/HalftimeHash",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/jbapple/HalftimeHash",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(HalftimeHash_64,
-  $.desc = "Halftime Hash (64-bit blocks)",
-  $.sort_order = 10,
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE ,
-  $.impl_flags =
-        FLAG_IMPL_SLOW         |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT  ,
-  $.bits = 64,
-  $.verification_LE = 0xED42E424,
-  $.verification_BE = 0x7EE5ED6F,
-  $.hashfn_native = HalftimeHash64<false>,
-  $.hashfn_bswap = HalftimeHash64<true>,
-  $.seedfn = halftime_hash_seed_init
-);
+   $.desc       = "Halftime Hash (64-bit blocks)",
+   $.sort_order = 10,
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SLOW         |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xED42E424,
+   $.verification_BE = 0x7EE5ED6F,
+   $.hashfn_native   = HalftimeHash64<false>,
+   $.hashfn_bswap    = HalftimeHash64<true>,
+   $.seedfn = halftime_hash_seed_init
+ );
 
 REGISTER_HASH(HalftimeHash_128,
-  $.desc = "Halftime Hash (128-bit blocks)",
-  $.sort_order = 20,
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE ,
-  $.impl_flags =
-        FLAG_IMPL_SLOW         |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT  ,
-  $.bits = 64,
-  $.verification_LE = 0x952DF141,
-  $.verification_BE = 0xD79E990B,
-  $.hashfn_native = HalftimeHash128<false>,
-  $.hashfn_bswap = HalftimeHash128<true>,
-  $.seedfn = halftime_hash_seed_init
-);
+   $.desc       = "Halftime Hash (128-bit blocks)",
+   $.sort_order = 20,
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SLOW         |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x952DF141,
+   $.verification_BE = 0xD79E990B,
+   $.hashfn_native   = HalftimeHash128<false>,
+   $.hashfn_bswap    = HalftimeHash128<true>,
+   $.seedfn = halftime_hash_seed_init
+ );
 
 REGISTER_HASH(HalftimeHash_256,
-  $.desc = "Halftime Hash (256-bit blocks)",
-  $.sort_order = 30,
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE ,
-  $.impl_flags =
-        FLAG_IMPL_SLOW         |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT  ,
-  $.bits = 64,
-  $.verification_LE = 0x912330EA,
-  $.verification_BE = 0x23C24991,
-  $.hashfn_native = HalftimeHash256<false>,
-  $.hashfn_bswap = HalftimeHash256<true>,
-  $.seedfn = halftime_hash_seed_init
-);
+   $.desc       = "Halftime Hash (256-bit blocks)",
+   $.sort_order = 30,
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SLOW         |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x912330EA,
+   $.verification_BE = 0x23C24991,
+   $.hashfn_native   = HalftimeHash256<false>,
+   $.hashfn_bswap    = HalftimeHash256<true>,
+   $.seedfn = halftime_hash_seed_init
+ );
 
 REGISTER_HASH(HalftimeHash_512,
-  $.desc = "Halftime Hash (512-bit blocks)",
-  $.sort_order = 40,
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE ,
-  $.impl_flags =
-        FLAG_IMPL_SLOW         |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT  ,
-  $.bits = 64,
-  $.verification_LE = 0x1E0F99EA,
-  $.verification_BE = 0xA3A0AE42,
-  $.hashfn_native = HalftimeHash512<false>,
-  $.hashfn_bswap = HalftimeHash512<true>,
-  $.seedfn = halftime_hash_seed_init
-);
+   $.desc       = "Halftime Hash (512-bit blocks)",
+   $.sort_order = 40,
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SLOW         |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x1E0F99EA,
+   $.verification_BE = 0xA3A0AE42,
+   $.hashfn_native   = HalftimeHash512<false>,
+   $.hashfn_bswap    = HalftimeHash512<true>,
+   $.seedfn = halftime_hash_seed_init
+ );
diff --git a/hashes/hasshe2.cpp b/hashes/hasshe2.cpp
index bca5cc21..d10e1740 100644
--- a/hashes/hasshe2.cpp
+++ b/hashes/hasshe2.cpp
@@ -32,35 +32,39 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_SSE_2)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
 //------------------------------------------------------------
 alignas(16) const static uint32_t coeffs[12] = {
-  /* Four carefully selected coefficients and interleaving zeros. */
-  0x98b365a1, 0, 0x52c69cab, 0,
-  0xb76a9a41, 0, 0xcc4d2c7b, 0,
-  /* 128 bits of random data. */
-  0x564a4447, 0xc7265595, 0xe20c241d, 0x128fa608,
+    /* Four carefully selected coefficients and interleaving zeros. */
+    0x98b365a1,          0, 0x52c69cab,          0,
+    0xb76a9a41,          0, 0xcc4d2c7b,          0,
+    /* 128 bits of random data. */
+    0x564a4447, 0xc7265595, 0xe20c241d, 0x128fa608,
 };
 
 //------------------------------------------------------------
 // Portable implementation of the hash
-static void combine_and_mix(uint64_t state[4], const uint64_t input[2]) {
-    /* Phase 1: Perform four 32x32->64 bit multiplication with the
-       input block and words 1 and 3 coeffs, respectively.  This
-       effectively propagates a bit change in input to 32 more
-       significant bit positions.  Combine into internal state by
-       subtracting the result of multiplications from the internal
-       state. */
+static void combine_and_mix( uint64_t state[4], const uint64_t input[2] ) {
+    /*
+     * Phase 1: Perform four 32x32->64 bit multiplication with the
+     * input block and words 1 and 3 coeffs, respectively.  This
+     * effectively propagates a bit change in input to 32 more
+     * significant bit positions.  Combine into internal state by
+     * subtracting the result of multiplications from the internal
+     * state.
+     */
     state[0] -= ((uint64_t)(coeffs[0])) * (input[1] & 0xffffffff);
-    state[1] -= ((uint64_t)(coeffs[2])) * (input[1] >> 32);
+    state[1] -= ((uint64_t)(coeffs[2])) * (input[1] >>        32);
     state[2] -= ((uint64_t)(coeffs[4])) * (input[0] & 0xffffffff);
-    state[3] -= ((uint64_t)(coeffs[6])) * (input[0] >> 32);
+    state[3] -= ((uint64_t)(coeffs[6])) * (input[0] >>        32);
 
-    /* Phase 2: Perform shifts and xors to propagate the 32-bit
-       changes produced above into 64-bit (and even a little larger)
-       changes in the internal state. */
+    /*
+     * Phase 2: Perform shifts and xors to propagate the 32-bit
+     * changes produced above into 64-bit (and even a little larger)
+     * changes in the internal state.
+     */
     /* state ^= state >64> 29; */
     /* state +64= state <64< 16; */
     /* state ^= state >64> 21; */
@@ -74,61 +78,68 @@ static void combine_and_mix(uint64_t state[4], const uint64_t input[2]) {
     state[3] += (state[3] << 32) + (state[2] >> 32);
     state[2] += (state[2] << 32);
 
-    /* Phase 3: Propagate the changes among the four 64-bit words by
-       performing 64-bit subtractions and 32-bit word shuffling. */
-    state[0] -= state[2];
-    state[1] -= state[3];
+    /*
+     * Phase 3: Propagate the changes among the four 64-bit words by
+     * performing 64-bit subtractions and 32-bit word shuffling.
+     */
+    state[0] -= state     [2];
+    state[1] -= state     [3];
 
     uint64_t tmp;
 
-    tmp = state[2];
+    tmp      = state      [2];
     state[2] = ((state[2] >> 32) + (state[3] << 32)) - state[0];
     state[3] = ((state[3] >> 32) + (tmp      << 32)) - state[1];
 
-    tmp = state[1];
+    tmp      = state      [1];
     state[1] = ((state[0] >> 32) + (state[0] << 32)) - state[3];
     state[0] = tmp - state[2];
 
-    tmp = state[2];
+    tmp      = state      [2];
     state[2] = ((state[3] >> 32) + (state[2] << 32)) - state[0];
     state[3] = ((tmp      >> 32) + (state[3] << 32)) - state[1];
 
-    tmp = state[0];
+    tmp      = state      [0];
     state[0] = ((state[1] >> 32) + (state[0] << 32)) - state[2];
     state[1] = ((tmp      >> 32) + (state[1] << 32)) - state[3];
 
-    /* With good coefficients any one-bit flip in the input has now
-       changed all bits in the internal state with a probability
-       between 45% to 55%. */
+    /*
+     * With good coefficients any one-bit flip in the input has now
+     * changed all bits in the internal state with a probability
+     * between 45% to 55%.
+     */
 }
 
-template < bool orig, bool bswap >
-static void hasshe2_portable(const uint8_t * input_buf, size_t n_bytes, uint64_t seed, void *output_state) {
+template <bool orig, bool bswap>
+static void hasshe2_portable( const uint8_t * input_buf, size_t n_bytes, uint64_t seed, void * output_state ) {
     uint64_t state[4];
     uint64_t input[2];
     uint64_t seed2 = orig ? seed : (seed + (uint64_t)n_bytes);
 
-    /* Initialize internal state to something random.  (Alternatively,
-       if hashing a chain of data, read in the previous hash result from
-       somewhere.)
-
-       Seeding is homegrown for SMHasher3
-    */
-    state[0] = coeffs[ 8] + (((uint64_t)coeffs[ 9]) << 32);
-    state[1] = coeffs[10] + (((uint64_t)coeffs[11]) << 32);
+    /*
+     * Initialize internal state to something random.  (Alternatively,
+     * if hashing a chain of data, read in the previous hash result from
+     * somewhere.)
+     *
+     * Seeding is homegrown for SMHasher3
+     */
+    state[0]  = coeffs[ 8] + (((uint64_t)coeffs[ 9]) << 32);
+    state[1]  = coeffs[10] + (((uint64_t)coeffs[11]) << 32);
     state[0] ^= seed;
     state[1] ^= seed2;
-    state[2] = state[0];
-    state[3] = state[1];
+    state[2]  = state[0];
+    state[3]  = state[1];
 
     while (n_bytes >= 16) {
-        /* Read in 16 bytes, or 128 bits, from buf.  Advance buf and
-           decrement n_bytes accordingly. */
+        /*
+         * Read in 16 bytes, or 128 bits, from buf.  Advance buf and
+         * decrement n_bytes accordingly.
+         */
         for (int i = 0; i < 2; i++) {
-            input[i] = GET_U64<bswap>(input_buf, i*8);
+            input[i] = GET_U64<bswap>(input_buf, i * 8);
         }
         input_buf += 16;
-        n_bytes -= 16;
+        n_bytes   -= 16;
 
         combine_and_mix(state, input);
     }
@@ -137,15 +148,17 @@ static void hasshe2_portable(const uint8_t * input_buf, size_t n_bytes, uint64_t
         memcpy(buf, input_buf, n_bytes);
         memset(buf + n_bytes, 0, 16 - n_bytes);
         for (int i = 0; i < 2; i++) {
-            input[i] = GET_U64<bswap>(buf, i*8);
+            input[i] = GET_U64<bswap>(buf, i * 8);
         }
 
         combine_and_mix(state, input);
     }
 
-    /* Postprocessing.  Copy half of the internal state into fake input,
-       replace it with the constant rnd_data, and do one combine and mix
-       phase more. */
+    /*
+     * Postprocessing.  Copy half of the internal state into fake input,
+     * replace it with the constant rnd_data, and do one combine and mix
+     * phase more.
+     */
     input[0] = state[0];
     input[1] = state[1];
     state[0] = coeffs[ 8] + (((uint64_t)coeffs[ 9]) << 32);
@@ -153,7 +166,7 @@ static void hasshe2_portable(const uint8_t * input_buf, size_t n_bytes, uint64_t
     combine_and_mix(state, input);
 
     for (int i = 0; i < 4; i++) {
-        PUT_U64<bswap>(state[i], (uint8_t *)output_state, i*8);
+        PUT_U64<bswap>(state[i], (uint8_t *)output_state, i * 8);
     }
 }
 
@@ -197,97 +210,104 @@ static void hasshe2_portable(const uint8_t * input_buf, size_t n_bytes, uint64_t
      changed all bits in the internal state with a probability               \
      between 45% to 55%. */
 
-template < bool orig, bool bswap >
-static void hasshe2_sse2(const uint8_t * input_buf, size_t n_bytes, uint64_t seed, void *output_state) {
-  __m128i coeffs_1, coeffs_2, rnd_data, seed_xmm, input, state_1, state_2;
-  coeffs_1 = _mm_load_si128((__m128i *) coeffs);
-  coeffs_2 = _mm_load_si128((__m128i *) (coeffs + 4));
-  rnd_data = _mm_load_si128((__m128i *) (coeffs + 8));
-  seed_xmm = _mm_set_epi64x(orig ? seed : (seed + n_bytes), seed);
-
-  /* Initialize internal state to something random.  (Alternatively,
-     if hashing a chain of data, read in the previous hash result from
-     somewhere.)
-
-     Seeding is homegrown for SMHasher3
-  */
-  state_1 = state_2 = _mm_xor_si128(rnd_data, seed_xmm);
-
-  while (n_bytes >= 16) {
-      /* Read in 16 bytes, or 128 bits, from buf.  Advance buf and
-         decrement n_bytes accordingly. */
-      input = _mm_loadu_si128((__m128i *) input_buf);
-      if (bswap) { input = mm_bswap64(input); }
-      input_buf += 16;
-      n_bytes -= 16;
-
-      COMBINE_AND_MIX(coeffs_1, coeffs_2, state_1, state_2, input);
-  }
-  if (n_bytes > 0) {
-      alignas(16) uint8_t buf[16];
-      memcpy(buf, input_buf, n_bytes);
-      memset(buf + n_bytes, 0, 16 - n_bytes);
-      input = _mm_load_si128((__m128i *) buf);
-      if (bswap) { input = mm_bswap64(input); }
-      COMBINE_AND_MIX(coeffs_1, coeffs_2, state_1, state_2, input);
-  }
-
-  /* Postprocessing.  Copy half of the internal state into fake input,
-     replace it with the constant rnd_data, and do one combine and mix
-     phase more. */
-  input = state_1;
-  state_1 = rnd_data;
-
-  COMBINE_AND_MIX(coeffs_1, coeffs_2, state_1, state_2, input);
-
-  if (bswap) {
-      state_1 = mm_bswap64(state_1);
-      state_2 = mm_bswap64(state_2);
-  }
-  _mm_storeu_si128((__m128i *)output_state,               state_1);
-  _mm_storeu_si128((__m128i *)((char*)output_state + 16), state_2);
+template <bool orig, bool bswap>
+static void hasshe2_sse2( const uint8_t * input_buf, size_t n_bytes, uint64_t seed, void * output_state ) {
+    __m128i coeffs_1, coeffs_2, rnd_data, seed_xmm, input, state_1, state_2;
+
+    coeffs_1 = _mm_load_si128((__m128i *)coeffs      );
+    coeffs_2 = _mm_load_si128((__m128i *)(coeffs + 4));
+    rnd_data = _mm_load_si128((__m128i *)(coeffs + 8));
+    seed_xmm = _mm_set_epi64x(orig ? seed : (seed + n_bytes), seed);
+
+    /*
+     * Initialize internal state to something random.  (Alternatively,
+     * if hashing a chain of data, read in the previous hash result from
+     * somewhere.)
+     *
+     * Seeding is homegrown for SMHasher3
+     */
+    state_1 = state_2 = _mm_xor_si128(rnd_data, seed_xmm);
+
+    while (n_bytes >= 16) {
+        /*
+         * Read in 16 bytes, or 128 bits, from buf.  Advance buf and
+         * decrement n_bytes accordingly.
+         */
+        input      = _mm_loadu_si128((__m128i *)input_buf);
+        if (bswap) { input = mm_bswap64(input); }
+        input_buf += 16;
+        n_bytes   -= 16;
+
+        COMBINE_AND_MIX(coeffs_1, coeffs_2, state_1, state_2, input);
+    }
+    if (n_bytes > 0) {
+        alignas(16) uint8_t buf[16];
+        memcpy(buf, input_buf, n_bytes);
+        memset(buf + n_bytes, 0, 16 - n_bytes);
+        input = _mm_load_si128((__m128i *)buf);
+        if (bswap) { input = mm_bswap64(input); }
+        COMBINE_AND_MIX(coeffs_1, coeffs_2, state_1, state_2, input);
+    }
+
+    /*
+     * Postprocessing.  Copy half of the internal state into fake input,
+     * replace it with the constant rnd_data, and do one combine and mix
+     * phase more.
+     */
+    input   = state_1;
+    state_1 = rnd_data;
+
+    COMBINE_AND_MIX(coeffs_1, coeffs_2, state_1, state_2, input);
+
+    if (bswap) {
+        state_1 = mm_bswap64(state_1);
+        state_2 = mm_bswap64(state_2);
+    }
+    _mm_storeu_si128((__m128i *)output_state, state_1);
+    _mm_storeu_si128((__m128i *)((char *)output_state + 16), state_2);
 }
+
 #endif
 
-template < bool orig, bool bswap >
-static void Hasshe2(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool orig, bool bswap>
+static void Hasshe2( const void * in, const size_t len, const seed_t seed, void * out ) {
 #if defined(HAVE_SSE_2)
-    hasshe2_sse2<orig,bswap>((const uint8_t *)in, len, (uint64_t)seed, out);
+    hasshe2_sse2<orig, bswap>((const uint8_t *)in, len, (uint64_t)seed, out);
 #else
-    hasshe2_portable<orig,bswap>((const uint8_t *)in, len, (uint64_t)seed, out);
+    hasshe2_portable<orig, bswap>((const uint8_t *)in, len, (uint64_t)seed, out);
 #endif
 }
 
 REGISTER_FAMILY(hasshe2,
-  $.src_url = "http://cessu.blogspot.com/2008/11/hashing-with-sse2-revisited-or-my-hash.html",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "http://cessu.blogspot.com/2008/11/hashing-with-sse2-revisited-or-my-hash.html",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(hasshe2,
-  $.desc = "hasshe2 (SSE2-oriented hash)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS           |
-        FLAG_IMPL_MULTIPLY               |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 256,
-  $.verification_LE = 0x68CBC5F1,
-  $.verification_BE = 0x562ECEB4,
-  $.hashfn_native = Hasshe2<true,false>,
-  $.hashfn_bswap = Hasshe2<true,true>
-);
+   $.desc       = "hasshe2 (SSE2-oriented hash)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS           |
+         FLAG_IMPL_MULTIPLY               |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 256,
+   $.verification_LE = 0x68CBC5F1,
+   $.verification_BE = 0x562ECEB4,
+   $.hashfn_native   = Hasshe2<true, false>,
+   $.hashfn_bswap    = Hasshe2<true, true>
+ );
 
 REGISTER_HASH(hasshe2__tweaked,
-  $.desc = "hasshe2 (SSE2-oriented hash, tweaked to add len into IV)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY               |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 256,
-  $.verification_LE = 0xBAF6B1BF,
-  $.verification_BE = 0x35A87D75,
-  $.hashfn_native = Hasshe2<false,false>,
-  $.hashfn_bswap = Hasshe2<false,true>
-);
+   $.desc       = "hasshe2 (SSE2-oriented hash, tweaked to add len into IV)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY               |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 256,
+   $.verification_LE = 0xBAF6B1BF,
+   $.verification_BE = 0x35A87D75,
+   $.hashfn_native   = Hasshe2<false, false>,
+   $.hashfn_bswap    = Hasshe2<false, true>
+ );
diff --git a/hashes/jodyhash.cpp b/hashes/jodyhash.cpp
index a926e55f..9d4e8dfd 100644
--- a/hashes/jodyhash.cpp
+++ b/hashes/jodyhash.cpp
@@ -51,45 +51,45 @@ static const uint32_t tail_mask_32[] = {
 
 //------------------------------------------------------------
 // Version increments when algorithm changes incompatibly
-//#define JODY_HASH_VERSION 5
+// #define JODY_HASH_VERSION 5
 
 #define JODY_HASH_CONSTANT UINT32_C(0x1f3d5b79)
 #define JODY_HASH_SHIFT    14
 
-template < typename T, bool bswap >
-static T jody_block_hash(const uint8_t * RESTRICT data, const size_t count, const T start_hash) {
+template <typename T, bool bswap>
+static T jody_block_hash( const uint8_t * RESTRICT data, const size_t count, const T start_hash ) {
     T hash = start_hash;
     T element;
     T partial_salt;
     const T * const tail_mask = (sizeof(T) == 4) ?
-        (const T *)tail_mask_32 : (const T *)tail_mask_64;
+                (const T *)tail_mask_32 : (const T *)tail_mask_64;
     size_t len;
 
     /* Don't bother trying to hash a zero-length block */
-    if (count == 0) return hash;
+    if (count == 0) { return hash; }
 
     len = count / sizeof(T);
     for (; len > 0; len--) {
         element = (sizeof(T) == 4) ?
-            GET_U32<bswap>(data, 0) : GET_U64<bswap>(data, 0) ;
-        hash += element;
-        hash += JODY_HASH_CONSTANT;
+                    GET_U32<bswap>(data, 0) : GET_U64<bswap>(data, 0);
+        hash   += element;
+        hash   += JODY_HASH_CONSTANT;
         /* bit rotate left */
-        hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(T) * 8 - JODY_HASH_SHIFT);
-        hash ^= element;
+        hash    = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(T) * 8 - JODY_HASH_SHIFT);
+        hash   ^= element;
         /* bit rotate left */
-        hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(T) * 8 - JODY_HASH_SHIFT);
-        hash ^= JODY_HASH_CONSTANT;
-        hash += element;
-        data += sizeof(T);
+        hash    = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(T) * 8 - JODY_HASH_SHIFT);
+        hash   ^= JODY_HASH_CONSTANT;
+        hash   += element;
+        data   += sizeof(T);
     }
 
     /* Handle data tail (for blocks indivisible by sizeof(T)) */
     len = count & (sizeof(T) - 1);
     if (len) {
         partial_salt = JODY_HASH_CONSTANT & tail_mask[len];
-        element = (sizeof(T) == 4) ?
-            GET_U32<bswap>(data, 0) : GET_U64<bswap>(data, 0) ;
+        element      = (sizeof(T) == 4) ?
+                    GET_U32<bswap>(data, 0) : GET_U64<bswap>(data, 0);
         if (isLE() ^ bswap) {
             element &= tail_mask[len];
         } else {
@@ -97,9 +97,9 @@ static T jody_block_hash(const uint8_t * RESTRICT data, const size_t count, cons
         }
         hash += element;
         hash += partial_salt;
-        hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(T) * 8 - JODY_HASH_SHIFT);
+        hash  = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(T) * 8 - JODY_HASH_SHIFT);
         hash ^= element;
-        hash = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(T) * 8 - JODY_HASH_SHIFT);
+        hash  = (hash << JODY_HASH_SHIFT) | hash >> (sizeof(T) * 8 - JODY_HASH_SHIFT);
         hash ^= partial_salt;
         hash += element;
     }
@@ -108,51 +108,53 @@ static T jody_block_hash(const uint8_t * RESTRICT data, const size_t count, cons
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void jodyhash32(const void * in, const size_t len, const seed_t seed, void * out) {
-    uint32_t h = jody_block_hash<uint32_t,bswap>((const uint8_t *)in, len, (uint32_t)seed);
+template <bool bswap>
+static void jodyhash32( const void * in, const size_t len, const seed_t seed, void * out ) {
+    uint32_t h = jody_block_hash<uint32_t, bswap>((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void jodyhash64(const void * in, const size_t len, const seed_t seed, void * out) {
-    uint64_t h = jody_block_hash<uint64_t,bswap>((const uint8_t *)in, len, (uint64_t)seed);
+template <bool bswap>
+static void jodyhash64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    uint64_t h = jody_block_hash<uint64_t, bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(jodyhash,
-  $.src_url = "https://github.com/jbruchon/jodyhash",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/jbruchon/jodyhash",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(jodyhash_32,
-  $.desc = "jodyhash v5, 32-bit",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB  |
-        FLAG_IMPL_ROTATE         |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xFB47D60D,
-  $.verification_BE = 0xB94C9789,
-  $.hashfn_native = jodyhash32<false>,
-  $.hashfn_bswap = jodyhash32<true>
-);
+   $.desc       = "jodyhash v5, 32-bit",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB  |
+         FLAG_IMPL_ROTATE         |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xFB47D60D,
+   $.verification_BE = 0xB94C9789,
+   $.hashfn_native   = jodyhash32<false>,
+   $.hashfn_bswap    = jodyhash32<true>
+ );
 
 REGISTER_HASH(jodyhash_64,
-  $.desc = "jodyhash v5, 64-bit",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS   | // appending zero bytes might not alter hash!
-        FLAG_IMPL_READ_PAST_EOB  |
-        FLAG_IMPL_ROTATE         |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x9F09E57F,
-  $.verification_BE = 0xF9CDDA2C,
-  $.hashfn_native = jodyhash64<false>,
-  $.hashfn_bswap = jodyhash64<true>
-);
+   $.desc       = "jodyhash v5, 64-bit",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS   |// appending zero bytes might not alter hash!
+         FLAG_IMPL_READ_PAST_EOB  |
+         FLAG_IMPL_ROTATE         |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x9F09E57F,
+   $.verification_BE = 0xF9CDDA2C,
+   $.hashfn_native   = jodyhash64<false>,
+   $.hashfn_bswap    = jodyhash64<true>
+ );
diff --git a/hashes/khash.cpp b/hashes/khash.cpp
index 3033f0a8..54759241 100644
--- a/hashes/khash.cpp
+++ b/hashes/khash.cpp
@@ -32,8 +32,9 @@
 // "khash" is really *only* these two mathematical functions.
 // khash64_fn maps 2 64-bit inputs to a 64-bit output,
 // and khash32_fn maps 3 32-bit inputs to a 32-bit output.
-static inline uint64_t khash64_fn(uint64_t input, uint64_t func) {
+static inline uint64_t khash64_fn( uint64_t input, uint64_t func ) {
     uint64_t h = func;
+
     h ^= input - 7;
     h ^= ROTR64(h, 31);
     h -= ROTR64(h, 11);
@@ -46,35 +47,36 @@ static inline uint64_t khash64_fn(uint64_t input, uint64_t func) {
 
     h ^= input - 2;
     h -= ROTR64(h, 19);
-    h += ROTR64(h, 5);
+    h += ROTR64(h,  5);
     h -= ROTR64(h, 31);
     return h;
 }
 
-static inline uint32_t khash32_fn(uint32_t input, uint32_t func1, uint32_t func2) {
+static inline uint32_t khash32_fn( uint32_t input, uint32_t func1, uint32_t func2 ) {
     uint32_t h = input;
+
     h  = ROTR32(h, 16);
     h ^= func2;
     h -= 5;
     h  = ROTR32(h, 17);
     h += func1;
-    h  = ROTR32(h, 1);
+    h  = ROTR32(h,  1);
 
     h += ROTR32(h, 27);
-    h ^= ROTR32(h, 3);
+    h ^= ROTR32(h,  3);
     h -= ROTR32(h, 17);
     h -= ROTR32(h, 27);
 
     h ^= input - 107;
     h -= ROTR32(h, 11);
-    h ^= ROTR32(h, 7);
-    h -= ROTR32(h, 5);
+    h ^= ROTR32(h,  7);
+    h -= ROTR32(h,  5);
     return h;
 }
 
 // Just initialize with the fractional part of sqrt(2)
-//#define khash64(input) khash64_fn(input, 0x6a09e667f3bcc908)
-//#define khash32(input) khash32_fn(input, 0x6a09e667, 0xf3bcc908)
+// #define khash64(input) khash64_fn(input, 0x6a09e667f3bcc908)
+// #define khash32(input) khash32_fn(input, 0x6a09e667, 0xf3bcc908)
 
 //------------------------------------------------------------
 // These hash functions operate on any amount of data, and hash it
@@ -85,18 +87,19 @@ static inline uint32_t khash32_fn(uint32_t input, uint32_t func1, uint32_t func2
 // handle 64-bit seeds but return the existing results when the high
 // 32 bits are zero, so that the verification value is unchanged.
 
-template < bool bswap >
-static void khash32(const void * in, const size_t len, const seed_t seed, void * out) {
-    uint32_t seedlo  = (uint32_t)(seed);
-    uint32_t seedhi  = (uint32_t)(seed >> 32);
-    uint32_t hash    = ~seedlo;
-    const uint32_t K = UINT32_C(0xf3bcc908) ^ seedhi;
+template <bool bswap>
+static void khash32( const void * in, const size_t len, const seed_t seed, void * out ) {
+    uint32_t       seedlo      = (uint32_t)(seed      );
+    uint32_t       seedhi      = (uint32_t)(seed >> 32);
+    uint32_t       hash        = ~seedlo;
+    const uint32_t K           = UINT32_C(0xf3bcc908) ^ seedhi;
 
     const uint8_t * const endw = &((const uint8_t *)in)[len & ~3];
-    uint8_t * dw = (uint8_t*)in;
+    uint8_t * dw = (uint8_t *)in;
+
     while (dw < endw) {
         hash ^= khash32_fn(GET_U32<bswap>(dw, 0), seed, K);
-        dw += 4;
+        dw   += 4;
     }
     const size_t flen = len & 3;
     if (flen) {
@@ -111,16 +114,17 @@ static void khash32(const void * in, const size_t len, const seed_t seed, void *
     PUT_U32<bswap>(hash, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void khash64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void khash64( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t seed64 = ((uint64_t)seed ^ UINT64_C(0x6a09e66700000000));
-    uint64_t hash = ~seed64;
+    uint64_t hash   = ~seed64;
 
     const uint8_t * const endw = &((const uint8_t *)in)[len & ~7];
-    uint8_t * dw = (uint8_t*)in;
+    uint8_t * dw = (uint8_t *)in;
+
     while (dw < endw) {
         hash ^= khash64_fn(GET_U64<bswap>(dw, 0), seed64);
-        dw += 8;
+        dw   += 8;
     }
     const size_t flen = len & 7;
     if (flen) {
@@ -137,38 +141,38 @@ static void khash64(const void * in, const size_t len, const seed_t seed, void *
 
 //------------------------------------------------------------
 REGISTER_FAMILY(khash,
-  $.src_url = "https://github.com/Keith-Cancel/k-hash",
-  $.src_status = HashFamilyInfo::SRC_ACTIVE
-);
+   $.src_url    = "https://github.com/Keith-Cancel/k-hash",
+   $.src_status = HashFamilyInfo::SRC_ACTIVE
+ );
 
 REGISTER_HASH(khash_32,
-  $.desc = "K-Hash 32 bit mixer-based hash",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS  |
-        FLAG_IMPL_READ_PAST_EOB |
-        FLAG_IMPL_ROTATE        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xA17DA29E,
-  $.verification_BE = 0x59073F57,
-  $.hashfn_native = khash32<false>,
-  $.hashfn_bswap = khash32<true>
-);
+   $.desc       = "K-Hash 32 bit mixer-based hash",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS  |
+         FLAG_IMPL_READ_PAST_EOB |
+         FLAG_IMPL_ROTATE        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xA17DA29E,
+   $.verification_BE = 0x59073F57,
+   $.hashfn_native   = khash32<false>,
+   $.hashfn_bswap    = khash32<true>
+ );
 
 REGISTER_HASH(khash_64,
-  $.desc = "K-Hash 64 bit mixer-based hash",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS  |
-        FLAG_IMPL_READ_PAST_EOB |
-        FLAG_IMPL_ROTATE        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x44BD88C4,
-  $.verification_BE = 0xCF3003D1,
-  $.hashfn_native = khash64<false>,
-  $.hashfn_bswap = khash64<true>
-);
+   $.desc       = "K-Hash 64 bit mixer-based hash",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS  |
+         FLAG_IMPL_READ_PAST_EOB |
+         FLAG_IMPL_ROTATE        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x44BD88C4,
+   $.verification_BE = 0xCF3003D1,
+   $.hashfn_native   = khash64<false>,
+   $.hashfn_bswap    = khash64<true>
+ );
diff --git a/hashes/komihash.cpp b/hashes/komihash.cpp
index 2e63dd8d..49a902ef 100644
--- a/hashes/komihash.cpp
+++ b/hashes/komihash.cpp
@@ -29,6 +29,7 @@
 #include "Mathmult.h"
 
 //------------------------------------------------------------
+
 /*
  * Function builds an unsigned 64-bit value out of remaining bytes in a
  * message, and pads it with the "final byte". This function can only be
@@ -39,26 +40,25 @@
  * @param MsgLen Message's remaining length, in bytes; can be 0.
  * @param fb Final byte used for padding.
  */
-template < bool bswap >
-static inline uint64_t kh_lpu64ec_l3(const uint8_t* const Msg,
-        const size_t MsgLen, uint64_t fb) {
+template <bool bswap>
+static inline uint64_t kh_lpu64ec_l3( const uint8_t * const Msg, const size_t MsgLen, uint64_t fb ) {
     if (MsgLen < 4) {
-        const uint8_t* const Msg3 = Msg + MsgLen - 3;
-        const int ml8 = (int) (MsgLen << 3);
-        const uint64_t m = (uint64_t) Msg3[ 0 ] | (uint64_t) Msg3[ 1 ] << 8 |
-            (uint64_t) Msg3[ 2 ] << 16;
+        const uint8_t * const Msg3 = Msg + MsgLen - 3;
+        const int      ml8         = (int)(MsgLen << 3);
+        const uint64_t m = (uint64_t)Msg3[0] | (uint64_t)Msg3[1] << 8 |
+                (uint64_t)Msg3[2] << 16;
 
-        return(fb << ml8 | m >> (24 - ml8));
+        return fb << ml8 | m >> (24 - ml8);
     }
 
-    const int ml8 = (int) (MsgLen << 3);
-    const uint64_t mh = GET_U32<bswap>(Msg + MsgLen - 4, 0);
-    const uint64_t ml = GET_U32<bswap>(Msg, 0);
+    const int      ml8 = (int)(MsgLen << 3);
+    const uint64_t mh  = GET_U32<bswap>(Msg + MsgLen - 4, 0);
+    const uint64_t ml  = GET_U32<bswap>(Msg             , 0);
 
     if (isLE() ^ bswap) {
-        return(fb << ml8 | ml | (mh >> (64 - ml8)) << 32);
+        return fb << ml8 | ml | (mh >> (64 - ml8)) << 32;
     } else {
-        return(fb << ml8 | mh | (ml >> (64 - ml8)) << 32);
+        return fb << ml8 | mh | (ml >> (64 - ml8)) << 32;
     }
 }
 
@@ -72,34 +72,33 @@ static inline uint64_t kh_lpu64ec_l3(const uint8_t* const Msg,
  * @param MsgLen Message's remaining length, in bytes; cannot be 0.
  * @param fb Final byte used for padding.
  */
-template < bool bswap >
-static inline uint64_t kh_lpu64ec_nz(const uint8_t* const Msg,
-        const size_t MsgLen, uint64_t fb) {
+template <bool bswap>
+static inline uint64_t kh_lpu64ec_nz( const uint8_t * const Msg, const size_t MsgLen, uint64_t fb ) {
     if (MsgLen < 4) {
         fb <<= (MsgLen << 3);
-        uint64_t m = Msg[ 0 ];
+        uint64_t m = Msg[0];
 
         if (MsgLen > 1) {
-            m |= (uint64_t) Msg[ 1 ] << 8;
+            m |= (uint64_t)Msg[1] << 8;
 
             if (MsgLen > 2) {
-                m |= (uint64_t) Msg[ 2 ] << 16;
+                m |= (uint64_t)Msg[2] << 16;
             }
         }
 
-        return(fb | m);
+        return fb | m;
     }
 
-    const int ml8 = (int) (MsgLen << 3);
-    const uint64_t mh = GET_U32<bswap>(Msg + MsgLen - 4, 0);
-    const uint64_t ml = GET_U32<bswap>(Msg, 0);
+    const int      ml8 = (int)(MsgLen << 3);
+    const uint64_t mh  = GET_U32<bswap>(Msg + MsgLen - 4, 0);
+    const uint64_t ml  = GET_U32<bswap>(Msg             , 0);
 
     if (isLE() ^ bswap) {
         // mh has remaining bytes from MSB, so shift off low bits
-        return (fb << ml8 | ml | (mh >> (64 - ml8)) << 32);
+        return fb << ml8 | ml | (mh >> (64 - ml8)) << 32;
     } else {
         // mh has remaining bytes from LSB, so shift off high bits
-        return (fb << ml8 | mh | (ml >> (64 - ml8)) << 32);
+        return fb << ml8 | mh | (ml >> (64 - ml8)) << 32;
     }
 }
 
@@ -113,37 +112,36 @@ static inline uint64_t kh_lpu64ec_nz(const uint8_t* const Msg,
  * @param MsgLen Message's remaining length, in bytes; can be 0.
  * @param fb Final byte used for padding.
  */
-template < bool bswap >
-static inline uint64_t kh_lpu64ec_l4(const uint8_t* const Msg,
-        const size_t MsgLen, uint64_t fb) {
-    const int ml8 = (int) (MsgLen << 3);
+template <bool bswap>
+static inline uint64_t kh_lpu64ec_l4( const uint8_t * const Msg, const size_t MsgLen, uint64_t fb ) {
+    const int ml8 = (int)(MsgLen << 3);
 
     if (MsgLen < 5) {
         if (isLE() ^ bswap) {
-            return(fb << ml8 |
-                    ((uint64_t)GET_U32<bswap>(Msg + MsgLen - 4, 0)) >> (32 - ml8));
+            return fb << ml8 |
+                   ((uint64_t)GET_U32<bswap>(Msg + MsgLen - 4, 0)) >> (32 - ml8);
         } else {
             // If MsgLen is 0 then "32 - ml8" is 32, and a uint32_t
             // shifted right by 32 bits is Undefined Behavior. This
             // odd construction avoids that.
-            return(fb << ml8 |
-                    (((uint64_t)GET_U32<bswap>(Msg + MsgLen - 4, 0)) &
-                            (((uint64_t)UINT32_C(-1)) >> (32 - ml8))));
+            return fb << ml8 |
+                   (((uint64_t)GET_U32<bswap>(Msg + MsgLen - 4, 0)) &
+                   (((uint64_t)UINT32_C(-1)) >> (32 - ml8)));
         }
     } else {
         if (isLE() ^ bswap) {
-            return(fb << ml8 | GET_U64<bswap>(Msg + MsgLen - 8, 0) >> (64 - ml8));
+            return fb << ml8 | GET_U64<bswap>(Msg + MsgLen - 8, 0) >> (64 - ml8);
         } else {
-            return(fb << ml8 | (GET_U64<bswap>(Msg + MsgLen - 8, 0) & (UINT64_C(-1) >> (64 - ml8))));
+            return fb << ml8 | (GET_U64<bswap>(Msg + MsgLen - 8, 0) & (UINT64_C(-1) >> (64 - ml8)));
         }
     }
 }
 
 //------------------------------------------------------------
 // Wrapper around Mathmult.h routine
-static inline void kh_m128(const uint64_t m1, const uint64_t m2,
-        uint64_t* const rl, uint64_t* const rh) {
+static inline void kh_m128( const uint64_t m1, const uint64_t m2, uint64_t * const rl, uint64_t * const rh ) {
     uint64_t rlo, rhi;
+
     mult64_128(rlo, rhi, m1, m2);
     *rl = rlo;
     *rh = rhi;
@@ -151,29 +149,31 @@ static inline void kh_m128(const uint64_t m1, const uint64_t m2,
 
 // Common hashing round with 16-byte input, using the "r1l" and "r1h"
 // temporary variables.
-#define KOMIHASH_HASH16(m)                              \
-    kh_m128(Seed1 ^ GET_U64<bswap>(m, 0),               \
-            Seed5 ^ GET_U64<bswap>(m, 8), &r1l, &r1h);  \
-    Seed5 += r1h;                                       \
+#define KOMIHASH_HASH16(m)                             \
+    kh_m128(Seed1 ^ GET_U64<bswap>(m, 0),              \
+            Seed5 ^ GET_U64<bswap>(m, 8), &r1l, &r1h); \
+    Seed5 += r1h;                                      \
     Seed1 = Seed5 ^ r1l;
 
 // Common hashing round without input, using the "r2l" and "r2h" temporary
 // variables.
-#define KOMIHASH_HASHROUND()                    \
-    kh_m128(Seed1, Seed5, &r2l, &r2h);          \
-    Seed5 += r2h;                               \
+#define KOMIHASH_HASHROUND()           \
+    kh_m128(Seed1, Seed5, &r2l, &r2h); \
+    Seed5 += r2h;                      \
     Seed1 = Seed5 ^ r2l;
 
 // Common hashing finalization round, with the final hashing input expected in
 // the "r2l" and "r2h" temporary variables.
-#define KOMIHASH_HASHFIN()                      \
-    kh_m128(r2l, r2h, &r1l, &r1h);              \
-    Seed5 += r1h;                               \
-    Seed1 = Seed5 ^ r1l;                        \
+#define KOMIHASH_HASHFIN()         \
+    kh_m128(r2l, r2h, &r1l, &r1h); \
+    Seed5 += r1h;                  \
+    Seed1 = Seed5 ^ r1l;           \
     KOMIHASH_HASHROUND();
 
 //------------------------------------------------------------
+
 // KOMIHASH hash function
+
 /*
  * @param Msg0 The message to produce a hash from. The alignment of this
  * pointer is unimportant.
@@ -184,10 +184,9 @@ static inline void kh_m128(const uint64_t m1, const uint64_t m2,
  * need endianness-correction if this value is shared between big- and
  * little-endian systems.
  */
-template < bool bswap >
-static inline uint64_t komihash_impl(const void* const Msg0, size_t MsgLen,
-        const uint64_t UseSeed) {
-    const uint8_t* Msg = (const uint8_t*) Msg0;
+template <bool bswap>
+static inline uint64_t komihash_impl( const void * const Msg0, size_t MsgLen, const uint64_t UseSeed ) {
+    const uint8_t * Msg = (const uint8_t *)Msg0;
 
     // The seeds are initialized to the first mantissa bits of PI.
     uint64_t Seed1 = UINT64_C(0x243F6A8885A308D3) ^ (UseSeed & UINT64_C(0x5555555555555555));
@@ -226,18 +225,16 @@ static inline uint64_t komihash_impl(const void* const Msg0, size_t MsgLen,
             // addition). Message's statistics and distribution are thus
             // unimportant.
 
-            r2h ^= kh_lpu64ec_l3<bswap>(Msg + 8, MsgLen - 8,
-                    1 << (Msg[ MsgLen - 1 ] >> 7));
+            r2h ^= kh_lpu64ec_l3<bswap>(Msg + 8, MsgLen - 8, 1 << (Msg[MsgLen - 1] >> 7));
 
             r2l ^= GET_U64<bswap>(Msg, 0);
         } else if (likely(MsgLen != 0)) {
-            r2l ^= kh_lpu64ec_nz<bswap>(Msg, MsgLen,
-                    1 << (Msg[ MsgLen - 1 ] >> 7));
+            r2l ^= kh_lpu64ec_nz<bswap>(Msg    , MsgLen    , 1 << (Msg[MsgLen - 1] >> 7));
         }
 
         KOMIHASH_HASHFIN();
 
-        return (Seed1);
+        return Seed1;
     }
 
     if (likely(MsgLen < 32)) {
@@ -249,7 +246,7 @@ static inline uint64_t komihash_impl(const void* const Msg0, size_t MsgLen,
 
         if (MsgLen > 23) {
             r2h = Seed5 ^ kh_lpu64ec_l4<bswap>(Msg + 24, MsgLen - 24, fb);
-            r2l = Seed1 ^ GET_U64<bswap>(Msg, 16);
+            r2l = Seed1 ^ GET_U64      <bswap>(Msg, 16);
         } else {
             r2l = Seed1 ^ kh_lpu64ec_l4<bswap>(Msg + 16, MsgLen - 16, fb);
             r2h = Seed5;
@@ -257,7 +254,7 @@ static inline uint64_t komihash_impl(const void* const Msg0, size_t MsgLen,
 
         KOMIHASH_HASHFIN();
 
-        return (Seed1);
+        return Seed1;
     }
 
     if (MsgLen > 63) {
@@ -272,19 +269,15 @@ static inline uint64_t komihash_impl(const void* const Msg0, size_t MsgLen,
         do {
             prefetch(Msg);
 
-            kh_m128(Seed1 ^ GET_U64<bswap>(Msg, 0),
-                    Seed5 ^ GET_U64<bswap>(Msg, 8), &r1l, &r1h);
+            kh_m128(Seed1 ^ GET_U64<bswap>(Msg, 0) , Seed5 ^ GET_U64<bswap>(Msg, 8) , &r1l, &r1h);
 
-            kh_m128(Seed2 ^ GET_U64<bswap>(Msg, 16),
-                    Seed6 ^ GET_U64<bswap>(Msg, 24), &r2l, &r2h);
+            kh_m128(Seed2 ^ GET_U64<bswap>(Msg, 16), Seed6 ^ GET_U64<bswap>(Msg, 24), &r2l, &r2h);
 
-            kh_m128(Seed3 ^ GET_U64<bswap>(Msg, 32),
-                    Seed7 ^ GET_U64<bswap>(Msg, 40), &r3l, &r3h);
+            kh_m128(Seed3 ^ GET_U64<bswap>(Msg, 32), Seed7 ^ GET_U64<bswap>(Msg, 40), &r3l, &r3h);
 
-            kh_m128(Seed4 ^ GET_U64<bswap>(Msg, 48),
-                    Seed8 ^ GET_U64<bswap>(Msg, 56), &r4l, &r4h);
+            kh_m128(Seed4 ^ GET_U64<bswap>(Msg, 48), Seed8 ^ GET_U64<bswap>(Msg, 56), &r4l, &r4h);
 
-            Msg += 64;
+            Msg    += 64;
             MsgLen -= 64;
 
             // Such "shifting" arrangement (below) does not increase
@@ -298,11 +291,10 @@ static inline uint64_t komihash_impl(const void* const Msg0, size_t MsgLen,
             Seed6 += r2h;
             Seed7 += r3h;
             Seed8 += r4h;
-            Seed2 = Seed5 ^ r2l;
-            Seed3 = Seed6 ^ r3l;
-            Seed4 = Seed7 ^ r4l;
-            Seed1 = Seed8 ^ r1l;
-
+            Seed2  = Seed5 ^ r2l;
+            Seed3  = Seed6 ^ r3l;
+            Seed4  = Seed7 ^ r4l;
+            Seed1  = Seed8 ^ r1l;
         } while (likely(MsgLen > 63));
 
         Seed5 ^= Seed6 ^ Seed7 ^ Seed8;
@@ -312,25 +304,25 @@ static inline uint64_t komihash_impl(const void* const Msg0, size_t MsgLen,
     prefetch(Msg);
 
     if (likely(MsgLen > 31)) {
-        KOMIHASH_HASH16(Msg);
+        KOMIHASH_HASH16(Msg     );
         KOMIHASH_HASH16(Msg + 16);
 
-        Msg += 32;
+        Msg    += 32;
         MsgLen -= 32;
     }
 
     if (MsgLen > 15) {
         KOMIHASH_HASH16(Msg);
 
-        Msg += 16;
+        Msg    += 16;
         MsgLen -= 16;
     }
 
-    const uint64_t fb = 1 << (Msg[ MsgLen - 1 ] >> 7);
+    const uint64_t fb = 1 << (Msg[MsgLen - 1] >> 7);
 
     if (MsgLen > 7) {
         r2h = Seed5 ^ kh_lpu64ec_l4<bswap>(Msg + 8, MsgLen - 8, fb);
-        r2l = Seed1 ^ GET_U64<bswap>(Msg, 0);
+        r2l = Seed1 ^ GET_U64      <bswap>(Msg, 0);
     } else {
         r2l = Seed1 ^ kh_lpu64ec_l4<bswap>(Msg, MsgLen, fb);
         r2h = Seed5;
@@ -338,34 +330,35 @@ static inline uint64_t komihash_impl(const void* const Msg0, size_t MsgLen,
 
     KOMIHASH_HASHFIN();
 
-    return (Seed1);
+    return Seed1;
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void komihash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void komihash( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = komihash_impl<bswap>(in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(komihash,
-  $.src_url = "https://github.com/avaneev/komihash/",
-  $.src_status = HashFamilyInfo::SRC_ACTIVE
-);
+   $.src_url    = "https://github.com/avaneev/komihash/",
+   $.src_status = HashFamilyInfo::SRC_ACTIVE
+ );
 
 REGISTER_HASH(komihash,
-  $.desc = "komihash v4.3",
-  $.hash_flags =
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_MULTIPLY_64_128     |
-        FLAG_IMPL_SHIFT_VARIABLE      |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x703624A4,
-  $.verification_BE = 0xB954DBAB,
-  $.hashfn_native = komihash<false>,
-  $.hashfn_bswap = komihash<true>
-);
+   $.desc       = "komihash v4.3",
+   $.hash_flags =
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_MULTIPLY_64_128     |
+         FLAG_IMPL_SHIFT_VARIABLE      |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x703624A4,
+   $.verification_BE = 0xB954DBAB,
+   $.hashfn_native   = komihash<false>,
+   $.hashfn_bswap    = komihash<true>
+ );
diff --git a/hashes/lookup3.cpp b/hashes/lookup3.cpp
index dcdba501..0aa5edc7 100644
--- a/hashes/lookup3.cpp
+++ b/hashes/lookup3.cpp
@@ -8,8 +8,8 @@
 #include "Hashlib.h"
 
 //------------------------------------------------------------
-#define mix(a,b,c) \
-{ \
+#define mix(a,b,c)                     \
+{                                      \
   a -= c;  a ^= ROTL32(c, 4);  c += b; \
   b -= a;  b ^= ROTL32(a, 6);  a += c; \
   c -= b;  c ^= ROTL32(b, 8);  b += a; \
@@ -18,8 +18,8 @@
   c -= b;  c ^= ROTL32(b, 4);  b += a; \
 }
 
-#define final(a,b,c) \
-{ \
+#define final(a,b,c)         \
+{                            \
   c ^= b; c -= ROTL32(b,14); \
   a ^= c; a -= ROTL32(c,11); \
   b ^= a; b -= ROTL32(a,25); \
@@ -29,87 +29,87 @@
   c ^= b; c -= ROTL32(b,24); \
 }
 
-template < bool hash64, bool bswap >
-static void hashlittle(const uint8_t * key, size_t length, uint64_t seed64, uint8_t * out) {
-  uint32_t a,b,c;                                          /* internal state */
+template <bool hash64, bool bswap>
+static void hashlittle( const uint8_t * key, size_t length, uint64_t seed64, uint8_t * out ) {
+    uint32_t a, b, c;                                      /* internal state */
 
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + ((uint32_t)length) + ((uint32_t)seed64);
-  c += (uint32_t)(seed64 >> 32);
+    /* Set up the internal state */
+    a  = b = c = 0xdeadbeef + ((uint32_t)length) + ((uint32_t)seed64);
+    c += (uint32_t)(seed64 >> 32);
 
-  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
-  while (length > 12) {
-      a += GET_U32<bswap>(key, 0);
-      b += GET_U32<bswap>(key, 4);
-      c += GET_U32<bswap>(key, 8);
-      mix(a,b,c);
-      length -= 12;
-      key += 12;
-  }
+    /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
+    while (length > 12) {
+        a      += GET_U32<bswap>(key, 0);
+        b      += GET_U32<bswap>(key, 4);
+        c      += GET_U32<bswap>(key, 8);
+        mix(a, b, c);
+        length -= 12;
+        key    += 12;
+    }
 
-  /*----------------------------- handle the last (probably partial) block */
-  switch(length) {
-    case 12: c+=GET_U32<bswap>(key, 8);
-             b+=GET_U32<bswap>(key, 4);
-             a+=GET_U32<bswap>(key, 0); break;
-    case 11: c+=((uint32_t)key[10])<<16;  /* fall through */
-    case 10: c+=((uint32_t)key[9])<<8;    /* fall through */
-    case 9 : c+=key[8];                   /* fall through */
-    case 8 : b+=GET_U32<bswap>(key, 4);
-             a+=GET_U32<bswap>(key, 0); break;
-    case 7 : b+=((uint32_t)key[6])<<16;   /* fall through */
-    case 6 : b+=((uint32_t)key[5])<<8;    /* fall through */
-    case 5 : b+=key[4];                   /* fall through */
-    case 4 : a+=GET_U32<bswap>(key, 0); break;
-    case 3 : a+=((uint32_t)key[2])<<16;   /* fall through */
-    case 2 : a+=((uint32_t)key[1])<<8;    /* fall through */
-    case 1 : a+=key[0];                  break;
-    case 0 : goto out;  /* zero length strings require no more mixing */
-  }
+    /*----------------------------- handle the last (probably partial) block */
+    switch (length) {
+    case 12: c += GET_U32<bswap>(key, 8);
+             b += GET_U32<bswap>(key, 4);
+             a += GET_U32<bswap>(key, 0); break;
+    case 11: c += ((uint32_t)key[10]) << 16; /* fall through */
+    case 10: c += ((uint32_t)key[ 9]) <<  8; /* fall through */
+    case  9: c += key[8];                    /* fall through */
+    case  8: b += GET_U32<bswap>(key, 4);
+             a += GET_U32<bswap>(key, 0); break;
+    case  7: b += ((uint32_t)key[ 6]) << 16; /* fall through */
+    case  6: b += ((uint32_t)key[ 5]) <<  8; /* fall through */
+    case  5: b += key[4];                    /* fall through */
+    case  4: a += GET_U32<bswap>(key, 0); break;
+    case  3: a += ((uint32_t)key[ 2]) << 16; /* fall through */
+    case  2: a += ((uint32_t)key[ 1]) <<  8; /* fall through */
+    case  1: a += key[0];                  break;
+    case  0: goto out;                       /* zero length strings require no more mixing */
+    }
 
-  final(a,b,c);
+    final (a, b, c);
 
- out:
-  PUT_U32<bswap>(c, out, 0);
-  if (hash64) { PUT_U32<bswap>(b, out, 4); }
+  out:
+    PUT_U32<bswap>(c, out, 0);
+    if (hash64) { PUT_U32<bswap>(b, out, 4); }
 }
 
 //------------------------------------------------------------
-template < bool hash64, bool bswap >
-static void lookup3(const void * in, const size_t len, const seed_t seed, void * out) {
-    hashlittle<hash64,bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
+template <bool hash64, bool bswap>
+static void lookup3( const void * in, const size_t len, const seed_t seed, void * out ) {
+    hashlittle<hash64, bswap>((const uint8_t *)in, len, (uint64_t)seed, (uint8_t *)out);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(lookup3,
-  $.src_url = "http://www.burtleburtle.net/bob/c/lookup3.c",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "http://www.burtleburtle.net/bob/c/lookup3.c",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(lookup3__32,
-  $.desc = "Bob Jenkins' lookup3 (32-bit output)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 32,
-  $.verification_LE = 0x3D83917A,
-  $.verification_BE = 0x18E6AA76,
-  $.hashfn_native = lookup3<false,false>,
-  $.hashfn_bswap = lookup3<false,true>
-);
+   $.desc       = "Bob Jenkins' lookup3 (32-bit output)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 32,
+   $.verification_LE = 0x3D83917A,
+   $.verification_BE = 0x18E6AA76,
+   $.hashfn_native   = lookup3<false, false>,
+   $.hashfn_bswap    = lookup3<false, true>
+ );
 
 REGISTER_HASH(lookup3,
-  $.desc = "Bob Jenkins' lookup3 (64-bit output)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x6AE8AB7C,
-  $.verification_BE = 0x074EBE4E,
-  $.hashfn_native = lookup3<true,false>,
-  $.hashfn_bswap = lookup3<true,true>
-);
+   $.desc       = "Bob Jenkins' lookup3 (64-bit output)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x6AE8AB7C,
+   $.verification_BE = 0x074EBE4E,
+   $.hashfn_native   = lookup3<true, false>,
+   $.hashfn_bswap    = lookup3<true, true>
+ );
diff --git a/hashes/md5.cpp b/hashes/md5.cpp
index 4f3a557c..974d3c25 100644
--- a/hashes/md5.cpp
+++ b/hashes/md5.cpp
@@ -50,9 +50,9 @@
 //-----------------------------------------------------------------------------
 // Raw MD5 implementation
 typedef struct {
-    uint32_t total[2];     /*!< number of bytes processed  */
-    uint32_t state[4];     /*!< intermediate digest state  */
-    uint8_t  buffer[64];   /*!< data block being processed */
+    uint32_t  total[2];    /*!< number of bytes processed  */
+    uint32_t  state[4];    /*!< intermediate digest state  */
+    uint8_t   buffer[64];  /*!< data block being processed */
 
     uint8_t  ipad[64];     /*!< HMAC: inner padding        */
     uint8_t  opad[64];     /*!< HMAC: outer padding        */
@@ -61,7 +61,7 @@ typedef struct {
 /*
  * MD5 context setup
  */
-static void md5_start(md5_context * ctx) {
+static void md5_start( md5_context * ctx ) {
     ctx->total[0] = 0;
     ctx->total[1] = 0;
 
@@ -74,32 +74,32 @@ static void md5_start(md5_context * ctx) {
 /*
  * MD5 process single data block
  */
-template < bool bswap >
-static void md5_process(md5_context * ctx, uint8_t data[64]) {
+template <bool bswap>
+static void md5_process( md5_context * ctx, uint8_t data[64] ) {
     uint32_t X[16], A, B, C, D;
 
 /*
  * These macros will cache the converted input data when byteswapping
  * is requested, and will just read directly from data when possible.
  */
-#define CACHEBLK(k) (bswap ?                        \
-            (X[k]=GET_U32<true>(data, 4*(k))) :     \
+#define CACHEBLK(k) (bswap ?                    \
+            (X[k]=GET_U32<true>(data, 4*(k))) : \
             (GET_U32<false>(data,4*(k))))
 
-#define GETBLK(k) (bswap ?                          \
-            (X[k]) :                                \
+#define GETBLK(k) (bswap ? \
+            (X[k]) :       \
             (GET_U32<false>(data,4*(k))))
 
-#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
+#define S(x, n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
 
-#define P(a,b,c,d,k,s,t)                                   \
-    {                                                      \
-        a += F(b,c,d) + CACHEBLK(k) + t; a = S(a,s) + b;   \
+#define P(a,b,c,d,k,s,t)                                 \
+    {                                                    \
+        a += F(b,c,d) + CACHEBLK(k) + t; a = S(a,s) + b; \
     }
 
-#define Q(a,b,c,d,k,s,t)                                   \
-    {                                                      \
-        a += F(b,c,d) + GETBLK(k) + t; a = S(a,s) + b;     \
+#define Q(a,b,c,d,k,s,t)                               \
+    {                                                  \
+        a += F(b,c,d) + GETBLK(k) + t; a = S(a,s) + b; \
     }
 
     A = ctx->state[0];
@@ -107,89 +107,89 @@ static void md5_process(md5_context * ctx, uint8_t data[64]) {
     C = ctx->state[2];
     D = ctx->state[3];
 
-#define F(x,y,z) (z ^ (x & (y ^ z)))
-//#define F(x,y,z) ((x & y) | (~x & z))
-
-    P( A, B, C, D,  0,  7, 0xD76AA478 );
-    P( D, A, B, C,  1, 12, 0xE8C7B756 );
-    P( C, D, A, B,  2, 17, 0x242070DB );
-    P( B, C, D, A,  3, 22, 0xC1BDCEEE );
-    P( A, B, C, D,  4,  7, 0xF57C0FAF );
-    P( D, A, B, C,  5, 12, 0x4787C62A );
-    P( C, D, A, B,  6, 17, 0xA8304613 );
-    P( B, C, D, A,  7, 22, 0xFD469501 );
-    P( A, B, C, D,  8,  7, 0x698098D8 );
-    P( D, A, B, C,  9, 12, 0x8B44F7AF );
-    P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
-    P( B, C, D, A, 11, 22, 0x895CD7BE );
-    P( A, B, C, D, 12,  7, 0x6B901122 );
-    P( D, A, B, C, 13, 12, 0xFD987193 );
-    P( C, D, A, B, 14, 17, 0xA679438E );
-    P( B, C, D, A, 15, 22, 0x49B40821 );
+#define F(x, y, z) (z ^ (x & (y ^ z)))
+// #define F(x,y,z) ((x & y) | (~x & z))
+
+    P(A, B, C, D,  0,  7, 0xD76AA478);
+    P(D, A, B, C,  1, 12, 0xE8C7B756);
+    P(C, D, A, B,  2, 17, 0x242070DB);
+    P(B, C, D, A,  3, 22, 0xC1BDCEEE);
+    P(A, B, C, D,  4,  7, 0xF57C0FAF);
+    P(D, A, B, C,  5, 12, 0x4787C62A);
+    P(C, D, A, B,  6, 17, 0xA8304613);
+    P(B, C, D, A,  7, 22, 0xFD469501);
+    P(A, B, C, D,  8,  7, 0x698098D8);
+    P(D, A, B, C,  9, 12, 0x8B44F7AF);
+    P(C, D, A, B, 10, 17, 0xFFFF5BB1);
+    P(B, C, D, A, 11, 22, 0x895CD7BE);
+    P(A, B, C, D, 12,  7, 0x6B901122);
+    P(D, A, B, C, 13, 12, 0xFD987193);
+    P(C, D, A, B, 14, 17, 0xA679438E);
+    P(B, C, D, A, 15, 22, 0x49B40821);
 
 #undef F
 
-#define F(x,y,z) (y ^ (z & (x ^ y)))
-//#define F(x,y,z) ((z & x) | (~z & y))
-
-    Q( A, B, C, D,  1,  5, 0xF61E2562 );
-    Q( D, A, B, C,  6,  9, 0xC040B340 );
-    Q( C, D, A, B, 11, 14, 0x265E5A51 );
-    Q( B, C, D, A,  0, 20, 0xE9B6C7AA );
-    Q( A, B, C, D,  5,  5, 0xD62F105D );
-    Q( D, A, B, C, 10,  9, 0x02441453 );
-    Q( C, D, A, B, 15, 14, 0xD8A1E681 );
-    Q( B, C, D, A,  4, 20, 0xE7D3FBC8 );
-    Q( A, B, C, D,  9,  5, 0x21E1CDE6 );
-    Q( D, A, B, C, 14,  9, 0xC33707D6 );
-    Q( C, D, A, B,  3, 14, 0xF4D50D87 );
-    Q( B, C, D, A,  8, 20, 0x455A14ED );
-    Q( A, B, C, D, 13,  5, 0xA9E3E905 );
-    Q( D, A, B, C,  2,  9, 0xFCEFA3F8 );
-    Q( C, D, A, B,  7, 14, 0x676F02D9 );
-    Q( B, C, D, A, 12, 20, 0x8D2A4C8A );
+#define F(x, y, z) (y ^ (z & (x ^ y)))
+// #define F(x,y,z) ((z & x) | (~z & y))
+
+    Q(A, B, C, D,  1,  5, 0xF61E2562);
+    Q(D, A, B, C,  6,  9, 0xC040B340);
+    Q(C, D, A, B, 11, 14, 0x265E5A51);
+    Q(B, C, D, A,  0, 20, 0xE9B6C7AA);
+    Q(A, B, C, D,  5,  5, 0xD62F105D);
+    Q(D, A, B, C, 10,  9, 0x02441453);
+    Q(C, D, A, B, 15, 14, 0xD8A1E681);
+    Q(B, C, D, A,  4, 20, 0xE7D3FBC8);
+    Q(A, B, C, D,  9,  5, 0x21E1CDE6);
+    Q(D, A, B, C, 14,  9, 0xC33707D6);
+    Q(C, D, A, B,  3, 14, 0xF4D50D87);
+    Q(B, C, D, A,  8, 20, 0x455A14ED);
+    Q(A, B, C, D, 13,  5, 0xA9E3E905);
+    Q(D, A, B, C,  2,  9, 0xFCEFA3F8);
+    Q(C, D, A, B,  7, 14, 0x676F02D9);
+    Q(B, C, D, A, 12, 20, 0x8D2A4C8A);
 
 #undef F
 
-#define F(x,y,z) (x ^ y ^ z)
-
-    Q( A, B, C, D,  5,  4, 0xFFFA3942 );
-    Q( D, A, B, C,  8, 11, 0x8771F681 );
-    Q( C, D, A, B, 11, 16, 0x6D9D6122 );
-    Q( B, C, D, A, 14, 23, 0xFDE5380C );
-    Q( A, B, C, D,  1,  4, 0xA4BEEA44 );
-    Q( D, A, B, C,  4, 11, 0x4BDECFA9 );
-    Q( C, D, A, B,  7, 16, 0xF6BB4B60 );
-    Q( B, C, D, A, 10, 23, 0xBEBFBC70 );
-    Q( A, B, C, D, 13,  4, 0x289B7EC6 );
-    Q( D, A, B, C,  0, 11, 0xEAA127FA );
-    Q( C, D, A, B,  3, 16, 0xD4EF3085 );
-    Q( B, C, D, A,  6, 23, 0x04881D05 );
-    Q( A, B, C, D,  9,  4, 0xD9D4D039 );
-    Q( D, A, B, C, 12, 11, 0xE6DB99E5 );
-    Q( C, D, A, B, 15, 16, 0x1FA27CF8 );
-    Q( B, C, D, A,  2, 23, 0xC4AC5665 );
+#define F(x, y, z) (x ^ y ^ z)
+
+    Q(A, B, C, D,  5,  4, 0xFFFA3942);
+    Q(D, A, B, C,  8, 11, 0x8771F681);
+    Q(C, D, A, B, 11, 16, 0x6D9D6122);
+    Q(B, C, D, A, 14, 23, 0xFDE5380C);
+    Q(A, B, C, D,  1,  4, 0xA4BEEA44);
+    Q(D, A, B, C,  4, 11, 0x4BDECFA9);
+    Q(C, D, A, B,  7, 16, 0xF6BB4B60);
+    Q(B, C, D, A, 10, 23, 0xBEBFBC70);
+    Q(A, B, C, D, 13,  4, 0x289B7EC6);
+    Q(D, A, B, C,  0, 11, 0xEAA127FA);
+    Q(C, D, A, B,  3, 16, 0xD4EF3085);
+    Q(B, C, D, A,  6, 23, 0x04881D05);
+    Q(A, B, C, D,  9,  4, 0xD9D4D039);
+    Q(D, A, B, C, 12, 11, 0xE6DB99E5);
+    Q(C, D, A, B, 15, 16, 0x1FA27CF8);
+    Q(B, C, D, A,  2, 23, 0xC4AC5665);
 
 #undef F
 
-#define F(x,y,z) (y ^ (x | ~z))
-
-    Q( A, B, C, D,  0,  6, 0xF4292244 );
-    Q( D, A, B, C,  7, 10, 0x432AFF97 );
-    Q( C, D, A, B, 14, 15, 0xAB9423A7 );
-    Q( B, C, D, A,  5, 21, 0xFC93A039 );
-    Q( A, B, C, D, 12,  6, 0x655B59C3 );
-    Q( D, A, B, C,  3, 10, 0x8F0CCC92 );
-    Q( C, D, A, B, 10, 15, 0xFFEFF47D );
-    Q( B, C, D, A,  1, 21, 0x85845DD1 );
-    Q( A, B, C, D,  8,  6, 0x6FA87E4F );
-    Q( D, A, B, C, 15, 10, 0xFE2CE6E0 );
-    Q( C, D, A, B,  6, 15, 0xA3014314 );
-    Q( B, C, D, A, 13, 21, 0x4E0811A1 );
-    Q( A, B, C, D,  4,  6, 0xF7537E82 );
-    Q( D, A, B, C, 11, 10, 0xBD3AF235 );
-    Q( C, D, A, B,  2, 15, 0x2AD7D2BB );
-    Q( B, C, D, A,  9, 21, 0xEB86D391 );
+#define F(x, y, z) (y ^ (x | ~z))
+
+    Q(A, B, C, D,  0,  6, 0xF4292244);
+    Q(D, A, B, C,  7, 10, 0x432AFF97);
+    Q(C, D, A, B, 14, 15, 0xAB9423A7);
+    Q(B, C, D, A,  5, 21, 0xFC93A039);
+    Q(A, B, C, D, 12,  6, 0x655B59C3);
+    Q(D, A, B, C,  3, 10, 0x8F0CCC92);
+    Q(C, D, A, B, 10, 15, 0xFFEFF47D);
+    Q(B, C, D, A,  1, 21, 0x85845DD1);
+    Q(A, B, C, D,  8,  6, 0x6FA87E4F);
+    Q(D, A, B, C, 15, 10, 0xFE2CE6E0);
+    Q(C, D, A, B,  6, 15, 0xA3014314);
+    Q(B, C, D, A, 13, 21, 0x4E0811A1);
+    Q(A, B, C, D,  4,  6, 0xF7537E82);
+    Q(D, A, B, C, 11, 10, 0xBD3AF235);
+    Q(C, D, A, B,  2, 15, 0x2AD7D2BB);
+    Q(B, C, D, A,  9, 21, 0xEB86D391);
 
 #undef F
 
@@ -202,18 +202,18 @@ static void md5_process(md5_context * ctx, uint8_t data[64]) {
 /*
  * MD5 process buffer
  */
-template < bool bswap >
-static void md5_update(md5_context * ctx, uint8_t * input, size_t ilen) {
+template <bool bswap>
+static void md5_update( md5_context * ctx, uint8_t * input, size_t ilen ) {
     uint32_t fill, left;
 
     if (ilen == 0)                    { return; }
     if (ilen >= UINT32_C(0xffffffff)) { return; }
 
-    left = ctx->total[0] & 0x3F;
+    left = ctx->total[0] &        0x3F;
     fill = 64 - left;
 
-    ctx->total[0] += ilen;
-    ctx->total[0] &= 0xFFFFFFFF;
+    ctx->total       [0] += ilen;
+    ctx->total       [0] &= 0xFFFFFFFF;
 
     if (ctx->total[0] < (uint32_t)ilen) { ctx->total[1]++; }
 
@@ -222,7 +222,7 @@ static void md5_update(md5_context * ctx, uint8_t * input, size_t ilen) {
         md5_process<bswap>(ctx, ctx->buffer);
         input += fill;
         ilen  -= fill;
-        left = 0;
+        left   = 0;
     }
 
     while (ilen >= 64) {
@@ -246,23 +246,23 @@ static const uint8_t md5_padding[64] = {
 /*
  * MD5 final digest
  */
-template < bool bswap >
-static void md5_finish(md5_context * ctx, uint8_t output[16]) {
+template <bool bswap>
+static void md5_finish( md5_context * ctx, uint8_t output[16] ) {
     uint32_t last, padn;
     uint32_t high, low;
-    uint8_t msglen[8];
+    uint8_t  msglen[8];
 
-    high = (ctx->total[0] >> 29)
-         | (ctx->total[1] <<  3);
-    low  = (ctx->total[0] <<  3);
+    high =  (ctx->total[0] >> 29) |
+            (ctx->total[1] <<  3);
+    low  =  (ctx->total[0] <<  3);
 
-    PUT_U32<bswap>(low,  msglen, 0);
+    PUT_U32<bswap>(low , msglen, 0);
     PUT_U32<bswap>(high, msglen, 4);
 
-    last = ctx->total[0] & 0x3F;
+    last = ctx->total  [0]        & 0x3F;
     padn = (last < 56) ? (56 - last) : (120 - last);
 
-    md5_update<bswap>(ctx, (uint8_t *) md5_padding, padn);
+    md5_update<bswap>(ctx, (uint8_t *)md5_padding, padn);
     md5_update<bswap>(ctx, msglen, 8);
 
     PUT_U32<bswap>(ctx->state[0], output,  0);
@@ -273,9 +273,10 @@ static void md5_finish(md5_context * ctx, uint8_t output[16]) {
 
 //-----------------------------------------------------------------------------
 // Homegrown MD5 seeding function
-static FORCE_INLINE void seed_md5(md5_context * ctx, const seed_t seed) {
+static FORCE_INLINE void seed_md5( md5_context * ctx, const seed_t seed ) {
     const uint32_t seedlo = seed         & 0xFFFFFFFF;
     const uint32_t seedhi = (seed >> 32) & 0xFFFFFFFF;
+
     ctx->state[0] ^= seedlo;
     ctx->state[1] ^= seedhi;
     ctx->state[2] += seedlo;
@@ -284,14 +285,14 @@ static FORCE_INLINE void seed_md5(md5_context * ctx, const seed_t seed) {
 
 //-----------------------------------------------------------------------------
 // Wrappers for rest of SMHasher3
-template < uint32_t hashbits, bool bswap >
-static void MD5(const void * in, const size_t len, const seed_t seed, void * out) {
+template <uint32_t hashbits, bool bswap>
+static void MD5( const void * in, const size_t len, const seed_t seed, void * out ) {
     md5_context md5_ctx;
-    uint8_t buf[16];
-    uint8_t * hash = (hashbits >= 128) ? (uint8_t *)out : &buf[0];
+    uint8_t     buf[16];
+    uint8_t *   hash = (hashbits >= 128) ? (uint8_t *)out : &buf[0];
 
-    md5_start        (&md5_ctx);
-    seed_md5         (&md5_ctx, seed);
+    md5_start(&md5_ctx);
+    seed_md5(&md5_ctx, seed);
     md5_update<bswap>(&md5_ctx, (uint8_t *)in, len);
     md5_finish<bswap>(&md5_ctx, hash);
 
@@ -300,74 +301,74 @@ static void MD5(const void * in, const size_t len, const seed_t seed, void * out
     // hash round, followed by "C" in the previous, etc.
     if (hashbits < 128) {
         if (hashbits <= 96) {
-            memcpy(out, &hash[4], (hashbits+7)/8);
+            memcpy(out, &hash[4], (hashbits + 7) / 8);
         } else {
-            memcpy(out, &hash[0], (hashbits+7)/8);
+            memcpy(out, &hash[0], (hashbits + 7) / 8);
         }
     }
 }
 
 REGISTER_FAMILY(md5,
-  $.src_url = "https://github.com/MattiaOng/md5-cracker/blob/master/md5.c",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/MattiaOng/md5-cracker/blob/master/md5.c",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(MD5__32,
-  $.desc = "MD5, bits 32-63",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_GPL3         |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 32,
-  $.verification_LE = 0x4003D7EE,
-  $.verification_BE = 0x53A2E981,
-  $.hashfn_native = MD5<32,false>,
-  $.hashfn_bswap = MD5<32,true>
-);
+   $.desc       = "MD5, bits 32-63",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_GPL3         |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 32,
+   $.verification_LE = 0x4003D7EE,
+   $.verification_BE = 0x53A2E981,
+   $.hashfn_native   = MD5<32, false>,
+   $.hashfn_bswap    = MD5<32, true>
+ );
 
 REGISTER_HASH(MD5__64,
-  $.desc = "MD5, bits 32-95",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_GPL3         |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 64,
-  $.verification_LE = 0xF2E011D4,
-  $.verification_BE = 0xDE2E1FAD,
-  $.hashfn_native = MD5<64,false>,
-  $.hashfn_bswap = MD5<64,true>
-);
+   $.desc       = "MD5, bits 32-95",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_GPL3         |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 64,
+   $.verification_LE = 0xF2E011D4,
+   $.verification_BE = 0xDE2E1FAD,
+   $.hashfn_native   = MD5<64, false>,
+   $.hashfn_bswap    = MD5<64, true>
+ );
 
 REGISTER_HASH(MD5,
-  $.desc = "MD5",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_GPL3         |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 128,
-  $.verification_LE = 0x1363415D,
-  $.verification_BE = 0x242A18E0,
-  $.hashfn_native = MD5<128,false>,
-  $.hashfn_bswap = MD5<128,true>
-);
+   $.desc       = "MD5",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_GPL3         |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 128,
+   $.verification_LE = 0x1363415D,
+   $.verification_BE = 0x242A18E0,
+   $.hashfn_native   = MD5<128, false>,
+   $.hashfn_bswap    = MD5<128, true>
+ );
diff --git a/hashes/meowhash.cpp b/hashes/meowhash.cpp
index e96ad6ca..9ad6f4c4 100644
--- a/hashes/meowhash.cpp
+++ b/hashes/meowhash.cpp
@@ -30,91 +30,91 @@
 
 #if defined(HAVE_X86_64_AES) && defined(HAVE_SSE_4_1)
 
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 
 typedef __m128i meow_u128;
 
 //------------------------------------------------------------
-//#define MEOW_HASH_VERSION 5
-//#define MEOW_HASH_VERSION_NAME "0.5/calico"
+// #define MEOW_HASH_VERSION 5
+// #define MEOW_HASH_VERSION_NAME "0.5/calico"
 
-#define MEOW_PAGESIZE 4096
-#define MEOW_PREFETCH 4096
-#define MEOW_PREFETCH_LIMIT 0x3ff
+  #define MEOW_PAGESIZE 4096
+  #define MEOW_PREFETCH 4096
+  #define MEOW_PREFETCH_LIMIT 0x3ff
 
 // fwojcik: Why is this needed?
-#if defined(_MSC_VER) && !defined(__clang__)
-#define INSTRUCTION_REORDER_BARRIER _ReadWriteBarrier()
-#else
-#define INSTRUCTION_REORDER_BARRIER
-#endif
+  #if defined(_MSC_VER) && !defined(__clang__)
+    #define INSTRUCTION_REORDER_BARRIER _ReadWriteBarrier()
+  #else
+    #define INSTRUCTION_REORDER_BARRIER
+  #endif
 
 //------------------------------------------------------------
-#define MeowU64From(A, I) (_mm_extract_epi64((A), (I)))
-#define MeowU32From(A, I) (_mm_extract_epi32((A), (I)))
-#define prefetcht0(A)      _mm_prefetch((char const *)(A), _MM_HINT_T0)
-#define movdqu_imm(B)      _mm_loadu_si128((meow_u128 *)(B))
-#define movdqu(A, B)       A = _mm_loadu_si128((meow_u128 *)(B))
-#define movq(A, B, C)      A = _mm_set_epi64x(C, B);
-#define aesdec(A, B)       A = _mm_aesdec_si128(A, B)
-#define pshufb(A, B)       A = _mm_shuffle_epi8(A, B)
-#define pxor(A, B)         A = _mm_xor_si128(A, B)
-#define paddq(A, B)        A = _mm_add_epi64(A, B)
-#define pand(A, B)         A = _mm_and_si128(A, B)
-#define palignr(A, B, i)   A = _mm_alignr_epi8(A, B, i)
+  #define MeowU64From(A, I) (_mm_extract_epi64((A), (I)))
+  #define MeowU32From(A, I) (_mm_extract_epi32((A), (I)))
+  #define prefetcht0(A)      _mm_prefetch((char const *)(A), _MM_HINT_T0)
+  #define movdqu_imm(B)      _mm_loadu_si128((meow_u128 *)(B))
+  #define movdqu(A, B)       A = _mm_loadu_si128((meow_u128 *)(B))
+  #define movq(A, B, C)      A = _mm_set_epi64x(C, B);
+  #define aesdec(A, B)       A = _mm_aesdec_si128(A, B)
+  #define pshufb(A, B)       A = _mm_shuffle_epi8(A, B)
+  #define pxor(A, B)         A = _mm_xor_si128(A, B)
+  #define paddq(A, B)        A = _mm_add_epi64(A, B)
+  #define pand(A, B)         A = _mm_and_si128(A, B)
+  #define palignr(A, B, i)   A = _mm_alignr_epi8(A, B, i)
 // NOTE(casey): pxor_clear is a nonsense thing that is only here
 // because compilers don't detect xor(a, a) is clearing a :(
-#define pxor_clear(A, B)   A = _mm_setzero_si128();
+  #define pxor_clear(A, B)   A = _mm_setzero_si128();
 
 //------------------------------------------------------------
-#define MEOW_MIX_REG(r1, r2, r3, r4, r5,  i1, i2, i3, i4)   \
-    aesdec(r1, r2);                                         \
-    INSTRUCTION_REORDER_BARRIER;                            \
-    paddq(r3, i1);                                          \
-    pxor(r2, i2);                                           \
-    aesdec(r2, r4);                                         \
-    INSTRUCTION_REORDER_BARRIER;                            \
-    paddq(r5, i3);                                          \
+#define MEOW_MIX_REG(r1, r2, r3, r4, r5,  i1, i2, i3, i4) \
+    aesdec(r1, r2);                                       \
+    INSTRUCTION_REORDER_BARRIER;                          \
+    paddq(r3, i1);                                        \
+    pxor(r2, i2);                                         \
+    aesdec(r2, r4);                                       \
+    INSTRUCTION_REORDER_BARRIER;                          \
+    paddq(r5, i3);                                        \
     pxor(r4, i4);
 
-#define MEOW_MIX(r1, r2, r3, r4, r5, ptr)                   \
-    if (bswap) {                                            \
-        MEOW_MIX_REG(r1, r2, r3, r4, r5,                    \
-                mm_bswap64(movdqu_imm((ptr) + 15)),         \
-                mm_bswap64(movdqu_imm((ptr) +  0)),         \
-                mm_bswap64(movdqu_imm((ptr) +  1)),         \
-                mm_bswap64(movdqu_imm((ptr) + 16)))         \
-    } else {                                                \
-        MEOW_MIX_REG(r1, r2, r3, r4, r5,                    \
-                movdqu_imm((ptr) + 15),                     \
-                movdqu_imm((ptr) +  0),                     \
-                movdqu_imm((ptr) +  1),                     \
-                movdqu_imm((ptr) + 16))                     \
+#define MEOW_MIX(r1, r2, r3, r4, r5, ptr)           \
+    if (bswap) {                                    \
+        MEOW_MIX_REG(r1, r2, r3, r4, r5,            \
+                mm_bswap64(movdqu_imm((ptr) + 15)), \
+                mm_bswap64(movdqu_imm((ptr) +  0)), \
+                mm_bswap64(movdqu_imm((ptr) +  1)), \
+                mm_bswap64(movdqu_imm((ptr) + 16))) \
+    } else {                                        \
+        MEOW_MIX_REG(r1, r2, r3, r4, r5,            \
+                movdqu_imm((ptr) + 15),             \
+                movdqu_imm((ptr) +  0),             \
+                movdqu_imm((ptr) +  1),             \
+                movdqu_imm((ptr) + 16))             \
     }
 
-#define MEOW_SHUFFLE(r1, r2, r3, r4, r5, r6)    \
-    aesdec(r1, r4);                             \
-    paddq(r2, r5);                              \
-    pxor(r4, r6);                               \
-    aesdec(r4, r2);                             \
-    paddq(r5, r6);                              \
+#define MEOW_SHUFFLE(r1, r2, r3, r4, r5, r6) \
+    aesdec(r1, r4);                          \
+    paddq(r2, r5);                           \
+    pxor(r4, r6);                            \
+    aesdec(r4, r2);                          \
+    paddq(r5, r6);                           \
     pxor(r2, r3)
 
 //------------------------------------------------------------
 static const uint8_t MeowShiftAdjust[32] = {
-    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
-    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 };
 
 static const uint8_t MeowMaskLen[32] = {
-    255,255,255,255,
-    255,255,255,255,
-    255,255,255,255,
-    255,255,255,255,
-    0,0,0,0,
-    0,0,0,0,
-    0,0,0,0,
-    0,0,0,0
+    255, 255, 255, 255,
+    255, 255, 255, 255,
+    255, 255, 255, 255,
+    255, 255, 255, 255,
+      0,   0,   0,   0,
+      0,   0,   0,   0,
+      0,   0,   0,   0,
+      0,   0,   0,   0
 };
 
 // NOTE(casey): The default seed is now a "nothing-up-our-sleeves"
@@ -143,8 +143,8 @@ static const uint8_t MeowDefaultSeed[128] = {
 //
 // NOTE(casey): Single block version
 //
-template < bool bswap >
-static meow_u128 MeowHash(const void * Seed128Init, size_t Len, const void * SourceInit, uint64_t extraseed) {
+template <bool bswap>
+static meow_u128 MeowHash( const void * Seed128Init, size_t Len, const void * SourceInit, uint64_t extraseed ) {
     const uint8_t * const SourceInit8 = (const uint8_t *)SourceInit;
     // NOTE(casey): xmm0-xmm7 are the hash accumulation lanes
     // NOTE(casey): xmm8-xmm15 hold values to be appended (residual, length)
@@ -180,33 +180,31 @@ static meow_u128 MeowHash(const void * Seed128Init, size_t Len, const void * Sou
             prefetcht0(rax + MEOW_PREFETCH + 0x80);
             prefetcht0(rax + MEOW_PREFETCH + 0xc0);
 
-            MEOW_MIX(xmm0,xmm4,xmm6,xmm1,xmm2, rax + 0x00);
-            MEOW_MIX(xmm1,xmm5,xmm7,xmm2,xmm3, rax + 0x20);
-            MEOW_MIX(xmm2,xmm6,xmm0,xmm3,xmm4, rax + 0x40);
-            MEOW_MIX(xmm3,xmm7,xmm1,xmm4,xmm5, rax + 0x60);
-            MEOW_MIX(xmm4,xmm0,xmm2,xmm5,xmm6, rax + 0x80);
-            MEOW_MIX(xmm5,xmm1,xmm3,xmm6,xmm7, rax + 0xa0);
-            MEOW_MIX(xmm6,xmm2,xmm4,xmm7,xmm0, rax + 0xc0);
-            MEOW_MIX(xmm7,xmm3,xmm5,xmm0,xmm1, rax + 0xe0);
+            MEOW_MIX(xmm0, xmm4, xmm6, xmm1, xmm2, rax + 0x00);
+            MEOW_MIX(xmm1, xmm5, xmm7, xmm2, xmm3, rax + 0x20);
+            MEOW_MIX(xmm2, xmm6, xmm0, xmm3, xmm4, rax + 0x40);
+            MEOW_MIX(xmm3, xmm7, xmm1, xmm4, xmm5, rax + 0x60);
+            MEOW_MIX(xmm4, xmm0, xmm2, xmm5, xmm6, rax + 0x80);
+            MEOW_MIX(xmm5, xmm1, xmm3, xmm6, xmm7, rax + 0xa0);
+            MEOW_MIX(xmm6, xmm2, xmm4, xmm7, xmm0, rax + 0xc0);
+            MEOW_MIX(xmm7, xmm3, xmm5, xmm0, xmm1, rax + 0xe0);
 
             rax += 0x100;
         }
-
     } else {
-
         // NOTE(casey): For small input, modern Intel x64's can't hit
         // full speed _with_ prefetching (because of port pressure),
         // so we use this loop.
 
         while (BlockCount--) {
-            MEOW_MIX(xmm0,xmm4,xmm6,xmm1,xmm2, rax + 0x00);
-            MEOW_MIX(xmm1,xmm5,xmm7,xmm2,xmm3, rax + 0x20);
-            MEOW_MIX(xmm2,xmm6,xmm0,xmm3,xmm4, rax + 0x40);
-            MEOW_MIX(xmm3,xmm7,xmm1,xmm4,xmm5, rax + 0x60);
-            MEOW_MIX(xmm4,xmm0,xmm2,xmm5,xmm6, rax + 0x80);
-            MEOW_MIX(xmm5,xmm1,xmm3,xmm6,xmm7, rax + 0xa0);
-            MEOW_MIX(xmm6,xmm2,xmm4,xmm7,xmm0, rax + 0xc0);
-            MEOW_MIX(xmm7,xmm3,xmm5,xmm0,xmm1, rax + 0xe0);
+            MEOW_MIX(xmm0, xmm4, xmm6, xmm1, xmm2, rax + 0x00);
+            MEOW_MIX(xmm1, xmm5, xmm7, xmm2, xmm3, rax + 0x20);
+            MEOW_MIX(xmm2, xmm6, xmm0, xmm3, xmm4, rax + 0x40);
+            MEOW_MIX(xmm3, xmm7, xmm1, xmm4, xmm5, rax + 0x60);
+            MEOW_MIX(xmm4, xmm0, xmm2, xmm5, xmm6, rax + 0x80);
+            MEOW_MIX(xmm5, xmm1, xmm3, xmm6, xmm7, rax + 0xa0);
+            MEOW_MIX(xmm6, xmm2, xmm4, xmm7, xmm0, rax + 0xc0);
+            MEOW_MIX(xmm7, xmm3, xmm5, xmm0, xmm1, rax + 0xe0);
 
             rax += 0x100;
         }
@@ -215,7 +213,7 @@ static meow_u128 MeowHash(const void * Seed128Init, size_t Len, const void * Sou
     //
     // NOTE(casey): Load any less-than-32-byte residual
     //
-    pxor_clear(xmm9, xmm9);
+    pxor_clear(xmm9 , xmm9 );
     pxor_clear(xmm11, xmm11);
 
     //
@@ -230,15 +228,15 @@ static meow_u128 MeowHash(const void * Seed128Init, size_t Len, const void * Sou
     // NOTE(casey): First, we have to load the part that is _not_
     // 16-byte aligned
     const uint8_t * Last = SourceInit8 + (Len & ~0xf);
-    uint32_t Len8 = (Len & 0xf);
+    uint32_t        Len8 =               (Len & 0xf );
     if (Len8) {
         // NOTE(casey): Load the mask early
-        movdqu(xmm8, &MeowMaskLen[0x10 - Len8]);
+        movdqu(xmm8 , &MeowMaskLen[0x10 - Len8]);
 
         const uint8_t * LastOk = (const uint8_t *)(((uintptr_t)(SourceInit8 + Len - 1) | (MEOW_PAGESIZE - 1)) - 16);
-        uint32_t Align = (Last > LastOk) ? ((uintptr_t)Last) & 0xf : 0;
-        movdqu(xmm10, &MeowShiftAdjust[Align]);
-        movdqu(xmm9, Last - Align);
+        uint32_t        Align  = (Last > LastOk) ? ((uintptr_t)Last) & 0xf : 0;
+        movdqu(xmm10, &MeowShiftAdjust[Align]  );
+        movdqu(xmm9 , Last - Align);
         pshufb(xmm9, xmm10);
 
         // NOTE(jeffr): and off the extra bytes
@@ -255,10 +253,10 @@ static meow_u128 MeowHash(const void * Seed128Init, size_t Len, const void * Sou
     //
     // NOTE(casey): Construct the residual and length injests
     //
-    xmm8 = xmm9;
+    xmm8  = xmm9;
     xmm10 = xmm9;
-    palignr(xmm8, xmm11, 15);
-    palignr(xmm10, xmm11, 1);
+    palignr(xmm8 , xmm11, 15);
+    palignr(xmm10, xmm11,  1);
 
     // NOTE(casey): We have room for a 128-bit nonce and a 64-bit none
     // here, but the decision was made to leave them zero'd so as not
@@ -272,32 +270,39 @@ static meow_u128 MeowHash(const void * Seed128Init, size_t Len, const void * Sou
     pxor_clear(xmm14, xmm14);
     movq(xmm15, Len, extraseed);
     palignr(xmm12, xmm15, 15);
-    palignr(xmm14, xmm15, 1);
+    palignr(xmm14, xmm15,  1);
 
     // NOTE(casey): To maintain the mix-down pattern, we always Meow
     // Mix the less-than-32-byte residual, even if it was empty
-    MEOW_MIX_REG(xmm0, xmm4, xmm6, xmm1, xmm2,  xmm8, xmm9, xmm10, xmm11);
+    MEOW_MIX_REG(xmm0, xmm4, xmm6, xmm1, xmm2, xmm8 , xmm9 , xmm10, xmm11);
 
     // NOTE(casey): Append the length, to avoid problems with our
     // 32-byte padding
-    MEOW_MIX_REG(xmm1, xmm5, xmm7, xmm2, xmm3,  xmm12, xmm13, xmm14, xmm15);
+    MEOW_MIX_REG(xmm1, xmm5, xmm7, xmm2, xmm3, xmm12, xmm13, xmm14, xmm15);
 
     //
     // NOTE(casey): Hash all full 32-byte blocks
     //
     uint32_t LaneCount = (Len >> 5) & 0x7;
-    if(LaneCount == 0) goto MixDown; MEOW_MIX(xmm2,xmm6,xmm0,xmm3,xmm4, rax + 0x00); --LaneCount;
-    if(LaneCount == 0) goto MixDown; MEOW_MIX(xmm3,xmm7,xmm1,xmm4,xmm5, rax + 0x20); --LaneCount;
-    if(LaneCount == 0) goto MixDown; MEOW_MIX(xmm4,xmm0,xmm2,xmm5,xmm6, rax + 0x40); --LaneCount;
-    if(LaneCount == 0) goto MixDown; MEOW_MIX(xmm5,xmm1,xmm3,xmm6,xmm7, rax + 0x60); --LaneCount;
-    if(LaneCount == 0) goto MixDown; MEOW_MIX(xmm6,xmm2,xmm4,xmm7,xmm0, rax + 0x80); --LaneCount;
-    if(LaneCount == 0) goto MixDown; MEOW_MIX(xmm7,xmm3,xmm5,xmm0,xmm1, rax + 0xa0); --LaneCount;
-    if(LaneCount == 0) goto MixDown; MEOW_MIX(xmm0,xmm4,xmm6,xmm1,xmm2, rax + 0xc0); --LaneCount;
+    if (LaneCount == 0) { goto MixDown; }
+    MEOW_MIX(xmm2, xmm6, xmm0, xmm3, xmm4, rax + 0x00); --LaneCount;
+    if (LaneCount == 0) { goto MixDown; }
+    MEOW_MIX(xmm3, xmm7, xmm1, xmm4, xmm5, rax + 0x20); --LaneCount;
+    if (LaneCount == 0) { goto MixDown; }
+    MEOW_MIX(xmm4, xmm0, xmm2, xmm5, xmm6, rax + 0x40); --LaneCount;
+    if (LaneCount == 0) { goto MixDown; }
+    MEOW_MIX(xmm5, xmm1, xmm3, xmm6, xmm7, rax + 0x60); --LaneCount;
+    if (LaneCount == 0) { goto MixDown; }
+    MEOW_MIX(xmm6, xmm2, xmm4, xmm7, xmm0, rax + 0x80); --LaneCount;
+    if (LaneCount == 0) { goto MixDown; }
+    MEOW_MIX(xmm7, xmm3, xmm5, xmm0, xmm1, rax + 0xa0); --LaneCount;
+    if (LaneCount == 0) { goto MixDown; }
+    MEOW_MIX(xmm0, xmm4, xmm6, xmm1, xmm2, rax + 0xc0); --LaneCount;
 
     //
     // NOTE(casey): Mix the eight lanes down to one 128-bit hash
     //
- MixDown:
+  MixDown:
     MEOW_SHUFFLE(xmm0, xmm1, xmm2, xmm4, xmm5, xmm6);
     MEOW_SHUFFLE(xmm1, xmm2, xmm3, xmm5, xmm6, xmm7);
     MEOW_SHUFFLE(xmm2, xmm3, xmm4, xmm6, xmm7, xmm0);
@@ -323,77 +328,81 @@ static meow_u128 MeowHash(const void * Seed128Init, size_t Len, const void * Sou
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void MeowHash32(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void MeowHash32( const void * in, const size_t len, const seed_t seed, void * out ) {
     meow_u128 h = MeowHash<bswap>(MeowDefaultSeed, len, in, (uint64_t)seed);
+
     PUT_U32<bswap>(MeowU32From(h, 0), (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void MeowHash64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void MeowHash64( const void * in, const size_t len, const seed_t seed, void * out ) {
     meow_u128 h = MeowHash<bswap>(MeowDefaultSeed, len, in, (uint64_t)seed);
+
     PUT_U64<bswap>(MeowU64From(h, 0), (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void MeowHash128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void MeowHash128( const void * in, const size_t len, const seed_t seed, void * out ) {
     meow_u128 h = MeowHash<bswap>(MeowDefaultSeed, len, in, (uint64_t)seed);
+
     PUT_U64<bswap>(MeowU64From(h, 0), (uint8_t *)out, 0);
     PUT_U64<bswap>(MeowU64From(h, 1), (uint8_t *)out, 8);
 }
+
 #endif
 
 //------------------------------------------------------------
 REGISTER_FAMILY(meowhash,
-  $.src_url = "https://github.com/cmuratori/meow_hash",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/cmuratori/meow_hash",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 #if defined(HAVE_X86_64_AES) && defined(HAVE_SSE_4_1)
 
 REGISTER_HASH(MeowHash__32,
-  $.desc = "MeowHash (0.5/calico, low 32 bits)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED       |
-        FLAG_HASH_AES_BASED,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB |
-        FLAG_IMPL_LICENSE_ZLIB,
-  $.bits = 32,
-  $.verification_LE = 0xE9E94FF2,
-  $.verification_BE = 0xD5BF086D,
-  $.hashfn_native = MeowHash32<false>,
-  $.hashfn_bswap = MeowHash32<true>
-);
+   $.desc       = "MeowHash (0.5/calico, low 32 bits)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED       |
+         FLAG_HASH_AES_BASED,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 32,
+   $.verification_LE = 0xE9E94FF2,
+   $.verification_BE = 0xD5BF086D,
+   $.hashfn_native   = MeowHash32<false>,
+   $.hashfn_bswap    = MeowHash32<true>
+ );
 
 REGISTER_HASH(MeowHash__64,
-  $.desc = "MeowHash (0.5/calico, low 64 bits)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED       |
-        FLAG_HASH_AES_BASED,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB |
-        FLAG_IMPL_LICENSE_ZLIB,
-  $.bits = 64,
-  $.verification_LE = 0x4C9F52A6,
-  $.verification_BE = 0xFA21003A,
-  $.hashfn_native = MeowHash64<false>,
-  $.hashfn_bswap = MeowHash64<true>
-);
+   $.desc       = "MeowHash (0.5/calico, low 64 bits)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED       |
+         FLAG_HASH_AES_BASED,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 64,
+   $.verification_LE = 0x4C9F52A6,
+   $.verification_BE = 0xFA21003A,
+   $.hashfn_native   = MeowHash64<false>,
+   $.hashfn_bswap    = MeowHash64<true>
+ );
 
 REGISTER_HASH(MeowHash,
-  $.desc = "MeowHash (0.5/calico)",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED       |
-        FLAG_HASH_AES_BASED,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB |
-        FLAG_IMPL_LICENSE_ZLIB,
-  $.bits = 128,
-  $.verification_LE = 0x7C648489,
-  $.verification_BE = 0x4FD0834C,
-  $.hashfn_native = MeowHash128<false>,
-  $.hashfn_bswap = MeowHash128<true>
-);
+   $.desc       = "MeowHash (0.5/calico)",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED       |
+         FLAG_HASH_AES_BASED,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 128,
+   $.verification_LE = 0x7C648489,
+   $.verification_BE = 0x4FD0834C,
+   $.hashfn_native   = MeowHash128<false>,
+   $.hashfn_bswap    = MeowHash128<true>
+ );
 
 #endif
diff --git a/hashes/metrohash.cpp b/hashes/metrohash.cpp
index 6fa6fac1..05337fa7 100644
--- a/hashes/metrohash.cpp
+++ b/hashes/metrohash.cpp
@@ -27,163 +27,164 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_X86_64_CRC32C)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #else
-uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v);
+uint64_t _mm_crc32_u64( uint64_t crc, uint64_t v );
+
 #endif
 
 #define VARIANTS_64 5
 
 static const uint64_t MULTK64[VARIANTS_64][8] = {
-  {
-    0xD6D018F5, 0xA2AA033B, 0x62992FC1, 0x30BC5B29,
-    0x62992FC1, 0x62992FC1, 0x30BC5B29, 0x30BC5B29,
-  },
-  {
-    0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5,
-    0xC83A91E1, 0x8648DBDB, 0x8648DBDB, 0x7BDEC03B,
-  },
-  {
-    0xD6D018F5, 0xA2AA033B, 0x62992FC1, 0x30BC5B29,
-    0x62992FC1, 0x62992FC1, 0x30BC5B29, 0x30BC5B29,
-  },
-  {
-    0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5,
-    0xC83A91E1, 0x8648DBDB, 0x8648DBDB, 0x7BDEC03B,
-  },
-  {
-    0xD6D018F5, 0xA2AA033B, 0x62992FC1, 0x30BC5B29,
-    0xD6D018F5, 0xA2AA033B, 0xA2AA033B, 0x62992FC1,
-  },
+    {
+        0xD6D018F5, 0xA2AA033B, 0x62992FC1, 0x30BC5B29,
+        0x62992FC1, 0x62992FC1, 0x30BC5B29, 0x30BC5B29,
+    },
+    {
+        0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5,
+        0xC83A91E1, 0x8648DBDB, 0x8648DBDB, 0x7BDEC03B,
+    },
+    {
+        0xD6D018F5, 0xA2AA033B, 0x62992FC1, 0x30BC5B29,
+        0x62992FC1, 0x62992FC1, 0x30BC5B29, 0x30BC5B29,
+    },
+    {
+        0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5,
+        0xC83A91E1, 0x8648DBDB, 0x8648DBDB, 0x7BDEC03B,
+    },
+    {
+        0xD6D018F5, 0xA2AA033B, 0x62992FC1, 0x30BC5B29,
+        0xD6D018F5, 0xA2AA033B, 0xA2AA033B, 0x62992FC1,
+    },
 };
 
 static const uint8_t ROTK64[VARIANTS_64][9] = {
-  { 37, 29, 21, 55, 26, 48, 37, 28, 29 },
-  { 33, 33, 35, 33, 15, 13, 25, 33, 33 },
-  { 30, 29, 34, 36, 15, 15, 23, 28, 29 },
-  { 33, 33, 35, 33, 15, 13, 25, 33, 33 },
-  { 33, 33, 35, 33, 15, 13, 25, 33, 33 },
+    { 37, 29, 21, 55, 26, 48, 37, 28, 29 },
+    { 33, 33, 35, 33, 15, 13, 25, 33, 33 },
+    { 30, 29, 34, 36, 15, 15, 23, 28, 29 },
+    { 33, 33, 35, 33, 15, 13, 25, 33, 33 },
+    { 33, 33, 35, 33, 15, 13, 25, 33, 33 },
 };
 
-template < uint32_t variant, bool bswap >
-static void MetroHash64(const void * in, const size_t len, const seed_t seed, void * out) {
-  if (variant >= VARIANTS_64) { return; }
+template <uint32_t variant, bool bswap>
+static void MetroHash64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    if (variant >= VARIANTS_64) { return; }
 
-  const uint64_t * K = &MULTK64[variant][0];
-  const uint8_t * ROTK = &ROTK64[variant][0];
-  const size_t length = len;
-  const uint8_t * ptr = (const uint8_t *)in;
-  const uint8_t * const end = ptr + len;
-  uint64_t v[4];
+    const uint64_t *      K      = &MULTK64[variant][0];
+    const uint8_t *       ROTK   = &ROTK64 [variant][0];
+    const size_t          length = len;
+    const uint8_t *       ptr    = (const uint8_t *)in;
+    const uint8_t * const end    = ptr + len;
+    uint64_t v[4];
 
-  uint64_t vseed = ((uint64_t)seed + K[2]) * K[0];
-  if (variant != 0) { vseed += len; }
+    uint64_t vseed = ((uint64_t)seed + K[2]) * K[0];
+    if (variant != 0) { vseed += len; }
 
-  v[0] = v[1] = v[2] = v[3] = vseed;
+    v[0] = v[1] = v[2] = v[3] = vseed;
 
-  // bulk update
-  while (ptr <= (end - 32)) {
-    if (variant <= 2) {
-      v[0] += GET_U64<bswap>(ptr,  0) * K[0]; v[0] = ROTR64(v[0], 29) + v[2];
-      v[1] += GET_U64<bswap>(ptr,  8) * K[1]; v[1] = ROTR64(v[1], 29) + v[3];
-      v[2] += GET_U64<bswap>(ptr, 16) * K[2]; v[2] = ROTR64(v[2], 29) + v[0];
-      v[3] += GET_U64<bswap>(ptr, 24) * K[3]; v[3] = ROTR64(v[3], 29) + v[1];
-    } else {
-      v[0] ^= _mm_crc32_u64(v[0], GET_U64<bswap>(ptr,  0));
-      v[1] ^= _mm_crc32_u64(v[1], GET_U64<bswap>(ptr,  8));
-      v[2] ^= _mm_crc32_u64(v[2], GET_U64<bswap>(ptr, 16));
-      v[3] ^= _mm_crc32_u64(v[3], GET_U64<bswap>(ptr, 24));
+    // bulk update
+    while (ptr <= (end - 32)) {
+        if (variant <= 2) {
+            v[0] += GET_U64<bswap>(ptr,  0) * K[0]; v[0] = ROTR64(v[0], 29) + v[2];
+            v[1] += GET_U64<bswap>(ptr,  8) * K[1]; v[1] = ROTR64(v[1], 29) + v[3];
+            v[2] += GET_U64<bswap>(ptr, 16) * K[2]; v[2] = ROTR64(v[2], 29) + v[0];
+            v[3] += GET_U64<bswap>(ptr, 24) * K[3]; v[3] = ROTR64(v[3], 29) + v[1];
+        } else {
+            v[0] ^= _mm_crc32_u64(v[0], GET_U64<bswap>(ptr,  0));
+            v[1] ^= _mm_crc32_u64(v[1], GET_U64<bswap>(ptr,  8));
+            v[2] ^= _mm_crc32_u64(v[2], GET_U64<bswap>(ptr, 16));
+            v[3] ^= _mm_crc32_u64(v[3], GET_U64<bswap>(ptr, 24));
+        }
+        ptr += 32;
     }
-    ptr += 32;
-  }
-
-  if (len >= 32) {
-    v[2] ^= ROTR64(((v[0] + v[3]) * K[0]) + v[1], ROTK[0]) * K[1];
-    v[3] ^= ROTR64(((v[1] + v[2]) * K[1]) + v[0], ROTK[0]) * K[0];
-    v[0] ^= ROTR64(((v[0] + v[2]) * K[0]) + v[3], ROTK[0]) * K[1];
-    v[1] ^= ROTR64(((v[1] + v[3]) * K[1]) + v[2], ROTK[0]) * K[0];
-
-    v[0] = vseed + (v[0] ^ v[1]);
-  }
-
-  if ((end - ptr) >= 16) {
-    v[1]  = v[0] + (GET_U64<bswap>(ptr, 0) * K[4]); v[1] = ROTR64(v[1], ROTK[1]) * K[6];
-    v[2]  = v[0] + (GET_U64<bswap>(ptr, 8) * K[5]); v[2] = ROTR64(v[2], ROTK[1]) * K[7];
-    v[1] ^= ROTR64(v[1] * K[0], ROTK[2]) + v[2];
-    v[2] ^= ROTR64(v[2] * K[3], ROTK[2]) + v[1];
-    v[0] += v[2];
-    ptr += 16;
-  }
-
-  if ((end - ptr) >= 8) {
-    v[0] += GET_U64<bswap>(ptr, 0) * K[3];
-    v[0] ^= ROTR64(v[0], ROTK[3]) * K[1];
-    ptr += 8;
-  }
-
-  if ((end - ptr) >= 4) {
-    if (variant <= 2) {
-      v[0] += GET_U32<bswap>(ptr, 0) * K[3];
-    } else {
-      v[0] ^= _mm_crc32_u64(v[0], GET_U32<bswap>(ptr, 0));
+
+    if (len >= 32) {
+        v[2] ^= ROTR64(((v[0] + v[3]) * K[0]) + v[1], ROTK[0]) * K[1];
+        v[3] ^= ROTR64(((v[1] + v[2]) * K[1]) + v[0], ROTK[0]) * K[0];
+        v[0] ^= ROTR64(((v[0] + v[2]) * K[0]) + v[3], ROTK[0]) * K[1];
+        v[1] ^= ROTR64(((v[1] + v[3]) * K[1]) + v[2], ROTK[0]) * K[0];
+
+        v[0]  = vseed + (v[0] ^ v[1]);
     }
-    v[0] ^= ROTR64(v[0], ROTK[4]) * K[1];
-    ptr += 4;
-  }
 
-  if ((end - ptr) >= 2) {
-    if (variant <= 2) {
-      v[0] += GET_U16<bswap>(ptr, 0) * K[3];
-    } else {
-      v[0] ^= _mm_crc32_u64(v[0], GET_U16<bswap>(ptr, 0));
+    if ((end - ptr) >= 16) {
+        v[1]  = v[0] + (GET_U64<bswap>(ptr, 0) * K[4]); v[1] = ROTR64(v[1], ROTK[1]) * K[6];
+        v[2]  = v[0] + (GET_U64<bswap>(ptr, 8) * K[5]); v[2] = ROTR64(v[2], ROTK[1]) * K[7];
+        v[1] ^= ROTR64(v[1] * K[0], ROTK[2]) + v[2];
+        v[2] ^= ROTR64(v[2] * K[3], ROTK[2]) + v[1];
+        v[0] += v[2];
+        ptr  += 16;
     }
-    v[0] ^= ROTR64(v[0], ROTK[5]) * K[1];
-    ptr += 2;
-  }
 
-  if ((end - ptr) >= 1) {
-    if (variant <= 2) {
-      v[0] += (*ptr) * K[3];
-    } else {
-      v[0] ^= _mm_crc32_u64(v[0], *ptr);
+    if ((end - ptr) >= 8) {
+        v[0] += GET_U64<bswap>(ptr, 0) * K[3];
+        v[0] ^= ROTR64(v[0], ROTK[3]) * K[1];
+        ptr  += 8;
+    }
+
+    if ((end - ptr) >= 4) {
+        if (variant <= 2) {
+            v[0] += GET_U32<bswap>(ptr, 0) * K[3];
+        } else {
+            v[0] ^= _mm_crc32_u64(v[0], GET_U32<bswap>(ptr, 0));
+        }
+        v[0] ^= ROTR64(v[0], ROTK[4]) * K[1];
+        ptr  += 4;
+    }
+
+    if ((end - ptr) >= 2) {
+        if (variant <= 2) {
+            v[0] += GET_U16<bswap>(ptr, 0) * K[3];
+        } else {
+            v[0] ^= _mm_crc32_u64(v[0], GET_U16<bswap>(ptr, 0));
+        }
+        v[0] ^= ROTR64(v[0], ROTK[5]) * K[1];
+        ptr  += 2;
+    }
+
+    if ((end - ptr) >= 1) {
+        if (variant <= 2) {
+            v[0] += (*ptr) * K[3];
+        } else {
+            v[0] ^= _mm_crc32_u64(v[0], *ptr);
+        }
+        v[0] ^= ROTR64(v[0], ROTK[6]) * K[1];
     }
-    v[0] ^= ROTR64(v[0], ROTK[6]) * K[1];
-  }
 
-  v[0] ^= ROTR64(v[0], ROTK[7]);
-  v[0] *= K[0];
-  v[0] ^= ROTR64(v[0], ROTK[8]);
+    v[0] ^= ROTR64(v[0], ROTK[7]);
+    v[0] *= K[0];
+    v[0] ^= ROTR64(v[0], ROTK[8]);
 
-  PUT_U64<bswap>(v[0], (uint8_t *)out, 0);
+    PUT_U64<bswap>(v[0], (uint8_t *)out, 0);
 }
 
 #define VARIANTS_128 5
 
 static const uint64_t MULTK128[VARIANTS_128][4] = {
-  { 0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5 }, // Standard mixing
-  { 0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5 },
-  { 0xD6D018F5, 0xA2AA033B, 0x62992FC1, 0x30BC5B29 },
-  { 0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5 }, // CRC-based mixing
-  { 0xEE783E2F, 0xAD07C493, 0x797A90BB, 0x2E4B2E1B }
+    { 0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5 }, // Standard mixing
+    { 0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5 },
+    { 0xD6D018F5, 0xA2AA033B, 0x62992FC1, 0x30BC5B29 },
+    { 0xC83A91E1, 0x8648DBDB, 0x7BDEC03B, 0x2F5870A5 }, // CRC-based mixing
+    { 0xEE783E2F, 0xAD07C493, 0x797A90BB, 0x2E4B2E1B }
 };
 
 static const uint8_t ROTK128[VARIANTS_128][15] = {
-  { 21, 21, 21, 33, 45, 33, 27, 33, 46, 33, 22, 33, 58, 13, 37, },
-  { 26, 26, 30, 33, 17, 33, 20, 33, 18, 33, 24, 33, 24, 13, 37, },
-  { 33, 33, 33, 29, 29, 29, 29, 29, 25, 29, 30, 29, 18, 33, 33, },
-  { 34, 37, 37, 34, 30, 36, 23,  0, 19,  0, 13,  0, 17, 11, 26, },
-  { 12, 19, 19, 41, 10, 34, 22,  0, 14,  0, 15,  0, 18, 15, 27, }
+    { 21, 21, 21, 33, 45, 33, 27, 33, 46, 33, 22, 33, 58, 13, 37, },
+    { 26, 26, 30, 33, 17, 33, 20, 33, 18, 33, 24, 33, 24, 13, 37, },
+    { 33, 33, 33, 29, 29, 29, 29, 29, 25, 29, 30, 29, 18, 33, 33, },
+    { 34, 37, 37, 34, 30, 36, 23,  0, 19,  0, 13,  0, 17, 11, 26, },
+    { 12, 19, 19, 41, 10, 34, 22,  0, 14,  0, 15,  0, 18, 15, 27, }
 };
 
-template < uint32_t variant, bool bswap >
-static void MetroHash128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <uint32_t variant, bool bswap>
+static void MetroHash128( const void * in, const size_t len, const seed_t seed, void * out ) {
     if (variant >= VARIANTS_128) { return; }
 
-    const uint64_t * K = &MULTK128[variant][0];
-    const uint8_t * ROTK = &ROTK128[variant][0];
-    const size_t length = len;
-    const uint8_t * ptr = (const uint8_t *)in;
-    const uint8_t * const end = ptr + len;
+    const uint64_t *      K      = &MULTK128[variant][0];
+    const uint8_t *       ROTK   = &ROTK128 [variant][0];
+    const size_t          length = len;
+    const uint8_t *       ptr    = (const uint8_t *)in;
+    const uint8_t * const end    = ptr + len;
 
     uint64_t v[4];
 
@@ -229,14 +230,14 @@ static void MetroHash128(const void * in, const size_t len, const seed_t seed, v
         v[1] += (GET_U64<bswap>(ptr, 8) * K[2]); v[1] = ROTR64(v[1], ROTK[3]) * K[3];
         v[0] ^= ROTR64(v[0] * K[2] + v[1], ROTK[4]) * K[1];
         v[1] ^= ROTR64(v[1] * K[3] + v[0], ROTK[4]) * K[0];
-        ptr += 16;
+        ptr  += 16;
     }
 
     if ((end - ptr) >= 8) {
         v[0] += GET_U64<bswap>(ptr, 0) * K[2];
-        v[0]  = ROTR64(v[0], ROTK[5]) * K[3];
+        v[0]  = ROTR64(v[0]              , ROTK[5]) * K[3];
         v[0] ^= ROTR64(v[0] * K[2] + v[1], ROTK[6]) * K[1];
-        ptr += 8;
+        ptr  += 8;
     }
 
     if ((end - ptr) >= 4) {
@@ -247,7 +248,7 @@ static void MetroHash128(const void * in, const size_t len, const seed_t seed, v
             v[1] ^= _mm_crc32_u64(v[0], GET_U32<bswap>(ptr, 0));
         }
         v[1] ^= ROTR64(v[1] * K[3] + v[0], ROTK[8]) * K[0];
-        ptr += 4;
+        ptr  += 4;
     }
 
     if ((end - ptr) >= 2) {
@@ -258,7 +259,7 @@ static void MetroHash128(const void * in, const size_t len, const seed_t seed, v
             v[0] ^= _mm_crc32_u64(v[1], GET_U16<bswap>(ptr, 0));
         }
         v[0] ^= ROTR64(v[0] * K[2] + v[1], ROTK[10]) * K[1];
-        ptr += 2;
+        ptr  += 2;
     }
 
     if ((end - ptr) >= 1) {
@@ -286,174 +287,174 @@ static void MetroHash128(const void * in, const size_t len, const seed_t seed, v
 }
 
 REGISTER_FAMILY(metrohash,
-  $.src_url = "https://github.com/jandrewrogers/MetroHash/tree/c135424b3b83f1ca2502b7960f8d5705ddcec987",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/jandrewrogers/MetroHash/tree/c135424b3b83f1ca2502b7960f8d5705ddcec987",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(MetroHash_64,
-  $.desc = "Metrohash v1 base variant, 64-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x6FA828C9,
-  $.verification_BE = 0xFB8D54A5,
-  $.hashfn_native = MetroHash64<0, false>,
-  $.hashfn_bswap = MetroHash64<0, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash v1 base variant, 64-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x6FA828C9,
+   $.verification_BE = 0xFB8D54A5,
+   $.hashfn_native   = MetroHash64<0, false>,
+   $.hashfn_bswap    = MetroHash64<0, true>,
+   $.badseeds        = {}
+ );
 
 REGISTER_HASH(MetroHash_64__var1,
-  $.desc = "Metrohash v1 variant 1, 64-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xEE88F7D2,
-  $.verification_BE = 0xCC0F03D7,
-  $.hashfn_native = MetroHash64<1, false>,
-  $.hashfn_bswap = MetroHash64<1, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash v1 variant 1, 64-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xEE88F7D2,
+   $.verification_BE = 0xCC0F03D7,
+   $.hashfn_native   = MetroHash64<1, false>,
+   $.hashfn_bswap    = MetroHash64<1, true>,
+   $.badseeds        = {}
+ );
 
 REGISTER_HASH(MetroHash_64__var2,
-  $.desc = "Metrohash v1 variant 2, 64-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xE1FC7C6E,
-  $.verification_BE = 0x7F8C6EF1,
-  $.hashfn_native = MetroHash64<2, false>,
-  $.hashfn_bswap = MetroHash64<2, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash v1 variant 2, 64-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xE1FC7C6E,
+   $.verification_BE = 0x7F8C6EF1,
+   $.hashfn_native   = MetroHash64<2, false>,
+   $.hashfn_bswap    = MetroHash64<2, true>,
+   $.badseeds        = {}
+ );
 
 #if defined(HAVE_X86_64_CRC32C)
 
 REGISTER_HASH(MetroHashCrc_64__var1,
-  $.desc = "Metrohash-crc v1 variant 1, 64-bit version (unofficial)",
-  $.hash_flags =
-        FLAG_HASH_CRC_BASED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x29C68A50,
-  $.verification_BE = 0xACEEC1FC,
-  $.hashfn_native = MetroHash64<3, false>,
-  $.hashfn_bswap = MetroHash64<3, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash-crc v1 variant 1, 64-bit version (unofficial)",
+   $.hash_flags =
+         FLAG_HASH_CRC_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x29C68A50,
+   $.verification_BE = 0xACEEC1FC,
+   $.hashfn_native   = MetroHash64<3, false>,
+   $.hashfn_bswap    = MetroHash64<3, true>,
+   $.badseeds        = {}
+ );
 
 REGISTER_HASH(MetroHashCrc_64__var2,
-  $.desc = "Metrohash-crc v1 variant 2, 64-bit version (unofficial)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x2C00BD9F,
-  $.verification_BE = 0x590D5688,
-  $.hashfn_native = MetroHash64<4, false>,
-  $.hashfn_bswap = MetroHash64<4, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash-crc v1 variant 2, 64-bit version (unofficial)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x2C00BD9F,
+   $.verification_BE = 0x590D5688,
+   $.hashfn_native   = MetroHash64<4, false>,
+   $.hashfn_bswap    = MetroHash64<4, true>,
+   $.badseeds        = {}
+ );
 
 #endif
 
 REGISTER_HASH(MetroHash_128,
-  $.desc = "Metrohash v1 base variant, 128-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x4A6673E7,
-  $.verification_BE = 0xD5F2CD8C,
-  $.hashfn_native = MetroHash128<0, false>,
-  $.hashfn_bswap = MetroHash128<0, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash v1 base variant, 128-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x4A6673E7,
+   $.verification_BE = 0xD5F2CD8C,
+   $.hashfn_native   = MetroHash128<0, false>,
+   $.hashfn_bswap    = MetroHash128<0, true>,
+   $.badseeds        = {}
+ );
 
 REGISTER_HASH(MetroHash_128__var1,
-  $.desc = "Metrohash v1 variant 1, 128-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x20E8A1D7,
-  $.verification_BE = 0x78661274,
-  $.hashfn_native = MetroHash128<1, false>,
-  $.hashfn_bswap = MetroHash128<1, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash v1 variant 1, 128-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x20E8A1D7,
+   $.verification_BE = 0x78661274,
+   $.hashfn_native   = MetroHash128<1, false>,
+   $.hashfn_bswap    = MetroHash128<1, true>,
+   $.badseeds        = {}
+ );
 
 REGISTER_HASH(MetroHash_128__var2,
-  $.desc = "Metrohash v1 variant 2, 128-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x5437C684,
-  $.verification_BE = 0x01A244A6,
-  $.hashfn_native = MetroHash128<2, false>,
-  $.hashfn_bswap = MetroHash128<2, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash v1 variant 2, 128-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x5437C684,
+   $.verification_BE = 0x01A244A6,
+   $.hashfn_native   = MetroHash128<2, false>,
+   $.hashfn_bswap    = MetroHash128<2, true>,
+   $.badseeds        = {}
+ );
 
 #if defined(HAVE_X86_64_CRC32C)
 
 REGISTER_HASH(MetroHashCrc_128__var1,
-  $.desc = "Metrohash-crc v1 variant 1, 128-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x5E75144E,
-  $.verification_BE = 0xCD4C6C7E,
-  $.hashfn_native = MetroHash128<3, false>,
-  $.hashfn_bswap = MetroHash128<3, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash-crc v1 variant 1, 128-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x5E75144E,
+   $.verification_BE = 0xCD4C6C7E,
+   $.hashfn_native   = MetroHash128<3, false>,
+   $.hashfn_bswap    = MetroHash128<3, true>,
+   $.badseeds        = {}
+ );
 
 REGISTER_HASH(MetroHashCrc_128__var2,
-  $.desc = "Metrohash-crc v1 variant 2, 128-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x1ACF3E77,
-  $.verification_BE = 0x3772DA12,
-  $.hashfn_native = MetroHash128<4, false>,
-  $.hashfn_bswap = MetroHash128<4, true>,
-  $.badseeds = {}
-);
+   $.desc       = "Metrohash-crc v1 variant 2, 128-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x1ACF3E77,
+   $.verification_BE = 0x3772DA12,
+   $.hashfn_native   = MetroHash128<4, false>,
+   $.hashfn_bswap    = MetroHash128<4, true>,
+   $.badseeds        = {}
+ );
 
 #endif
diff --git a/hashes/multiply_shift.cpp b/hashes/multiply_shift.cpp
index bd95b0aa..3ac990ba 100644
--- a/hashes/multiply_shift.cpp
+++ b/hashes/multiply_shift.cpp
@@ -35,59 +35,63 @@
 // https://arxiv.org/pdf/1504.06804.pdf
 
 // A randomly-generated table of 128-bit multiplicative constants
-const static int MULTIPLY_SHIFT_RANDOM_WORDS = 1<<8;
-static uint64_t multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS * 2];
+const static int MULTIPLY_SHIFT_RANDOM_WORDS = 1 << 8;
+static uint64_t  multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS * 2];
 
 // This is just the Xorshift RNG, which was arbitrarily chosen.  This
 // hash is labeled as system-dependent, since this would really be
 // replaced by *some* kind of srand()/rand() in practice.
-static inline void mix(uint32_t & w, uint32_t & x, uint32_t & y, uint32_t & z) {
+static inline void mix( uint32_t & w, uint32_t & x, uint32_t & y, uint32_t & z ) {
     uint32_t t = x ^ (x << 11);
+
     x = y; y = z; z = w;
     w = w ^ (w >> 19) ^ t ^ (t >> 8);
 }
 
-static uintptr_t multiply_shift_seed_init_slow(const seed_t seed) {
+static uintptr_t multiply_shift_seed_init_slow( const seed_t seed ) {
     uint32_t w, x, y, z;
-    x = 0x498b3bc5 ^ (uint32_t)(seed);
+
+    x = 0x498b3bc5 ^ (uint32_t)(seed      );
     y = 0x5a05089a ^ (uint32_t)(seed >> 32);
     w = z = 0;
-    for(int i = 0; i < 10; i++) mix(w, x, y, z);
+    for (int i = 0; i < 10; i++) { mix(w, x, y, z); }
 
     for (int i = 0; i < MULTIPLY_SHIFT_RANDOM_WORDS; i++) {
         mix(w, x, y, z);
         multiply_shift_random[2 * i + 1] = ((uint64_t)(x) << 32) | y;
         mix(w, x, y, z);
         multiply_shift_random[2 * i + 0] = ((uint64_t)(x) << 32) | y;
-        if (!multiply_shift_random[2 * i + 0])
+        if (!multiply_shift_random[2 * i + 0]) {
             multiply_shift_random[2 * i + 0]++;
+        }
     }
     return 0;
 }
 
-static bool multiply_shift_init(void) {
+static bool multiply_shift_init( void ) {
     multiply_shift_seed_init_slow(0);
     return true;
 }
 
 // Vector multiply-shift (3.4) from Thorup's notes.
-template < bool bswap >
-static void multiply_shift32(const void * in, const size_t len_bytes, const seed_t seed, void * out) {
+template <bool bswap>
+static void multiply_shift32( const void * in, const size_t len_bytes, const seed_t seed, void * out ) {
     const uint8_t * buf = (const uint8_t *)in;
-    const size_t len = len_bytes/4;
+    const size_t    len = len_bytes / 4;
 
     // We mix in len_bytes in the basis, since smhasher considers two keys
     // of different length to be different, even if all the extra bits are 0.
     // This is needed for the AppendZero test.
     uint64_t h, t;
-    h = ((uint32_t)(seed))          * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 1] +
-        ((uint32_t)(seed>>32))      * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 2] +
-        ((uint32_t)(len_bytes))     * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 3] +
-        ((uint32_t)(len_bytes>>32)) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 4];
+
+    h =     ((uint32_t)(seed           )) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 1] +
+            ((uint32_t)(seed      >> 32)) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 2] +
+            ((uint32_t)(len_bytes      )) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 3] +
+            ((uint32_t)(len_bytes >> 32)) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 4];
 
     for (size_t i = 0; i < len; i++, buf += 4) {
-        t = GET_U32<bswap>(buf, 0) *
-            multiply_shift_random[i % MULTIPLY_SHIFT_RANDOM_WORDS];
+        t  = GET_U32<bswap>(buf, 0) *
+                multiply_shift_random[i % MULTIPLY_SHIFT_RANDOM_WORDS];
         h += t;
     }
 
@@ -96,9 +100,9 @@ static void multiply_shift32(const void * in, const size_t len_bytes, const seed
     if (remaining_bytes) {
         uint64_t last = 0;
         if (remaining_bytes & 2) { last = (last << 16) | GET_U16<bswap>(buf, 0); buf += 2; }
-        if (remaining_bytes & 1) { last = (last << 8)  | (*buf); }
-        t = last *
-            multiply_shift_random[len % MULTIPLY_SHIFT_RANDOM_WORDS];
+        if (remaining_bytes & 1) { last = (last << 8) | (*buf); }
+        t  = last *
+                multiply_shift_random[len % MULTIPLY_SHIFT_RANDOM_WORDS];
         h += t;
     }
 
@@ -106,31 +110,32 @@ static void multiply_shift32(const void * in, const size_t len_bytes, const seed
 }
 
 // Pair multiply-shift (3.5) from Thorup's notes.
-template < bool bswap >
-static void pair_multiply_shift32(const void * in, const size_t len_bytes, const seed_t seed, void * out) {
+template <bool bswap>
+static void pair_multiply_shift32( const void * in, const size_t len_bytes, const seed_t seed, void * out ) {
     const uint8_t * buf = (const uint8_t *)in;
-    const size_t len = len_bytes/4;
+    const size_t    len = len_bytes / 4;
 
     // We mix in len_bytes in the basis, since smhasher considers two keys
     // of different length to be different, even if all the extra bits are 0.
     // This is needed for the AppendZero test.
     uint64_t h, t;
-    h = ((uint32_t)(seed))          * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 1] +
-        ((uint32_t)(seed>>32))      * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 2] +
-        ((uint32_t)(len_bytes))     * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 3] +
-        ((uint32_t)(len_bytes>>32)) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 4];
-
-    for (size_t i = 0; i < len/2; i++, buf += 8) {
-        t = GET_U64<bswap>(buf, 0);
-        h += (((uint32_t)(t)) + multiply_shift_random[((2 * i) % MULTIPLY_SHIFT_RANDOM_WORDS) + 1]) *
-            (((uint32_t)(t>>32)) + multiply_shift_random[((2 * i) % MULTIPLY_SHIFT_RANDOM_WORDS) + 0]);
+
+    h =     ((uint32_t)(seed           )) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 1] +
+            ((uint32_t)(seed      >> 32)) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 2] +
+            ((uint32_t)(len_bytes      )) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 3] +
+            ((uint32_t)(len_bytes >> 32)) * multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 4];
+
+    for (size_t i = 0; i < len / 2; i++, buf += 8) {
+        t  = GET_U64<bswap>(buf, 0);
+        h +=    (((uint32_t)(t      )) + multiply_shift_random[((2 * i) % MULTIPLY_SHIFT_RANDOM_WORDS) + 1]) *
+                (((uint32_t)(t >> 32)) + multiply_shift_random[((2 * i) % MULTIPLY_SHIFT_RANDOM_WORDS) + 0]);
     }
 
     // Make sure we have the last word, if the number of words is odd
     if (len & 1) {
-        t = GET_U32<bswap>(buf, 0) *
-            multiply_shift_random[(len - 1) % MULTIPLY_SHIFT_RANDOM_WORDS];
-        h += t;
+        t    = GET_U32<bswap>(buf, 0) *
+                multiply_shift_random[(len - 1) % MULTIPLY_SHIFT_RANDOM_WORDS];
+        h   += t;
         buf += 4;
     }
 
@@ -139,9 +144,9 @@ static void pair_multiply_shift32(const void * in, const size_t len_bytes, const
     if (remaining_bytes) {
         uint64_t last = 0;
         if (remaining_bytes & 2) { last = (last << 16) | GET_U16<bswap>(buf, 0); buf += 2; }
-        if (remaining_bytes & 1) { last = (last << 8)  | (*buf); }
-        t = last *
-            multiply_shift_random[len % MULTIPLY_SHIFT_RANDOM_WORDS];
+        if (remaining_bytes & 1) { last = (last << 8) | (*buf); }
+        t  = last *
+                multiply_shift_random[len % MULTIPLY_SHIFT_RANDOM_WORDS];
         h += t;
     }
 
@@ -156,20 +161,19 @@ static void pair_multiply_shift32(const void * in, const size_t len_bytes, const
 // for the moment.
 //
 // XXX Need to implement fma128_128()
-template < bool bswap >
-static void multiply_shift64(const void * in, const size_t len_bytes, const seed_t seed, void * out) {
+template <bool bswap>
+static void multiply_shift64( const void * in, const size_t len_bytes, const seed_t seed, void * out ) {
     const uint8_t * buf = (const uint8_t *)in;
-    const size_t len = len_bytes/8;
+    const size_t    len = len_bytes / 8;
 
     // We mix in len_bytes in the basis, since smhasher considers two keys
     // of different length to be different, even if all the extra bits are 0.
     // This is needed for the AppendZero test.
     uint64_t h, t, ignored;
-    mult128_128(ignored, h, (uint64_t)seed, 0,
-            multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 1],
+
+    mult128_128(ignored, h, (uint64_t)seed     , 0, multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 1],
             multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 2]);
-    mult128_128(ignored, t, (uint64_t)len_bytes, 0,
-            multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 3],
+    mult128_128(ignored, t, (uint64_t)len_bytes, 0, multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 3],
             multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 4]);
     h += t;
 
@@ -186,9 +190,8 @@ static void multiply_shift64(const void * in, const size_t len_bytes, const seed
         uint64_t last = 0;
         if (remaining_bytes & 4) { last = GET_U32<bswap>(buf, 0); buf += 4; }
         if (remaining_bytes & 2) { last = (last << 16) | GET_U16<bswap>(buf, 0); buf += 2; }
-        if (remaining_bytes & 1) { last = (last << 8)  | (*buf); }
-        mult128_128(ignored, t, last, 0,
-                multiply_shift_random[(len % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 0],
+        if (remaining_bytes & 1) { last = (last << 8) | (*buf); }
+        mult128_128(ignored, t, last, 0, multiply_shift_random[(len % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 0],
                 multiply_shift_random[(len % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 1]);
         h += t;
     }
@@ -197,23 +200,22 @@ static void multiply_shift64(const void * in, const size_t len_bytes, const seed
 }
 
 // Pair multiply-shift (3.5) from Thorup's notes.
-template < bool bswap >
-static void pair_multiply_shift64(const void * in, const size_t len_bytes, const seed_t seed, void * out) {
+template <bool bswap>
+static void pair_multiply_shift64( const void * in, const size_t len_bytes, const seed_t seed, void * out ) {
     const uint8_t * buf = (const uint8_t *)in;
-    const size_t len = len_bytes/8;
+    const size_t    len = len_bytes / 8;
 
     // We mix in len_bytes in the basis, since smhasher considers two keys
     // of different length to be different, even if all the extra bits are 0.
     // This is needed for the AppendZero test.
     uint64_t h, t, ignored;
-    mult128_128(ignored, h, (uint64_t)seed, 0,
-            multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 1],
+
+    mult128_128(ignored, h, (uint64_t)seed     , 0, multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 1],
             multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 2]);
-    mult128_128(ignored, t, (uint64_t)len_bytes, 0,
-            multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 3],
+    mult128_128(ignored, t, (uint64_t)len_bytes, 0, multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 3],
             multiply_shift_random[MULTIPLY_SHIFT_RANDOM_WORDS - 4]);
     h += t;
-    for (size_t i = 0; i < len/2; i++, buf += 16) {
+    for (size_t i = 0; i < len / 2; i++, buf += 16) {
         uint64_t blk1lo, blk1hi, blk2lo, blk2hi;
         blk1lo = multiply_shift_random[((2 * i) % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 2];
         blk1hi = multiply_shift_random[((2 * i) % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 3];
@@ -230,7 +232,7 @@ static void pair_multiply_shift64(const void * in, const size_t len_bytes, const
         mult128_128(ignored, t, GET_U64<bswap>(buf, 0), 0,
                 multiply_shift_random[((len - 1) % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 0],
                 multiply_shift_random[((len - 1) % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 1]);
-        h += t;
+        h   += t;
         buf += 8;
     }
 
@@ -240,9 +242,8 @@ static void pair_multiply_shift64(const void * in, const size_t len_bytes, const
         uint64_t last = 0;
         if (remaining_bytes & 4) { last = GET_U32<bswap>(buf, 0); buf += 4; }
         if (remaining_bytes & 2) { last = (last << 16) | GET_U16<bswap>(buf, 0); buf += 2; }
-        if (remaining_bytes & 1) { last = (last << 8)  | (*buf); }
-        mult128_128(ignored, t, last, 0,
-                multiply_shift_random[(len % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 0],
+        if (remaining_bytes & 1) { last = (last << 8) | (*buf); }
+        mult128_128(ignored, t, last, 0, multiply_shift_random[(len % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 0],
                 multiply_shift_random[(len % MULTIPLY_SHIFT_RANDOM_WORDS) * 2 + 1]);
         h += t;
     }
@@ -251,74 +252,74 @@ static void pair_multiply_shift64(const void * in, const size_t len_bytes, const
 }
 
 REGISTER_FAMILY(multiply_shift,
-  $.src_url = "https://github.com/rurban/smhasher/blob/2b5992fe015282c87c9069e3c664771b47555ff3/Hashes.cpp",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/rurban/smhasher/blob/2b5992fe015282c87c9069e3c664771b47555ff3/Hashes.cpp",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(multiply_shift_32,
-  $.desc = "Dietzfelbinger Multiply-shift on strings, 32-bit blocks",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE      |
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64    |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x34BAD85C,
-  $.verification_BE = 0x133CC3AC,
-  $.hashfn_native = multiply_shift32<false>,
-  $.hashfn_bswap = multiply_shift32<true>,
-//$.seedfn = multiply_shift_seed_init_slow
-  $.initfn = multiply_shift_init
-);
+   $.desc       = "Dietzfelbinger Multiply-shift on strings, 32-bit blocks",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE      |
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64    |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x34BAD85C,
+   $.verification_BE = 0x133CC3AC,
+   $.hashfn_native   = multiply_shift32<false>,
+   $.hashfn_bswap    = multiply_shift32<true>,
+// $.seedfn = multiply_shift_seed_init_slow
+   $.initfn = multiply_shift_init
+ );
 
 REGISTER_HASH(pair_multiply_shift_32,
-  $.desc = "Dietzfelbinger Pair-multiply-shift strings, 32-bit blocks",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE      |
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64    |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xFC284F0F,
-  $.verification_BE = 0x6E93B706,
-  $.hashfn_native = pair_multiply_shift32<false>,
-  $.hashfn_bswap = pair_multiply_shift32<true>,
-//$.seedfn = multiply_shift_seed_init_slow
-  $.initfn = multiply_shift_init
-);
+   $.desc       = "Dietzfelbinger Pair-multiply-shift strings, 32-bit blocks",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE      |
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64    |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xFC284F0F,
+   $.verification_BE = 0x6E93B706,
+   $.hashfn_native   = pair_multiply_shift32<false>,
+   $.hashfn_bswap    = pair_multiply_shift32<true>,
+// $.seedfn = multiply_shift_seed_init_slow
+   $.initfn = multiply_shift_init
+ );
 
 REGISTER_HASH(multiply_shift,
-  $.desc = "Dietzfelbinger Multiply-shift on strings, 64-bit blocks",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE      |
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_128_128  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xB7A5E66D,
-  $.verification_BE = 0x6E3902A6,
-  $.hashfn_native = multiply_shift64<false>,
-  $.hashfn_bswap = multiply_shift64<true>,
-//$.seedfn = multiply_shift_seed_init_slow
-  $.initfn = multiply_shift_init
-);
+   $.desc       = "Dietzfelbinger Multiply-shift on strings, 64-bit blocks",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE      |
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_128_128  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xB7A5E66D,
+   $.verification_BE = 0x6E3902A6,
+   $.hashfn_native   = multiply_shift64<false>,
+   $.hashfn_bswap    = multiply_shift64<true>,
+// $.seedfn = multiply_shift_seed_init_slow
+   $.initfn = multiply_shift_init
+ );
 
 REGISTER_HASH(pair_multiply_shift,
-  $.desc = "Dietzfelbinger Pair-multiply-shift strings, 64-bit blocks",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE      |
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_128_128  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x4FBA804D,
-  $.verification_BE = 0x2B7F643B,
-  $.hashfn_native = pair_multiply_shift64<false>,
-  $.hashfn_bswap = pair_multiply_shift64<true>,
-//$.seedfn = multiply_shift_seed_init_slow
-  $.initfn = multiply_shift_init
-);
+   $.desc       = "Dietzfelbinger Pair-multiply-shift strings, 64-bit blocks",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE      |
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_128_128  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x4FBA804D,
+   $.verification_BE = 0x2B7F643B,
+   $.hashfn_native   = pair_multiply_shift64<false>,
+   $.hashfn_bswap    = pair_multiply_shift64<true>,
+// $.seedfn = multiply_shift_seed_init_slow
+   $.initfn = multiply_shift_init
+ );
diff --git a/hashes/mum_mir.cpp b/hashes/mum_mir.cpp
index 71a66833..07a2a1ce 100644
--- a/hashes/mum_mir.cpp
+++ b/hashes/mum_mir.cpp
@@ -1,4 +1,5 @@
-/* MUM and MIR hashes
+/*
+ * MUM and MIR hashes
  * Copyright (C) 2021-2022  Frank J. T. Wojcik
  * Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
  *
@@ -21,7 +22,7 @@
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
-*/
+ */
 #include "Platform.h"
 #include "Hashlib.h"
 
@@ -40,9 +41,10 @@
 //
 // The code has been reworked to allow both forms to always be
 // calculable on every platform.
-template < bool exact >
-static inline uint64_t _mum(uint64_t v, uint64_t p) {
+template <bool exact>
+static inline uint64_t _mum( uint64_t v, uint64_t p ) {
     uint64_t hi, lo;
+
     if (exact) {
         mult64_128(lo, hi, v, p);
     } else {
@@ -60,9 +62,11 @@ static inline uint64_t _mum(uint64_t v, uint64_t p) {
 //-----------------------------------------------------------------------------
 // MUM hash internals
 
-/* Here are different primes randomly generated with the equal
-   probability of their bit values.  They are used to randomize input
-   values.  */
+/*
+ * Here are different primes randomly generated with the equal
+ * probability of their bit values.  They are used to randomize input
+ * values.
+ */
 static const uint64_t _mum_hash_step_prime   = UINT64_C(0x2e0bb864e9ea7df5);
 static const uint64_t _mum_key_step_prime    = UINT64_C(0xcdb32970830fcaa1);
 static const uint64_t _mum_block_start_prime = UINT64_C(0xc42b5e2e6480b23b);
@@ -71,118 +75,122 @@ static const uint64_t _mum_tail_prime        = UINT64_C(0xaf47d47c99b1461b);
 static const uint64_t _mum_finish_prime1     = UINT64_C(0xa9a7ae7ceff79f3f);
 static const uint64_t _mum_finish_prime2     = UINT64_C(0xaf47d47c99b1461b);
 
-static const uint64_t _mum_primes [] = {
-  UINT64_C(0x9ebdcae10d981691), UINT64_C(0x32b9b9b97a27ac7d),
-  UINT64_C(0x29b5584d83d35bbd), UINT64_C(0x4b04e0e61401255f),
-  UINT64_C(0x25e8f7b1f1c9d027), UINT64_C(0x80d4c8c000f3e881),
-  UINT64_C(0xbd1255431904b9dd), UINT64_C(0x8a3bd4485eee6d81),
-  UINT64_C(0x3bc721b2aad05197), UINT64_C(0x71b1a19b907d6e33),
-  UINT64_C(0x525e6c1084a8534b), UINT64_C(0x9e4c2cd340c1299f),
-  UINT64_C(0xde3add92e94caa37), UINT64_C(0x7e14eadb1f65311d),
-  UINT64_C(0x3f5aa40f89812853), UINT64_C(0x33b15a3b587d15c9),
+static const uint64_t _mum_primes []         = {
+    UINT64_C(0x9ebdcae10d981691), UINT64_C(0x32b9b9b97a27ac7d),
+    UINT64_C(0x29b5584d83d35bbd), UINT64_C(0x4b04e0e61401255f),
+    UINT64_C(0x25e8f7b1f1c9d027), UINT64_C(0x80d4c8c000f3e881),
+    UINT64_C(0xbd1255431904b9dd), UINT64_C(0x8a3bd4485eee6d81),
+    UINT64_C(0x3bc721b2aad05197), UINT64_C(0x71b1a19b907d6e33),
+    UINT64_C(0x525e6c1084a8534b), UINT64_C(0x9e4c2cd340c1299f),
+    UINT64_C(0xde3add92e94caa37), UINT64_C(0x7e14eadb1f65311d),
+    UINT64_C(0x3f5aa40f89812853), UINT64_C(0x33b15a3b587d15c9),
 };
 
 // Since unroll_power actually affects hash *values*, not just speed,
 // it needs to be a template parameter, so all versions of the hash
 // can be tested on all platforms.
-template < uint32_t version, uint32_t unroll_power, bool bswap, bool exactmul >
-//_MUM_OPTIMIZE("unroll-loops")
-static inline uint64_t _mum_hash_aligned(uint64_t seed, const void * key, size_t len) {
-  const uint32_t _MUM_UNROLL_FACTOR = 1 << unroll_power;
-  const uint8_t * str = (const uint8_t *)key;
-  uint64_t u64, result;
-  size_t i;
-  size_t n;
-
-  if ((version == 1) || (version == 3)) {
-      result = _mum<exactmul>(seed, _mum_block_start_prime);
-  } else {
-      result = seed;
-  }
-  while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
-    /*
-     * This loop could be vectorized when we have vector insns for
-     * 64x64->128-bit multiplication.  AVX2 currently only have a
-     * vector insn for 4 32x32->64-bit multiplication.
-     */
-    if ((version == 1) || (version == 2)) {
-        for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
-            result ^= _mum<exactmul>(GET_U64<bswap>(str, i*8), _mum_primes[i]);
+template <uint32_t version, uint32_t unroll_power, bool bswap, bool exactmul>
+// _MUM_OPTIMIZE("unroll-loops")
+static inline uint64_t _mum_hash_aligned( uint64_t seed, const void * key, size_t len ) {
+    const uint32_t  _MUM_UNROLL_FACTOR = 1 << unroll_power;
+    const uint8_t * str = (const uint8_t *)key;
+    uint64_t        u64, result;
+    size_t          i;
+    size_t          n;
+
+    if ((version == 1) || (version == 3)) {
+        result = _mum<exactmul>(seed, _mum_block_start_prime);
     } else {
-        for (i = 0; i < _MUM_UNROLL_FACTOR; i+=2)
-            result ^= _mum<exactmul>(GET_U64<bswap>(str, i*8    ) ^ _mum_primes[i],
-                                     GET_U64<bswap>(str, i*8 + 8) ^ _mum_primes[i+1]);
+        result = seed;
     }
-    len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
-    str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
-    /*
-     * We will use the same prime numbers on the next iterations --
-     * randomize the state.
-     */
-    result = _mum<exactmul>(result, _mum_unroll_prime);
-  }
-  n = len / sizeof (uint64_t);
-  for (i = 0; i < n; i++)
-    result ^= _mum<exactmul>(GET_U64<bswap>(str, i*8), _mum_primes[i]);
-  len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
-  switch (len) {
-  case 7:
-    u64 = GET_U32<bswap>(str, 0);
-    u64 |= (uint64_t) str[4] << 32;
-    u64 |= (uint64_t) str[5] << 40;
-    u64 |= (uint64_t) str[6] << 48;
-    return result ^ _mum<exactmul>(u64, _mum_tail_prime);
-  case 6:
-    u64 = GET_U32<bswap>(str, 0);
-    u64 |= (uint64_t) str[4] << 32;
-    u64 |= (uint64_t) str[5] << 40;
-    return result ^ _mum<exactmul>(u64, _mum_tail_prime);
-  case 5:
-    u64 = GET_U32<bswap>(str, 0);
-    u64 |= (uint64_t) str[4] << 32;
-    return result ^ _mum<exactmul>(u64, _mum_tail_prime);
-  case 4:
-    u64 = GET_U32<bswap>(str, 0);
-    return result ^ _mum<exactmul>(u64, _mum_tail_prime);
-  case 3:
-    u64 = str[0];
-    u64 |= (uint64_t) str[1] << 8;
-    u64 |= (uint64_t) str[2] << 16;
-    return result ^ _mum<exactmul>(u64, _mum_tail_prime);
-  case 2:
-    u64 = str[0];
-    u64 |= (uint64_t) str[1] << 8;
-    return result ^ _mum<exactmul>(u64, _mum_tail_prime);
-  case 1:
-    u64 = str[0];
-    return result ^ _mum<exactmul>(u64, _mum_tail_prime);
-  }
-  return result;
+    while (len > _MUM_UNROLL_FACTOR * sizeof(uint64_t)) {
+        /*
+         * This loop could be vectorized when we have vector insns for
+         * 64x64->128-bit multiplication.  AVX2 currently only have a
+         * vector insn for 4 32x32->64-bit multiplication.
+         */
+        if ((version == 1) || (version == 2)) {
+            for (i = 0; i < _MUM_UNROLL_FACTOR; i++) {
+                result ^= _mum<exactmul>(GET_U64<bswap>(str, i * 8) , _mum_primes[i]);
+            }
+        } else {
+            for (i = 0; i < _MUM_UNROLL_FACTOR; i += 2) {
+                result ^= _mum<exactmul>(GET_U64<bswap>(str, i * 8) ^ _mum_primes[i],
+                        GET_U64<bswap>(str, i * 8 + 8) ^ _mum_primes[i + 1]);
+            }
+        }
+        len   -= _MUM_UNROLL_FACTOR * sizeof(uint64_t);
+        str   += _MUM_UNROLL_FACTOR * sizeof(uint64_t);
+        /*
+         * We will use the same prime numbers on the next iterations --
+         * randomize the state.
+         */
+        result = _mum<exactmul>(result, _mum_unroll_prime);
+    }
+    n = len / sizeof(uint64_t);
+    for (i = 0; i < n; i++) {
+        result ^= _mum<exactmul>(GET_U64<bswap>(str, i * 8), _mum_primes[i]);
+    }
+    len -= n * sizeof(uint64_t); str += n * sizeof(uint64_t);
+    switch (len) {
+    case 7:
+            u64  = GET_U32<bswap>(str, 0);
+            u64 |= (uint64_t)str[4] << 32;
+            u64 |= (uint64_t)str[5] << 40;
+            u64 |= (uint64_t)str[6] << 48;
+            return result ^ _mum<exactmul>(u64, _mum_tail_prime);
+    case 6:
+            u64  = GET_U32<bswap>(str, 0);
+            u64 |= (uint64_t)str[4] << 32;
+            u64 |= (uint64_t)str[5] << 40;
+            return result ^ _mum<exactmul>(u64, _mum_tail_prime);
+    case 5:
+            u64  = GET_U32<bswap>(str, 0);
+            u64 |= (uint64_t)str[4] << 32;
+            return result ^ _mum<exactmul>(u64, _mum_tail_prime);
+    case 4:
+            u64 =  GET_U32<bswap>(str, 0);
+            return result ^ _mum<exactmul>(u64, _mum_tail_prime);
+    case 3:
+            u64  = str[0];
+            u64 |= (uint64_t)str[1] <<  8;
+            u64 |= (uint64_t)str[2] << 16;
+            return result ^ _mum<exactmul>(u64, _mum_tail_prime);
+    case 2:
+            u64  = str          [0];
+            u64 |= (uint64_t)str[1] <<  8;
+            return result ^ _mum<exactmul>(u64, _mum_tail_prime);
+    case 1:
+            u64 = str           [0];
+            return result ^ _mum<exactmul>(u64, _mum_tail_prime);
+    }
+    return result;
 }
 
 /* Final randomization of H.  */
-template < uint32_t version, bool exactmul >
-static inline uint64_t _mum_final (uint64_t h) {
-  if (version == 1) {
-      h ^= _mum<exactmul>(h, _mum_finish_prime1);
-      h ^= _mum<exactmul>(h, _mum_finish_prime2);
-  } else if (version == 2) {
-      h ^= ROTL64(h, 33);
-      h ^= _mum<exactmul>(h, _mum_finish_prime1);
-  } else {
-      h = _mum<exactmul>(h, h);
-  }
-  return h;
+template <uint32_t version, bool exactmul>
+static inline uint64_t _mum_final( uint64_t h ) {
+    if (version == 1) {
+        h ^= _mum<exactmul>(h, _mum_finish_prime1);
+        h ^= _mum<exactmul>(h, _mum_finish_prime2);
+    } else if (version == 2) {
+        h ^= ROTL64(h, 33);
+        h ^= _mum<exactmul>(h, _mum_finish_prime1);
+    } else {
+        h = _mum<exactmul>(h, h);
+    }
+    return h;
 }
 
 //-----------------------------------------------------------------------------
 // MUM hash externals for SMHasher3
 
-template < uint32_t version, uint32_t unroll_power, bool bswap, bool exactmul >
-static void mum_aligned(const void * in, const size_t len, const seed_t seed, void * out) {
+template <uint32_t version, uint32_t unroll_power, bool bswap, bool exactmul>
+static void mum_aligned( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h;
-    h = _mum_hash_aligned<version,unroll_power,bswap,exactmul>(seed + len, in, len);
-    h = _mum_final<version,exactmul>(h);
+
+    h = _mum_hash_aligned<version, unroll_power, bswap, exactmul>(seed + len, in, len);
+    h = _mum_final<version, exactmul>(h);
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
@@ -194,23 +202,26 @@ static void mum_aligned(const void * in, const size_t len, const seed_t seed, vo
 //      "while (len >= .....".
 // Based on this, I'm removing the realign variants for now.
 #if defined(NOTYET)
-template < uint32_t version, uint32_t unroll_power, bool bswap, bool exactmul >
-static void mum_realign(const void * in, const size_t olen, const seed_t seed, void * out) {
+
+template <uint32_t version, uint32_t unroll_power, bool bswap, bool exactmul>
+static void mum_realign( const void * in, const size_t olen, const seed_t seed, void * out ) {
     const uint8_t * str = (const uint8_t *)in;
-    const uint32_t _MUM_BLOCK_LEN = 1024;
-    uint64_t buf[_MUM_BLOCK_LEN / sizeof(uint64_t)];
-    size_t len = olen;
-    uint64_t h = seed + olen;
+    const uint32_t  _MUM_BLOCK_LEN = 1024;
+    uint64_t        buf[_MUM_BLOCK_LEN / sizeof(uint64_t)];
+    size_t          len = olen;
+    uint64_t        h   = seed + olen;
+
     while (len != 0) {
         size_t block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
         memmove(buf, str, block_len);
-        h = _mum_hash_aligned<version,unroll_power,bswap,exactmul>(h, buf, block_len);
+        h    = _mum_hash_aligned<version, unroll_power, bswap, exactmul>(h, buf, block_len);
         len -= block_len;
         str += block_len;
     }
-    h = _mum_final<version,exactmul>(h);
+    h = _mum_final<version, exactmul>(h);
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
+
 #endif
 
 //-----------------------------------------------------------------------------
@@ -222,33 +233,33 @@ static void mum_realign(const void * in, const size_t olen, const seed_t seed, v
  * Hash for the same key can be different on different architectures.
  * To get machine-independent hash, use mir_hash_strict which is about
  * 1.5 times slower than mir_hash.
-*/
-template < bool exact >
-static inline uint64_t mir_mum(uint64_t v, uint64_t c) {
-  if (exact) { return _mum<true>(v, c); }
-  uint64_t v1 = v >> 32, v2 = (uint32_t) v, c1 = c >> 32, c2 = (uint32_t) c, rm = v2 * c1 + v1 * c2;
-  return v1 * c1 + (rm >> 32) + v2 * c2 + (rm << 32);
+ */
+template <bool exact>
+static inline uint64_t mir_mum( uint64_t v, uint64_t c ) {
+    if (exact) { return _mum<true>(v, c); }
+    uint64_t v1 = v >> 32, v2 = (uint32_t)v, c1 = c >> 32, c2 = (uint32_t)c, rm = v2 * c1 + v1 * c2;
+    return v1 * c1 + (rm >> 32) + v2 * c2 + (rm << 32);
 }
 
 static const uint64_t p1 = UINT64_C(0x65862b62bdf5ef4d), p2 = UINT64_C(0x288eea216831e6a7);
 
-template < bool exactmul >
-static inline uint64_t mir_round(uint64_t state, uint64_t v) {
-  state ^= mir_mum<exactmul>(v, p1);
-  return state ^ mir_mum<exactmul>(state, p2);
+template <bool exactmul>
+static inline uint64_t mir_round( uint64_t state, uint64_t v ) {
+    state ^= mir_mum<exactmul>(v, p1);
+    return state ^ mir_mum<exactmul>(state, p2);
 }
 
-template < bool bswap >
-static inline uint64_t mir_get_key_part(const uint8_t * v, size_t len) {
-  size_t i, start = 0;
-  uint64_t tail = 0;
-
-  if (len >= sizeof(uint32_t)) {
-      tail = ((uint64_t)(GET_U32<bswap>(v, 0))) << 32;
-      start = 4;
-  }
-  for (i = start; i < len; i++) tail = (tail >> 8) | ((uint64_t) v[i] << 56);
-  return tail;
+template <bool bswap>
+static inline uint64_t mir_get_key_part( const uint8_t * v, size_t len ) {
+    size_t   i, start = 0;
+    uint64_t tail = 0;
+
+    if (len >= sizeof(uint32_t)) {
+        tail  = ((uint64_t)(GET_U32<bswap>(v, 0))) << 32;
+        start = 4;
+    }
+    for (i = start; i < len; i++) { tail = (tail >> 8) | ((uint64_t)v[i] << 56); }
+    return tail;
 }
 
 //-----------------------------------------------------------------------------
@@ -257,789 +268,789 @@ static inline uint64_t mir_get_key_part(const uint8_t * v, size_t len) {
 // The bswap and exactmul booleans cover all possible sets of hash
 // values from the original mir_hash() in both "strict" mode and
 // "relaxed" mode, regardless of machine endianness.
-template < bool bswap, bool exactmul >
-static void mir_hash(const void * in, const size_t olen, const seed_t seed, void * out) {
-  const uint8_t * v = (const uint8_t *)in;
-  uint64_t r = seed + olen;
-  size_t len = olen;
-  uint64_t blk;
-
-  for (; len >= 16; len -= 16, v += 16) {
-    r ^= mir_mum<exactmul>(GET_U64<bswap>(v, 0), p1);
-    r ^= mir_mum<exactmul>(GET_U64<bswap>(v, 8), p2);
-    r ^= mir_mum<exactmul>(r, p1);
-  }
-  if (len >= 8) {
-    r ^= mir_mum<exactmul>(GET_U64<bswap>(v, 0), p1);
-    len -= 8, v += 8;
-  }
-  if (len != 0) {
-      r ^= mir_mum<exactmul>(mir_get_key_part<bswap>(v, len), p2);
-  }
-  r = mir_round<exactmul>(r, r);
-  PUT_U64<bswap>(r, (uint8_t *)out, 0);
+template <bool bswap, bool exactmul>
+static void mir_hash( const void * in, const size_t olen, const seed_t seed, void * out ) {
+    const uint8_t * v   = (const uint8_t *)in;
+    uint64_t        r   = seed + olen;
+    size_t          len = olen;
+    uint64_t        blk;
+
+    for (; len >= 16; len -= 16, v += 16) {
+        r ^= mir_mum<exactmul>(GET_U64<bswap>(v, 0), p1);
+        r ^= mir_mum<exactmul>(GET_U64<bswap>(v, 8), p2);
+        r ^= mir_mum<exactmul>(r, p1);
+    }
+    if (len >= 8) {
+        r   ^= mir_mum<exactmul>(GET_U64<bswap>(v, 0), p1);
+        len -= 8, v += 8;
+    }
+    if (len != 0) {
+        r ^= mir_mum<exactmul>(mir_get_key_part<bswap>(v, len), p2);
+    }
+    r = mir_round<exactmul>(r, r);
+    PUT_U64<bswap>(r, (uint8_t *)out, 0);
 }
 
 //-----------------------------------------------------------------------------
 // Also https://github.com/vnmakarov/mir/blob/master/mir-hash.h
 REGISTER_FAMILY(mum_mir,
-  $.src_url = "https://github.com/vnmakarov/mum-hash",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/vnmakarov/mum-hash",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(mum1__exact__unroll1,
-  $.desc = "Mum-hash v1, unroll 2^1, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xCB93DE58,
-  $.verification_BE = 0xE820D0FB,
-  $.hashfn_native = mum_aligned<1,1,false,true>,
-  $.hashfn_bswap = mum_aligned<1,1,true,true>
-);
+   $.desc       = "Mum-hash v1, unroll 2^1, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xCB93DE58,
+   $.verification_BE = 0xE820D0FB,
+   $.hashfn_native   = mum_aligned<1, 1, false, true>,
+   $.hashfn_bswap    = mum_aligned<1, 1, true, true>
+ );
 
 REGISTER_HASH(mum1__exact__unroll2,
-  $.desc = "Mum-hash v1, unroll 2^2, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x3EEAE2D4,
-  $.verification_BE = 0xF23A691C,
-  $.hashfn_native = mum_aligned<1,2,false,true>,
-  $.hashfn_bswap = mum_aligned<1,2,true,true>
-);
+   $.desc       = "Mum-hash v1, unroll 2^2, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x3EEAE2D4,
+   $.verification_BE = 0xF23A691C,
+   $.hashfn_native   = mum_aligned<1, 2, false, true>,
+   $.hashfn_bswap    = mum_aligned<1, 2, true, true>
+ );
 
 REGISTER_HASH(mum1__exact__unroll3,
-  $.desc = "Mum-hash v1, unroll 2^3, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x7C0A2F98,
-  $.verification_BE = 0x210F4BEB,
-  $.hashfn_native = mum_aligned<1,3,false,true>,
-  $.hashfn_bswap = mum_aligned<1,3,true,true>
-);
+   $.desc       = "Mum-hash v1, unroll 2^3, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x7C0A2F98,
+   $.verification_BE = 0x210F4BEB,
+   $.hashfn_native   = mum_aligned<1, 3, false, true>,
+   $.hashfn_bswap    = mum_aligned<1, 3, true, true>
+ );
 
 REGISTER_HASH(mum1__exact__unroll4,
-  $.desc = "Mum-hash v1, unroll 2^4, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x280B2CC6,
-  $.verification_BE = 0x0609C4A6,
-  $.hashfn_native = mum_aligned<1,4,false,true>,
-  $.hashfn_bswap = mum_aligned<1,4,true,true>
-);
+   $.desc       = "Mum-hash v1, unroll 2^4, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x280B2CC6,
+   $.verification_BE = 0x0609C4A6,
+   $.hashfn_native   = mum_aligned<1, 4, false, true>,
+   $.hashfn_bswap    = mum_aligned<1, 4, true, true>
+ );
 
 REGISTER_HASH(mum1__inexact__unroll1,
-  $.desc = "Mum-hash v1, unroll 2^1, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x689214DF,
-  $.verification_BE = 0x14FBDFDD,
-  $.hashfn_native = mum_aligned<1,1,false,false>,
-  $.hashfn_bswap = mum_aligned<1,1,true,false>
-);
+   $.desc       = "Mum-hash v1, unroll 2^1, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x689214DF,
+   $.verification_BE = 0x14FBDFDD,
+   $.hashfn_native   = mum_aligned<1, 1, false, false>,
+   $.hashfn_bswap    = mum_aligned<1, 1, true, false>
+ );
 
 REGISTER_HASH(mum1__inexact__unroll2,
-  $.desc = "Mum-hash v1, unroll 2^2, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xA973C6C0,
-  $.verification_BE = 0x9C12DFA3,
-  $.hashfn_native = mum_aligned<1,2,false,false>,
-  $.hashfn_bswap = mum_aligned<1,2,true,false>
-);
+   $.desc       = "Mum-hash v1, unroll 2^2, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xA973C6C0,
+   $.verification_BE = 0x9C12DFA3,
+   $.hashfn_native   = mum_aligned<1, 2, false, false>,
+   $.hashfn_bswap    = mum_aligned<1, 2, true, false>
+ );
 
 REGISTER_HASH(mum1__inexact__unroll3,
-  $.desc = "Mum-hash v1, unroll 2^3, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x5FC8FC51,
-  $.verification_BE = 0x907AB469,
-  $.hashfn_native = mum_aligned<1,3,false,false>,
-  $.hashfn_bswap = mum_aligned<1,3,true,false>
-);
+   $.desc       = "Mum-hash v1, unroll 2^3, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x5FC8FC51,
+   $.verification_BE = 0x907AB469,
+   $.hashfn_native   = mum_aligned<1, 3, false, false>,
+   $.hashfn_bswap    = mum_aligned<1, 3, true, false>
+ );
 
 REGISTER_HASH(mum1__inexact__unroll4,
-  $.desc = "Mum-hash v1, unroll 2^4, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x2EF256D3,
-  $.verification_BE = 0xBF27AAE6,
-  $.hashfn_native = mum_aligned<1,4,false,false>,
-  $.hashfn_bswap = mum_aligned<1,4,true,false>
-);
+   $.desc       = "Mum-hash v1, unroll 2^4, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x2EF256D3,
+   $.verification_BE = 0xBF27AAE6,
+   $.hashfn_native   = mum_aligned<1, 4, false, false>,
+   $.hashfn_bswap    = mum_aligned<1, 4, true, false>
+ );
 
 #if defined(NOTYET)
 REGISTER_HASH(mum1_realign__exact__unroll1,
-  $.desc = "Mum-hash v1, unroll 2^1, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x9E323D13,
-  $.verification_BE = 0x2E655802,
-  $.hashfn_native = mum_realign<1,1,false,true>,
-  $.hashfn_bswap = mum_realign<1,1,true,true>
-);
+   $.desc       = "Mum-hash v1, unroll 2^1, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x9E323D13,
+   $.verification_BE = 0x2E655802,
+   $.hashfn_native   = mum_realign<1, 1, false, true>,
+   $.hashfn_bswap    = mum_realign<1, 1, true, true>
+ );
 
 REGISTER_HASH(mum1_realign__exact__unroll2,
-  $.desc = "Mum-hash v1, unroll 2^2, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x139A630F,
-  $.verification_BE = 0x2281185A,
-  $.hashfn_native = mum_realign<1,2,false,true>,
-  $.hashfn_bswap = mum_realign<1,2,true,true>
-);
+   $.desc       = "Mum-hash v1, unroll 2^2, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x139A630F,
+   $.verification_BE = 0x2281185A,
+   $.hashfn_native   = mum_realign<1, 2, false, true>,
+   $.hashfn_bswap    = mum_realign<1, 2, true, true>
+ );
 
 REGISTER_HASH(mum1_realign__exact__unroll3,
-  $.desc = "Mum-hash v1, unroll 2^3, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x0F1AC6C6,
-  $.verification_BE = 0xE8BF6CE3,
-  $.hashfn_native = mum_realign<1,3,false,true>,
-  $.hashfn_bswap = mum_realign<1,3,true,true>
-);
+   $.desc       = "Mum-hash v1, unroll 2^3, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x0F1AC6C6,
+   $.verification_BE = 0xE8BF6CE3,
+   $.hashfn_native   = mum_realign<1, 3, false, true>,
+   $.hashfn_bswap    = mum_realign<1, 3, true, true>
+ );
 
 REGISTER_HASH(mum1_realign__exact__unroll4,
-  $.desc = "Mum-hash v1, unroll 2^4, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xF47885FE,
-  $.verification_BE = 0xA7961551,
-  $.hashfn_native = mum_realign<1,4,false,true>,
-  $.hashfn_bswap = mum_realign<1,4,true,true>
-);
+   $.desc       = "Mum-hash v1, unroll 2^4, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xF47885FE,
+   $.verification_BE = 0xA7961551,
+   $.hashfn_native   = mum_realign<1, 4, false, true>,
+   $.hashfn_bswap    = mum_realign<1, 4, true, true>
+ );
 
 REGISTER_HASH(mum1_realign__inexact__unroll1,
-  $.desc = "Mum-hash v1, unroll 2^1, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xE11FC923,
-  $.verification_BE = 0x99623861,
-  $.hashfn_native = mum_realign<1,1,false,false>,
-  $.hashfn_bswap = mum_realign<1,1,true,false>
-);
+   $.desc       = "Mum-hash v1, unroll 2^1, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xE11FC923,
+   $.verification_BE = 0x99623861,
+   $.hashfn_native   = mum_realign<1, 1, false, false>,
+   $.hashfn_bswap    = mum_realign<1, 1, true, false>
+ );
 
 REGISTER_HASH(mum1_realign__inexact__unroll2,
-  $.desc = "Mum-hash v1, unroll 2^2, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xBAFC050E,
-  $.verification_BE = 0x9678D798,
-  $.hashfn_native = mum_realign<1,2,false,false>,
-  $.hashfn_bswap = mum_realign<1,2,true,false>
-);
+   $.desc       = "Mum-hash v1, unroll 2^2, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xBAFC050E,
+   $.verification_BE = 0x9678D798,
+   $.hashfn_native   = mum_realign<1, 2, false, false>,
+   $.hashfn_bswap    = mum_realign<1, 2, true, false>
+ );
 
 REGISTER_HASH(mum1_realign__inexact__unroll3,
-  $.desc = "Mum-hash v1, unroll 2^3, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x56FA3D86,
-  $.verification_BE = 0x8EDC90F0,
-  $.hashfn_native = mum_realign<1,3,false,false>,
-  $.hashfn_bswap = mum_realign<1,3,true,false>
-);
+   $.desc       = "Mum-hash v1, unroll 2^3, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x56FA3D86,
+   $.verification_BE = 0x8EDC90F0,
+   $.hashfn_native   = mum_realign<1, 3, false, false>,
+   $.hashfn_bswap    = mum_realign<1, 3, true, false>
+ );
 
 REGISTER_HASH(mum1_realign__inexact__unroll4,
-  $.desc = "Mum-hash v1, unroll 2^4, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x59787144,
-  $.verification_BE = 0xFCAEA377,
-  $.hashfn_native = mum_realign<1,4,false,false>,
-  $.hashfn_bswap = mum_realign<1,4,true,false>
-);
+   $.desc       = "Mum-hash v1, unroll 2^4, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x59787144,
+   $.verification_BE = 0xFCAEA377,
+   $.hashfn_native   = mum_realign<1, 4, false, false>,
+   $.hashfn_bswap    = mum_realign<1, 4, true, false>
+ );
 #endif
 
 REGISTER_HASH(mum2__exact__unroll1,
-  $.desc = "Mum-hash v2, unroll 2^1, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x9B36F94C,
-  $.verification_BE = 0x50F10B41,
-  $.hashfn_native = mum_aligned<2,1,false,true>,
-  $.hashfn_bswap = mum_aligned<2,1,true,true>
-);
+   $.desc       = "Mum-hash v2, unroll 2^1, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x9B36F94C,
+   $.verification_BE = 0x50F10B41,
+   $.hashfn_native   = mum_aligned<2, 1, false, true>,
+   $.hashfn_bswap    = mum_aligned<2, 1, true, true>
+ );
 
 REGISTER_HASH(mum2__exact__unroll2,
-  $.desc = "Mum-hash v2, unroll 2^2, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x40427228,
-  $.verification_BE = 0x43DB198B,
-  $.hashfn_native = mum_aligned<2,2,false,true>,
-  $.hashfn_bswap = mum_aligned<2,2,true,true>
-);
+   $.desc       = "Mum-hash v2, unroll 2^2, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x40427228,
+   $.verification_BE = 0x43DB198B,
+   $.hashfn_native   = mum_aligned<2, 2, false, true>,
+   $.hashfn_bswap    = mum_aligned<2, 2, true, true>
+ );
 
 REGISTER_HASH(mum2__exact__unroll3,
-  $.desc = "Mum-hash v2, unroll 2^3, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xB5D1CB5C,
-  $.verification_BE = 0xA718EDE8,
-  $.hashfn_native = mum_aligned<2,3,false,true>,
-  $.hashfn_bswap = mum_aligned<2,3,true,true>
-);
+   $.desc       = "Mum-hash v2, unroll 2^3, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xB5D1CB5C,
+   $.verification_BE = 0xA718EDE8,
+   $.hashfn_native   = mum_aligned<2, 3, false, true>,
+   $.hashfn_bswap    = mum_aligned<2, 3, true, true>
+ );
 
 REGISTER_HASH(mum2__exact__unroll4,
-  $.desc = "Mum-hash v2, unroll 2^4, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x59AEDABF,
-  $.verification_BE = 0x3B1A2832,
-  $.hashfn_native = mum_aligned<2,4,false,true>,
-  $.hashfn_bswap = mum_aligned<2,4,true,true>
-);
+   $.desc       = "Mum-hash v2, unroll 2^4, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x59AEDABF,
+   $.verification_BE = 0x3B1A2832,
+   $.hashfn_native   = mum_aligned<2, 4, false, true>,
+   $.hashfn_bswap    = mum_aligned<2, 4, true, true>
+ );
 
 REGISTER_HASH(mum2__inexact__unroll1,
-  $.desc = "Mum-hash v2, unroll 2^1, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x1CC6D1E3,
-  $.verification_BE = 0x297D8E45,
-  $.hashfn_native = mum_aligned<2,1,false,false>,
-  $.hashfn_bswap = mum_aligned<2,1,true,false>
-);
+   $.desc       = "Mum-hash v2, unroll 2^1, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x1CC6D1E3,
+   $.verification_BE = 0x297D8E45,
+   $.hashfn_native   = mum_aligned<2, 1, false, false>,
+   $.hashfn_bswap    = mum_aligned<2, 1, true, false>
+ );
 
 REGISTER_HASH(mum2__inexact__unroll2,
-  $.desc = "Mum-hash v2, unroll 2^2, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x62325A27,
-  $.verification_BE = 0x5324AEEA,
-  $.hashfn_native = mum_aligned<2,2,false,false>,
-  $.hashfn_bswap = mum_aligned<2,2,true,false>
-);
+   $.desc       = "Mum-hash v2, unroll 2^2, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x62325A27,
+   $.verification_BE = 0x5324AEEA,
+   $.hashfn_native   = mum_aligned<2, 2, false, false>,
+   $.hashfn_bswap    = mum_aligned<2, 2, true, false>
+ );
 
 REGISTER_HASH(mum2__inexact__unroll3,
-  $.desc = "Mum-hash v2, unroll 2^3, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xF4DD9947,
-  $.verification_BE = 0x98C9448F,
-  $.hashfn_native = mum_aligned<2,3,false,false>,
-  $.hashfn_bswap = mum_aligned<2,3,true,false>
-);
+   $.desc       = "Mum-hash v2, unroll 2^3, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xF4DD9947,
+   $.verification_BE = 0x98C9448F,
+   $.hashfn_native   = mum_aligned<2, 3, false, false>,
+   $.hashfn_bswap    = mum_aligned<2, 3, true, false>
+ );
 
 REGISTER_HASH(mum2__inexact__unroll4,
-  $.desc = "Mum-hash v2, unroll 2^4, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x62C46C55,
-  $.verification_BE = 0x0E9DDA53,
-  $.hashfn_native = mum_aligned<2,4,false,false>,
-  $.hashfn_bswap = mum_aligned<2,4,true,false>
-);
+   $.desc       = "Mum-hash v2, unroll 2^4, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x62C46C55,
+   $.verification_BE = 0x0E9DDA53,
+   $.hashfn_native   = mum_aligned<2, 4, false, false>,
+   $.hashfn_bswap    = mum_aligned<2, 4, true, false>
+ );
 
 #if defined(NOTYET)
 REGISTER_HASH(mum2_realign__exact__unroll1,
-  $.desc = "Mum-hash v2, unroll 2^1, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x3A8751BE,
-  $.verification_BE = 0xA3C3C380,
-  $.hashfn_native = mum_realign<2,1,false,true>,
-  $.hashfn_bswap = mum_realign<2,1,true,true>
-);
+   $.desc       = "Mum-hash v2, unroll 2^1, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x3A8751BE,
+   $.verification_BE = 0xA3C3C380,
+   $.hashfn_native   = mum_realign<2, 1, false, true>,
+   $.hashfn_bswap    = mum_realign<2, 1, true, true>
+ );
 
 REGISTER_HASH(mum2_realign__exact__unroll2,
-  $.desc = "Mum-hash v2, unroll 2^2, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x7C85EF5A,
-  $.verification_BE = 0xE99D6D79,
-  $.hashfn_native = mum_realign<2,2,false,true>,
-  $.hashfn_bswap = mum_realign<2,2,true,true>
-);
+   $.desc       = "Mum-hash v2, unroll 2^2, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x7C85EF5A,
+   $.verification_BE = 0xE99D6D79,
+   $.hashfn_native   = mum_realign<2, 2, false, true>,
+   $.hashfn_bswap    = mum_realign<2, 2, true, true>
+ );
 
 REGISTER_HASH(mum2_realign__exact__unroll3,
-  $.desc = "Mum-hash v2, unroll 2^3, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x93F25600,
-  $.verification_BE = 0xE13A6F00,
-  $.hashfn_native = mum_realign<2,3,false,true>,
-  $.hashfn_bswap = mum_realign<2,3,true,true>
-);
+   $.desc       = "Mum-hash v2, unroll 2^3, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x93F25600,
+   $.verification_BE = 0xE13A6F00,
+   $.hashfn_native   = mum_realign<2, 3, false, true>,
+   $.hashfn_bswap    = mum_realign<2, 3, true, true>
+ );
 
 REGISTER_HASH(mum2_realign__exact__unroll4,
-  $.desc = "Mum-hash v2, unroll 2^4, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xA0DC8DF8,
-  $.verification_BE = 0x6B746384,
-  $.hashfn_native = mum_realign<2,4,false,true>,
-  $.hashfn_bswap = mum_realign<2,4,true,true>
-);
+   $.desc       = "Mum-hash v2, unroll 2^4, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xA0DC8DF8,
+   $.verification_BE = 0x6B746384,
+   $.hashfn_native   = mum_realign<2, 4, false, true>,
+   $.hashfn_bswap    = mum_realign<2, 4, true, true>
+ );
 
 REGISTER_HASH(mum2_realign__inexact__unroll1,
-  $.desc = "Mum-hash v2, unroll 2^1, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x2D06BA6A,
-  $.verification_BE = 0xF0F929DF,
-  $.hashfn_native = mum_realign<2,1,false,false>,
-  $.hashfn_bswap = mum_realign<2,1,true,false>
-);
+   $.desc       = "Mum-hash v2, unroll 2^1, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x2D06BA6A,
+   $.verification_BE = 0xF0F929DF,
+   $.hashfn_native   = mum_realign<2, 1, false, false>,
+   $.hashfn_bswap    = mum_realign<2, 1, true, false>
+ );
 
 REGISTER_HASH(mum2_realign__inexact__unroll2,
-  $.desc = "Mum-hash v2, unroll 2^2, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xF645F70A,
-  $.verification_BE = 0xC384782D,
-  $.hashfn_native = mum_realign<2,2,false,false>,
-  $.hashfn_bswap = mum_realign<2,2,true,false>
-);
+   $.desc       = "Mum-hash v2, unroll 2^2, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xF645F70A,
+   $.verification_BE = 0xC384782D,
+   $.hashfn_native   = mum_realign<2, 2, false, false>,
+   $.hashfn_bswap    = mum_realign<2, 2, true, false>
+ );
 
 REGISTER_HASH(mum2_realign__inexact__unroll3,
-  $.desc = "Mum-hash v2, unroll 2^3, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xA8F0601A,
-  $.verification_BE = 0x5F5895AB,
-  $.hashfn_native = mum_realign<2,3,false,false>,
-  $.hashfn_bswap = mum_realign<2,3,true,false>
-);
+   $.desc       = "Mum-hash v2, unroll 2^3, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xA8F0601A,
+   $.verification_BE = 0x5F5895AB,
+   $.hashfn_native   = mum_realign<2, 3, false, false>,
+   $.hashfn_bswap    = mum_realign<2, 3, true, false>
+ );
 
 REGISTER_HASH(mum2_realign__inexact__unroll4,
-  $.desc = "Mum-hash v2, unroll 2^4, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x53A9484D,
-  $.verification_BE = 0x4C6EBD7D,
-  $.hashfn_native = mum_realign<2,4,false,false>,
-  $.hashfn_bswap = mum_realign<2,4,true,false>
-);
+   $.desc       = "Mum-hash v2, unroll 2^4, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x53A9484D,
+   $.verification_BE = 0x4C6EBD7D,
+   $.hashfn_native   = mum_realign<2, 4, false, false>,
+   $.hashfn_bswap    = mum_realign<2, 4, true, false>
+ );
 #endif
 
 REGISTER_HASH(mum3__exact__unroll1,
-  $.desc = "Mum-hash v3, unroll 2^1, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x3D14C6E2,
-  $.verification_BE = 0x360A792D,
-  $.hashfn_native = mum_aligned<3,1,false,true>,
-  $.hashfn_bswap = mum_aligned<3,1,true,true>
-);
+   $.desc       = "Mum-hash v3, unroll 2^1, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x3D14C6E2,
+   $.verification_BE = 0x360A792D,
+   $.hashfn_native   = mum_aligned<3, 1, false, true>,
+   $.hashfn_bswap    = mum_aligned<3, 1, true, true>
+ );
 
 REGISTER_HASH(mum3__exact__unroll2,
-  $.desc = "Mum-hash v3, unroll 2^2, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x3A556EB2,
-  $.verification_BE = 0x752891D0,
-  $.hashfn_native = mum_aligned<3,2,false,true>,
-  $.hashfn_bswap = mum_aligned<3,2,true,true>
-);
+   $.desc       = "Mum-hash v3, unroll 2^2, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x3A556EB2,
+   $.verification_BE = 0x752891D0,
+   $.hashfn_native   = mum_aligned<3, 2, false, true>,
+   $.hashfn_bswap    = mum_aligned<3, 2, true, true>
+ );
 
 REGISTER_HASH(mum3__exact__unroll3,
-  $.desc = "Mum-hash v3, unroll 2^3, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x8BD72B8C,
-  $.verification_BE = 0xDD8DD247,
-  $.hashfn_native = mum_aligned<3,3,false,true>,
-  $.hashfn_bswap = mum_aligned<3,3,true,true>
-);
+   $.desc       = "Mum-hash v3, unroll 2^3, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x8BD72B8C,
+   $.verification_BE = 0xDD8DD247,
+   $.hashfn_native   = mum_aligned<3, 3, false, true>,
+   $.hashfn_bswap    = mum_aligned<3, 3, true, true>
+ );
 
 REGISTER_HASH(mum3__exact__unroll4,
-  $.desc = "Mum-hash v3, unroll 2^4, exact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x0AD998DF,
-  $.verification_BE = 0x05832709,
-  $.hashfn_native = mum_aligned<3,4,false,true>,
-  $.hashfn_bswap = mum_aligned<3,4,true,true>
-);
+   $.desc       = "Mum-hash v3, unroll 2^4, exact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x0AD998DF,
+   $.verification_BE = 0x05832709,
+   $.hashfn_native   = mum_aligned<3, 4, false, true>,
+   $.hashfn_bswap    = mum_aligned<3, 4, true, true>
+ );
 
 REGISTER_HASH(mum3__inexact__unroll1,
-  $.desc = "Mum-hash v3, unroll 2^1, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x475D666B,
-  $.verification_BE = 0xE75B31F7,
-  $.hashfn_native = mum_aligned<3,1,false,false>,
-  $.hashfn_bswap = mum_aligned<3,1,true,false>
-);
+   $.desc       = "Mum-hash v3, unroll 2^1, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x475D666B,
+   $.verification_BE = 0xE75B31F7,
+   $.hashfn_native   = mum_aligned<3, 1, false, false>,
+   $.hashfn_bswap    = mum_aligned<3, 1, true, false>
+ );
 
 REGISTER_HASH(mum3__inexact__unroll2,
-  $.desc = "Mum-hash v3, unroll 2^2, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xF6E13B23,
-  $.verification_BE = 0x7B00F4F6,
-  $.hashfn_native = mum_aligned<3,2,false,false>,
-  $.hashfn_bswap = mum_aligned<3,2,true,false>
-);
+   $.desc       = "Mum-hash v3, unroll 2^2, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xF6E13B23,
+   $.verification_BE = 0x7B00F4F6,
+   $.hashfn_native   = mum_aligned<3, 2, false, false>,
+   $.hashfn_bswap    = mum_aligned<3, 2, true, false>
+ );
 
 REGISTER_HASH(mum3__inexact__unroll3,
-  $.desc = "Mum-hash v3, unroll 2^3, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xB5560703,
-  $.verification_BE = 0x1220D737,
-  $.hashfn_native = mum_aligned<3,3,false,false>,
-  $.hashfn_bswap = mum_aligned<3,3,true,false>
-);
+   $.desc       = "Mum-hash v3, unroll 2^3, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xB5560703,
+   $.verification_BE = 0x1220D737,
+   $.hashfn_native   = mum_aligned<3, 3, false, false>,
+   $.hashfn_bswap    = mum_aligned<3, 3, true, false>
+ );
 
 REGISTER_HASH(mum3__inexact__unroll4,
-  $.desc = "Mum-hash v3, unroll 2^4, inexact mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xE96A20C0,
-  $.verification_BE = 0xE784308C,
-  $.hashfn_native = mum_aligned<3,4,false,false>,
-  $.hashfn_bswap = mum_aligned<3,4,true,false>
-);
+   $.desc       = "Mum-hash v3, unroll 2^4, inexact mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xE96A20C0,
+   $.verification_BE = 0xE784308C,
+   $.hashfn_native   = mum_aligned<3, 4, false, false>,
+   $.hashfn_bswap    = mum_aligned<3, 4, true, false>
+ );
 
 #if defined(NOTYET)
 REGISTER_HASH(mum3_realign__exact__unroll1,
-  $.desc = "Mum-hash v3, unroll 2^1, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x26B6E56E,
-  $.verification_BE = 0x3395CE6B,
-  $.hashfn_native = mum_realign<3,1,false,true>,
-  $.hashfn_bswap = mum_realign<3,1,true,true>
-);
+   $.desc       = "Mum-hash v3, unroll 2^1, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x26B6E56E,
+   $.verification_BE = 0x3395CE6B,
+   $.hashfn_native   = mum_realign<3, 1, false, true>,
+   $.hashfn_bswap    = mum_realign<3, 1, true, true>
+ );
 
 REGISTER_HASH(mum3_realign__exact__unroll2,
-  $.desc = "Mum-hash v3, unroll 2^2, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x6A60097E,
-  $.verification_BE = 0xF7ABC648,
-  $.hashfn_native = mum_realign<3,2,false,true>,
-  $.hashfn_bswap = mum_realign<3,2,true,true>
-);
+   $.desc       = "Mum-hash v3, unroll 2^2, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x6A60097E,
+   $.verification_BE = 0xF7ABC648,
+   $.hashfn_native   = mum_realign<3, 2, false, true>,
+   $.hashfn_bswap    = mum_realign<3, 2, true, true>
+ );
 
 REGISTER_HASH(mum3_realign__exact__unroll3,
-  $.desc = "Mum-hash v3, unroll 2^3, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xD45801EE,
-  $.verification_BE = 0x1D6D8F1C,
-  $.hashfn_native = mum_realign<3,3,false,true>,
-  $.hashfn_bswap = mum_realign<3,3,true,true>
-);
+   $.desc       = "Mum-hash v3, unroll 2^3, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xD45801EE,
+   $.verification_BE = 0x1D6D8F1C,
+   $.hashfn_native   = mum_realign<3, 3, false, true>,
+   $.hashfn_bswap    = mum_realign<3, 3, true, true>
+ );
 
 REGISTER_HASH(mum3_realign__exact__unroll4,
-  $.desc = "Mum-hash v3, unroll 2^4, exact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x65C49B24,
-  $.verification_BE = 0xE1C2CEEC,
-  $.hashfn_native = mum_realign<3,4,false,true>,
-  $.hashfn_bswap = mum_realign<3,4,true,true>
-);
+   $.desc       = "Mum-hash v3, unroll 2^4, exact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x65C49B24,
+   $.verification_BE = 0xE1C2CEEC,
+   $.hashfn_native   = mum_realign<3, 4, false, true>,
+   $.hashfn_bswap    = mum_realign<3, 4, true, true>
+ );
 
 REGISTER_HASH(mum3_realign__inexact__unroll1,
-  $.desc = "Mum-hash v3, unroll 2^1, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xFB3DE98D,
-  $.verification_BE = 0xBBF8D76F,
-  $.hashfn_native = mum_realign<3,1,false,false>,
-  $.hashfn_bswap = mum_realign<3,1,true,false>
-);
+   $.desc       = "Mum-hash v3, unroll 2^1, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xFB3DE98D,
+   $.verification_BE = 0xBBF8D76F,
+   $.hashfn_native   = mum_realign<3, 1, false, false>,
+   $.hashfn_bswap    = mum_realign<3, 1, true, false>
+ );
 
 REGISTER_HASH(mum3_realign__inexact__unroll2,
-  $.desc = "Mum-hash v3, unroll 2^2, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xBFD7CE56,
-  $.verification_BE = 0x134317BB,
-  $.hashfn_native = mum_realign<3,2,false,false>,
-  $.hashfn_bswap = mum_realign<3,2,true,false>
-);
+   $.desc       = "Mum-hash v3, unroll 2^2, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xBFD7CE56,
+   $.verification_BE = 0x134317BB,
+   $.hashfn_native   = mum_realign<3, 2, false, false>,
+   $.hashfn_bswap    = mum_realign<3, 2, true, false>
+ );
 
 REGISTER_HASH(mum3_realign__inexact__unroll3,
-  $.desc = "Mum-hash v3, unroll 2^3, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x68CB735E,
-  $.verification_BE = 0x47E5152C,
-  $.hashfn_native = mum_realign<3,3,false,false>,
-  $.hashfn_bswap = mum_realign<3,3,true,false>
-);
+   $.desc       = "Mum-hash v3, unroll 2^3, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x68CB735E,
+   $.verification_BE = 0x47E5152C,
+   $.hashfn_native   = mum_realign<3, 3, false, false>,
+   $.hashfn_bswap    = mum_realign<3, 3, true, false>
+ );
 
 REGISTER_HASH(mum3_realign__inexact__unroll4,
-  $.desc = "Mum-hash v3, unroll 2^4, inexact mult, for aligned-only reads",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x4975BD5E,
-  $.verification_BE = 0x8A467520,
-  $.hashfn_native = mum_realign<3,4,false,false>,
-  $.hashfn_bswap = mum_realign<3,4,true,false>
-);
+   $.desc       = "Mum-hash v3, unroll 2^4, inexact mult, for aligned-only reads",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x4975BD5E,
+   $.verification_BE = 0x8A467520,
+   $.hashfn_native   = mum_realign<3, 4, false, false>,
+   $.hashfn_bswap    = mum_realign<3, 4, true, false>
+ );
 #endif
 
 REGISTER_HASH(mir__exact,
-  $.desc = "MIR-hash, exact 128-bit mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x00A393C8,
-  $.verification_BE = 0x39F99A44,
-  $.hashfn_native = mir_hash<false,true>,
-  $.hashfn_bswap = mir_hash<true,true>,
-  $.badseeds = {0x0, 0x1, 0x2} // !! I think literally *ALL* seeds are bad
-);
+   $.desc       = "MIR-hash, exact 128-bit mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x00A393C8,
+   $.verification_BE = 0x39F99A44,
+   $.hashfn_native   = mir_hash<false, true>,
+   $.hashfn_bswap    = mir_hash<true, true>,
+   $.badseeds        = { 0x0, 0x1, 0x2 } // !! I think literally *ALL* seeds are bad
+ );
 
 REGISTER_HASH(mir__inexact,
-  $.desc = "MIR-hash, inexact 128-bit mult",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64      |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x422A66FC,
-  $.verification_BE = 0xA9A6A383,
-  $.hashfn_native = mir_hash<false,false>,
-  $.hashfn_bswap = mir_hash<true,false>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = {UINT64_C(0xfffffffffffffff0)}
-);
+   $.desc       = "MIR-hash, inexact 128-bit mult",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64      |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x422A66FC,
+   $.verification_BE = 0xA9A6A383,
+   $.hashfn_native   = mir_hash<false, false>,
+   $.hashfn_bswap    = mir_hash<true, false>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { UINT64_C (0xfffffffffffffff0) }
+ );
diff --git a/hashes/murmur_oaat.cpp b/hashes/murmur_oaat.cpp
index eff2a402..79801534 100644
--- a/hashes/murmur_oaat.cpp
+++ b/hashes/murmur_oaat.cpp
@@ -28,7 +28,7 @@
 #include "Hashlib.h"
 
 //------------------------------------------------------------
-static uint32_t MurmurOAAT_impl(const uint8_t * data, size_t len, uint32_t seed) {
+static uint32_t MurmurOAAT_impl( const uint8_t * data, size_t len, uint32_t seed ) {
     uint32_t h = seed;
 
     for (size_t i = 0; i < len; i++) {
@@ -40,30 +40,31 @@ static uint32_t MurmurOAAT_impl(const uint8_t * data, size_t len, uint32_t seed)
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void MurmurOAAT(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void MurmurOAAT( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = MurmurOAAT_impl((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(MurmurOAAT,
-  $.src_url = "https://github.com/aappleby/smhasher/blob/master/src/Hashes.cpp",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/aappleby/smhasher/blob/master/src/Hashes.cpp",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(MurmurOAAT,
-  $.desc = "OAAT hash based on Murmur's mix",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS  |
-        FLAG_IMPL_MULTIPLY      |
-        FLAG_IMPL_SLOW          |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x5363BD98,
-  $.verification_BE = 0x29CCE130,
-  $.hashfn_native = MurmurOAAT<false>,
-  $.hashfn_bswap = MurmurOAAT<true>
-);
+   $.desc       = "OAAT hash based on Murmur's mix",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS  |
+         FLAG_IMPL_MULTIPLY      |
+         FLAG_IMPL_SLOW          |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x5363BD98,
+   $.verification_BE = 0x29CCE130,
+   $.hashfn_native   = MurmurOAAT<false>,
+   $.hashfn_bswap    = MurmurOAAT<true>
+ );
diff --git a/hashes/murmurhash1.cpp b/hashes/murmurhash1.cpp
index f903f4b0..5a9fc58d 100644
--- a/hashes/murmurhash1.cpp
+++ b/hashes/murmurhash1.cpp
@@ -31,14 +31,15 @@
 #include "Hashlib.h"
 
 //-----------------------------------------------------------------------------
-template < bool bswap >
-static void MurmurHash1(const void * in, const size_t olen, const seed_t seed, void * out) {
-    //uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
+template <bool bswap>
+static void MurmurHash1( const void * in, const size_t olen, const seed_t seed, void * out ) {
+    // uint32_t MurmurHash1 ( const void * key, int len, uint32_t seed )
     const uint32_t m = 0xc6a4a793;
     const uint32_t r = 16;
 
-    size_t len = olen;
-    uint32_t h = seed;
+    size_t   len     = olen;
+    uint32_t h       = seed;
+
     h ^= len * m;
 
     //----------
@@ -47,25 +48,25 @@ static void MurmurHash1(const void * in, const size_t olen, const seed_t seed, v
     while (len >= 4) {
         uint32_t k = GET_U32<bswap>(data, 0);
 
-        h += k;
-        h *= m;
-        h ^= h >> 16;
+        h    += k;
+        h    *= m;
+        h    ^= h >> 16;
 
         data += 4;
-        len -= 4;
+        len  -= 4;
     }
 
     //----------
-    switch(len) {
+    switch (len) {
     case 3:
-        h += data[2] << 16; /* FALLTHROUGH */
+            h += data[2] << 16; /* FALLTHROUGH */
     case 2:
-        h += data[1] << 8;  /* FALLTHROUGH */
+            h += data[1] <<  8; /* FALLTHROUGH */
     case 1:
-        h += data[0];
-        h *= m;
-        h ^= h >> r;
-    };
+            h += data[0];
+            h *= m;
+            h ^= h >> r;
+    }
 
     //----------
     h *= m;
@@ -77,22 +78,22 @@ static void MurmurHash1(const void * in, const size_t olen, const seed_t seed, v
 }
 
 REGISTER_FAMILY(murmur1,
-  $.src_url = "https://github.com/aappleby/smhasher/",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/aappleby/smhasher/",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(MurmurHash1,
-  $.desc = "MurmurHash v1",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY         |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x9EA7D056,
-  $.verification_BE = 0x4B34A47A,
-  $.hashfn_native = MurmurHash1<false>,
-  $.hashfn_bswap = MurmurHash1<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = {0xc6a4a793}
-);
+   $.desc       = "MurmurHash v1",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY         |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x9EA7D056,
+   $.verification_BE = 0x4B34A47A,
+   $.hashfn_native   = MurmurHash1<false>,
+   $.hashfn_bswap    = MurmurHash1<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0xc6a4a793 }
+ );
diff --git a/hashes/murmurhash2.cpp b/hashes/murmurhash2.cpp
index 3ab24163..ccd6671f 100644
--- a/hashes/murmurhash2.cpp
+++ b/hashes/murmurhash2.cpp
@@ -31,87 +31,87 @@
 #include "Hashlib.h"
 
 //-----------------------------------------------------------------------------
-template < bool bswap >
-static void MurmurHash2_32(const void * in, const size_t olen, const seed_t seed, void * out) {
-  // 'm' and 'r' are mixing constants generated offline.
-  // They're not really 'magic', they just happen to work well.
-  const uint32_t m = 0x5bd1e995;
-  const uint32_t r = 24;
-  size_t len = olen;
-
-  // Initialize the hash to a 'random' value
-  uint32_t h = seed ^ olen;
-
-  // Mix 4 bytes at a time into the hash
-  const uint8_t * data = (const uint8_t *)in;
-
-  while (len >= 4) {
-      uint32_t k = GET_U32<bswap>(data, 0);
-
-      k *= m;
-      k ^= k >> r;
-      k *= m;
-
-      h *= m;
-      h ^= k;
-
-      data += 4;
-      len -= 4;
-  }
-
-  // Handle the last few bytes of the input array
-  switch(len) {
-  case 3: h ^= data[2] << 16; /* FALLTHROUGH */
-  case 2: h ^= data[1] << 8;  /* FALLTHROUGH */
-  case 1: h ^= data[0];
-          h *= m;
-  }
-
-  // Do a few final mixes of the hash to ensure the last few
-  // bytes are well-incorporated.
-  h ^= h >> 13;
-  h *= m;
-  h ^= h >> 15;
-
-  PUT_U32<bswap>(h, (uint8_t *)out, 0);
+template <bool bswap>
+static void MurmurHash2_32( const void * in, const size_t olen, const seed_t seed, void * out ) {
+    // 'm' and 'r' are mixing constants generated offline.
+    // They're not really 'magic', they just happen to work well.
+    const uint32_t m   = 0x5bd1e995;
+    const uint32_t r   = 24;
+    size_t         len = olen;
+
+    // Initialize the hash to a 'random' value
+    uint32_t h = seed ^ olen;
+
+    // Mix 4 bytes at a time into the hash
+    const uint8_t * data = (const uint8_t *)in;
+
+    while (len >= 4) {
+        uint32_t k = GET_U32<bswap>(data, 0);
+
+        k    *= m;
+        k    ^= k >> r;
+        k    *= m;
+
+        h    *= m;
+        h    ^= k;
+
+        data += 4;
+        len  -= 4;
+    }
+
+    // Handle the last few bytes of the input array
+    switch (len) {
+    case 3: h ^= data[2] << 16; /* FALLTHROUGH */
+    case 2: h ^= data[1] <<  8; /* FALLTHROUGH */
+    case 1: h ^= data[0];
+            h *= m;
+    }
+
+    // Do a few final mixes of the hash to ensure the last few
+    // bytes are well-incorporated.
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
 //-----------------------------------------------------------------------------
 // MurmurHash2, 64-bit versions, by Austin Appleby
 
 // 64-bit hash for 64-bit platforms
-template < bool bswap >
-static void MurmurHash2_64(const void * in, const size_t len, const seed_t seed, void * out) {
-    const uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
-    const uint32_t r = 47;
+template <bool bswap>
+static void MurmurHash2_64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    const uint64_t m     = UINT64_C(0xc6a4a7935bd1e995);
+    const uint32_t r     = 47;
 
-    uint64_t h = seed ^ (len * m);
+    uint64_t h           = seed ^ (len * m);
 
     const uint8_t * data = (const uint8_t *)in;
-    const uint8_t * end = data + len - (len & 7);
+    const uint8_t * end  = data + len - (len & 7);
 
     while (data != end) {
-      uint64_t k = GET_U64<bswap>(data, 0);
+        uint64_t k = GET_U64<bswap>(data, 0);
 
-      k *= m;
-      k ^= k >> r;
-      k *= m;
+        k    *= m;
+        k    ^= k >> r;
+        k    *= m;
 
-      h ^= k;
-      h *= m;
+        h    ^= k;
+        h    *= m;
 
-      data += 8;
+        data += 8;
     }
 
-    switch(len & 7) {
+    switch (len & 7) {
     case 7: h ^= uint64_t(data[6]) << 48; /* FALLTHROUGH */
     case 6: h ^= uint64_t(data[5]) << 40; /* FALLTHROUGH */
     case 5: h ^= uint64_t(data[4]) << 32; /* FALLTHROUGH */
     case 4: h ^= uint64_t(data[3]) << 24; /* FALLTHROUGH */
     case 3: h ^= uint64_t(data[2]) << 16; /* FALLTHROUGH */
-    case 2: h ^= uint64_t(data[1]) << 8;  /* FALLTHROUGH */
+    case 2: h ^= uint64_t(data[1]) <<  8; /* FALLTHROUGH */
     case 1: h ^= uint64_t(data[0]);
-        h *= m;
+            h *= m;
     }
 
     h ^= h >> r;
@@ -121,45 +121,44 @@ static void MurmurHash2_64(const void * in, const size_t len, const seed_t seed,
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-
 // 64-bit hash for 32-bit platforms
-template < bool bswap >
-static void MurmurHash2_32_64(const void * in, const size_t olen, const seed_t seed, void * out) {
-    const uint32_t m = 0x5bd1e995;
-    const uint32_t r = 24;
+template <bool bswap>
+static void MurmurHash2_32_64( const void * in, const size_t olen, const seed_t seed, void * out ) {
+    const uint32_t m     = 0x5bd1e995;
+    const uint32_t r     = 24;
 
-    uint32_t h1 = (uint32_t)(seed) ^ olen;
-    uint32_t h2 = (uint32_t)(seed >> 32);
-    size_t len = olen;
+    uint32_t h1          = (uint32_t)(seed      ) ^ olen;
+    uint32_t h2          = (uint32_t)(seed >> 32);
+    size_t   len         = olen;
 
     const uint8_t * data = (const uint8_t *)in;
 
     while (len >= 8) {
         uint32_t k1 = GET_U32<bswap>(data, 0);
-        k1 *= m; k1 ^= k1 >> r; k1 *= m;
+        k1 *= m; k1   ^= k1 >> r; k1 *= m;
         h1 *= m; h1 ^= k1;
 
         uint32_t k2 = GET_U32<bswap>(data, 4);
-        k2 *= m; k2 ^= k2 >> r; k2 *= m;
-        h2 *= m; h2 ^= k2;
+        k2   *= m; k2 ^= k2 >> r; k2 *= m;
+        h2   *= m; h2 ^= k2;
 
-        len -= 8;
+        len  -= 8;
         data += 8;
     }
 
     if (len >= 4) {
         uint32_t k1 = GET_U32<bswap>(data, 0);
-        k1 *= m; k1 ^= k1 >> r; k1 *= m;
-        h1 *= m; h1 ^= k1;
-        len -= 4;
+        k1   *= m; k1 ^= k1 >> r; k1 *= m;
+        h1   *= m; h1 ^= k1;
+        len  -= 4;
         data += 4;
     }
 
     switch (len) {
     case 3: h2 ^= data[2] << 16; /* FALLTHROUGH */
-    case 2: h2 ^= data[1] << 8;  /* FALLTHROUGH */
+    case 2: h2 ^= data[1] <<  8; /* FALLTHROUGH */
     case 1: h2 ^= data[0];
-        h2 *= m;
+            h2 *= m;
     }
 
     h1 ^= h2 >> 18; h1 *= m;
@@ -182,38 +181,38 @@ static void MurmurHash2_32_64(const void * in, const size_t olen, const seed_t s
 // collide with each other than expected, and also makes the function
 // more amenable to incremental implementations.
 
-#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
+#define mmix(h, k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
 
-template < bool bswap >
-static void MurmurHash2A_32(const void * in, const size_t olen, const seed_t seed, void * out) {
-    const uint32_t m = 0x5bd1e995;
-    const uint32_t r = 24;
+template <bool bswap>
+static void MurmurHash2A_32( const void * in, const size_t olen, const seed_t seed, void * out ) {
+    const uint32_t m     = 0x5bd1e995;
+    const uint32_t r     = 24;
 
-    size_t len = olen;
-    uint32_t len32 = olen;
-    uint32_t h = (uint32_t)seed;
+    size_t   len         = olen;
+    uint32_t len32       = olen;
+    uint32_t h           = (uint32_t)seed;
 
     const uint8_t * data = (const uint8_t *)in;
 
-    while (len >= 4 ) {
+    while (len >= 4) {
         uint32_t k = GET_U32<bswap>(data, 0);
 
-        mmix(h,k);
+        mmix(h, k);
 
         data += 4;
-        len -= 4;
+        len  -= 4;
     }
 
     uint32_t t = 0;
 
     switch (len) {
     case 3: t ^= data[2] << 16; /* FALLTHROUGH */
-    case 2: t ^= data[1] << 8;  /* FALLTHROUGH */
+    case 2: t ^= data[1] <<  8; /* FALLTHROUGH */
     case 1: t ^= data[0];
     }
 
-    mmix(h,t);
-    mmix(h,len32);
+    mmix(h, t    );
+    mmix(h, len32);
 
     h ^= h >> 13;
     h *= m;
@@ -223,70 +222,70 @@ static void MurmurHash2A_32(const void * in, const size_t olen, const seed_t see
 }
 
 REGISTER_FAMILY(murmur2,
-  $.src_url = "https://github.com/aappleby/smhasher/",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/aappleby/smhasher/",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(MurmurHash2_32,
-  $.desc = "MurmurHash v2, 32-bit version",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY         |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x27864C1E,
-  $.verification_BE = 0xE87D9B54,
-  $.hashfn_native = MurmurHash2_32<false>,
-  $.hashfn_bswap = MurmurHash2_32<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = {0x10}
-);
+   $.desc       = "MurmurHash v2, 32-bit version",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY         |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x27864C1E,
+   $.verification_BE = 0xE87D9B54,
+   $.hashfn_native   = MurmurHash2_32<false>,
+   $.hashfn_bswap    = MurmurHash2_32<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0x10 }
+ );
 
 REGISTER_HASH(MurmurHash2_64,
-  $.desc = "MurmurHash v2, 64-bit version",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x1F0D3804,
-  $.verification_BE = 0x8FDA498D,
-  $.hashfn_native = MurmurHash2_64<false>,
-  $.hashfn_bswap = MurmurHash2_64<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = {UINT64_C(0xc6a4a7935bd1e995)}
-);
+   $.desc       = "MurmurHash v2, 64-bit version",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x1F0D3804,
+   $.verification_BE = 0x8FDA498D,
+   $.hashfn_native   = MurmurHash2_64<false>,
+   $.hashfn_bswap    = MurmurHash2_64<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { UINT64_C (0xc6a4a7935bd1e995) }
+ );
 
 REGISTER_HASH(MurmurHash2_64__int32,
-  $.desc = "MurmurHash v2, 64-bit version using 32-bit variables",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xDD537C05,
-  $.verification_BE = 0xBF573795,
-  $.hashfn_native = MurmurHash2_32_64<false>,
-  $.hashfn_bswap = MurmurHash2_32_64<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = {0x10, UINT64_C(0xffffffff00000010)}
-);
+   $.desc       = "MurmurHash v2, 64-bit version using 32-bit variables",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xDD537C05,
+   $.verification_BE = 0xBF573795,
+   $.hashfn_native   = MurmurHash2_32_64<false>,
+   $.hashfn_bswap    = MurmurHash2_32_64<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0x10, UINT64_C (0xffffffff00000010) }
+ );
 
 REGISTER_HASH(MurmurHash2a,
-  $.desc = "MurmurHash v2a, 32-bit version using variant mixing",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY        |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x7FBD4396,
-  $.verification_BE = 0x7D969EB5,
-  $.hashfn_native = MurmurHash2A_32<false>,
-  $.hashfn_bswap = MurmurHash2A_32<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = {0x2fc301c9}
-);
+   $.desc       = "MurmurHash v2a, 32-bit version using variant mixing",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY        |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x7FBD4396,
+   $.verification_BE = 0x7D969EB5,
+   $.hashfn_native   = MurmurHash2A_32<false>,
+   $.hashfn_bswap    = MurmurHash2A_32<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0x2fc301c9 }
+ );
diff --git a/hashes/murmurhash3.cpp b/hashes/murmurhash3.cpp
index d1b0d36a..d8446b09 100644
--- a/hashes/murmurhash3.cpp
+++ b/hashes/murmurhash3.cpp
@@ -30,323 +30,323 @@
 #include "Platform.h"
 #include "Hashlib.h"
 
-static FORCE_INLINE uint32_t fmix32(uint32_t h) {
-  h ^= h >> 16;
-  h *= 0x85ebca6b;
-  h ^= h >> 13;
-  h *= 0xc2b2ae35;
-  h ^= h >> 16;
-
-  return h;
+static FORCE_INLINE uint32_t fmix32( uint32_t h ) {
+    h ^= h >> 16;
+    h *= 0x85ebca6b;
+    h ^= h >> 13;
+    h *= 0xc2b2ae35;
+    h ^= h >> 16;
+
+    return h;
 }
 
-static FORCE_INLINE uint64_t fmix64(uint64_t k) {
-  k ^= k >> 33;
-  k *= UINT64_C(0xff51afd7ed558ccd);
-  k ^= k >> 33;
-  k *= UINT64_C(0xc4ceb9fe1a85ec53);
-  k ^= k >> 33;
+static FORCE_INLINE uint64_t fmix64( uint64_t k ) {
+    k ^= k >> 33;
+    k *= UINT64_C(0xff51afd7ed558ccd);
+    k ^= k >> 33;
+    k *= UINT64_C(0xc4ceb9fe1a85ec53);
+    k ^= k >> 33;
 
-  return k;
+    return k;
 }
 
 //-----------------------------------------------------------------------------
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
 
-template < bool bswap >
-static FORCE_INLINE uint32_t getblock32(const uint8_t * p, int64_t i) {
-    return GET_U32<bswap>(p+(4*i), 0);
+template <bool bswap>
+static FORCE_INLINE uint32_t getblock32( const uint8_t * p, int64_t i ) {
+    return GET_U32<bswap>(p + (4 * i), 0);
 }
 
-template < bool bswap >
-static FORCE_INLINE uint64_t getblock64(const uint8_t * p, int64_t i) {
-    return GET_U64<bswap>(p+(8*i), 0);
+template <bool bswap>
+static FORCE_INLINE uint64_t getblock64( const uint8_t * p, int64_t i ) {
+    return GET_U64<bswap>(p + (8 * i), 0);
 }
 
 //-----------------------------------------------------------------------------
-template < bool bswap >
-static void MurmurHash3_32(const void * in, const size_t len, const seed_t seed, void * out) {
-  const uint8_t * data = (const uint8_t *)in;
-  const ssize_t nblocks = len / 4;
+template <bool bswap>
+static void MurmurHash3_32( const void * in, const size_t len, const seed_t seed, void * out ) {
+    const uint8_t * data    = (const uint8_t *)in;
+    const ssize_t   nblocks = len / 4;
 
-  uint32_t h1 = (uint32_t)seed;
+    uint32_t h1       =       (uint32_t       )seed;
 
-  const uint32_t c1 = 0xcc9e2d51;
-  const uint32_t c2 = 0x1b873593;
+    const uint32_t c1 = 0xcc9e2d51;
+    const uint32_t c2 = 0x1b873593;
 
-  //----------
-  // body
+    //----------
+    // body
 
-  const uint8_t * blocks = data + nblocks*4;
+    const uint8_t * blocks = data + nblocks * 4;
 
-  for (ssize_t i = -nblocks; i; i++) {
-    uint32_t k1 = getblock32<bswap>(blocks,i);
+    for (ssize_t i = -nblocks; i; i++) {
+        uint32_t k1 = getblock32<bswap>(blocks, i);
 
-    k1 *= c1;
-    k1 = ROTL32(k1,15);
-    k1 *= c2;
+        k1 *= c1;
+        k1  = ROTL32(k1, 15);
+        k1 *= c2;
 
-    h1 ^= k1;
-    h1 = ROTL32(h1,13); 
-    h1 = h1*5+0xe6546b64;
-  }
+        h1 ^= k1;
+        h1  = ROTL32(h1, 13);
+        h1  = h1 * 5 + 0xe6546b64;
+    }
 
-  //----------
-  // tail
+    //----------
+    // tail
 
-  const uint8_t * tail = data + nblocks*4;
+    const uint8_t * tail = data + nblocks * 4;
 
-  uint32_t k1 = 0;
+    uint32_t k1 = 0;
 
-  switch(len & 3) {
-  case 3: k1 ^= tail[2] << 16;  /* FALLTHROUGH */
-  case 2: k1 ^= tail[1] << 8;   /* FALLTHROUGH */
-  case 1: k1 ^= tail[0];
-          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-  };
+    switch (len & 3) {
+    case 3: k1 ^= tail[2] << 16; /* FALLTHROUGH */
+    case 2: k1 ^= tail[1] <<  8; /* FALLTHROUGH */
+    case 1: k1 ^= tail[0];
+            k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1;
+    }
 
-  //----------
-  // finalization
+    //----------
+    // finalization
 
-  h1 ^= (uint32_t)len;
+    h1 ^= (uint32_t)len;
 
-  h1 = fmix32(h1);
+    h1  = fmix32(h1);
 
-  PUT_U32<bswap>(h1, (uint8_t *)out, 0);
-} 
+    PUT_U32<bswap>(h1, (uint8_t *)out, 0);
+}
 
 //-----------------------------------------------------------------------------
-template < bool bswap >
-static void MurmurHash3_32_128(const void * in, const size_t len, const seed_t seed, void * out) {
-  const uint8_t * data = (const uint8_t *)in;
-  const ssize_t nblocks = len / 16;
-
-  uint32_t h1 = (uint32_t)seed;
-  uint32_t h2 = (uint32_t)seed;
-  uint32_t h3 = (uint32_t)seed;
-  uint32_t h4 = (uint32_t)seed;
-
-  const uint32_t c1 = 0x239b961b; 
-  const uint32_t c2 = 0xab0e9789;
-  const uint32_t c3 = 0x38b34ae5; 
-  const uint32_t c4 = 0xa1e38b93;
-
-  //----------
-  // body
-
-  const uint8_t * blocks = data + nblocks*16;
-
-  for (ssize_t i = -nblocks; i; i++) {
-    uint32_t k1 = getblock32<bswap>(blocks,i*4+0);
-    uint32_t k2 = getblock32<bswap>(blocks,i*4+1);
-    uint32_t k3 = getblock32<bswap>(blocks,i*4+2);
-    uint32_t k4 = getblock32<bswap>(blocks,i*4+3);
-
-    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-
-    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
-
-    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
-
-    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
-
-    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
-
-    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
-
-    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-
-    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
-  }
-
-  //----------
-  // tail
-
-  const uint8_t * tail = data + nblocks*16;
-
-  uint32_t k1 = 0;
-  uint32_t k2 = 0;
-  uint32_t k3 = 0;
-  uint32_t k4 = 0;
-
-  switch(len & 15) {
-  case 15: k4 ^= tail[14] << 16;   /* FALLTHROUGH */
-  case 14: k4 ^= tail[13] << 8;    /* FALLTHROUGH */
-  case 13: k4 ^= tail[12] << 0;    /* FALLTHROUGH */
-           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
-                                   /* FALLTHROUGH */
-  case 12: k3 ^= tail[11] << 24;   /* FALLTHROUGH */
-  case 11: k3 ^= tail[10] << 16;   /* FALLTHROUGH */
-  case 10: k3 ^= tail[ 9] << 8;    /* FALLTHROUGH */
-  case  9: k3 ^= tail[ 8] << 0;    /* FALLTHROUGH */
-           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
-                                   /* FALLTHROUGH */
-  case  8: k2 ^= tail[ 7] << 24;   /* FALLTHROUGH */
-  case  7: k2 ^= tail[ 6] << 16;   /* FALLTHROUGH */
-  case  6: k2 ^= tail[ 5] << 8;    /* FALLTHROUGH */
-  case  5: k2 ^= tail[ 4] << 0;    /* FALLTHROUGH */
-           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
-                                   /* FALLTHROUGH */
-  case  4: k1 ^= tail[ 3] << 24;   /* FALLTHROUGH */
-  case  3: k1 ^= tail[ 2] << 16;   /* FALLTHROUGH */
-  case  2: k1 ^= tail[ 1] << 8;    /* FALLTHROUGH */
-  case  1: k1 ^= tail[ 0] << 0;
-           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
-  };
-
-  //----------
-  // finalization
-
-  h1 ^= (uint32_t)len; h2 ^= (uint32_t)len;
-  h3 ^= (uint32_t)len; h4 ^= (uint32_t)len;
-
-  h1 += h2; h1 += h3; h1 += h4;
-  h2 += h1; h3 += h1; h4 += h1;
-
-  h1 = fmix32(h1);
-  h2 = fmix32(h2);
-  h3 = fmix32(h3);
-  h4 = fmix32(h4);
-
-  h1 += h2; h1 += h3; h1 += h4;
-  h2 += h1; h3 += h1; h4 += h1;
-
-  PUT_U32<bswap>(h1, (uint8_t *)out,  0);
-  PUT_U32<bswap>(h2, (uint8_t *)out,  4);
-  PUT_U32<bswap>(h3, (uint8_t *)out,  8);
-  PUT_U32<bswap>(h4, (uint8_t *)out, 12);
+template <bool bswap>
+static void MurmurHash3_32_128( const void * in, const size_t len, const seed_t seed, void * out ) {
+    const uint8_t * data    = (const uint8_t *)in;
+    const ssize_t   nblocks = len / 16;
+
+    uint32_t h1       =       (uint32_t       )seed;
+    uint32_t h2       =       (uint32_t       )seed;
+    uint32_t h3       =       (uint32_t       )seed;
+    uint32_t h4       =       (uint32_t       )seed;
+
+    const uint32_t c1 = 0x239b961b;
+    const uint32_t c2 = 0xab0e9789;
+    const uint32_t c3 = 0x38b34ae5;
+    const uint32_t c4 = 0xa1e38b93;
+
+    //----------
+    // body
+
+    const uint8_t * blocks = data + nblocks * 16;
+
+    for (ssize_t i = -nblocks; i; i++) {
+        uint32_t k1 = getblock32<bswap>(blocks, i * 4 + 0);
+        uint32_t k2 = getblock32<bswap>(blocks, i * 4 + 1);
+        uint32_t k3 = getblock32<bswap>(blocks, i * 4 + 2);
+        uint32_t k4 = getblock32<bswap>(blocks, i * 4 + 3);
+
+        k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1;
+
+        h1  = ROTL32(h1,          19); h1 += h2; h1  = h1 * 5 + 0x561ccd1b;
+
+        k2 *= c2; k2 = ROTL32(k2, 16); k2 *= c3; h2 ^= k2;
+
+        h2  = ROTL32(h2,          17); h2 += h3; h2  = h2 * 5 + 0x0bcaa747;
+
+        k3 *= c3; k3 = ROTL32(k3, 17); k3 *= c4; h3 ^= k3;
+
+        h3  = ROTL32(h3,          15); h3 += h4; h3  = h3 * 5 + 0x96cd1c35;
+
+        k4 *= c4; k4 = ROTL32(k4, 18); k4 *= c1; h4 ^= k4;
+
+        h4  = ROTL32(h4,          13); h4 += h1; h4  = h4 * 5 + 0x32ac3b17;
+    }
+
+    //----------
+    // tail
+
+    const uint8_t * tail = data + nblocks * 16;
+
+    uint32_t k1 = 0;
+    uint32_t k2 = 0;
+    uint32_t k3 = 0;
+    uint32_t k4 = 0;
+
+    switch (len & 15) {
+    case 15: k4 ^= tail[14] << 16; /* FALLTHROUGH */
+    case 14: k4 ^= tail[13] <<  8; /* FALLTHROUGH */
+    case 13: k4 ^= tail[12] <<  0; /* FALLTHROUGH */
+             k4 *= c4; k4 = ROTL32(k4, 18); k4 *= c1; h4 ^= k4;
+    /* FALLTHROUGH */
+    case 12: k3 ^= tail[11] << 24; /* FALLTHROUGH */
+    case 11: k3 ^= tail[10] << 16; /* FALLTHROUGH */
+    case 10: k3 ^= tail[ 9] <<  8; /* FALLTHROUGH */
+    case  9: k3 ^= tail[ 8] <<  0; /* FALLTHROUGH */
+             k3 *= c3; k3 = ROTL32(k3, 17); k3 *= c4; h3 ^= k3;
+    /* FALLTHROUGH */
+    case  8: k2 ^= tail[ 7] << 24; /* FALLTHROUGH */
+    case  7: k2 ^= tail[ 6] << 16; /* FALLTHROUGH */
+    case  6: k2 ^= tail[ 5] <<  8; /* FALLTHROUGH */
+    case  5: k2 ^= tail[ 4] <<  0; /* FALLTHROUGH */
+             k2 *= c2; k2 = ROTL32(k2, 16); k2 *= c3; h2 ^= k2;
+    /* FALLTHROUGH */
+    case  4: k1 ^= tail[ 3] << 24; /* FALLTHROUGH */
+    case  3: k1 ^= tail[ 2] << 16; /* FALLTHROUGH */
+    case  2: k1 ^= tail[ 1] <<  8; /* FALLTHROUGH */
+    case  1: k1 ^= tail[ 0] <<  0;
+             k1 *= c1; k1 = ROTL32(k1, 15); k1 *= c2; h1 ^= k1;
+    }
+
+    //----------
+    // finalization
+
+    h1 ^= (uint32_t)len; h2 ^= (uint32_t)len;
+    h3 ^= (uint32_t)len; h4 ^= (uint32_t)len;
+
+    h1 += h2; h1            += h3; h1  += h4;
+    h2 += h1; h3            += h1; h4  += h1;
+
+    h1  = fmix32(h1);
+    h2  = fmix32(h2);
+    h3  = fmix32(h3);
+    h4  = fmix32(h4);
+
+    h1 += h2; h1 += h3; h1 += h4;
+    h2 += h1; h3 += h1; h4 += h1;
+
+    PUT_U32<bswap>(h1, (uint8_t *)out,  0);
+    PUT_U32<bswap>(h2, (uint8_t *)out,  4);
+    PUT_U32<bswap>(h3, (uint8_t *)out,  8);
+    PUT_U32<bswap>(h4, (uint8_t *)out, 12);
 }
 
 //-----------------------------------------------------------------------------
-template < bool bswap >
-static void MurmurHash3_128(const void * in, const size_t len, const seed_t seed, void * out) {
-  const uint8_t * data = (const uint8_t *)in;
-  const size_t nblocks = len / 16;
+template <bool bswap>
+static void MurmurHash3_128( const void * in, const size_t len, const seed_t seed, void * out ) {
+    const uint8_t * data    = (const uint8_t *)in;
+    const size_t    nblocks = len / 16;
 
-  uint64_t h1 = (uint32_t)seed;
-  uint64_t h2 = (uint32_t)seed;
+    uint64_t h1       =       (uint32_t       )seed;
+    uint64_t h2       =       (uint32_t       )seed;
 
-  const uint64_t c1 = UINT64_C(0x87c37b91114253d5);
-  const uint64_t c2 = UINT64_C(0x4cf5ad432745937f);
+    const uint64_t c1 = UINT64_C(0x87c37b91114253d5);
+    const uint64_t c2 = UINT64_C(0x4cf5ad432745937f);
 
-  //----------
-  // body
+    //----------
+    // body
 
-  const uint8_t * blocks = data;
+    const uint8_t * blocks = data;
 
-  for (size_t i = 0; i < nblocks; i++) {
-    uint64_t k1 = getblock64<bswap>(blocks,i*2+0);
-    uint64_t k2 = getblock64<bswap>(blocks,i*2+1);
+    for (size_t i = 0; i < nblocks; i++) {
+        uint64_t k1 = getblock64<bswap>(blocks, i * 2 + 0);
+        uint64_t k2 = getblock64<bswap>(blocks, i * 2 + 1);
 
-    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+        k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1;
 
-    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+        h1  = ROTL64(h1,          27); h1 += h2; h1  = h1 * 5 + 0x52dce729;
 
-    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+        k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2;
 
-    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
-  }
+        h2  = ROTL64(h2,          31); h2 += h1; h2  = h2 * 5 + 0x38495ab5;
+    }
 
-  //----------
-  // tail
+    //----------
+    // tail
 
-  const uint8_t * tail = data + nblocks*16;
+    const uint8_t * tail = data + nblocks * 16;
 
-  uint64_t k1 = 0;
-  uint64_t k2 = 0;
+    uint64_t k1 = 0;
+    uint64_t k2 = 0;
 
-  switch(len & 15) {
-  case 15: k2 ^= ((uint64_t)tail[14]) << 48;  /* FALLTHROUGH */
-  case 14: k2 ^= ((uint64_t)tail[13]) << 40;  /* FALLTHROUGH */
-  case 13: k2 ^= ((uint64_t)tail[12]) << 32;  /* FALLTHROUGH */
-  case 12: k2 ^= ((uint64_t)tail[11]) << 24;  /* FALLTHROUGH */
-  case 11: k2 ^= ((uint64_t)tail[10]) << 16;  /* FALLTHROUGH */
-  case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;   /* FALLTHROUGH */
-  case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
-           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
-                                              /* FALLTHROUGH */
-  case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;  /* FALLTHROUGH */
-  case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;  /* FALLTHROUGH */
-  case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;  /* FALLTHROUGH */
-  case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;  /* FALLTHROUGH */
-  case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;  /* FALLTHROUGH */
-  case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;  /* FALLTHROUGH */
-  case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;   /* FALLTHROUGH */
-  case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
-           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
-  };
+    switch (len & 15) {
+    case 15: k2 ^= ((uint64_t)tail[14]) << 48; /* FALLTHROUGH */
+    case 14: k2 ^= ((uint64_t)tail[13]) << 40; /* FALLTHROUGH */
+    case 13: k2 ^= ((uint64_t)tail[12]) << 32; /* FALLTHROUGH */
+    case 12: k2 ^= ((uint64_t)tail[11]) << 24; /* FALLTHROUGH */
+    case 11: k2 ^= ((uint64_t)tail[10]) << 16; /* FALLTHROUGH */
+    case 10: k2 ^= ((uint64_t)tail[ 9]) <<  8; /* FALLTHROUGH */
+    case  9: k2 ^= ((uint64_t)tail[ 8]) <<  0;
+             k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2;
+    /* FALLTHROUGH */
+    case  8: k1 ^= ((uint64_t)tail[ 7]) << 56; /* FALLTHROUGH */
+    case  7: k1 ^= ((uint64_t)tail[ 6]) << 48; /* FALLTHROUGH */
+    case  6: k1 ^= ((uint64_t)tail[ 5]) << 40; /* FALLTHROUGH */
+    case  5: k1 ^= ((uint64_t)tail[ 4]) << 32; /* FALLTHROUGH */
+    case  4: k1 ^= ((uint64_t)tail[ 3]) << 24; /* FALLTHROUGH */
+    case  3: k1 ^= ((uint64_t)tail[ 2]) << 16; /* FALLTHROUGH */
+    case  2: k1 ^= ((uint64_t)tail[ 1]) <<  8; /* FALLTHROUGH */
+    case  1: k1 ^= ((uint64_t)tail[ 0]) <<  0;
+             k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1;
+    }
 
-  //----------
-  // finalization
+    //----------
+    // finalization
 
-  h1 ^= (uint32_t)len; h2 ^= (uint32_t)len;
+    h1 ^= (uint32_t)len; h2 ^= (uint32_t)len;
 
-  h1 += h2;
-  h2 += h1;
+    h1 += h2;
+    h2 += h1;
 
-  h1 = fmix64(h1);
-  h2 = fmix64(h2);
+    h1  = fmix64(h1);
+    h2  = fmix64(h2);
 
-  h1 += h2;
-  h2 += h1;
+    h1 += h2;
+    h2 += h1;
 
-  PUT_U64<bswap>(h1, (uint8_t *)out, 0);
-  PUT_U64<bswap>(h2, (uint8_t *)out, 8);
+    PUT_U64<bswap>(h1, (uint8_t *)out, 0);
+    PUT_U64<bswap>(h2, (uint8_t *)out, 8);
 }
 
 REGISTER_FAMILY(murmur3,
-  $.src_url = "https://github.com/aappleby/smhasher/",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/aappleby/smhasher/",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(MurmurHash3_32,
-  $.desc = "MurmurHash v3, 32-bit version",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY         |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xB0F57EE3,
-  $.verification_BE = 0x6213303E,
-  $.hashfn_native = MurmurHash3_32<false>,
-  $.hashfn_bswap = MurmurHash3_32<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = {0xfca58b2d}
-);
+   $.desc       = "MurmurHash v3, 32-bit version",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY         |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xB0F57EE3,
+   $.verification_BE = 0x6213303E,
+   $.hashfn_native   = MurmurHash3_32<false>,
+   $.hashfn_bswap    = MurmurHash3_32<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0xfca58b2d }
+ );
 
 REGISTER_HASH(MurmurHash3_128__int32,
-  $.desc = "MurmurHash v3, 128-bit version using 32-bit variables",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY         |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0xB3ECE62A,
-  $.verification_BE = 0xDC26F009,
-  $.hashfn_native = MurmurHash3_32_128<false>,
-  $.hashfn_bswap = MurmurHash3_32_128<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = {0x239b961b}
-);
+   $.desc       = "MurmurHash v3, 128-bit version using 32-bit variables",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY         |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0xB3ECE62A,
+   $.verification_BE = 0xDC26F009,
+   $.hashfn_native   = MurmurHash3_32_128<false>,
+   $.hashfn_bswap    = MurmurHash3_32_128<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0x239b961b }
+ );
 
 REGISTER_HASH(MurmurHash3_128,
-  $.desc = "MurmurHash v3, 128-bit version using 64-bit variables",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64   |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x6384BA69,
-  $.verification_BE = 0xCC622B6F,
-  $.hashfn_native = MurmurHash3_128<false>,
-  $.hashfn_bswap = MurmurHash3_128<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = {0x239b961b}
-);
+   $.desc       = "MurmurHash v3, 128-bit version using 64-bit variables",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64   |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x6384BA69,
+   $.verification_BE = 0xCC622B6F,
+   $.hashfn_native   = MurmurHash3_128<false>,
+   $.hashfn_bswap    = MurmurHash3_128<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0x239b961b }
+ );
diff --git a/hashes/mx3.cpp b/hashes/mx3.cpp
index 99ac620d..376bcb18 100644
--- a/hashes/mx3.cpp
+++ b/hashes/mx3.cpp
@@ -10,8 +10,8 @@
 //------------------------------------------------------------
 static const uint64_t C = UINT64_C(0xbea225f9eb34556d);
 
-template < bool v1 >
-static inline uint64_t mix(uint64_t x) {
+template <bool v1>
+static inline uint64_t mix( uint64_t x ) {
     constexpr uint32_t R0 = v1 ?  0 : 32;
     constexpr uint32_t R1 = v1 ? 33 : 29;
     constexpr uint32_t R2 = v1 ? 29 : 32;
@@ -29,9 +29,10 @@ static inline uint64_t mix(uint64_t x) {
     return x;
 }
 
-template < bool v1 >
-static inline uint64_t mix_stream(uint64_t h, uint64_t x) {
+template <bool v1>
+static inline uint64_t mix_stream( uint64_t h, uint64_t x ) {
     constexpr uint32_t R1 = v1 ? 33 : 43;
+
     x *= C;
     x ^= (x >> 57) ^ (x >> R1);
     x *= C;
@@ -40,83 +41,86 @@ static inline uint64_t mix_stream(uint64_t h, uint64_t x) {
     return h;
 }
 
-template < bool v1, bool bswap >
-static inline uint64_t mx3(const uint8_t * buf, size_t len, uint64_t seed) {
+template <bool v1, bool bswap>
+static inline uint64_t mx3( const uint8_t * buf, size_t len, uint64_t seed ) {
     const uint8_t * const tail = buf + (len & ~7);
 
     uint64_t h = seed ^ len;
+
     while (len >= 32) {
         len -= 32;
-        h = mix_stream<v1>(h, GET_U64<bswap>(buf,  0));
-        h = mix_stream<v1>(h, GET_U64<bswap>(buf,  8));
-        h = mix_stream<v1>(h, GET_U64<bswap>(buf, 16));
-        h = mix_stream<v1>(h, GET_U64<bswap>(buf, 24));
+        h    = mix_stream<v1>(h, GET_U64<bswap>(buf,  0));
+        h    = mix_stream<v1>(h, GET_U64<bswap>(buf,  8));
+        h    = mix_stream<v1>(h, GET_U64<bswap>(buf, 16));
+        h    = mix_stream<v1>(h, GET_U64<bswap>(buf, 24));
         buf += 32;
     }
 
     while (len >= 8) {
         len -= 8;
-        h = mix_stream<v1>(h, GET_U64<bswap>(buf,  0));
+        h    = mix_stream<v1>(h, GET_U64<bswap>(buf, 0));
         buf += 8;
     }
 
     uint64_t v = 0;
     switch (len & 7) {
-        case 7: v |= static_cast<uint64_t>(tail[6]) << 48;
-        case 6: v |= static_cast<uint64_t>(tail[5]) << 40;
-        case 5: v |= static_cast<uint64_t>(tail[4]) << 32;
-        case 4: v |= static_cast<uint64_t>(tail[3]) << 24;
-        case 3: v |= static_cast<uint64_t>(tail[2]) << 16;
-        case 2: v |= static_cast<uint64_t>(tail[1]) << 8;
-        case 1: h = mix_stream<v1>(h, v | tail[0]);
-        default: ;
+    case 7: v |= static_cast<uint64_t>(tail[6]) << 48;
+    case 6: v |= static_cast<uint64_t>(tail[5]) << 40;
+    case 5: v |= static_cast<uint64_t>(tail[4]) << 32;
+    case 4: v |= static_cast<uint64_t>(tail[3]) << 24;
+    case 3: v |= static_cast<uint64_t>(tail[2]) << 16;
+    case 2: v |= static_cast<uint64_t>(tail[1]) <<  8;
+    case 1: h  = mix_stream<v1>(h, v | tail[0]);
+    default:;
     }
     return mix<v1>(h);
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void mx3_v1(const void * in, const size_t len, const seed_t seed, void * out) {
-    uint64_t h = mx3<true, bswap>((const uint8_t *)in, len, (uint64_t) seed);
+template <bool bswap>
+static void mx3_v1( const void * in, const size_t len, const seed_t seed, void * out ) {
+    uint64_t h = mx3<true, bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void mx3_v2(const void * in, const size_t len, const seed_t seed, void * out) {
-    uint64_t h = mx3<false, bswap>((const uint8_t *)in, len, (uint64_t) seed);
+template <bool bswap>
+static void mx3_v2( const void * in, const size_t len, const seed_t seed, void * out ) {
+    uint64_t h = mx3<false, bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(mx3,
-  $.src_url = "https://github.com/jonmaiga/mx3/",
-  $.src_status = HashFamilyInfo::SRC_ACTIVE
-);
+   $.src_url    = "https://github.com/jonmaiga/mx3/",
+   $.src_status = HashFamilyInfo::SRC_ACTIVE
+ );
 
 REGISTER_HASH(mx3__v2,
-  $.desc = "mx3 (revision 2)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64         |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x527399AD,
-  $.verification_BE = 0x5B6AAE8F,
-  $.hashfn_native = mx3_v2<false>,
-  $.hashfn_bswap = mx3_v2<true>
-);
+   $.desc       = "mx3 (revision 2)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64         |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x527399AD,
+   $.verification_BE = 0x5B6AAE8F,
+   $.hashfn_native   = mx3_v2<false>,
+   $.hashfn_bswap    = mx3_v2<true>
+ );
 
 REGISTER_HASH(mx3__v1,
-  $.desc = "mx3 (revision 1)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64         |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x4DB51E5B,
-  $.verification_BE = 0x93E930B0,
-  $.hashfn_native = mx3_v1<false>,
-  $.hashfn_bswap = mx3_v1<true>
-);
+   $.desc       = "mx3 (revision 1)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64         |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x4DB51E5B,
+   $.verification_BE = 0x93E930B0,
+   $.hashfn_native   = mx3_v1<false>,
+   $.hashfn_bswap    = mx3_v1<true>
+ );
diff --git a/hashes/nmhash.cpp b/hashes/nmhash.cpp
index 91683fad..5e9ab570 100644
--- a/hashes/nmhash.cpp
+++ b/hashes/nmhash.cpp
@@ -33,7 +33,7 @@
 #include "Hashlib.h"
 
 //------------------------------------------------------------
-//#define NMH_VERSION 2
+// #define NMH_VERSION 2
 
 /* vector macros */
 #define NMH_SCALAR 0
@@ -42,17 +42,17 @@
 #define NMH_AVX512 3
 
 #if defined(HAVE_AVX512_BW)
-#define NMH_VECTOR NMH_AVX512 /* _mm512_mullo_epi16 requires AVX512BW */
+  #define NMH_VECTOR NMH_AVX512 /* _mm512_mullo_epi16 requires AVX512BW */
 #elif defined(HAVE_AVX2)
-#define NMH_VECTOR NMH_AVX2
+  #define NMH_VECTOR NMH_AVX2
 #elif defined(HAVE_SSE_2)
-#define NMH_VECTOR NMH_SSE2
+  #define NMH_VECTOR NMH_SSE2
 #else
-#define NMH_VECTOR NMH_SCALAR
+  #define NMH_VECTOR NMH_SCALAR
 #endif
 
 #if NMH_VECTOR > NMH_SCALAR
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
 //------------------------------------------------------------
@@ -102,10 +102,10 @@ alignas(16) static const uint32_t __NMH_M3_V[32] = {
 };
 
 //------------------------------------------------------------
-static inline uint32_t NMHASH_mult16(uint32_t a, uint32_t b) {
-    uint16_t al = (uint16_t)(a);
+static inline uint32_t NMHASH_mult16( uint32_t a, uint32_t b ) {
+    uint16_t al = (uint16_t)(a      );
     uint16_t ah = (uint16_t)(a >> 16);
-    uint16_t bl = (uint16_t)(b);
+    uint16_t bl = (uint16_t)(b      );
     uint16_t bh = (uint16_t)(b >> 16);
 
     al *= bl;
@@ -114,8 +114,7 @@ static inline uint32_t NMHASH_mult16(uint32_t a, uint32_t b) {
     return (((uint32_t)ah) << 16) + ((uint32_t)al);
 }
 
-
-static inline uint32_t NMHASH32_0to8(uint32_t const x, uint32_t const seed2) {
+static inline uint32_t NMHASH32_0to8( uint32_t const x, uint32_t const seed2 ) {
     /* base mixer: [-6 -12 776bf593 -19 11 3fb39c65 -15 -9 e9139917 -11 16] = 0.027071104091278835 */
     const uint32_t m1 = UINT32_C(0x776BF593);
     const uint32_t m2 = UINT32_C(0x3FB39C65);
@@ -125,31 +124,31 @@ static inline uint32_t NMHASH32_0to8(uint32_t const x, uint32_t const seed2) {
     {
         uint32_t vx;
         vx  = x;
-        vx ^= (vx >> 12) ^ (vx >> 6);
+        vx ^= (vx >> 12) ^ (vx >>  6);
         vx  = NMHASH_mult16(vx, m1);
         vx ^= (vx << 11) ^ (vx >> 19);
         vx  = NMHASH_mult16(vx, m2);
         vx ^= seed2;
-        vx ^= (vx >> 15) ^ (vx >> 9);
+        vx ^= (vx >> 15) ^ (vx >>  9);
         vx  = NMHASH_mult16(vx, m3);
         vx ^= (vx << 16) ^ (vx >> 11);
         return vx;
     }
 #else /* at least NMH_SSE2 */
     {
-        __m128i hv = _mm_setr_epi32((int)x, 0, 0, 0);
+        __m128i       hv = _mm_setr_epi32((int)x    , 0, 0, 0);
         const __m128i sv = _mm_setr_epi32((int)seed2, 0, 0, 0);
-        const uint32_t *const result = (const uint32_t*)&hv;
+        const uint32_t * const result = (const uint32_t *)&hv;
 
-        hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 12)), _mm_srli_epi32(hv, 6));
+        hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 12)), _mm_srli_epi32(hv,  6));
         hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m1, 0, 0, 0));
         hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 11)), _mm_srli_epi32(hv, 19));
         hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m2, 0, 0, 0));
 
         hv = _mm_xor_si128(hv, sv);
 
-        hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 15)), _mm_srli_epi32(hv, 9));
-    hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m3, 0, 0, 0));
+        hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_srli_epi32(hv, 15)), _mm_srli_epi32(hv,  9));
+        hv = _mm_mullo_epi16(hv, _mm_setr_epi32((int)m3, 0, 0, 0));
         hv = _mm_xor_si128(_mm_xor_si128(hv, _mm_slli_epi32(hv, 16)), _mm_srli_epi32(hv, 11));
 
         return *result;
@@ -157,115 +156,115 @@ static inline uint32_t NMHASH32_0to8(uint32_t const x, uint32_t const seed2) {
 #endif
 }
 
-template < bool gt32bytes, bool bswap >
-static inline uint32_t NMHASH32_9to255(const uint8_t* const RESTRICT p,
-        size_t const len, uint32_t const seed) {
+template <bool gt32bytes, bool bswap>
+static inline uint32_t NMHASH32_9to255( const uint8_t * const RESTRICT p, size_t const len, uint32_t const seed ) {
     /* base mixer: [f0d9649b  5 -13 29a7935d -9 11 55d35831 -20 -10 ] = 0.93495901789135362 */
     uint32_t result = 0;
 
 #if NMH_VECTOR == NMH_SCALAR
     {
-        uint32_t x[4], y[4];
+        uint32_t       x[4], y[4];
         uint32_t const sl = seed + (uint32_t)len;
-        size_t j;
+        size_t         j;
         x[0] = NMH_PRIME32_1;
         x[1] = NMH_PRIME32_2;
         x[2] = NMH_PRIME32_3;
         x[3] = NMH_PRIME32_4;
-        for (j = 0; j < 4; ++j) y[j] = sl;
+        for (j = 0; j < 4; ++j) { y[j] = sl; }
 
         if (gt32bytes) {
             /* 33 to 255 bytes */
             size_t const r = (len - 1) / 32;
-            size_t i;
+            size_t       i;
             for (i = 0; i < r; ++i) {
-                for (j = 0; j < 4; ++j) x[j] ^= GET_U32<bswap>(p, i * 32 + j * 4);
-                for (j = 0; j < 4; ++j) y[j] ^= GET_U32<bswap>(p, i * 32 + j * 4 + 16);
-                for (j = 0; j < 4; ++j) x[j] += y[j];
+                for (j = 0; j < 4; ++j) { x[j] ^= GET_U32<bswap>(p, i * 32 + j * 4); }
+                for (j = 0; j < 4; ++j) { y[j] ^= GET_U32<bswap>(p, i * 32 + j * 4 + 16); }
+                for (j = 0; j < 4; ++j) { x[j] += y[j]; }
 
-                for (j = 0; j < 4; ++j) x[j] = NMHASH_mult16(x[j], __NMH_M1);
+                for (j = 0; j < 4; ++j) { x[j] = NMHASH_mult16(x[j], __NMH_M1); }
 
-                for (j = 0; j < 4; ++j) x[j] ^= (x[j] << 5) ^ (x[j] >> 13);
+                for (j = 0; j < 4; ++j) { x[j] ^= (x[j] << 5) ^ (x[j] >> 13); }
 
-                for (j = 0; j < 4; ++j) x[j] = NMHASH_mult16(x[j], __NMH_M2);
+                for (j = 0; j < 4; ++j) { x[j] = NMHASH_mult16(x[j], __NMH_M2); }
 
-                for (j = 0; j < 4; ++j) x[j] ^= y[j];
+                for (j = 0; j < 4; ++j) { x[j] ^= y[j]; }
 
-                for (j = 0; j < 4; ++j) x[j] ^= (x[j] << 11) ^ (x[j] >> 9);
+                for (j = 0; j < 4; ++j) { x[j] ^= (x[j] << 11) ^ (x[j] >> 9); }
 
-                for (j = 0; j < 4; ++j) x[j] = NMHASH_mult16(x[j], __NMH_M3);
+                for (j = 0; j < 4; ++j) { x[j] = NMHASH_mult16(x[j], __NMH_M3); }
 
-                for (j = 0; j < 4; ++j) x[j] ^= (x[j] >> 10) ^ (x[j] >> 20);
+                for (j = 0; j < 4; ++j) { x[j] ^= (x[j] >> 10) ^ (x[j] >> 20); }
             }
-            for (j = 0; j < 4; ++j) x[j] ^= GET_U32<bswap>(p, len - 32 + j * 4);
-            for (j = 0; j < 4; ++j) y[j] ^= GET_U32<bswap>(p, len - 16 + j * 4);
+            for (j = 0; j < 4; ++j) { x[j] ^= GET_U32<bswap>(p, len - 32 + j * 4); }
+            for (j = 0; j < 4; ++j) { y[j] ^= GET_U32<bswap>(p, len - 16 + j * 4); }
         } else {
             /* 9 to 32 bytes */
-            x[0] ^= GET_U32<bswap>(p, 0);
-            x[1] ^= GET_U32<bswap>(p, ((len>>4)<<3));
+            x[0] ^= GET_U32<bswap>(p,   0    );
+            x[1] ^= GET_U32<bswap>(p,   (     (len    >> 4) << 3));
             x[2] ^= GET_U32<bswap>(p, len - 8);
-            x[3] ^= GET_U32<bswap>(p, len - 8 - ((len>>4)<<3));
-            y[0] ^= GET_U32<bswap>(p, 4);
-            y[1] ^= GET_U32<bswap>(p, ((len>>4)<<3) + 4);
+            x[3] ^= GET_U32<bswap>(p, len - 8 - ((len >> 4) << 3));
+            y[0] ^= GET_U32<bswap>(p,   4    );
+            y[1] ^= GET_U32<bswap>(p,   (        (len >> 4) << 3) + 4);
             y[2] ^= GET_U32<bswap>(p, len - 8 + 4);
-            y[3] ^= GET_U32<bswap>(p, len - 8 - ((len>>4)<<3) + 4);
+            y[3] ^= GET_U32<bswap>(p, len - 8 - ((len >> 4) << 3) + 4);
         }
 
-        for (j = 0; j < 4; ++j) x[j] += y[j];
-        for (j = 0; j < 4; ++j) y[j] ^= (y[j] << 17) ^ (y[j] >> 6);
+        for (j = 0; j < 4; ++j) { x[j] += y[j]; }
+        for (j = 0; j < 4; ++j) { y[j] ^= (y[j] << 17) ^ (y[j] >> 6); }
 
-        for (j = 0; j < 4; ++j) x[j] = NMHASH_mult16(x[j], __NMH_M1);
-        for (j = 0; j < 4; ++j) x[j] ^= (x[j] << 5) ^ (x[j] >> 13);
-        for (j = 0; j < 4; ++j) x[j] = NMHASH_mult16(x[j], __NMH_M2);
+        for (j = 0; j < 4; ++j) { x[j] = NMHASH_mult16(x[j], __NMH_M1); }
+        for (j = 0; j < 4; ++j) { x[j] ^= (x[j] << 5) ^ (x[j] >> 13); }
+        for (j = 0; j < 4; ++j) { x[j] = NMHASH_mult16(x[j], __NMH_M2); }
 
-        for (j = 0; j < 4; ++j) x[j] ^= y[j];
+        for (j = 0; j < 4; ++j) { x[j] ^= y[j]; }
 
-        for (j = 0; j < 4; ++j) x[j] ^= (x[j] << 11) ^ (x[j] >> 9);
-        for (j = 0; j < 4; ++j) x[j] = NMHASH_mult16(x[j], __NMH_M3);
-        for (j = 0; j < 4; ++j) x[j] ^= (x[j] >> 10) ^ (x[j] >> 20);
+        for (j = 0; j < 4; ++j) { x[j] ^= (x[j] << 11) ^ (x[j] >> 9); }
+        for (j = 0; j < 4; ++j) { x[j] = NMHASH_mult16(x[j], __NMH_M3); }
+        for (j = 0; j < 4; ++j) { x[j] ^= (x[j] >> 10) ^ (x[j] >> 20); }
 
         x[0] ^= NMH_PRIME32_1;
         x[1] ^= NMH_PRIME32_2;
         x[2] ^= NMH_PRIME32_3;
         x[3] ^= NMH_PRIME32_4;
 
-        for (j = 1; j < 4; ++j) x[0] += x[j];
+        for (j = 1; j < 4; ++j) { x[0] += x[j]; }
 
-        x[0] ^= sl + (sl >> 5);
-        x[0]  = NMHASH_mult16(x[0], __NMH_M3);
-        x[0] ^= (x[0] >> 10) ^ (x[0] >> 20);
+        x[0]  ^= sl + (sl >> 5);
+        x[0]   = NMHASH_mult16(x[0], __NMH_M3);
+        x[0]  ^= (x[0] >> 10) ^ (x[0] >> 20);
 
         result = x[0];
     }
 #else /* at least NMH_SSE2 */
     {
-        __m128i const h0   = _mm_setr_epi32((int)NMH_PRIME32_1, (int)NMH_PRIME32_2, (int)NMH_PRIME32_3, (int)NMH_PRIME32_4);
-        __m128i const sl   = _mm_set1_epi32((int)seed + (int)len);
-        __m128i const m1   = _mm_set1_epi32((int)__NMH_M1);
-        __m128i const m2   = _mm_set1_epi32((int)__NMH_M2);
-        __m128i const m3   = _mm_set1_epi32((int)__NMH_M3);
-        __m128i          x = h0;
-        __m128i          y = sl;
-        const uint32_t *const px = (const uint32_t*)&x;
+        __m128i const h0 = _mm_setr_epi32((int)NMH_PRIME32_1, (int)NMH_PRIME32_2,
+                (int)NMH_PRIME32_3, (int)NMH_PRIME32_4);
+        __m128i const sl = _mm_set1_epi32((int)seed         + (int)len);
+        __m128i const m1 = _mm_set1_epi32((int)__NMH_M1               );
+        __m128i const m2 = _mm_set1_epi32((int)__NMH_M2               );
+        __m128i const m3 = _mm_set1_epi32((int)__NMH_M3               );
+        __m128i       x  = h0;
+        __m128i       y  = sl;
+        const uint32_t * const px = (const uint32_t *)&x;
 
         if (gt32bytes) {
             /* 32 to 127 bytes */
             size_t const r = (len - 1) / 32;
-            size_t i;
+            size_t       i;
             for (i = 0; i < r; ++i) {
                 if (bswap) {
-                    x = _mm_xor_si128(x, mm_bswap32(_mm_loadu_si128((const __m128i *)(p + i * 32))));
+                    x = _mm_xor_si128(x, mm_bswap32(_mm_loadu_si128((const __m128i *)(p + i * 32     ))));
                     y = _mm_xor_si128(y, mm_bswap32(_mm_loadu_si128((const __m128i *)(p + i * 32 + 16))));
                 } else {
-                    x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + i * 32)));
+                    x = _mm_xor_si128(x, _mm_loadu_si128((const __m128i *)(p + i * 32     )));
                     y = _mm_xor_si128(y, _mm_loadu_si128((const __m128i *)(p + i * 32 + 16)));
                 }
                 x = _mm_add_epi32(x, y);
                 x = _mm_mullo_epi16(x, m1);
-                x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13));
+                x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x,  5)), _mm_srli_epi32(x, 13));
                 x = _mm_mullo_epi16(x, m2);
                 x = _mm_xor_si128(x, y);
-                x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9));
+                x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x,  9));
                 x = _mm_mullo_epi16(x, m3);
                 x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20));
             }
@@ -278,29 +277,33 @@ static inline uint32_t NMHASH32_9to255(const uint8_t* const RESTRICT p,
             }
         } else {
             /* 9 to 32 bytes */
-            x = _mm_xor_si128(x, _mm_setr_epi32((int)GET_U32<bswap>(p, 0), (int)GET_U32<bswap>(p, ((len>>4)<<3)), (int)GET_U32<bswap>(p, len - 8), (int)GET_U32<bswap>(p, len - 8 - ((len>>4)<<3))));
-            y = _mm_xor_si128(y, _mm_setr_epi32((int)GET_U32<bswap>(p, 4), (int)GET_U32<bswap>(p, ((len>>4)<<3) + 4), (int)GET_U32<bswap>(p, len - 8 + 4), (int)GET_U32<bswap>(p, len - 8 - ((len>>4)<<3) + 4)));
+            x = _mm_xor_si128(x, _mm_setr_epi32((int)GET_U32<bswap>(p, 0), (int)GET_U32<bswap>(
+                    p, ((len >> 4) << 3))    , (int)GET_U32<bswap>(p, len     - 8), (int)GET_U32<bswap>(
+                    p, len - 8 - ((len >> 4) << 3)))    );
+            y = _mm_xor_si128(y, _mm_setr_epi32((int)GET_U32<bswap>(p, 4), (int)GET_U32<bswap>(
+                    p, ((len >> 4) << 3) + 4), (int)GET_U32<bswap>(p, len - 8 + 4), (int)GET_U32<bswap>(
+                    p, len - 8 - ((len >> 4) << 3) + 4)));
         }
 
-        x = _mm_add_epi32(x, y);
+        x      = _mm_add_epi32(x, y);
 
-        y = _mm_xor_si128(_mm_xor_si128(y, _mm_slli_epi32(y, 17)), _mm_srli_epi32(y, 6));
+        y      = _mm_xor_si128(_mm_xor_si128(y, _mm_slli_epi32(y, 17)), _mm_srli_epi32(y,  6));
 
-        x = _mm_mullo_epi16(x, m1);
-        x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 5)), _mm_srli_epi32(x, 13));
-        x = _mm_mullo_epi16(x, m2);
-        x = _mm_xor_si128(x, y);
-        x = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x, 9));
-        x = _mm_mullo_epi16(x, m3);
-        x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20));
+        x      = _mm_mullo_epi16(x, m1);
+        x      = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x,  5)), _mm_srli_epi32(x, 13));
+        x      = _mm_mullo_epi16(x, m2);
+        x      = _mm_xor_si128(x, y);
+        x      = _mm_xor_si128(_mm_xor_si128(x, _mm_slli_epi32(x, 11)), _mm_srli_epi32(x,  9));
+        x      = _mm_mullo_epi16(x, m3);
+        x      = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20));
 
-        x = _mm_xor_si128(x, h0);
-        x = _mm_add_epi32(x, _mm_srli_si128(x, 4));
-        x = _mm_add_epi32(x, _mm_srli_si128(x, 8));
+        x      = _mm_xor_si128(x, h0);
+        x      = _mm_add_epi32(x, _mm_srli_si128(x, 4));
+        x      = _mm_add_epi32(x, _mm_srli_si128(x, 8));
 
-        x = _mm_xor_si128(x, _mm_add_epi32(sl, _mm_srli_epi32(sl, 5)));
-        x = _mm_mullo_epi16(x, m3);
-        x = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20));
+        x      = _mm_xor_si128(x, _mm_add_epi32(sl, _mm_srli_epi32(sl, 5)));
+        x      = _mm_mullo_epi16(x, m3);
+        x      = _mm_xor_si128(_mm_xor_si128(x, _mm_srli_epi32(x, 10)), _mm_srli_epi32(x, 20));
 
         result = *px;
     }
@@ -313,28 +316,26 @@ static inline uint32_t NMHASH32_9to255(const uint8_t* const RESTRICT p,
 #undef __NMH_M2
 #undef __NMH_M1
 
-template < bool bswap >
-static inline uint32_t NMHASH32_9to32(const uint8_t* const RESTRICT p,
-        size_t const len, uint32_t const seed) {
-    return NMHASH32_9to255<false,bswap>(p, len, seed);
+template <bool bswap>
+static inline uint32_t NMHASH32_9to32( const uint8_t * const RESTRICT p, size_t const len, uint32_t const seed ) {
+    return NMHASH32_9to255<false, bswap>(p, len, seed);
 }
 
-template < bool bswap >
-static inline uint32_t NMHASH32_33to255(const uint8_t* const RESTRICT p,
-        size_t const len, uint32_t const seed) {
-    return NMHASH32_9to255<true,bswap>(p, len, seed);
+template <bool bswap>
+static inline uint32_t NMHASH32_33to255( const uint8_t * const RESTRICT p, size_t const len, uint32_t const seed ) {
+    return NMHASH32_9to255<true, bswap>(p, len, seed);
 }
 
-template < bool bswap >
-static inline void NMHASH32_long_round_scalar(uint32_t * const RESTRICT accX,
-        uint32_t * const RESTRICT accY, const uint8_t * const RESTRICT p) {
+template <bool bswap>
+static inline void NMHASH32_long_round_scalar( uint32_t * const RESTRICT accX, uint32_t * const RESTRICT accY,
+        const uint8_t * const RESTRICT p ) {
     /*
      * breadth first calculation will hint some compiler to auto
      * vectorize the code on gcc, the performance becomes 10x than the
      * depth first, and about 80% of the manually vectorized code
      */
     const size_t nbGroups = sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT);
-    size_t i;
+    size_t       i;
 
     for (i = 0; i < nbGroups; ++i) {
         accX[i] ^= GET_U32<bswap>(p, i * 4);
@@ -349,13 +350,13 @@ static inline void NMHASH32_long_round_scalar(uint32_t * const RESTRICT accX,
         accY[i] ^= accX[i] >> 1;
     }
     for (i = 0; i < nbGroups * 2; ++i) {
-        ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M1_V)[i];
+        ((uint16_t *)accX)[i] *= ((uint16_t *)__NMH_M1_V)[i];
     }
     for (i = 0; i < nbGroups; ++i) {
         accX[i] ^= accX[i] << 5 ^ accX[i] >> 13;
     }
     for (i = 0; i < nbGroups * 2; ++i) {
-        ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M2_V)[i];
+        ((uint16_t *)accX)[i] *= ((uint16_t *)__NMH_M2_V)[i];
     }
     for (i = 0; i < nbGroups; ++i) {
         accX[i] ^= accY[i];
@@ -364,7 +365,7 @@ static inline void NMHASH32_long_round_scalar(uint32_t * const RESTRICT accX,
         accX[i] ^= accX[i] << 11 ^ accX[i] >> 9;
     }
     for (i = 0; i < nbGroups * 2; ++i) {
-        ((uint16_t*)accX)[i] *= ((uint16_t*)__NMH_M3_V)[i];
+        ((uint16_t *)accX)[i] *= ((uint16_t *)__NMH_M3_V)[i];
     }
     for (i = 0; i < nbGroups; ++i) {
         accX[i] ^= accX[i] >> 10 ^ accX[i] >> 20;
@@ -373,36 +374,37 @@ static inline void NMHASH32_long_round_scalar(uint32_t * const RESTRICT accX,
 
 #if NMH_VECTOR > NMH_SCALAR
 
-#if NMH_VECTOR == NMH_SSE2
-#  define _NMH_M_(F) mm_ ## F
-#  define _NMH_MM_(F) _mm_ ## F
-#  define _NMH_MMW_(F) _mm_ ## F ## 128
-#  define _NMH_MM_T __m128i
-#elif NMH_VECTOR == NMH_AVX2
-#  define _NMH_M_(F) mm256_ ## F
-#  define _NMH_MM_(F) _mm256_ ## F
-#  define _NMH_MMW_(F) _mm256_ ## F ## 256
-#  define _NMH_MM_T __m256i
-#elif NMH_VECTOR == NMH_AVX512
-#  define _NMH_M_(F) mm512_ ## F
-#  define _NMH_MM_(F) _mm512_ ## F
-#  define _NMH_MMW_(F) _mm512_ ## F ## 512
-#  define _NMH_MM_T __m512i
-#endif
-
-#define NMH_VECTOR_NB_GROUP (sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT) / (sizeof(_NMH_MM_T) / sizeof(*NMH_ACC_INIT)))
-
-template < bool bswap >
-static inline void NMHASH32_long_round_sse(uint32_t * const RESTRICT accX,
-        uint32_t *const RESTRICT accY, const uint8_t* const RESTRICT p) {
-    const _NMH_MM_T *const RESTRICT m1    = (const _NMH_MM_T * RESTRICT)__NMH_M1_V;
-    const _NMH_MM_T *const RESTRICT m2    = (const _NMH_MM_T * RESTRICT)__NMH_M2_V;
-    const _NMH_MM_T *const RESTRICT m3    = (const _NMH_MM_T * RESTRICT)__NMH_M3_V;
-
-          _NMH_MM_T *const              xaccX = (      _NMH_MM_T *             )accX;
-          _NMH_MM_T *const              xaccY = (      _NMH_MM_T *             )accY;
-          _NMH_MM_T *const              xp    = (      _NMH_MM_T *             )p;
-    size_t i;
+  #if NMH_VECTOR == NMH_SSE2
+    #define _NMH_M_(F) mm_ ## F
+    #define _NMH_MM_(F) _mm_ ## F
+    #define _NMH_MMW_(F) _mm_ ## F ## 128
+    #define _NMH_MM_T __m128i
+  #elif NMH_VECTOR == NMH_AVX2
+    #define _NMH_M_(F) mm256_ ## F
+    #define _NMH_MM_(F) _mm256_ ## F
+    #define _NMH_MMW_(F) _mm256_ ## F ## 256
+    #define _NMH_MM_T __m256i
+  #elif NMH_VECTOR == NMH_AVX512
+    #define _NMH_M_(F) mm512_ ## F
+    #define _NMH_MM_(F) _mm512_ ## F
+    #define _NMH_MMW_(F) _mm512_ ## F ## 512
+    #define _NMH_MM_T __m512i
+  #endif
+
+  #define NMH_VECTOR_NB_GROUP (sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT) / \
+    (sizeof(_NMH_MM_T) / sizeof(*NMH_ACC_INIT)))
+
+template <bool bswap>
+static inline void NMHASH32_long_round_sse( uint32_t * const RESTRICT accX, uint32_t * const RESTRICT accY,
+        const uint8_t * const RESTRICT p ) {
+    const _NMH_MM_T * const RESTRICT m1 = (const _NMH_MM_T * RESTRICT) __NMH_M1_V;
+    const _NMH_MM_T * const RESTRICT m2 = (const _NMH_MM_T * RESTRICT) __NMH_M2_V;
+    const _NMH_MM_T * const RESTRICT m3 = (const _NMH_MM_T * RESTRICT) __NMH_M3_V;
+
+    _NMH_MM_T * const xaccX = (_NMH_MM_T *)accX;
+    _NMH_MM_T * const xaccY = (_NMH_MM_T *)accY;
+    _NMH_MM_T * const xp    = (_NMH_MM_T *)p;
+    size_t            i;
 
     for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) {
         if (bswap) {
@@ -428,7 +430,8 @@ static inline void NMHASH32_long_round_sse(uint32_t * const RESTRICT accX,
         xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m1);
     }
     for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) {
-        xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 5)), _NMH_MM_(srli_epi32)(xaccX[i], 13));
+        xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(
+                slli_epi32)(xaccX[i], 5)), _NMH_MM_(srli_epi32)(xaccX[i], 13));
     }
     for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) {
         xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m2);
@@ -437,26 +440,28 @@ static inline void NMHASH32_long_round_sse(uint32_t * const RESTRICT accX,
         xaccX[i] = _NMH_MMW_(xor_si)(xaccX[i], xaccY[i]);
     }
     for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) {
-        xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(slli_epi32)(xaccX[i], 11)), _NMH_MM_(srli_epi32)(xaccX[i], 9));
+        xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(
+                slli_epi32)(xaccX[i], 11)), _NMH_MM_(srli_epi32)(xaccX[i], 9));
     }
     for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) {
         xaccX[i] = _NMH_MM_(mullo_epi16)(xaccX[i], *m3);
     }
     for (i = 0; i < NMH_VECTOR_NB_GROUP; ++i) {
-        xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(srli_epi32)(xaccX[i], 10)), _NMH_MM_(srli_epi32)(xaccX[i], 20));
+        xaccX[i] = _NMH_MMW_(xor_si)(_NMH_MMW_(xor_si)(xaccX[i], _NMH_MM_(
+                srli_epi32)(xaccX[i], 10)), _NMH_MM_(srli_epi32)(xaccX[i], 20));
     }
 }
 
-#  undef _NMH_MM_
-#  undef _NMH_MMW_
-#  undef _NMH_MM_T
-#undef NMH_VECTOR_NB_GROUP
+  #undef _NMH_MM_
+  #undef _NMH_MMW_
+  #undef _NMH_MM_T
+  #undef NMH_VECTOR_NB_GROUP
 
 #endif /* NMH_VECTOR > NMH_SCALAR */
 
-template < bool bswap >
-static inline void NMHASH32_long_round(uint32_t * const RESTRICT accX,
-        uint32_t *const RESTRICT accY, const uint8_t* const RESTRICT p) {
+template <bool bswap>
+static inline void NMHASH32_long_round( uint32_t * const RESTRICT accX, uint32_t * const RESTRICT accY,
+        const uint8_t * const RESTRICT p ) {
 #if NMH_VECTOR > NMH_SCALAR
     return NMHASH32_long_round_sse<bswap>(accX, accY, p);
 #else
@@ -464,18 +469,17 @@ static inline void NMHASH32_long_round(uint32_t * const RESTRICT accX,
 #endif
 }
 
-template < bool bswap >
-static uint32_t NMHASH32_long(const uint8_t* const RESTRICT p,
-        size_t const len, uint32_t const seed) {
-    alignas(16) uint32_t accX[sizeof(NMH_ACC_INIT)/sizeof(*NMH_ACC_INIT)];
-    alignas(16) uint32_t accY[sizeof(accX)/sizeof(*accX)];
+template <bool bswap>
+static uint32_t NMHASH32_long( const uint8_t * const RESTRICT p, size_t const len, uint32_t const seed ) {
+    alignas(16) uint32_t accX[sizeof(NMH_ACC_INIT) / sizeof(*NMH_ACC_INIT)];
+    alignas(16) uint32_t accY[sizeof(accX) / sizeof(*accX)];
     size_t const nbRounds = (len - 1) / (sizeof(accX) + sizeof(accY));
-    size_t i;
-    uint32_t sum = 0;
+    size_t       i;
+    uint32_t     sum      = 0;
 
     /* init */
-    for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] = NMH_ACC_INIT[i];
-    for (i = 0; i < sizeof(accY)/sizeof(*accY); ++i) accY[i] = seed;
+    for (i = 0; i < sizeof(accX) / sizeof(*accX); ++i) { accX[i] = NMH_ACC_INIT[i]; }
+    for (i = 0; i < sizeof(accY) / sizeof(*accY); ++i) { accY[i] = seed; }
 
     for (i = 0; i < nbRounds; ++i) {
         NMHASH32_long_round<bswap>(accX, accY, p + i * (sizeof(accX) + sizeof(accY)));
@@ -483,8 +487,8 @@ static uint32_t NMHASH32_long(const uint8_t* const RESTRICT p,
     NMHASH32_long_round<bswap>(accX, accY, p + len - (sizeof(accX) + sizeof(accY)));
 
     /* merge acc */
-    for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) accX[i] ^= NMH_ACC_INIT[i];
-    for (i = 0; i < sizeof(accX)/sizeof(*accX); ++i) sum += accX[i];
+    for (i = 0; i < sizeof(accX) / sizeof(*accX); ++i) { accX[i] ^= NMH_ACC_INIT[i]; }
+    for (i = 0; i < sizeof(accX) / sizeof(*accX); ++i) { sum += accX[i]; }
 
     if (sizeof(size_t) > sizeof(uint32_t)) {
         sum += (uint32_t)(len >> 32);
@@ -492,29 +496,30 @@ static uint32_t NMHASH32_long(const uint8_t* const RESTRICT p,
     return sum ^ (uint32_t)len;
 }
 
-static inline uint32_t NMHASH32_avalanche32(uint32_t const x) {
+static inline uint32_t NMHASH32_avalanche32( uint32_t const x ) {
     /* [-21 -8 cce5196d 12 -7 464be229 -21 -8] = 3.2267098842182733 */
     const uint32_t m1 = UINT32_C(0xCCE5196D);
     const uint32_t m2 = UINT32_C(0x464BE229);
-    uint32_t vx;
-    vx    = x;
-    vx   ^= (vx >> 8) ^ (vx >> 21);
-    vx    = NMHASH_mult16(vx, m1);
-    vx   ^= (vx << 12) ^ (vx >> 7);
-    vx    = NMHASH_mult16(vx, m2);
+    uint32_t       vx;
+
+    vx  = x;
+    vx ^= (vx >>  8) ^ (vx >> 21);
+    vx  = NMHASH_mult16(vx, m1);
+    vx ^= (vx << 12) ^ (vx >>  7);
+    vx  = NMHASH_mult16(vx, m2);
     return vx ^ (vx >> 8) ^ (vx >> 21);
 }
 
-template < bool bswap >
-static inline uint32_t NMHASH32(const void * const RESTRICT input,
-        size_t const len, uint32_t seed) {
-    const uint8_t *const p = (const uint8_t *)input;
+template <bool bswap>
+static inline uint32_t NMHASH32( const void * const RESTRICT input, size_t const len, uint32_t seed ) {
+    const uint8_t * const p = (const uint8_t *)input;
+
     if (likely(len <= 32)) {
         if (likely(len > 8)) {
             return NMHASH32_9to32<bswap>(p, len, seed);
         }
         if (likely(len > 4)) {
-            uint32_t x = GET_U32<bswap>(p, 0);
+            uint32_t x = GET_U32<bswap>(p,   0    );
             uint32_t y = GET_U32<bswap>(p, len - 4) ^ (NMH_PRIME32_4 + 2 + seed);
             x += y;
             x ^= x << (len + 7);
@@ -522,22 +527,22 @@ static inline uint32_t NMHASH32(const void * const RESTRICT input,
         } else {
             uint32_t data;
             switch (len) {
-                case 0: seed += NMH_PRIME32_2;
-                    data = 0;
+            case 0: seed += NMH_PRIME32_2;
+                    data  = 0;
                     break;
-                case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1);
-                    data = p[0];
+            case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1);
+                    data  = p[0];
                     break;
-                case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1);
-                    data = GET_U16<bswap>(p, 0);
+            case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1);
+                    data  = GET_U16<bswap>(p, 0);
                     break;
-                case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1);
-                    data = GET_U16<bswap>(p, 0) | (p[2] << 16);
+            case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1);
+                    data  = GET_U16<bswap>(p, 0) | (p[2] << 16);
                     break;
-                case 4: seed += NMH_PRIME32_3;
-                    data = GET_U32<bswap>(p, 0);
+            case 4: seed += NMH_PRIME32_3;
+                    data  = GET_U32<bswap>(p, 0);
                     break;
-                default: return 0;
+            default: return 0;
             }
             return NMHASH32_0to8(data + seed, ROTL32(seed, 5));
         }
@@ -549,7 +554,7 @@ static inline uint32_t NMHASH32(const void * const RESTRICT input,
 }
 
 //------------------------------------------------------------
-static inline uint32_t NMHASH32X_0to4(uint32_t x, uint32_t const seed) {
+static inline uint32_t NMHASH32X_0to4( uint32_t x, uint32_t const seed ) {
     /* [bdab1ea9 18 a7896a1b 12 83796a2d 16] = 0.092922873297662509 */
     x ^= seed;
     x *= UINT32_C(0xBDAB1EA9);
@@ -562,15 +567,15 @@ static inline uint32_t NMHASH32X_0to4(uint32_t x, uint32_t const seed) {
     return x;
 }
 
-template < bool bswap >
-static inline uint32_t NMHASH32X_5to8(const uint8_t* const RESTRICT p,
-        size_t const len, uint32_t const seed) {
+template <bool bswap>
+static inline uint32_t NMHASH32X_5to8( const uint8_t * const RESTRICT p, size_t const len, uint32_t const seed ) {
     /*
      * - 5 to 9 bytes
      * - mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246
      */
-    uint32_t       x = GET_U32<bswap>(p, 0) ^ NMH_PRIME32_3;
+    uint32_t       x = GET_U32<bswap>(p,   0    ) ^ NMH_PRIME32_3;
     uint32_t const y = GET_U32<bswap>(p, len - 4) ^ seed;
+
     x += y;
     x ^= x >> len;
     x *= UINT32_C(0x11049A7D);
@@ -583,10 +588,10 @@ static inline uint32_t NMHASH32X_5to8(const uint8_t* const RESTRICT p,
     return x;
 }
 
-template < bool bswap >
-static inline uint32_t NMHASH32X_9to255(const uint8_t* const RESTRICT p,
-        size_t const len, uint32_t const seed) {
-    /* - at least 9 bytes
+template <bool bswap>
+static inline uint32_t NMHASH32X_9to255( const uint8_t * const RESTRICT p, size_t const len, uint32_t const seed ) {
+    /*
+     * - at least 9 bytes
      * - base mixer: [11049a7d 23 bcccdc7b 12 065e9dad 12] = 0.16577596555667246
      * - tail mixer: [16 a52fb2cd 15 551e4d49 16] = 0.17162579707098322
      */
@@ -595,7 +600,7 @@ static inline uint32_t NMHASH32X_9to255(const uint8_t* const RESTRICT p,
     uint32_t y = seed;
     uint32_t a = NMH_PRIME32_4;
     uint32_t b = seed;
-    size_t i, r = (len - 1) / 16;
+    size_t   i, r = (len - 1) / 16;
 
     for (i = 0; i < r; ++i) {
         x ^= GET_U32<bswap>(p, i * 16 + 0);
@@ -610,7 +615,7 @@ static inline uint32_t NMHASH32X_9to255(const uint8_t* const RESTRICT p,
         x *= UINT32_C(0x065E9DAD);
         x ^= x >> 12;
 
-        a ^= GET_U32<bswap>(p, i * 16 + 8);
+        a ^= GET_U32<bswap>(p, i * 16 +  8);
         b ^= GET_U32<bswap>(p, i * 16 + 12);
         a ^= b;
         a *= UINT32_C(0x11049A7D);
@@ -623,8 +628,8 @@ static inline uint32_t NMHASH32X_9to255(const uint8_t* const RESTRICT p,
         a ^= a >> 12;
     }
 
-    if (likely(((uint8_t)len-1) & 8)) {
-        if (likely(((uint8_t)len-1) & 4)) {
+    if (likely(((uint8_t)len - 1) & 8)) {
+        if (likely(((uint8_t)len - 1) & 4)) {
             a ^= GET_U32<bswap>(p, r * 16 + 0);
             b ^= GET_U32<bswap>(p, r * 16 + 4);
             a ^= b;
@@ -652,7 +657,7 @@ static inline uint32_t NMHASH32X_9to255(const uint8_t* const RESTRICT p,
         x ^= x >> 12;
         x *= UINT32_C(0x065E9DAD);
     } else {
-        if (likely(((uint8_t)len-1) & 4)) {
+        if (likely(((uint8_t)len - 1) & 4)) {
             a ^= GET_U32<bswap>(p, r * 16) + b;
             a ^= a >> 16;
             a *= UINT32_C(0xA52FB2CD);
@@ -674,7 +679,7 @@ static inline uint32_t NMHASH32X_9to255(const uint8_t* const RESTRICT p,
     return x;
 }
 
-static inline uint32_t NMHASH32X_avalanche32(uint32_t x) {
+static inline uint32_t NMHASH32X_avalanche32( uint32_t x ) {
     /*
      * mixer with 2 mul from skeeto/hash-prospector:
      * [15 d168aaad 15 af723597 15] = 0.15983776156606694
@@ -688,10 +693,10 @@ static inline uint32_t NMHASH32X_avalanche32(uint32_t x) {
 }
 
 /* use 32*32->32 multiplication for short hash */
-template < bool bswap >
-static inline uint32_t NMHASH32X(const void* const RESTRICT input,
-        size_t const len, uint32_t seed) {
-    const uint8_t *const p = (const uint8_t *)input;
+template <bool bswap>
+static inline uint32_t NMHASH32X( const void * const RESTRICT input, size_t const len, uint32_t seed ) {
+    const uint8_t * const p = (const uint8_t *)input;
+
     if (likely(len <= 8)) {
         if (likely(len > 4)) {
             return NMHASH32X_5to8<bswap>(p, len, seed);
@@ -699,22 +704,22 @@ static inline uint32_t NMHASH32X(const void* const RESTRICT input,
             /* 0-4 bytes */
             uint32_t data;
             switch (len) {
-                case 0: seed += NMH_PRIME32_2;
-                    data = 0;
+            case 0: seed += NMH_PRIME32_2;
+                    data  = 0;
                     break;
-                case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1);
-                    data = p[0];
+            case 1: seed += NMH_PRIME32_2 + (UINT32_C(1) << 24) + (1 << 1);
+                    data  = p[0];
                     break;
-                case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1);
-                    data = GET_U16<bswap>(p, 0);
+            case 2: seed += NMH_PRIME32_2 + (UINT32_C(2) << 24) + (2 << 1);
+                    data  = GET_U16<bswap>(p, 0);
                     break;
-                case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1);
-                    data = GET_U16<bswap>(p, 0) | (p[2] << 16);
+            case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1);
+                    data  = GET_U16<bswap>(p, 0) | (p[2] << 16);
                     break;
-                case 4: seed += NMH_PRIME32_1;
-                    data = GET_U32<bswap>(p, 0);
+            case 4: seed += NMH_PRIME32_1;
+                    data  = GET_U32<bswap>(p, 0);
                     break;
-                default: return 0;
+            default: return 0;
             }
             return NMHASH32X_0to4(data, seed);
         }
@@ -726,54 +731,56 @@ static inline uint32_t NMHASH32X(const void* const RESTRICT input,
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void NMhash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void NMhash( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = NMHASH32<bswap>(in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void NMhashX(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void NMhashX( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = NMHASH32X<bswap>(in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(nmhash,
-  $.src_url = "https://github.com/gzm55/hash-garage",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/gzm55/hash-garage",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(NMHASH,
-  $.desc = "nmhash32 v2",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_TYPE_PUNNING   |
-        FLAG_IMPL_MULTIPLY       |
-        FLAG_IMPL_ROTATE         |
-        FLAG_IMPL_SHIFT_VARIABLE |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0x12A30553,
-  $.verification_BE = 0xE3222AC8,
-  $.hashfn_native = NMhash<false>,
-  $.hashfn_bswap = NMhash<true>
-);
+   $.desc       = "nmhash32 v2",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_TYPE_PUNNING   |
+         FLAG_IMPL_MULTIPLY       |
+         FLAG_IMPL_ROTATE         |
+         FLAG_IMPL_SHIFT_VARIABLE |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0x12A30553,
+   $.verification_BE = 0xE3222AC8,
+   $.hashfn_native   = NMhash<false>,
+   $.hashfn_bswap    = NMhash<true>
+ );
 
 REGISTER_HASH(NMHASHX,
-  $.desc = "nmhash32x v2",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_TYPE_PUNNING   |
-        FLAG_IMPL_MULTIPLY       |
-        FLAG_IMPL_ROTATE         |
-        FLAG_IMPL_SHIFT_VARIABLE |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0xA8580227,
-  $.verification_BE = 0x83B36886,
-  $.hashfn_native = NMhashX<false>,
-  $.hashfn_bswap = NMhashX<true>
-);
+   $.desc       = "nmhash32x v2",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_TYPE_PUNNING   |
+         FLAG_IMPL_MULTIPLY       |
+         FLAG_IMPL_ROTATE         |
+         FLAG_IMPL_SHIFT_VARIABLE |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0xA8580227,
+   $.verification_BE = 0x83B36886,
+   $.hashfn_native   = NMhashX<false>,
+   $.hashfn_bswap    = NMhashX<true>
+ );
diff --git a/hashes/o1hash.cpp b/hashes/o1hash.cpp
index 497e8086..2aa107c3 100644
--- a/hashes/o1hash.cpp
+++ b/hashes/o1hash.cpp
@@ -39,28 +39,29 @@
 #include "Hashlib.h"
 
 /*
-  This is a quick and dirty hash function designed for O(1) speed.
-  It makes your hash table application fly in most cases.
-  It samples first, middle and last 4 bytes to produce the hash.
-  Do not use it in very serious applications as it's not secure.
-*/
+ * This is a quick and dirty hash function designed for O(1) speed.
+ * It makes your hash table application fly in most cases.
+ * It samples first, middle and last 4 bytes to produce the hash.
+ * Do not use it in very serious applications as it's not secure.
+ */
 
 //------------------------------------------------------------
 // Includes homegrown seeding for SMHasher3
-template < bool bswap >
-static void o1hash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void o1hash( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint8_t * p = (const uint8_t *)in;
-    uint64_t h;
+    uint64_t        h;
+
     if (len >= 4) {
-        uint64_t first  = GET_U32<bswap>(p, 0);
-        uint64_t middle = GET_U32<bswap>(p, ((len >> 1) - 2));
+        uint64_t first  = GET_U32<bswap>(p,   0    );
+        uint64_t middle = GET_U32<bswap>(p,   ((len >> 1) - 2));
         uint64_t last   = GET_U32<bswap>(p, len - 4);
         h = (middle + (uint64_t)seed) * (first + last);
     } else if (len > 0) {
         uint64_t tail = seed + (
-            (((uint64_t)p[       0]) << 16) |
+            (((uint64_t)p[0       ]) << 16) |
             (((uint64_t)p[len >> 1]) <<  8) |
-            (((uint64_t)p[ len - 1])))      ;
+            (((uint64_t)p[len - 1])));
         h = tail * UINT64_C(0xa0761d6478bd642f);
     } else {
         h = 0;
@@ -70,23 +71,23 @@ static void o1hash(const void * in, const size_t len, const seed_t seed, void *
 
 //------------------------------------------------------------
 REGISTER_FAMILY(o1hash,
-  $.src_url = "https://github.com/wangyi-fudan/wyhash/blob/master/old_versions/o1hash.h",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/wangyi-fudan/wyhash/blob/master/old_versions/o1hash.h",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(o1hash,
-  $.desc = "o(1) hash, from wyhash",
-  $.sort_order = 45,
-  $.hash_flags =
-        FLAG_HASH_MOCK                  |
-        FLAG_HASH_NO_SEED               ,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS          |
-        FLAG_IMPL_MULTIPLY              |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN ,
-  $.bits = 64,
-  $.verification_LE = 0xAE049F09,
-  $.verification_BE = 0x299BD16A,
-  $.hashfn_native = o1hash<false>,
-  $.hashfn_bswap = o1hash<true>
-);
+   $.desc       = "o(1) hash, from wyhash",
+   $.sort_order = 45,
+   $.hash_flags =
+         FLAG_HASH_MOCK                  |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS          |
+         FLAG_IMPL_MULTIPLY              |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0xAE049F09,
+   $.verification_BE = 0x299BD16A,
+   $.hashfn_native   = o1hash<false>,
+   $.hashfn_bswap    = o1hash<true>
+ );
diff --git a/hashes/pearson.cpp b/hashes/pearson.cpp
index f96c2f1c..de9b796e 100644
--- a/hashes/pearson.cpp
+++ b/hashes/pearson.cpp
@@ -32,506 +32,513 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_SSSE_3)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
 // AES S-Box table -- allows for eventually supported hardware accelerated look-up
-static const uint8_t t[256] ={
-	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
-	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
-	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
-	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
-	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
-	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
-	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
-	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
-	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
-	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
-	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
-	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
-	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
-	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
-	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
-	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
+static const uint8_t t[256] = {
+    0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+    0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+    0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+    0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+    0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+    0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+    0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+    0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+    0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+    0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+    0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+    0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+    0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+    0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+    0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+    0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+};
 
 static uint16_t t16[65536];
 
-static bool pearson_hash_init (void) {
+static bool pearson_hash_init( void ) {
 #if !defined(HAVE_SSSE_3)
-  size_t i;
+    size_t i;
 
-  for (i = 0; i < 65536; i++)
-    t16[i] = (t[i >> 8] << 8) + t[(uint8_t)i];
+    for (i = 0; i < 65536; i++) {
+        t16[i] = (t[i >> 8] << 8) + t[(uint8_t)i];
+    }
 #endif
-  return true;
+    return true;
 }
 
-static void pearson_hash_256_portable(uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in) {
-  size_t i;
-  /* initial values -  astonishingly, assembling using SHIFTs and ORs (in register)
-   * works faster on well pipelined CPUs than loading the 64-bit value from memory.
-   * however, there is one advantage to loading from memory: as we also store back to
-   * memory at the end, we do not need to care about endianess! */
-  uint8_t upper[8] = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
-  uint8_t lower[8] = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
-
-  uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
-  uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
-  uint64_t high_upper_hash_mask = upper_hash_mask + UINT64_C(0x1010101010101010);
-  uint64_t high_lower_hash_mask = lower_hash_mask + UINT64_C(0x1010101010101010);
-
-  // The one nod to endianness is that the hash_in value needs be in
-  // little-endian format always, to match up with the byte ordering
-  // of upper[] and lower[] above.
-  hash_in = COND_BSWAP(hash_in, isBE());
-  uint64_t upper_hash = hash_in;
-  uint64_t lower_hash = hash_in;
-  uint64_t high_upper_hash = hash_in;
-  uint64_t high_lower_hash = hash_in;
-
-  for (i = 0; i < len; i++) {
-    // broadcast the character, xor into hash, make them different permutations
-    uint64_t c = (uint8_t)in[i];
-    c |= c <<  8;
-    c |= c << 16;
-    c |= c << 32;
-    upper_hash ^= c ^ upper_hash_mask;
-    lower_hash ^= c ^ lower_hash_mask;
-    high_upper_hash ^= c ^ high_upper_hash_mask;
-    high_lower_hash ^= c ^ high_lower_hash_mask;
-
-    // table lookup
-    uint64_t h = 0;
-    uint16_t x;
-    x = upper_hash; x = t16[x]; upper_hash >>= 16; h  = x; h = ROTR64 (h, 16);
-    x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = upper_hash; x = t16[x];                  ; h |= x; h = ROTR64 (h, 16);
-    upper_hash = h;
-
-    h = 0;
-    x = lower_hash; x = t16[x]; lower_hash >>= 16; h  = x; h = ROTR64 (h, 16);
-    x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = lower_hash; x = t16[x];                  ; h |= x; h = ROTR64 (h, 16);
-    lower_hash = h;
-
-    h = 0;
-    x = high_upper_hash; x = t16[x]; high_upper_hash >>= 16; h  = x; h = ROTR64 (h, 16);
-    x = high_upper_hash; x = t16[x]; high_upper_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = high_upper_hash; x = t16[x]; high_upper_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = high_upper_hash; x = t16[x];                       ; h |= x; h = ROTR64 (h, 16);
-    high_upper_hash = h;
-
-    h = 0;
-    x = high_lower_hash; x = t16[x]; high_lower_hash >>= 16; h  = x; h = ROTR64 (h, 16);
-    x = high_lower_hash; x = t16[x]; high_lower_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = high_lower_hash; x = t16[x]; high_lower_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = high_lower_hash; x = t16[x];                       ; h |= x; h = ROTR64 (h, 16);
-    high_lower_hash = h;
-  }
-  // store output
-  PUT_U64<false>(high_upper_hash, out, 0);
-  PUT_U64<false>(high_lower_hash, out, 8);
-  PUT_U64<false>(upper_hash, out, 16);
-  PUT_U64<false>(lower_hash, out, 24);
+static void pearson_hash_256_portable( uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in ) {
+    size_t i;
+    /*
+     * initial values -  astonishingly, assembling using SHIFTs and ORs (in register)
+     * works faster on well pipelined CPUs than loading the 64-bit value from memory.
+     * however, there is one advantage to loading from memory: as we also store back to
+     * memory at the end, we do not need to care about endianess!
+     */
+    uint8_t upper[8]              = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    uint8_t lower[8]              = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
+
+    uint64_t upper_hash_mask      = GET_U64<false>(upper, 0);
+    uint64_t lower_hash_mask      = GET_U64<false>(lower, 0);
+    uint64_t high_upper_hash_mask = upper_hash_mask + UINT64_C(0x1010101010101010);
+    uint64_t high_lower_hash_mask = lower_hash_mask + UINT64_C(0x1010101010101010);
+
+    // The one nod to endianness is that the hash_in value needs be in
+    // little-endian format always, to match up with the byte ordering
+    // of upper[] and lower[] above.
+    hash_in = COND_BSWAP(hash_in, isBE());
+    uint64_t upper_hash      = hash_in;
+    uint64_t lower_hash      = hash_in;
+    uint64_t high_upper_hash = hash_in;
+    uint64_t high_lower_hash = hash_in;
+
+    for (i = 0; i < len; i++) {
+        // broadcast the character, xor into hash, make them different permutations
+        uint64_t c = (uint8_t)in[i];
+        c |= c <<  8;
+        c |= c << 16;
+        c |= c << 32;
+        upper_hash      ^= c ^ upper_hash_mask;
+        lower_hash      ^= c ^ lower_hash_mask;
+        high_upper_hash ^= c ^ high_upper_hash_mask;
+        high_lower_hash ^= c ^ high_lower_hash_mask;
+
+        // table lookup
+        uint64_t h = 0;
+        uint16_t x;
+        x = upper_hash; x      = t16[x]; upper_hash               >>= 16; h  = x; h = ROTR64(h, 16);
+        x = upper_hash; x      = t16[x]; upper_hash               >>= 16; h |= x; h = ROTR64(h, 16);
+        x = upper_hash; x      = t16[x]; upper_hash               >>= 16; h |= x; h = ROTR64(h, 16);
+        x = upper_hash; x      = t16[x];                    h      |= x; h   =        ROTR64(h, 16);
+        upper_hash = h;
+
+        h = 0;
+        x = lower_hash; x      = t16[x]; lower_hash               >>= 16; h  = x; h = ROTR64(h, 16);
+        x = lower_hash; x      = t16[x]; lower_hash               >>= 16; h |= x; h = ROTR64(h, 16);
+        x = lower_hash; x      = t16[x]; lower_hash               >>= 16; h |= x; h = ROTR64(h, 16);
+        x = lower_hash; x      = t16[x];                    h      |= x; h   =        ROTR64(h, 16);
+        lower_hash = h;
+
+        h = 0;
+        x = high_upper_hash; x = t16[x]; high_upper_hash          >>= 16; h  = x; h = ROTR64(h, 16);
+        x = high_upper_hash; x = t16[x]; high_upper_hash          >>= 16; h |= x; h = ROTR64(h, 16);
+        x = high_upper_hash; x = t16[x]; high_upper_hash          >>= 16; h |= x; h = ROTR64(h, 16);
+        x = high_upper_hash; x = t16[x];                         h |= x; h   =        ROTR64(h, 16);
+        high_upper_hash = h;
+
+        h = 0;
+        x = high_lower_hash; x = t16[x]; high_lower_hash          >>= 16; h  = x; h = ROTR64(h, 16);
+        x = high_lower_hash; x = t16[x]; high_lower_hash          >>= 16; h |= x; h = ROTR64(h, 16);
+        x = high_lower_hash; x = t16[x]; high_lower_hash          >>= 16; h |= x; h = ROTR64(h, 16);
+        x = high_lower_hash; x = t16[x];                         h |= x; h   =        ROTR64(h, 16);
+        high_lower_hash = h;
+    }
+    // store output
+    PUT_U64<false>(high_upper_hash, out,  0);
+    PUT_U64<false>(high_lower_hash, out,  8);
+    PUT_U64<false>(upper_hash     , out, 16);
+    PUT_U64<false>(lower_hash     , out, 24);
 }
 
-static void pearson_hash_128_portable(uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in) {
-  size_t i;
-  /* initial values -  astonishingly, assembling using SHIFTs and ORs (in register)
-   * works faster on well pipelined CPUs than loading the 64-bit value from memory.
-   * however, there is one advantage to loading from memory: as we also store back to
-   * memory at the end, we do not need to care about endianess! */
-  uint8_t upper[8] = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
-  uint8_t lower[8] = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
-
-  uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
-  uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
-
-  // The one nod to endianness is that the hash_in value needs be in
-  // little-endian format always, to match up with the byte ordering
-  // of upper[] and lower[] above.
-  hash_in = COND_BSWAP(hash_in, isBE());
-  uint64_t upper_hash = hash_in;
-  uint64_t lower_hash = hash_in;
-
-  for (i = 0; i < len; i++) {
-    // broadcast the character, xor into hash, make them different permutations
-    uint64_t c = (uint8_t)in[i];
-    c |= c <<  8;
-    c |= c << 16;
-    c |= c << 32;
-    upper_hash ^= c ^ upper_hash_mask;
-    lower_hash ^= c ^ lower_hash_mask;
-
-    // table lookup
-    uint64_t h = 0;
-    uint16_t x;
-    x = upper_hash; x = t16[x]; upper_hash >>= 16; h  = x; h = ROTR64 (h,16);
-    x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h = ROTR64 (h,16);
-    x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h = ROTR64 (h,16);
-    x = upper_hash; x = t16[x];                  ; h |= x; h = ROTR64 (h,16);
-    upper_hash = h;
-
-    h = 0;
-    x = lower_hash; x = t16[x]; lower_hash >>= 16; h  = x; h = ROTR64 (h, 16);
-    x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h = ROTR64 (h, 16);
-    x = lower_hash; x = t16[x];                  ; h |= x; h = ROTR64 (h, 16);
-    lower_hash = h;
-  }
-  // store output
-  PUT_U64<false>(upper_hash, out, 0);
-  PUT_U64<false>(lower_hash, out, 8);
+static void pearson_hash_128_portable( uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in ) {
+    size_t i;
+    /*
+     * initial values -  astonishingly, assembling using SHIFTs and ORs (in register)
+     * works faster on well pipelined CPUs than loading the 64-bit value from memory.
+     * however, there is one advantage to loading from memory: as we also store back to
+     * memory at the end, we do not need to care about endianess!
+     */
+    uint8_t upper[8]         = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    uint8_t lower[8]         = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
+
+    uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
+    uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
+
+    // The one nod to endianness is that the hash_in value needs be in
+    // little-endian format always, to match up with the byte ordering
+    // of upper[] and lower[] above.
+    hash_in = COND_BSWAP(hash_in, isBE());
+    uint64_t upper_hash = hash_in;
+    uint64_t lower_hash = hash_in;
+
+    for (i = 0; i < len; i++) {
+        // broadcast the character, xor into hash, make them different permutations
+        uint64_t c = (uint8_t)in[i];
+        c |= c <<  8;
+        c |= c << 16;
+        c |= c << 32;
+        upper_hash ^= c ^ upper_hash_mask;
+        lower_hash ^= c ^ lower_hash_mask;
+
+        // table lookup
+        uint64_t h = 0;
+        uint16_t x;
+        x = upper_hash; x = t16[x]; upper_hash          >>= 16; h  = x; h = ROTR64(h, 16);
+        x = upper_hash; x = t16[x]; upper_hash          >>= 16; h |= x; h = ROTR64(h, 16);
+        x = upper_hash; x = t16[x]; upper_hash          >>= 16; h |= x; h = ROTR64(h, 16);
+        x = upper_hash; x = t16[x];                    h |= x; h   =        ROTR64(h, 16);
+        upper_hash = h;
+
+        h = 0;
+        x = lower_hash; x = t16[x]; lower_hash          >>= 16; h  = x; h = ROTR64(h, 16);
+        x = lower_hash; x = t16[x]; lower_hash          >>= 16; h |= x; h = ROTR64(h, 16);
+        x = lower_hash; x = t16[x]; lower_hash          >>= 16; h |= x; h = ROTR64(h, 16);
+        x = lower_hash; x = t16[x];                    h |= x; h   =        ROTR64(h, 16);
+        lower_hash = h;
+    }
+    // store output
+    PUT_U64<false>(upper_hash, out, 0);
+    PUT_U64<false>(lower_hash, out, 8);
 }
 
-static void pearson_hash_64_portable(uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in) {
-  size_t i;
-  uint64_t hash_mask = UINT64_C(0x0706050403020100);
-  uint64_t hash = hash_in;
-
-  for (i = 0; i < len; i++) {
-    // broadcast the character, xor into hash, make them different permutations
-    uint64_t c = (uint8_t)in[i];
-    c |= c <<  8;
-    c |= c << 16;
-    c |= c << 32;
-    hash ^= c ^ hash_mask;
-    // table lookup
-
-    uint64_t h = 0;
-    h   = (t16[(uint16_t)(hash >> 16)] << 16) + t16[(uint16_t)hash];
-    h <<= 32;
-    h |= (uint32_t)((t16[(uint16_t)(hash >> 48)] << 16)) + t16[(uint16_t)(hash >> 32)];
-    hash = ROTR64(h, 32);
-  }
-  // store output
-  if (isBE()) {
-    PUT_U64<true>(hash, out, 0);
-  } else {
-    PUT_U64<false>(hash, out, 0);
-  }
+static void pearson_hash_64_portable( uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in ) {
+    size_t   i;
+    uint64_t hash_mask = UINT64_C(0x0706050403020100);
+    uint64_t hash      = hash_in;
+
+    for (i = 0; i < len; i++) {
+        // broadcast the character, xor into hash, make them different permutations
+        uint64_t c = (uint8_t)in[i];
+        c    |= c <<  8;
+        c    |= c << 16;
+        c    |= c << 32;
+        hash ^= c ^ hash_mask;
+        // table lookup
+
+        uint64_t h = 0;
+        h    = (t16[(uint16_t)(hash >> 16)] << 16) + t16[(uint16_t)hash];
+        h  <<= 32;
+        h   |= (uint32_t)((t16[(uint16_t)(hash >> 48)] << 16)) + t16[(uint16_t)(hash >> 32)];
+        hash = ROTR64(h, 32);
+    }
+    // store output
+    if (isBE()) {
+        PUT_U64<true>(hash, out, 0);
+    } else {
+        PUT_U64<false>(hash, out, 0);
+    }
 }
 
 #if defined(HAVE_X86_64_AES)
 
-static void pearson_hash_256_aesni(uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in) {
-  size_t i;
+static void pearson_hash_256_aesni( uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in ) {
+    size_t i;
 
-  uint8_t upper[8] = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
-  uint8_t lower[8] = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
+    uint8_t upper[8]         = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    uint8_t lower[8]         = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
 
-  uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
-  uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
+    uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
+    uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
 
-  __m128i tmp = _mm_set1_epi8(0x10);
+    __m128i tmp            = _mm_set1_epi8(0x10);
 
-  __m128i hash_mask = _mm_set_epi64x(lower_hash_mask, upper_hash_mask);
-  __m128i high_hash_mask = _mm_xor_si128 (tmp, hash_mask);
-  __m128i hash= _mm_set_epi64x(hash_in, hash_in);
-  __m128i high_hash= _mm_set_epi64x(hash_in, hash_in);
+    __m128i hash_mask      = _mm_set_epi64x(lower_hash_mask, upper_hash_mask);
+    __m128i high_hash_mask = _mm_xor_si128(tmp, hash_mask);
+    __m128i hash           = _mm_set_epi64x(hash_in, hash_in);
+    __m128i high_hash      = _mm_set_epi64x(hash_in, hash_in);
 
-  // table lookup preparation
-  __m128i ZERO = _mm_setzero_si128();
-  __m128i ISOLATE_SBOX_MASK = _mm_set_epi32(0x0306090C, 0x0F020508, 0x0B0E0104, 0x070A0D00);
+    // table lookup preparation
+    __m128i ZERO = _mm_setzero_si128();
+    __m128i ISOLATE_SBOX_MASK = _mm_set_epi32(0x0306090C, 0x0F020508, 0x0B0E0104, 0x070A0D00);
 
-  for (i = 0; i < len; i++) {
-    // broadcast the character, xor into hash, make them different permutations
-    __m128i cc = _mm_set1_epi8 (in[i]);
-    hash = _mm_xor_si128 (hash, cc);
-    high_hash = _mm_xor_si128 (high_hash, cc);
-    hash = _mm_xor_si128 (hash, hash_mask);
-    high_hash = _mm_xor_si128 (high_hash, high_hash_mask);
+    for (i = 0; i < len; i++) {
+        // broadcast the character, xor into hash, make them different permutations
+        __m128i cc = _mm_set1_epi8(in[i]);
+        hash      = _mm_xor_si128(hash     , cc       );
+        high_hash = _mm_xor_si128(high_hash, cc       );
+        hash      = _mm_xor_si128(hash     , hash_mask);
+        high_hash = _mm_xor_si128(high_hash, high_hash_mask);
 
-    // table lookup
-    hash = _mm_shuffle_epi8(hash, ISOLATE_SBOX_MASK);           // re-order along AES round
-    high_hash = _mm_shuffle_epi8(high_hash, ISOLATE_SBOX_MASK); // re-order along AES round
-    hash = _mm_aesenclast_si128(hash, ZERO);
-    high_hash = _mm_aesenclast_si128(high_hash, ZERO);
-  }
+        // table lookup
+        hash      = _mm_shuffle_epi8(hash     , ISOLATE_SBOX_MASK); // re-order along AES round
+        high_hash = _mm_shuffle_epi8(high_hash, ISOLATE_SBOX_MASK); // re-order along AES round
+        hash      = _mm_aesenclast_si128(hash     , ZERO);
+        high_hash = _mm_aesenclast_si128(high_hash, ZERO);
+    }
 
-  // store output
-  _mm_store_si128 ((__m128i*)out , high_hash);
-  _mm_store_si128 ((__m128i*)&out[16] , hash);
+    // store output
+    _mm_store_si128((__m128i *)out     , high_hash);
+    _mm_store_si128((__m128i *)&out[16], hash     );
 }
 
-static void pearson_hash_128_aesni(uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in) {
-  size_t i;
+static void pearson_hash_128_aesni( uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in ) {
+    size_t i;
 
-  uint8_t upper[8] = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
-  uint8_t lower[8] = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
+    uint8_t upper[8]         = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    uint8_t lower[8]         = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
 
-  uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
-  uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
+    uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
+    uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
 
-  __m128i hash_mask = _mm_set_epi64x (lower_hash_mask, upper_hash_mask);
-  __m128i hash = _mm_set_epi64x(hash_in, hash_in);
+    __m128i hash_mask        = _mm_set_epi64x(lower_hash_mask, upper_hash_mask);
+    __m128i hash = _mm_set_epi64x(hash_in, hash_in);
 
-  // table lookup preparation
-  __m128i ZERO = _mm_setzero_si128();
-  __m128i ISOLATE_SBOX_MASK = _mm_set_epi32(0x0306090C, 0x0F020508, 0x0B0E0104, 0x070A0D00);
+    // table lookup preparation
+    __m128i ZERO = _mm_setzero_si128();
+    __m128i ISOLATE_SBOX_MASK = _mm_set_epi32(0x0306090C, 0x0F020508, 0x0B0E0104, 0x070A0D00);
 
-  for (i = 0; i < len; i++) {
-    // broadcast the character, xor into hash, make them different permutations
-    __m128i cc = _mm_set1_epi8 (in[i]);
-    hash = _mm_xor_si128 (hash, cc);
-    hash = _mm_xor_si128 (hash, hash_mask);
+    for (i = 0; i < len; i++) {
+        // broadcast the character, xor into hash, make them different permutations
+        __m128i cc = _mm_set1_epi8(in[i]);
+        hash = _mm_xor_si128(hash, cc       );
+        hash = _mm_xor_si128(hash, hash_mask);
 
-    // table lookup
-    hash = _mm_shuffle_epi8(hash, ISOLATE_SBOX_MASK); // re-order along AES round
-    hash = _mm_aesenclast_si128(hash, ZERO);
-  }
-  // store output
-  _mm_store_si128 ((__m128i*)out , hash);
+        // table lookup
+        hash = _mm_shuffle_epi8(hash, ISOLATE_SBOX_MASK); // re-order along AES round
+        hash = _mm_aesenclast_si128(hash, ZERO);
+    }
+    // store output
+    _mm_store_si128((__m128i *)out, hash);
 }
 
-static void pearson_hash_64_aesni(uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in) {
-  size_t i;
-  __m128i hash_mask = _mm_cvtsi64_si128(UINT64_C(0x0706050403020100));
-  __m128i hash = _mm_cvtsi64_si128(hash_in);
+static void pearson_hash_64_aesni( uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in ) {
+    size_t  i;
+    __m128i hash_mask = _mm_cvtsi64_si128(UINT64_C(0x0706050403020100));
+    __m128i hash      = _mm_cvtsi64_si128(hash_in);
 
-  // table lookup preparation
-  __m128i ZERO = _mm_setzero_si128();
-  __m128i ISOLATE_SBOX_MASK = _mm_set_epi32(0x0306090C, 0x0F020508, 0x0B0E0104, 0x070A0D00);
+    // table lookup preparation
+    __m128i ZERO = _mm_setzero_si128();
+    __m128i ISOLATE_SBOX_MASK = _mm_set_epi32(0x0306090C, 0x0F020508, 0x0B0E0104, 0x070A0D00);
 
-  for (i = 0; i < len; i++) {
-    // broadcast the character, xor into hash, make them different permutations
-    __m128i cc = _mm_set1_epi8 (in[i]);
-    hash = _mm_xor_si128 (hash, cc);
-    hash = _mm_xor_si128 (hash, hash_mask);
+    for (i = 0; i < len; i++) {
+        // broadcast the character, xor into hash, make them different permutations
+        __m128i cc = _mm_set1_epi8(in[i]);
+        hash = _mm_xor_si128(hash, cc       );
+        hash = _mm_xor_si128(hash, hash_mask);
 
-    // table lookup
-    hash = _mm_shuffle_epi8(hash, ISOLATE_SBOX_MASK); // re-order along AES round
-    hash = _mm_aesenclast_si128(hash, ZERO);
-  }
+        // table lookup
+        hash = _mm_shuffle_epi8(hash, ISOLATE_SBOX_MASK); // re-order along AES round
+        hash = _mm_aesenclast_si128(hash, ZERO);
+    }
 
-  // store output
-  _mm_storel_epi64((__m128i*)out , hash);
+    // store output
+    _mm_storel_epi64((__m128i *)out, hash);
 }
 
 #elif defined(HAVE_SSSE_3)
 
-static void pearson_hash_256_ssse3(uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in) {
-  size_t i;
-
-  uint8_t upper[8] = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
-  uint8_t lower[8] = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
-
-  uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
-  uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
-
-  __m128i tmp = _mm_set1_epi8(0x10);
-
-  __m128i hash_mask = _mm_set_epi64x (lower_hash_mask, upper_hash_mask);
-  __m128i high_hash_mask = _mm_xor_si128 (tmp, hash_mask);
-  __m128i hash= _mm_set_epi64x(hash_in, hash_in);
-  __m128i high_hash= _mm_set_epi64x(hash_in, hash_in);
-
-  // table lookup preparation
-  __m128i const p16  = _mm_set1_epi8 (0x10);
-  __m128i lut_result  = _mm_xor_si128 (lut_result, lut_result);
-  __m128i high_lut_result  = _mm_xor_si128 (high_lut_result, high_lut_result);
-  __m128i selected_entries;
-  __m128i high_selected_entries;
-  __m128i table_line;
-
-  for (i = 0; i < len; i++) {
-    // broadcast the character, xor into hash, make them different permutations
-    __m128i cc = _mm_set1_epi8 (in[i]);
-    hash = _mm_xor_si128 (hash, cc);
-    high_hash = _mm_xor_si128 (high_hash, cc);
-    hash = _mm_xor_si128 (hash, hash_mask);
-    high_hash = _mm_xor_si128 (high_hash, high_hash_mask);
-
-    // table lookup
-    size_t j;
-    __m128i lut_index = hash;
-    __m128i high_lut_index = high_hash;
-    lut_result = _mm_xor_si128 (lut_result, lut_result);
-    high_lut_result = _mm_xor_si128 (lut_result, lut_result);
-    for (j = 0; j < 256; j += 16) {
-      table_line = _mm_load_si128 ((__m128i *)&t[j]);
-      selected_entries = _mm_min_epu8 (lut_index, p16);
-      selected_entries = _mm_cmpeq_epi8 (selected_entries, p16);
-      selected_entries = _mm_or_si128 (selected_entries, lut_index);
-      selected_entries = _mm_shuffle_epi8 (table_line, selected_entries);
-      high_selected_entries = _mm_min_epu8 (high_lut_index, p16);
-      high_selected_entries = _mm_cmpeq_epi8 (high_selected_entries, p16);
-      high_selected_entries = _mm_or_si128 (high_selected_entries, high_lut_index);
-      high_selected_entries = _mm_shuffle_epi8 (table_line, high_selected_entries);
-      lut_result  = _mm_or_si128 (lut_result, selected_entries);
-      lut_index = _mm_sub_epi8 (lut_index, p16);
-      high_lut_result  = _mm_or_si128 (high_lut_result, high_selected_entries);
-      high_lut_index = _mm_sub_epi8 (high_lut_index, p16);
+static void pearson_hash_256_ssse3( uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in ) {
+    size_t i;
+
+    uint8_t upper[8]         = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    uint8_t lower[8]         = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
+
+    uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
+    uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
+
+    __m128i tmp            = _mm_set1_epi8(0x10);
+
+    __m128i hash_mask      = _mm_set_epi64x(lower_hash_mask, upper_hash_mask);
+    __m128i high_hash_mask = _mm_xor_si128(tmp, hash_mask);
+    __m128i hash           = _mm_set_epi64x(hash_in, hash_in);
+    __m128i high_hash      = _mm_set_epi64x(hash_in, hash_in);
+
+    // table lookup preparation
+    __m128i const p16             = _mm_set1_epi8(0x10);
+    __m128i       lut_result      = _mm_xor_si128(lut_result     , lut_result);
+    __m128i       high_lut_result = _mm_xor_si128(high_lut_result, high_lut_result);
+    __m128i       selected_entries;
+    __m128i       high_selected_entries;
+    __m128i       table_line;
+
+    for (i = 0; i < len; i++) {
+        // broadcast the character, xor into hash, make them different permutations
+        __m128i cc = _mm_set1_epi8(in[i]);
+        hash      =       _mm_xor_si128(hash     , cc       );
+        high_hash =       _mm_xor_si128(high_hash, cc       );
+        hash      =       _mm_xor_si128(hash     , hash_mask);
+        high_hash =       _mm_xor_si128(high_hash, high_hash_mask);
+
+        // table lookup
+        size_t  j;
+        __m128i lut_index      = hash;
+        __m128i high_lut_index = high_hash;
+        lut_result      = _mm_xor_si128(lut_result, lut_result);
+        high_lut_result = _mm_xor_si128(lut_result, lut_result);
+        for (j = 0; j < 256; j += 16) {
+            table_line            = _mm_load_si128((__m128i *)&t[j]);
+            selected_entries      = _mm_min_epu8(lut_index, p16);
+            selected_entries      = _mm_cmpeq_epi8(selected_entries, p16);
+            selected_entries      = _mm_or_si128(selected_entries, lut_index);
+            selected_entries      = _mm_shuffle_epi8(table_line, selected_entries);
+            high_selected_entries = _mm_min_epu8(high_lut_index, p16);
+            high_selected_entries = _mm_cmpeq_epi8(high_selected_entries, p16);
+            high_selected_entries = _mm_or_si128(high_selected_entries, high_lut_index);
+            high_selected_entries = _mm_shuffle_epi8(table_line, high_selected_entries);
+            lut_result            = _mm_or_si128(lut_result, selected_entries);
+            lut_index = _mm_sub_epi8(lut_index, p16);
+            high_lut_result       = _mm_or_si128(high_lut_result, high_selected_entries);
+            high_lut_index        = _mm_sub_epi8(high_lut_index, p16);
+        }
+        hash      = lut_result;
+        high_hash = high_lut_result;
     }
-    hash = lut_result;
-    high_hash = high_lut_result;
-  }
 
-  // store output
-  _mm_store_si128 ((__m128i*)out , high_hash);
-  _mm_store_si128 ((__m128i*)&out[16] , hash);
+    // store output
+    _mm_store_si128((__m128i *)out     , high_hash);
+    _mm_store_si128((__m128i *)&out[16], hash     );
 }
 
-static void pearson_hash_128_ssse3(uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in) {
-  size_t i;
-
-  uint8_t upper[8] = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
-  uint8_t lower[8] = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
-
-  uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
-  uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
-
-  __m128i hash_mask = _mm_set_epi64x (lower_hash_mask, upper_hash_mask);
-  __m128i hash = _mm_set_epi64x(hash_in, hash_in);
-
-  // table lookup preparation
-  __m128i const p16  = _mm_set1_epi8 (0x10);
-  __m128i lut_result  = _mm_xor_si128 (lut_result, lut_result);
-  __m128i selected_entries;
-  __m128i table_line;
-
-  for (i = 0; i < len; i++) {
-    // broadcast the character, xor into hash, make them different permutations
-    __m128i cc = _mm_set1_epi8 (in[i]);
-    hash = _mm_xor_si128 (hash, cc);
-    hash = _mm_xor_si128 (hash, hash_mask);
-
-    // table lookup
-    size_t j;
-    __m128i lut_index = hash;
-    lut_result = _mm_xor_si128 (lut_result, lut_result);
-    for (j = 0; j < 256; j += 16) {
-      table_line = _mm_load_si128 ((__m128i *)&t[j]);
-      selected_entries = _mm_min_epu8 (lut_index, p16);
-      selected_entries = _mm_cmpeq_epi8 (selected_entries, p16);
-      selected_entries = _mm_or_si128 (selected_entries, lut_index);
-      selected_entries = _mm_shuffle_epi8 (table_line, selected_entries);
-      lut_result  = _mm_or_si128 (lut_result, selected_entries);
-      lut_index = _mm_sub_epi8 (lut_index, p16);
+static void pearson_hash_128_ssse3( uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in ) {
+    size_t i;
+
+    uint8_t upper[8]         = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+    uint8_t lower[8]         = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 };
+
+    uint64_t upper_hash_mask = GET_U64<false>(upper, 0);
+    uint64_t lower_hash_mask = GET_U64<false>(lower, 0);
+
+    __m128i hash_mask        = _mm_set_epi64x(lower_hash_mask, upper_hash_mask);
+    __m128i hash = _mm_set_epi64x(hash_in, hash_in);
+
+    // table lookup preparation
+    __m128i const p16        = _mm_set1_epi8(0x10);
+    __m128i       lut_result = _mm_xor_si128(lut_result, lut_result);
+    __m128i       selected_entries;
+    __m128i       table_line;
+
+    for (i = 0; i < len; i++) {
+        // broadcast the character, xor into hash, make them different permutations
+        __m128i cc = _mm_set1_epi8(in[i]);
+        hash =       _mm_xor_si128(hash, cc       );
+        hash =       _mm_xor_si128(hash, hash_mask);
+
+        // table lookup
+        size_t  j;
+        __m128i lut_index = hash;
+        lut_result = _mm_xor_si128(lut_result, lut_result);
+        for (j = 0; j < 256; j += 16) {
+            table_line       = _mm_load_si128((__m128i *)&t[j]);
+            selected_entries = _mm_min_epu8(lut_index, p16);
+            selected_entries = _mm_cmpeq_epi8(selected_entries, p16);
+            selected_entries = _mm_or_si128(selected_entries, lut_index);
+            selected_entries = _mm_shuffle_epi8(table_line, selected_entries);
+            lut_result       = _mm_or_si128(lut_result, selected_entries);
+            lut_index        = _mm_sub_epi8(lut_index, p16);
+        }
+        hash = lut_result;
     }
-    hash = lut_result;
-  }
-  // store output
-  _mm_store_si128 ((__m128i*)out , hash);
+    // store output
+    _mm_store_si128((__m128i *)out, hash);
 }
 
-static void pearson_hash_64_ssse3(uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in) {
-  size_t i;
-  __m128i hash_mask = _mm_cvtsi64_si128(UINT64_C(0x0706050403020100));
-  __m128i hash = _mm_cvtsi64_si128 (hash_in);
-
-  // table lookup preparation
-  __m128i const p16  = _mm_set1_epi8 (0x10);
-  __m128i lut_result  = _mm_xor_si128 (lut_result, lut_result);
-
-  for (i = 0; i < len; i++) {
-    // broadcast the character, xor into hash, make them different permutations
-    __m128i cc = _mm_set1_epi8 (in[i]);
-    hash = _mm_xor_si128 (hash, cc);
-    hash = _mm_xor_si128 (hash, hash_mask);
-
-    // table lookup
-    size_t j;
-    __m128i lut_index = hash;
-    lut_result = _mm_xor_si128 (lut_result, lut_result);
-    for (j = 0; j < 256; j += 16) {
-      __m128i table_line = _mm_load_si128 ((__m128i *)&t[j]);
-      __m128i selected_entries = _mm_min_epu8 (lut_index, p16);
-      selected_entries = _mm_cmpeq_epi8 (selected_entries, p16);
-      selected_entries = _mm_or_si128 (selected_entries, lut_index);
-      selected_entries = _mm_shuffle_epi8 (table_line, selected_entries);
-      lut_result  = _mm_or_si128 (lut_result, selected_entries);
-      lut_index = _mm_sub_epi8 (lut_index, p16);
+static void pearson_hash_64_ssse3( uint8_t * out, const uint8_t * in, size_t len, uint64_t hash_in ) {
+    size_t  i;
+    __m128i hash_mask = _mm_cvtsi64_si128(UINT64_C(0x0706050403020100));
+    __m128i hash      = _mm_cvtsi64_si128(hash_in);
+
+    // table lookup preparation
+    __m128i const p16        = _mm_set1_epi8(0x10);
+    __m128i       lut_result = _mm_xor_si128(lut_result, lut_result);
+
+    for (i = 0; i < len; i++) {
+        // broadcast the character, xor into hash, make them different permutations
+        __m128i cc = _mm_set1_epi8(in[i]);
+        hash =       _mm_xor_si128(hash, cc       );
+        hash =       _mm_xor_si128(hash, hash_mask);
+
+        // table lookup
+        size_t  j;
+        __m128i lut_index = hash;
+        lut_result = _mm_xor_si128(lut_result, lut_result);
+        for (j = 0; j < 256; j += 16) {
+            __m128i table_line       = _mm_load_si128((__m128i *)&t[j]);
+            __m128i selected_entries = _mm_min_epu8(lut_index, p16);
+            selected_entries = _mm_cmpeq_epi8(selected_entries, p16);
+            selected_entries = _mm_or_si128(selected_entries, lut_index);
+            selected_entries = _mm_shuffle_epi8(table_line, selected_entries);
+            lut_result       = _mm_or_si128(lut_result, selected_entries);
+            lut_index        = _mm_sub_epi8(lut_index, p16);
+        }
+        hash = lut_result;
     }
-    hash = lut_result;
-  }
 
-  // store output
-  _mm_storel_epi64((__m128i*)out , hash);
+    // store output
+    _mm_storel_epi64((__m128i *)out, hash);
 }
+
 #endif
 
-static void pearson64(const void * in, const size_t len, const seed_t seed, void * out) {
+static void pearson64( const void * in, const size_t len, const seed_t seed, void * out ) {
 #if defined(HAVE_X86_64_AES)
-  pearson_hash_64_aesni((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
+    pearson_hash_64_aesni((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
 #elif defined(HAVE_SSSE_3)
-  pearson_hash_64_ssse3((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
+    pearson_hash_64_ssse3((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
 #else
-  pearson_hash_64_portable((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
+    pearson_hash_64_portable((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
 #endif
 }
 
-static void pearson128(const void * in, const size_t len, const seed_t seed, void * out) {
+static void pearson128( const void * in, const size_t len, const seed_t seed, void * out ) {
 #if defined(HAVE_X86_64_AES)
-  pearson_hash_128_aesni((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
+    pearson_hash_128_aesni((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
 #elif defined(HAVE_SSSE_3)
-  pearson_hash_128_ssse3((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
+    pearson_hash_128_ssse3((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
 #else
-  pearson_hash_128_portable((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
+    pearson_hash_128_portable((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
 #endif
 }
 
-static void pearson256(const void * in, const size_t len, const seed_t seed, void * out) {
+static void pearson256( const void * in, const size_t len, const seed_t seed, void * out ) {
 #if defined(HAVE_X86_64_AES)
-  pearson_hash_256_aesni((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
+    pearson_hash_256_aesni((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
 #elif defined(HAVE_SSSE_3)
-  pearson_hash_256_ssse3((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
+    pearson_hash_256_ssse3((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
 #else
-  pearson_hash_256_portable((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
+    pearson_hash_256_portable((uint8_t *)out, (const uint8_t *)in, len, (uint64_t)seed);
 #endif
 }
 
 REGISTER_FAMILY(pearson,
-  $.src_url = "https://github.com/Logan007/pearson",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/Logan007/pearson",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(Pearson_64,
-  $.desc = "Pearson hash, 8 lanes using AES sbox",
-  $.hash_flags =
-        FLAG_HASH_AES_BASED               |
-        FLAG_HASH_LOOKUP_TABLE,
-  $.impl_flags =
-        FLAG_IMPL_SLOW                    |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x12E4C8CD,
-  $.verification_BE = 0x12E4C8CD,
-  $.hashfn_native = pearson64,
-  $.hashfn_bswap = pearson64,
-  $.initfn = pearson_hash_init
-);
+   $.desc       = "Pearson hash, 8 lanes using AES sbox",
+   $.hash_flags =
+         FLAG_HASH_AES_BASED               |
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SLOW                    |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x12E4C8CD,
+   $.verification_BE = 0x12E4C8CD,
+   $.hashfn_native   = pearson64,
+   $.hashfn_bswap    = pearson64,
+   $.initfn = pearson_hash_init
+ );
 
 REGISTER_HASH(Pearson_128,
-  $.desc = "Pearson hash, 16 lanes using AES sbox",
-  $.hash_flags =
-        FLAG_HASH_AES_BASED               |
-        FLAG_HASH_LOOKUP_TABLE,
-  $.impl_flags =
-        FLAG_IMPL_SLOW                    |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 128,
-  $.verification_LE = 0xDC5048A3,
-  $.verification_BE = 0xDC5048A3,
-  $.hashfn_native = pearson128,
-  $.hashfn_bswap = pearson128,
-  $.initfn = pearson_hash_init
-);
+   $.desc       = "Pearson hash, 16 lanes using AES sbox",
+   $.hash_flags =
+         FLAG_HASH_AES_BASED               |
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SLOW                    |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 128,
+   $.verification_LE = 0xDC5048A3,
+   $.verification_BE = 0xDC5048A3,
+   $.hashfn_native   = pearson128,
+   $.hashfn_bswap    = pearson128,
+   $.initfn = pearson_hash_init
+ );
 
 REGISTER_HASH(Pearson_256,
-  $.desc = "Pearson hash, 32 lanes using AES sbox",
-  $.hash_flags =
-        FLAG_HASH_AES_BASED               |
-        FLAG_HASH_LOOKUP_TABLE,
-  $.impl_flags =
-        FLAG_IMPL_SLOW                    |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 256,
-  $.verification_LE = 0xA9B1DE02,
-  $.verification_BE = 0xA9B1DE02,
-  $.hashfn_native = pearson256,
-  $.hashfn_bswap = pearson256,
-  $.initfn = pearson_hash_init
-);
+   $.desc       = "Pearson hash, 32 lanes using AES sbox",
+   $.hash_flags =
+         FLAG_HASH_AES_BASED               |
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_SLOW                    |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 256,
+   $.verification_LE = 0xA9B1DE02,
+   $.verification_BE = 0xA9B1DE02,
+   $.hashfn_native   = pearson256,
+   $.hashfn_bswap    = pearson256,
+   $.initfn = pearson_hash_init
+ );
diff --git a/hashes/pengyhash.cpp b/hashes/pengyhash.cpp
index 53330052..a982c04e 100644
--- a/hashes/pengyhash.cpp
+++ b/hashes/pengyhash.cpp
@@ -33,13 +33,13 @@
 #include "Hashlib.h"
 
 //------------------------------------------------------------
-template < bool bswap >
-static uint64_t pengyhash(const uint8_t * p, size_t size, uint64_t seed) {
+template <bool bswap>
+static uint64_t pengyhash( const uint8_t * p, size_t size, uint64_t seed ) {
     uint64_t b[4] = { 0 };
     uint64_t s[4] = { 0, 0, 0, size };
-    int i;
+    int      i;
 
-    for(; size >= 32; size -= 32, p += 32) {
+    for (; size >= 32; size -= 32, p += 32) {
         memcpy(b, p, 32);
 
         s[1] = (s[0] += s[1] + GET_U64<bswap>((uint8_t *)&b[3], 0)) + (s[1] << 14 | s[1] >> 50);
@@ -50,7 +50,7 @@ static uint64_t pengyhash(const uint8_t * p, size_t size, uint64_t seed) {
 
     memcpy(b, p, size);
 
-    for(i = 0; i < 6; i++) {
+    for (i = 0; i < 6; i++) {
         s[1] = (s[0] += s[1] + GET_U64<bswap>((uint8_t *)&b[3], 0)) + (s[1] << 14 | s[1] >> 50) + seed;
         s[3] = (s[2] += s[3] + GET_U64<bswap>((uint8_t *)&b[2], 0)) + (s[3] << 23 | s[3] >> 41);
         s[3] = (s[0] += s[3] + GET_U64<bswap>((uint8_t *)&b[1], 0)) ^ (s[3] << 16 | s[3] >> 48);
@@ -61,28 +61,29 @@ static uint64_t pengyhash(const uint8_t * p, size_t size, uint64_t seed) {
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void pengy(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void pengy( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = pengyhash<bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(pengyhash,
-  $.src_url = "https://github.com/tinypeng/pengyhash",
-  $.src_status = HashFamilyInfo::SRC_STABLEISH
-);
+   $.src_url    = "https://github.com/tinypeng/pengyhash",
+   $.src_status = HashFamilyInfo::SRC_STABLEISH
+ );
 
 REGISTER_HASH(pengyhash,
-  $.desc = "pengyhash v0.2",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 64,
-  $.verification_LE = 0x1FC2217B,
-  $.verification_BE = 0x774D23AB,
-  $.hashfn_native = pengy<false>,
-  $.hashfn_bswap = pengy<true>
-);
+   $.desc       = "pengyhash v0.2",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 64,
+   $.verification_LE = 0x1FC2217B,
+   $.verification_BE = 0x774D23AB,
+   $.hashfn_native   = pengy<false>,
+   $.hashfn_bswap    = pengy<true>
+ );
diff --git a/hashes/perlhashes.cpp b/hashes/perlhashes.cpp
index 6a2b9a4f..032808fa 100644
--- a/hashes/perlhashes.cpp
+++ b/hashes/perlhashes.cpp
@@ -25,185 +25,195 @@
 // hash value, as the perl code does. The old verification codes can
 // be obtained by removing "+ (uint32_t)len" from the "hash =" lines.
 
-static uint32_t djb2(const uint8_t * str, const size_t len, const uint32_t seed) {
-    const uint8_t * end = str + len;
-    uint32_t hash = seed + (uint32_t)len;
+static uint32_t djb2( const uint8_t * str, const size_t len, const uint32_t seed ) {
+    const uint8_t * end  = str + len;
+    uint32_t        hash = seed + (uint32_t)len;
+
     while (str < end) {
         hash = ((hash << 5) + hash) + *str++;
     }
     return hash;
 }
 
-static uint32_t sdbm(const uint8_t * str, const size_t len, const uint32_t seed) {
-    const uint8_t * end = str + len;
-    uint32_t hash = seed + (uint32_t)len;
+static uint32_t sdbm( const uint8_t * str, const size_t len, const uint32_t seed ) {
+    const uint8_t * end  = str + len;
+    uint32_t        hash = seed + (uint32_t)len;
+
     while (str < end) {
         hash = (hash << 6) + (hash << 16) - hash + *str++;
     }
     return hash;
 }
 
-static uint32_t jenkinsOAAT(const uint8_t * str, const size_t len, const uint32_t seed) {
-    const uint8_t * end = str + len;
-    uint32_t hash = seed + (uint32_t)len;
+static uint32_t jenkinsOAAT( const uint8_t * str, const size_t len, const uint32_t seed ) {
+    const uint8_t * end  = str + len;
+    uint32_t        hash = seed + (uint32_t)len;
+
     while (str < end) {
         hash += *str++;
         hash += (hash << 10);
-        hash ^= (hash >> 6);
+        hash ^= (hash >>  6);
     }
-    hash += (hash << 3);
+    hash += (hash <<  3);
     hash ^= (hash >> 11);
     hash += (hash << 15);
     return hash;
 }
 
-static uint32_t jenkinsOAAT_old(const uint8_t * str, const size_t len, const uint32_t seed) {
-    const uint8_t * end = str + len;
-    uint32_t hash = seed;
+static uint32_t jenkinsOAAT_old( const uint8_t * str, const size_t len, const uint32_t seed ) {
+    const uint8_t * end  = str + len;
+    uint32_t        hash = seed;
+
     while (str < end) {
         hash += *str++;
         hash += (hash << 10);
-        hash ^= (hash >> 6);
+        hash ^= (hash >>  6);
     }
-    hash += (hash << 3);
+    hash += (hash <<  3);
     hash ^= (hash >> 11);
     hash += (hash << 15);
     return hash;
 }
 
-static uint32_t jenkinsOAAT_hard(const uint8_t * str, const size_t len, const uint64_t seed64) {
-    const uint8_t * end = str + len;
-    uint32_t hash = (uint32_t)seed64 + (uint32_t)len;
+static uint32_t jenkinsOAAT_hard( const uint8_t * str, const size_t len, const uint64_t seed64 ) {
+    const uint8_t * end  = str + len;
+    uint32_t        hash = (uint32_t)seed64 + (uint32_t)len;
+
     while (str < end) {
         hash += (hash << 10);
-        hash ^= (hash >> 6);
+        hash ^= (hash >>  6);
         hash += *str++;
     }
 
-    hash += (hash << 10);
-    hash ^= (hash >> 6);
+    hash += (hash   << 10);
+    hash ^= (hash   >>  6);
     hash += (seed64 >> 32) & 0xFF;
 
-    hash += (hash << 10);
-    hash ^= (hash >> 6);
+    hash += (hash   << 10);
+    hash ^= (hash   >>  6);
     hash += (seed64 >> 40) & 0xFF;
 
-    hash += (hash << 10);
-    hash ^= (hash >> 6);
+    hash += (hash   << 10);
+    hash ^= (hash   >>  6);
     hash += (seed64 >> 48) & 0xFF;
 
-    hash += (hash << 10);
-    hash ^= (hash >> 6);
+    hash += (hash   << 10);
+    hash ^= (hash   >>  6);
     hash += (seed64 >> 56) & 0xFF;
 
-    hash += (hash << 10);
-    hash ^= (hash >> 6);
-    hash += (hash << 3);
-    hash ^= (hash >> 11);
-    hash += (hash << 15);
+    hash += (hash   << 10);
+    hash ^= (hash   >>  6);
+    hash += (hash   <<  3);
+    hash ^= (hash   >> 11);
+    hash += (hash   << 15);
     return hash;
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void perl_djb2(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void perl_djb2( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = djb2((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void perl_sdbm(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void perl_sdbm( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = sdbm((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void perl_jenkins(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void perl_jenkins( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = jenkinsOAAT((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void perl_jenkins_old(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void perl_jenkins_old( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = jenkinsOAAT_old((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void perl_jenkins_hard(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void perl_jenkins_hard( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = jenkinsOAAT_hard((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(perloldhashes,
-  $.src_url = "https://github.com/Perl/perl5/blob/6b0260474df579e9412f57249519747ab8bb5c2b/hv_func.h",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/Perl/perl5/blob/6b0260474df579e9412f57249519747ab8bb5c2b/hv_func.h",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(perl_djb2,
-  $.desc = "djb2 OAAT hash (from old perl5 code)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_GPL3,
-  $.bits = 32,
-  $.verification_LE = 0x4962CBAB,
-  $.verification_BE = 0xCBC1BFB3,
-  $.hashfn_native = perl_djb2<false>,
-  $.hashfn_bswap = perl_djb2<true>
-);
+   $.desc            = "djb2 OAAT hash (from old perl5 code)",
+   $.hash_flags      =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags      =
+         FLAG_IMPL_LICENSE_GPL3,
+   $.bits            = 32,
+   $.verification_LE = 0x4962CBAB,
+   $.verification_BE = 0xCBC1BFB3,
+   $.hashfn_native   = perl_djb2<false>,
+   $.hashfn_bswap    = perl_djb2<true>
+ );
 
 REGISTER_HASH(perl_sdbm,
-  $.desc = "sdbm OAAT hash (from old perl5 code)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_GPL3,
-  $.bits = 32,
-  $.verification_LE = 0xD973311D,
-  $.verification_BE = 0xA3228EF6,
-  $.hashfn_native = perl_sdbm<false>,
-  $.hashfn_bswap = perl_sdbm<true>
-);
+   $.desc            = "sdbm OAAT hash (from old perl5 code)",
+   $.hash_flags      =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags      =
+         FLAG_IMPL_LICENSE_GPL3,
+   $.bits            = 32,
+   $.verification_LE = 0xD973311D,
+   $.verification_BE = 0xA3228EF6,
+   $.hashfn_native   = perl_sdbm<false>,
+   $.hashfn_bswap    = perl_sdbm<true>
+ );
 
 REGISTER_HASH(perl_jenkins,
-  $.desc = "Bob Jenkins' OAAT hash (from old perl5 code)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_GPL3,
-  $.bits = 32,
-  $.verification_LE = 0xE3ED0E54,
-  $.verification_BE = 0xA83E99BF,
-  $.hashfn_native = perl_jenkins<false>,
-  $.hashfn_bswap = perl_jenkins<true>
-);
+   $.desc            = "Bob Jenkins' OAAT hash (from old perl5 code)",
+   $.hash_flags      =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags      =
+         FLAG_IMPL_LICENSE_GPL3,
+   $.bits            = 32,
+   $.verification_LE = 0xE3ED0E54,
+   $.verification_BE = 0xA83E99BF,
+   $.hashfn_native   = perl_jenkins<false>,
+   $.hashfn_bswap    = perl_jenkins<true>
+ );
 
 REGISTER_HASH(perl_jenkins_old,
-  $.desc = "Bob Jenkins' OAAT hash (\"old\" version from old perl5 code)",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS   |
-        FLAG_IMPL_LICENSE_GPL3,
-  $.bits = 32,
-  $.verification_LE = 0xEE05869B,
-  $.verification_BE = 0x691105C0,
-  $.hashfn_native = perl_jenkins_old<false>,
-  $.hashfn_bswap = perl_jenkins_old<true>
-);
+   $.desc       = "Bob Jenkins' OAAT hash (\"old\" version from old perl5 code)",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS   |
+         FLAG_IMPL_LICENSE_GPL3,
+   $.bits = 32,
+   $.verification_LE = 0xEE05869B,
+   $.verification_BE = 0x691105C0,
+   $.hashfn_native   = perl_jenkins_old<false>,
+   $.hashfn_bswap    = perl_jenkins_old<true>
+ );
 
 REGISTER_HASH(perl_jenkins_hard,
-  $.desc = "Bob Jenkins' OAAT hash (\"hard\" version from old perl5 code)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_SLOW           |
-        FLAG_IMPL_LICENSE_GPL3,
-  $.bits = 32,
-  $.verification_LE = 0x1C216B25,
-  $.verification_BE = 0x3B326068,
-  $.hashfn_native = perl_jenkins_hard<false>,
-  $.hashfn_bswap = perl_jenkins_hard<true>
-);
+   $.desc       = "Bob Jenkins' OAAT hash (\"hard\" version from old perl5 code)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_SLOW           |
+         FLAG_IMPL_LICENSE_GPL3,
+   $.bits = 32,
+   $.verification_LE = 0x1C216B25,
+   $.verification_BE = 0x3B326068,
+   $.hashfn_native   = perl_jenkins_hard<false>,
+   $.hashfn_bswap    = perl_jenkins_hard<true>
+ );
diff --git a/hashes/pmp_multilinear.cpp b/hashes/pmp_multilinear.cpp
index ffbe2e96..04b76b91 100644
--- a/hashes/pmp_multilinear.cpp
+++ b/hashes/pmp_multilinear.cpp
@@ -29,10 +29,10 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_AVX2) || defined(HAVE_SSE_4_1) || defined(HAVE_SSE_2)
-#undef HAVE_AVX2
-#undef HAVE_SSE_4_1
-#undef HAVE_SSE_2
-//#include "Intrinsics.h"
+  #undef HAVE_AVX2
+  #undef HAVE_SSE_4_1
+  #undef HAVE_SSE_2
+// #include "Intrinsics.h"
 #endif
 
 #include "Mathmult.h"
@@ -43,69 +43,66 @@ using namespace std;
 //-------------------------------------------------------------
 // Common typedefs
 #if __BYTE_ORDER == __LITTLE_ENDIAN
-typedef union _ULARGE_INTEGER__XX
-{
-  struct {
-    uint32_t LowPart;
-    uint32_t HighPart;
-  };
-  struct {
-    uint32_t LowPart;
-    uint32_t HighPart;
-  } u;
-  uint64_t QuadPart;
+typedef union _ULARGE_INTEGER__XX {
+    struct {
+        uint32_t  LowPart;
+        uint32_t  HighPart;
+    };
+    struct {
+        uint32_t  LowPart;
+        uint32_t  HighPart;
+    }         u;
+    uint64_t  QuadPart;
 } ULARGE_INTEGER__XX;
 
 typedef union _LARGE_INTEGER__XX {
     struct {
-        uint32_t LowPart;
-        int32_t HighPart;
+        uint32_t  LowPart;
+        int32_t   HighPart;
     };
     struct {
-        uint32_t LowPart;
-        int32_t HighPart;
-    } u;
-    int64_t QuadPart;
+        uint32_t  LowPart;
+        int32_t   HighPart;
+    }        u;
+    int64_t  QuadPart;
 } LARGE_INTEGER__XX;
 #else
-typedef union _ULARGE_INTEGER__XX
-{
-  struct {
-    uint32_t HighPart;
-    uint32_t LowPart;
-  };
-  struct {
-    uint32_t HighPart;
-    uint32_t LowPart;
-  } u;
-  uint64_t QuadPart;
+typedef union _ULARGE_INTEGER__XX {
+    struct {
+        uint32_t  HighPart;
+        uint32_t  LowPart;
+    };
+    struct {
+        uint32_t  HighPart;
+        uint32_t  LowPart;
+    }         u;
+    uint64_t  QuadPart;
 } ULARGE_INTEGER__XX;
 
 typedef union _LARGE_INTEGER__XX {
     struct {
-        int32_t HighPart;
-        uint32_t LowPart;
+        int32_t   HighPart;
+        uint32_t  LowPart;
     };
     struct {
-        int32_t HighPart;
-        uint32_t LowPart;
-    } u;
-    int64_t QuadPart;
+        int32_t   HighPart;
+        uint32_t  LowPart;
+    }        u;
+    int64_t  QuadPart;
 } LARGE_INTEGER__XX;
 #endif
 
-typedef struct _ULARGELARGE_INTEGER__XX
-{
-    uint64_t LowPart;
-    uint64_t HighPart;
+typedef struct _ULARGELARGE_INTEGER__XX {
+    uint64_t  LowPart;
+    uint64_t  HighPart;
 } ULARGELARGE_INTEGER__XX;
 
 #if defined(__arm__)
-typedef struct { uint32_t value __attribute__((__packed__)); } unaligned_uint32;
-typedef struct { uint64_t value __attribute__((__packed__)); } unaligned_uint64;
+typedef struct { uint32_t  value __attribute__((__packed__)); }  unaligned_uint32;
+typedef struct { uint64_t  value __attribute__((__packed__)); }  unaligned_uint64;
 #else
-typedef struct { uint32_t value; } unaligned_uint32;
-typedef struct { uint64_t value; } unaligned_uint64;
+typedef struct { uint32_t  value; }  unaligned_uint32;
+typedef struct { uint64_t  value; }  unaligned_uint64;
 #endif // __arm__
 
 //-------------------------------------------------------------
@@ -116,13 +113,13 @@ typedef struct { uint64_t value; } unaligned_uint64;
 #define PMPML_32_WORD_SIZE_BYTES_LOG2 2
 #define PMPML_32_LEVELS 8
 // Derived constants
-static const uint32_t PMPML_32_CHUNK_SIZE = (1 << PMPML_32_CHUNK_SIZE_LOG2);
-static const uint32_t PMPML_32_WORD_SIZE_BYTES = (1 << PMPML_32_WORD_SIZE_BYTES_LOG2);
-static const uint32_t PMPML_32_CHUNK_SIZE_BYTES = PMPML_32_CHUNK_SIZE * PMPML_32_WORD_SIZE_BYTES;
+static const uint32_t PMPML_32_CHUNK_SIZE            = (1 << PMPML_32_CHUNK_SIZE_LOG2     );
+static const uint32_t PMPML_32_WORD_SIZE_BYTES       = (1 << PMPML_32_WORD_SIZE_BYTES_LOG2);
+static const uint32_t PMPML_32_CHUNK_SIZE_BYTES      = PMPML_32_CHUNK_SIZE * PMPML_32_WORD_SIZE_BYTES;
 static const uint32_t PMPML_32_CHUNK_SIZE_BYTES_LOG2 = PMPML_32_CHUNK_SIZE_LOG2 + PMPML_32_WORD_SIZE_BYTES_LOG2;
 
 // container for coefficients
-typedef struct alignas(32) _random_data_for_PMPML_32 {
+typedef struct alignas( 32 ) _random_data_for_PMPML_32 {
     uint64_t const_term;
     uint64_t cachedSum;
     uint64_t dummy[2];
@@ -130,184 +127,184 @@ typedef struct alignas(32) _random_data_for_PMPML_32 {
 } random_data_for_PMPML_32;
 
 static thread_local random_data_for_PMPML_32 rd_for_PMPML_32[PMPML_32_LEVELS] = {
-  // Level 0
-  {
-    UINT64_C(0xb5ae35fa), UINT64_C(0x45dfdab824), {UINT64_C(0), UINT64_C(0)}, // dummy
+    // Level 0
     {
-      0x801841bb, 0x5ef2b6fc, 0xcc5a24e2, 0x1b6c5dd5, 0xeb07483b, 0xef894c5b, 0x02213973, 0x2d34d946,
-      0x11af1a4d, 0xd0a96734, 0xf39454a6, 0x58574f85, 0x08bc3780, 0x3d5e4d6e, 0x72302724, 0x89d2f7d4,
-      0x97d9459e, 0xba75d6d3, 0x69efa09d, 0x56f8f06a, 0x7345e990, 0x8ac230e9, 0xd21f3d0c, 0x3fffba8a,
-      0xd6dd6772, 0xd8c69c6b, 0x77a68e52, 0xde17020d, 0xf969ac45, 0x4ec4e3fb, 0x66e1eaae, 0x8c3e2c33,
-      0xd031a884, 0x5942d1f7, 0x355157a1, 0x79e517ce, 0x6f6e67c9, 0xdbeb2ce9, 0xaf4c5195, 0x1d72b4ce,
-      0x2214d9f3, 0xdab836c3, 0x94a54c8d, 0xa259587e, 0x8e5a6bd6, 0x75d23672, 0xf08fcd74, 0x59297837,
-      0xc1f093c7, 0xb1e14572, 0x84e25787, 0xfa18cbdd, 0xc0a8efe1, 0x8f746f29, 0xd1dfea17, 0xd17d1d65,
-      0x99c0334e, 0xc200ce59, 0xbac039b7, 0xaa8da145, 0x91787415, 0x7478d0e6, 0xd4fcb135, 0x76c4ce66,
-      0xdf1d9e9b, 0xe6a6640f, 0x94dd9b8e, 0x7f530896, 0xd5a76dff, 0xda99ae01, 0x2830dcad, 0x18421917,
-      0xc98aeb4f, 0x0048fdda, 0xd5ae8cba, 0xe9d27a3f, 0xc51ba04d, 0x8f1403e7, 0x2cbc94bd, 0x2c47c847,
-      0xbf127785, 0x54d2a15b, 0x6a818544, 0x993ca700, 0x31f529ed, 0x4cf30c4c, 0x386af44a, 0x1378d4c0,
-      0x3c40ac83, 0x3d27aaa4, 0x9b1c685e, 0x61dbbba6, 0xe5fbbd87, 0x800c57fd, 0xccd49830, 0x1ee12d69,
-      0x84868385, 0xbaf5679f, 0xd0417045, 0x4f5c30f0, 0x70558f08, 0x7c1e281d, 0xfe17014e, 0x56404d7c,
-      0x77dcfdd3, 0xf0d53161, 0xf9914927, 0x69bc0362, 0x609759cb, 0xfc9afc53, 0xc5f28ba8, 0x9cbe677d,
-      0x8b8311e5, 0x40a1fbde, 0x500ef7fc, 0xd51ceaa4, 0x2c666e8f, 0xbf81662b, 0xa0922fe9, 0x65a75374,
-      0xc744184e, 0x1fad7a1a, 0xbc3678c2, 0xde23fbbc, 0x0403fd45, 0x69cd23ae, 0xf3dc2f19, 0x31416e93,
+        UINT64_C(0xb5ae35fa), UINT64_C(0x45dfdab824), { UINT64_C(0), UINT64_C(0) }, // dummy
+        {
+            0x801841bb, 0x5ef2b6fc, 0xcc5a24e2, 0x1b6c5dd5, 0xeb07483b, 0xef894c5b, 0x02213973, 0x2d34d946,
+            0x11af1a4d, 0xd0a96734, 0xf39454a6, 0x58574f85, 0x08bc3780, 0x3d5e4d6e, 0x72302724, 0x89d2f7d4,
+            0x97d9459e, 0xba75d6d3, 0x69efa09d, 0x56f8f06a, 0x7345e990, 0x8ac230e9, 0xd21f3d0c, 0x3fffba8a,
+            0xd6dd6772, 0xd8c69c6b, 0x77a68e52, 0xde17020d, 0xf969ac45, 0x4ec4e3fb, 0x66e1eaae, 0x8c3e2c33,
+            0xd031a884, 0x5942d1f7, 0x355157a1, 0x79e517ce, 0x6f6e67c9, 0xdbeb2ce9, 0xaf4c5195, 0x1d72b4ce,
+            0x2214d9f3, 0xdab836c3, 0x94a54c8d, 0xa259587e, 0x8e5a6bd6, 0x75d23672, 0xf08fcd74, 0x59297837,
+            0xc1f093c7, 0xb1e14572, 0x84e25787, 0xfa18cbdd, 0xc0a8efe1, 0x8f746f29, 0xd1dfea17, 0xd17d1d65,
+            0x99c0334e, 0xc200ce59, 0xbac039b7, 0xaa8da145, 0x91787415, 0x7478d0e6, 0xd4fcb135, 0x76c4ce66,
+            0xdf1d9e9b, 0xe6a6640f, 0x94dd9b8e, 0x7f530896, 0xd5a76dff, 0xda99ae01, 0x2830dcad, 0x18421917,
+            0xc98aeb4f, 0x0048fdda, 0xd5ae8cba, 0xe9d27a3f, 0xc51ba04d, 0x8f1403e7, 0x2cbc94bd, 0x2c47c847,
+            0xbf127785, 0x54d2a15b, 0x6a818544, 0x993ca700, 0x31f529ed, 0x4cf30c4c, 0x386af44a, 0x1378d4c0,
+            0x3c40ac83, 0x3d27aaa4, 0x9b1c685e, 0x61dbbba6, 0xe5fbbd87, 0x800c57fd, 0xccd49830, 0x1ee12d69,
+            0x84868385, 0xbaf5679f, 0xd0417045, 0x4f5c30f0, 0x70558f08, 0x7c1e281d, 0xfe17014e, 0x56404d7c,
+            0x77dcfdd3, 0xf0d53161, 0xf9914927, 0x69bc0362, 0x609759cb, 0xfc9afc53, 0xc5f28ba8, 0x9cbe677d,
+            0x8b8311e5, 0x40a1fbde, 0x500ef7fc, 0xd51ceaa4, 0x2c666e8f, 0xbf81662b, 0xa0922fe9, 0x65a75374,
+            0xc744184e, 0x1fad7a1a, 0xbc3678c2, 0xde23fbbc, 0x0403fd45, 0x69cd23ae, 0xf3dc2f19, 0x31416e93,
+        },
     },
-  },
-  // Level 1
-  {
-    UINT64_C(0xc3dbb82), UINT64_C(0x3c33d12213), {UINT64_C(0), UINT64_C(0)}, // dummy
+    // Level 1
     {
-      0xd233467b, 0x72a70d41, 0x8bd6cb67, 0x2e954d02, 0x08142b46, 0xb9613249, 0x8136a81d, 0x3cdab6cf,
-      0x70433dfc, 0x984d385b, 0x66f13c63, 0x392a028c, 0x84b10a87, 0xb54b7873, 0x7af58609, 0xbe835997,
-      0x09878350, 0x2702ed23, 0x940ffe4b, 0x073982e4, 0x4b565486, 0xc1872a1b, 0xcb9af7a0, 0xd8a84f81,
-      0xd8234048, 0x3d9a44b4, 0xfcecd1d5, 0x114fe193, 0x7e848584, 0x0082760d, 0x0ede3da7, 0x0040762c,
-      0xe522397a, 0x44ec8715, 0x422bc161, 0x0764c174, 0x3c511482, 0xd7dea424, 0xa12ec3c0, 0x66d33ec0,
-      0x0aaa55ce, 0x65f93ec0, 0xadaaaf7f, 0x647e772d, 0xa6b0a4fa, 0x88a72a0d, 0x1cfa03b4, 0x4f28c0c6,
-      0xa7c64b56, 0xedd8af5e, 0xa47e7242, 0x99f8d210, 0x8ad70f5f, 0xa8e3cdfb, 0x0a1db865, 0x56b2e1b0,
-      0x0dd7b307, 0x564a191f, 0xca38b54f, 0x61567b67, 0xd50c9644, 0x7671637e, 0x92d511cc, 0x25057afc,
-      0xd286cba4, 0x71f8dda9, 0x2ad9996c, 0x75ad65f0, 0x9418c0e9, 0xe6d0066b, 0xf1d15419, 0x264afe8b,
-      0x98c932e2, 0x3a6d5f8d, 0x289a7d0c, 0x3d18290d, 0xb9ecee8d, 0xdff7a79b, 0x7ecc3cde, 0x583e06a0,
-      0x8e29d297, 0xdc8650cb, 0x30f7861d, 0xf2de5cf9, 0x924dc8bc, 0x5afb46e9, 0xb997b1d9, 0x463d84a2,
-      0xfb8e2e7e, 0x043418b8, 0xa94e6a05, 0xae5c1efa, 0x7c7e4583, 0xcb6755ac, 0xf3359dba, 0xf05fdf94,
-      0x79db25ea, 0xed490569, 0x993d8da0, 0x6593ce5a, 0x03e3ed39, 0x044f74a3, 0x84777814, 0xcb2848d7,
-      0x41881b64, 0xf52d206e, 0x1fb1ebaf, 0x07a3d4b3, 0x63a5924f, 0x35c21005, 0xc981c63c, 0x9e3fdbaa,
-      0x89b64b0d, 0x0f2aba74, 0x512f3cfe, 0xb053e5d0, 0x59a69c4a, 0x400c442f, 0x28afebd0, 0x4540c190,
-      0xc7f5e757, 0x7d40152b, 0x321fa235, 0xb6309529, 0x021c71e1, 0x7474f524, 0xc4f2e22e, 0x778b9371,
+        UINT64_C(0xc3dbb82), UINT64_C(0x3c33d12213), { UINT64_C(0), UINT64_C(0) }, // dummy
+        {
+            0xd233467b, 0x72a70d41, 0x8bd6cb67, 0x2e954d02, 0x08142b46, 0xb9613249, 0x8136a81d, 0x3cdab6cf,
+            0x70433dfc, 0x984d385b, 0x66f13c63, 0x392a028c, 0x84b10a87, 0xb54b7873, 0x7af58609, 0xbe835997,
+            0x09878350, 0x2702ed23, 0x940ffe4b, 0x073982e4, 0x4b565486, 0xc1872a1b, 0xcb9af7a0, 0xd8a84f81,
+            0xd8234048, 0x3d9a44b4, 0xfcecd1d5, 0x114fe193, 0x7e848584, 0x0082760d, 0x0ede3da7, 0x0040762c,
+            0xe522397a, 0x44ec8715, 0x422bc161, 0x0764c174, 0x3c511482, 0xd7dea424, 0xa12ec3c0, 0x66d33ec0,
+            0x0aaa55ce, 0x65f93ec0, 0xadaaaf7f, 0x647e772d, 0xa6b0a4fa, 0x88a72a0d, 0x1cfa03b4, 0x4f28c0c6,
+            0xa7c64b56, 0xedd8af5e, 0xa47e7242, 0x99f8d210, 0x8ad70f5f, 0xa8e3cdfb, 0x0a1db865, 0x56b2e1b0,
+            0x0dd7b307, 0x564a191f, 0xca38b54f, 0x61567b67, 0xd50c9644, 0x7671637e, 0x92d511cc, 0x25057afc,
+            0xd286cba4, 0x71f8dda9, 0x2ad9996c, 0x75ad65f0, 0x9418c0e9, 0xe6d0066b, 0xf1d15419, 0x264afe8b,
+            0x98c932e2, 0x3a6d5f8d, 0x289a7d0c, 0x3d18290d, 0xb9ecee8d, 0xdff7a79b, 0x7ecc3cde, 0x583e06a0,
+            0x8e29d297, 0xdc8650cb, 0x30f7861d, 0xf2de5cf9, 0x924dc8bc, 0x5afb46e9, 0xb997b1d9, 0x463d84a2,
+            0xfb8e2e7e, 0x043418b8, 0xa94e6a05, 0xae5c1efa, 0x7c7e4583, 0xcb6755ac, 0xf3359dba, 0xf05fdf94,
+            0x79db25ea, 0xed490569, 0x993d8da0, 0x6593ce5a, 0x03e3ed39, 0x044f74a3, 0x84777814, 0xcb2848d7,
+            0x41881b64, 0xf52d206e, 0x1fb1ebaf, 0x07a3d4b3, 0x63a5924f, 0x35c21005, 0xc981c63c, 0x9e3fdbaa,
+            0x89b64b0d, 0x0f2aba74, 0x512f3cfe, 0xb053e5d0, 0x59a69c4a, 0x400c442f, 0x28afebd0, 0x4540c190,
+            0xc7f5e757, 0x7d40152b, 0x321fa235, 0xb6309529, 0x021c71e1, 0x7474f524, 0xc4f2e22e, 0x778b9371,
+        },
     },
-  },
-  // Level 2
-  {
-    UINT64_C(0x4ae2b467), UINT64_C(0x41b6700d41), {UINT64_C(0), UINT64_C(0)}, // dummy
+    // Level 2
     {
-      0xf8898c22, 0x863868bc, 0xd35470e9, 0x58d21ad6, 0xa2fce702, 0xe4f58530, 0x0225c8a9, 0x9b29b401,
-      0xf4f6d3eb, 0xf751b2ce, 0x2afa3d7a, 0xc1edf3e9, 0x4c57e2d1, 0xc2ef970d, 0x8a70aa25, 0x887d0102,
-      0xcc09e169, 0xeb5b75e2, 0x760b047e, 0xa2d21874, 0xc2bf310a, 0x8f030e02, 0x4b97fa22, 0x6a413ddb,
-      0x708062b4, 0x58cc67d3, 0x52459895, 0x78d345e3, 0x2b7a9415, 0xbaf4d1fe, 0x83462969, 0x923fa257,
-      0x91617494, 0xedf8d2f5, 0xc3d41302, 0xdf1934ff, 0x78a27863, 0xe7bf06a2, 0xc21b996d, 0x1e72411e,
-      0x98da3053, 0x0c2195ad, 0xf984dd09, 0x4b30dac8, 0xf3a03a7a, 0xee6540ec, 0x966dffb7, 0xb463fdbe,
-      0xbec26037, 0xcc9adad0, 0xdb71b8ef, 0x57341ca0, 0xa742ec7b, 0xe86321e9, 0x7a9d9f15, 0x7809e2a6,
-      0x2cb6a0a0, 0x344756d0, 0x6e8e8c88, 0x7ecf3ff7, 0x129d18a0, 0x0965dc6a, 0xf6a2cad1, 0xd938681b,
-      0xa1d07081, 0x4253df74, 0x774a5200, 0x59e1356d, 0x7aad36b5, 0x7dd6414a, 0x4700a70e, 0xd0da811c,
-      0x1fd2a8b8, 0x1dee15ad, 0x7f15ae5a, 0xc1f74f27, 0xfd8bfb7f, 0x16815bb9, 0x64d29007, 0xc8919e9f,
-      0x0b8c7e82, 0xfd5e92c2, 0x6e073fb7, 0xd52df9c2, 0x0c5c519d, 0x3ad86cb4, 0xfde300c8, 0x674c4dac,
-      0x54899a0a, 0xbf9a9be5, 0xe198c073, 0x6025af27, 0x433bac50, 0x669d3281, 0xee3838b3, 0x0df3a048,
-      0x2d0de6cd, 0xd289c8eb, 0x6b1c9eb1, 0x1634922b, 0x61917d41, 0x8b8bdeec, 0x12b73dcf, 0x96353517,
-      0x20e29858, 0xecc04cb9, 0x0074a2ca, 0x58a0f1ba, 0x6ed4e71f, 0x063fec8e, 0xc5bc30c2, 0x77af6d46,
-      0x078a6a93, 0x8c8da7a2, 0x1d02b1cc, 0x96b659f9, 0x8d8b4fbd, 0x521b2964, 0x990235f7, 0x55c63419,
-      0x1ad869a5, 0x51987dbd, 0x99e7a3ff, 0xf584d99a, 0xc11c3506, 0xb1adca80, 0x55007e41, 0x09efa72b,
+        UINT64_C(0x4ae2b467), UINT64_C(0x41b6700d41), { UINT64_C(0), UINT64_C(0) }, // dummy
+        {
+            0xf8898c22, 0x863868bc, 0xd35470e9, 0x58d21ad6, 0xa2fce702, 0xe4f58530, 0x0225c8a9, 0x9b29b401,
+            0xf4f6d3eb, 0xf751b2ce, 0x2afa3d7a, 0xc1edf3e9, 0x4c57e2d1, 0xc2ef970d, 0x8a70aa25, 0x887d0102,
+            0xcc09e169, 0xeb5b75e2, 0x760b047e, 0xa2d21874, 0xc2bf310a, 0x8f030e02, 0x4b97fa22, 0x6a413ddb,
+            0x708062b4, 0x58cc67d3, 0x52459895, 0x78d345e3, 0x2b7a9415, 0xbaf4d1fe, 0x83462969, 0x923fa257,
+            0x91617494, 0xedf8d2f5, 0xc3d41302, 0xdf1934ff, 0x78a27863, 0xe7bf06a2, 0xc21b996d, 0x1e72411e,
+            0x98da3053, 0x0c2195ad, 0xf984dd09, 0x4b30dac8, 0xf3a03a7a, 0xee6540ec, 0x966dffb7, 0xb463fdbe,
+            0xbec26037, 0xcc9adad0, 0xdb71b8ef, 0x57341ca0, 0xa742ec7b, 0xe86321e9, 0x7a9d9f15, 0x7809e2a6,
+            0x2cb6a0a0, 0x344756d0, 0x6e8e8c88, 0x7ecf3ff7, 0x129d18a0, 0x0965dc6a, 0xf6a2cad1, 0xd938681b,
+            0xa1d07081, 0x4253df74, 0x774a5200, 0x59e1356d, 0x7aad36b5, 0x7dd6414a, 0x4700a70e, 0xd0da811c,
+            0x1fd2a8b8, 0x1dee15ad, 0x7f15ae5a, 0xc1f74f27, 0xfd8bfb7f, 0x16815bb9, 0x64d29007, 0xc8919e9f,
+            0x0b8c7e82, 0xfd5e92c2, 0x6e073fb7, 0xd52df9c2, 0x0c5c519d, 0x3ad86cb4, 0xfde300c8, 0x674c4dac,
+            0x54899a0a, 0xbf9a9be5, 0xe198c073, 0x6025af27, 0x433bac50, 0x669d3281, 0xee3838b3, 0x0df3a048,
+            0x2d0de6cd, 0xd289c8eb, 0x6b1c9eb1, 0x1634922b, 0x61917d41, 0x8b8bdeec, 0x12b73dcf, 0x96353517,
+            0x20e29858, 0xecc04cb9, 0x0074a2ca, 0x58a0f1ba, 0x6ed4e71f, 0x063fec8e, 0xc5bc30c2, 0x77af6d46,
+            0x078a6a93, 0x8c8da7a2, 0x1d02b1cc, 0x96b659f9, 0x8d8b4fbd, 0x521b2964, 0x990235f7, 0x55c63419,
+            0x1ad869a5, 0x51987dbd, 0x99e7a3ff, 0xf584d99a, 0xc11c3506, 0xb1adca80, 0x55007e41, 0x09efa72b,
+        },
     },
-  },
-  // Level 3
-  {
-    UINT64_C(0xae82fd43), UINT64_C(0x4358e7ef21), {UINT64_C(0), UINT64_C(0)}, // dummy
+    // Level 3
     {
-      0x9e6c8a0f, 0x9107b963, 0xdc39a0eb, 0x9fb2328d, 0xd4f03812, 0xce7ff238, 0x99710f09, 0x90b5a0ba,
-      0x53cb9654, 0xdca51386, 0x5a03c91d, 0x542e4280, 0x92d368ff, 0x6769cd0b, 0xacad27d0, 0x3947f94b,
-      0xf33a3265, 0x2f298054, 0x5094d047, 0x962591a6, 0x89c1de39, 0x0ef43de4, 0xe87f5576, 0xb342b1dc,
-      0xffb893e3, 0x08a96d7d, 0xe1023f0d, 0x054ac7ea, 0xeb0a8934, 0xe1558e68, 0xce76025c, 0x47c0a61f,
-      0x9d476622, 0xee83acc6, 0x5fb7a3fd, 0xa1798b06, 0x97cfbc96, 0x341dc4f8, 0x079d4d68, 0x85811d0d,
-      0xe81cd930, 0x83f55707, 0x7cd3da51, 0xe504fcf6, 0x5afed439, 0x35677002, 0x40d755aa, 0xcea876c6,
-      0x1c8a9953, 0x9a7d47c1, 0x9343c019, 0x60ffafe4, 0x7c12e1c5, 0xa64b2499, 0x9e13587f, 0x6e690d98,
-      0x24a0dcfe, 0xfc4c35a6, 0x66eca52a, 0xe9e0315f, 0xa208fe48, 0x16d7bd81, 0xd5c9b0fb, 0xe7337bf9,
-      0x2d3ad9dc, 0x6924c3f3, 0x8e7174f8, 0x01f7e499, 0x2e3edfb8, 0x8dfe2b6a, 0x40f43c09, 0xcf51dafc,
-      0xafe98c70, 0x31b3d859, 0x07f28e34, 0x6527d100, 0x5274484e, 0x92fa82fe, 0xf059d18a, 0x55e4c67c,
-      0x51e5d061, 0xaa4408e9, 0xbd7463cc, 0xb587505f, 0xfc88d42e, 0x70b3e921, 0xeabb6770, 0xfb3a060b,
-      0xd675527a, 0xb8d6153f, 0xbd1763ad, 0x6f1a2573, 0xf96490be, 0xce99095f, 0x966d1090, 0x65e2a371,
-      0x3a81e7f8, 0x769315db, 0xaa973861, 0x8d6d798c, 0xa935a7ae, 0x194de67a, 0x402f5da2, 0x58a7f932,
-      0xa1eb519c, 0x65125c5b, 0x961b4b6c, 0x518c8dab, 0x47233e7f, 0x1b19109b, 0x46a1b3c1, 0x5dc3dd6c,
-      0x709b63af, 0x3e43e71c, 0x7b997703, 0xa2259145, 0x81f87a1c, 0xa6c8a082, 0xa12ef053, 0x412e7f0e,
-      0x29bef6e8, 0xcc8fca68, 0xf521167a, 0x203c0e84, 0xe92d5cd7, 0x9589c2d1, 0x208e2f28, 0x906bd537,
+        UINT64_C(0xae82fd43), UINT64_C(0x4358e7ef21), { UINT64_C(0), UINT64_C(0) }, // dummy
+        {
+            0x9e6c8a0f, 0x9107b963, 0xdc39a0eb, 0x9fb2328d, 0xd4f03812, 0xce7ff238, 0x99710f09, 0x90b5a0ba,
+            0x53cb9654, 0xdca51386, 0x5a03c91d, 0x542e4280, 0x92d368ff, 0x6769cd0b, 0xacad27d0, 0x3947f94b,
+            0xf33a3265, 0x2f298054, 0x5094d047, 0x962591a6, 0x89c1de39, 0x0ef43de4, 0xe87f5576, 0xb342b1dc,
+            0xffb893e3, 0x08a96d7d, 0xe1023f0d, 0x054ac7ea, 0xeb0a8934, 0xe1558e68, 0xce76025c, 0x47c0a61f,
+            0x9d476622, 0xee83acc6, 0x5fb7a3fd, 0xa1798b06, 0x97cfbc96, 0x341dc4f8, 0x079d4d68, 0x85811d0d,
+            0xe81cd930, 0x83f55707, 0x7cd3da51, 0xe504fcf6, 0x5afed439, 0x35677002, 0x40d755aa, 0xcea876c6,
+            0x1c8a9953, 0x9a7d47c1, 0x9343c019, 0x60ffafe4, 0x7c12e1c5, 0xa64b2499, 0x9e13587f, 0x6e690d98,
+            0x24a0dcfe, 0xfc4c35a6, 0x66eca52a, 0xe9e0315f, 0xa208fe48, 0x16d7bd81, 0xd5c9b0fb, 0xe7337bf9,
+            0x2d3ad9dc, 0x6924c3f3, 0x8e7174f8, 0x01f7e499, 0x2e3edfb8, 0x8dfe2b6a, 0x40f43c09, 0xcf51dafc,
+            0xafe98c70, 0x31b3d859, 0x07f28e34, 0x6527d100, 0x5274484e, 0x92fa82fe, 0xf059d18a, 0x55e4c67c,
+            0x51e5d061, 0xaa4408e9, 0xbd7463cc, 0xb587505f, 0xfc88d42e, 0x70b3e921, 0xeabb6770, 0xfb3a060b,
+            0xd675527a, 0xb8d6153f, 0xbd1763ad, 0x6f1a2573, 0xf96490be, 0xce99095f, 0x966d1090, 0x65e2a371,
+            0x3a81e7f8, 0x769315db, 0xaa973861, 0x8d6d798c, 0xa935a7ae, 0x194de67a, 0x402f5da2, 0x58a7f932,
+            0xa1eb519c, 0x65125c5b, 0x961b4b6c, 0x518c8dab, 0x47233e7f, 0x1b19109b, 0x46a1b3c1, 0x5dc3dd6c,
+            0x709b63af, 0x3e43e71c, 0x7b997703, 0xa2259145, 0x81f87a1c, 0xa6c8a082, 0xa12ef053, 0x412e7f0e,
+            0x29bef6e8, 0xcc8fca68, 0xf521167a, 0x203c0e84, 0xe92d5cd7, 0x9589c2d1, 0x208e2f28, 0x906bd537,
+        },
     },
-  },
-  // Level 4
-  {
-    UINT64_C(0xc3b9656e), UINT64_C(0x3f969c7ed3), {UINT64_C(0), UINT64_C(0)}, // dummy
+    // Level 4
     {
-      0x60731d8f, 0x2e17b1b7, 0xb808f3c7, 0xf20f223c, 0xb964bc3c, 0xaa61a231, 0x3d84cd54, 0x94f006d6,
-      0x684e8f60, 0xb64adf58, 0x7033ff6c, 0x01ea1b40, 0xbcaf2776, 0x70250562, 0x342ec517, 0x1e280438,
-      0xaeaa96ba, 0x802391c2, 0x35a7f213, 0x8d0f57aa, 0xf8a1153b, 0x917a692a, 0xbac0385c, 0x6dc2f7dd,
-      0xc573a21b, 0x0469558c, 0xf206c551, 0xfe683c17, 0x54d0c3bc, 0x80734381, 0xc4eef75c, 0x22648b9e,
-      0xede23e78, 0x8823f123, 0xd687c6a7, 0x85b6752b, 0xb8cf5160, 0x8109a1c8, 0x1b4c7ceb, 0xaa8b17a6,
-      0xeda3fcbf, 0xb6d65214, 0xe6171214, 0x98f4ee28, 0xc1ac9d91, 0x0810d22e, 0x1ccec281, 0xd1911b8a,
-      0x272b7696, 0x860fc01d, 0x903c0029, 0xf3308e35, 0x8c2021ef, 0x52ebae93, 0x6ece3f90, 0x2d01f59f,
-      0x15cf87c9, 0x79c113fd, 0xcee953e9, 0x6152456a, 0x82d25ea1, 0x743316c4, 0x351f50d1, 0x06e3708f,
-      0x45060a80, 0x4c13c59a, 0x0a737387, 0x3eaa3672, 0xe5176942, 0x8431098a, 0x0cd55f05, 0x9d5c2eda,
-      0x6df6d514, 0x41a412ea, 0x67606dd0, 0xdec02567, 0xaebddaad, 0xf48d85d8, 0x7f41af4b, 0xbb8b03b7,
-      0x29bb612f, 0xc96546c9, 0xb04dfcc9, 0x2ee6c830, 0xafb0bc9e, 0x08e0ef18, 0xea81d1fc, 0xa58be897,
-      0xee996482, 0xb7ee4493, 0x0c561cd5, 0x7695207b, 0x763a34f3, 0x7093196a, 0xecf527bd, 0xb3037632,
-      0x40fdbc46, 0x72a3f33d, 0xb09e2e73, 0x1b41ab32, 0x32c280f4, 0x865d6444, 0xa998ef38, 0xe1f097de,
-      0x5f6c5d4f, 0xfebdf03d, 0xc569ef53, 0xec6decf1, 0x03de6003, 0x0e3063d7, 0x8dd9c0a0, 0x062c97a4,
-      0xa45c835e, 0xd167187d, 0xfe55e66e, 0x6b24b6df, 0x572c5189, 0x30c18b20, 0x3c0346f8, 0x5982a13e,
-      0xbf491b0f, 0x248df32c, 0x6f572546, 0x51296aff, 0x1a8c0702, 0x94a21284, 0x371e69c8, 0x2298720e,
+        UINT64_C(0xc3b9656e), UINT64_C(0x3f969c7ed3), { UINT64_C(0), UINT64_C(0) }, // dummy
+        {
+            0x60731d8f, 0x2e17b1b7, 0xb808f3c7, 0xf20f223c, 0xb964bc3c, 0xaa61a231, 0x3d84cd54, 0x94f006d6,
+            0x684e8f60, 0xb64adf58, 0x7033ff6c, 0x01ea1b40, 0xbcaf2776, 0x70250562, 0x342ec517, 0x1e280438,
+            0xaeaa96ba, 0x802391c2, 0x35a7f213, 0x8d0f57aa, 0xf8a1153b, 0x917a692a, 0xbac0385c, 0x6dc2f7dd,
+            0xc573a21b, 0x0469558c, 0xf206c551, 0xfe683c17, 0x54d0c3bc, 0x80734381, 0xc4eef75c, 0x22648b9e,
+            0xede23e78, 0x8823f123, 0xd687c6a7, 0x85b6752b, 0xb8cf5160, 0x8109a1c8, 0x1b4c7ceb, 0xaa8b17a6,
+            0xeda3fcbf, 0xb6d65214, 0xe6171214, 0x98f4ee28, 0xc1ac9d91, 0x0810d22e, 0x1ccec281, 0xd1911b8a,
+            0x272b7696, 0x860fc01d, 0x903c0029, 0xf3308e35, 0x8c2021ef, 0x52ebae93, 0x6ece3f90, 0x2d01f59f,
+            0x15cf87c9, 0x79c113fd, 0xcee953e9, 0x6152456a, 0x82d25ea1, 0x743316c4, 0x351f50d1, 0x06e3708f,
+            0x45060a80, 0x4c13c59a, 0x0a737387, 0x3eaa3672, 0xe5176942, 0x8431098a, 0x0cd55f05, 0x9d5c2eda,
+            0x6df6d514, 0x41a412ea, 0x67606dd0, 0xdec02567, 0xaebddaad, 0xf48d85d8, 0x7f41af4b, 0xbb8b03b7,
+            0x29bb612f, 0xc96546c9, 0xb04dfcc9, 0x2ee6c830, 0xafb0bc9e, 0x08e0ef18, 0xea81d1fc, 0xa58be897,
+            0xee996482, 0xb7ee4493, 0x0c561cd5, 0x7695207b, 0x763a34f3, 0x7093196a, 0xecf527bd, 0xb3037632,
+            0x40fdbc46, 0x72a3f33d, 0xb09e2e73, 0x1b41ab32, 0x32c280f4, 0x865d6444, 0xa998ef38, 0xe1f097de,
+            0x5f6c5d4f, 0xfebdf03d, 0xc569ef53, 0xec6decf1, 0x03de6003, 0x0e3063d7, 0x8dd9c0a0, 0x062c97a4,
+            0xa45c835e, 0xd167187d, 0xfe55e66e, 0x6b24b6df, 0x572c5189, 0x30c18b20, 0x3c0346f8, 0x5982a13e,
+            0xbf491b0f, 0x248df32c, 0x6f572546, 0x51296aff, 0x1a8c0702, 0x94a21284, 0x371e69c8, 0x2298720e,
+        },
     },
-  },
-  // Level 5
-  {
-    UINT64_C(0xe3c9939c), UINT64_C(0x3d848fecbb), {UINT64_C(0), UINT64_C(0)}, // dummy
+    // Level 5
     {
-      0x78bb7f84, 0xc6a18ac7, 0xeb321f90, 0x35d4f871, 0x61a5f4a7, 0x6d591ba2, 0x7f93ad57, 0x96841919,
-      0xea7890a9, 0x0fa2f69c, 0x1866af58, 0x7f257346, 0xdcc51cd9, 0x92e78656, 0xc4628292, 0x42e01b49,
-      0x40541662, 0x37af7888, 0x4faa39af, 0xa3207d98, 0x63750fda, 0x2767c143, 0xf11a2916, 0x618ceb9b,
-      0x9d684ce0, 0x69088033, 0x1ab5a1c7, 0x0f0a4f86, 0x4e49f893, 0x0ca32464, 0x90a7c38e, 0x5a0aded0,
-      0x2dae1926, 0x0d935a0e, 0xde592a69, 0x085299b2, 0x4977a3a0, 0x7e82d9bc, 0x399e6a95, 0xdb9f1b90,
-      0xe1dfe431, 0xbac5a72d, 0x168fe9ef, 0x9727301e, 0x76cd1ddb, 0x2bcd89e0, 0x45b7de13, 0xf239f2ad,
-      0xae66187d, 0xb92a6f32, 0xf0fb1c7f, 0xb77384f2, 0x6e405312, 0x6616a82e, 0x9bdca728, 0x1b5e6782,
-      0xdd243a3f, 0xf148d161, 0xfe0e7b47, 0x0fdadcf7, 0x9f21d59d, 0x5057328f, 0x22f944b9, 0x7e68d807,
-      0x46de914d, 0x2d351dad, 0x6b0f3436, 0x6d6a8943, 0xcd18923c, 0x2e8fa891, 0x33f1ed84, 0x30e3a20a,
-      0xa15f52a0, 0x3162fa56, 0xa60d4a72, 0x3e9fab64, 0x0a584673, 0x99d08542, 0x5ce99b5a, 0xcf1be8b0,
-      0xe83225e3, 0xad522e70, 0xb17e0c87, 0x5b081b14, 0xc4c71a48, 0xb430a70b, 0xf38673cd, 0x1aad3b26,
-      0x0e50ca70, 0xa1aeb568, 0x4140ea0c, 0xdabeee2d, 0x2779c11b, 0x5e06c86e, 0x12803b8f, 0xa46fd322,
-      0x7de67db9, 0x7d1ee355, 0xbea94742, 0xf529e572, 0x5374fffc, 0xf9037c7a, 0x1010523f, 0xb1a96f9c,
-      0x89b49bfc, 0xf2469dc2, 0x1692f9e1, 0x95ec9a68, 0x09426ab7, 0x0bc30953, 0x8628bd58, 0xa28375f2,
-      0xd9d4c2bf, 0xaae40027, 0x2b56df1b, 0x9d9fbc50, 0x14bf937d, 0xe7b0fb0a, 0xa5e40995, 0xfae90145,
-      0x1ea68371, 0x671f2f40, 0xc654778c, 0x477cf3fd, 0x6aa5cbda, 0x8f9960c8, 0xc08542ef, 0x88bbddc8,
+        UINT64_C(0xe3c9939c), UINT64_C(0x3d848fecbb), { UINT64_C(0), UINT64_C(0) }, // dummy
+        {
+            0x78bb7f84, 0xc6a18ac7, 0xeb321f90, 0x35d4f871, 0x61a5f4a7, 0x6d591ba2, 0x7f93ad57, 0x96841919,
+            0xea7890a9, 0x0fa2f69c, 0x1866af58, 0x7f257346, 0xdcc51cd9, 0x92e78656, 0xc4628292, 0x42e01b49,
+            0x40541662, 0x37af7888, 0x4faa39af, 0xa3207d98, 0x63750fda, 0x2767c143, 0xf11a2916, 0x618ceb9b,
+            0x9d684ce0, 0x69088033, 0x1ab5a1c7, 0x0f0a4f86, 0x4e49f893, 0x0ca32464, 0x90a7c38e, 0x5a0aded0,
+            0x2dae1926, 0x0d935a0e, 0xde592a69, 0x085299b2, 0x4977a3a0, 0x7e82d9bc, 0x399e6a95, 0xdb9f1b90,
+            0xe1dfe431, 0xbac5a72d, 0x168fe9ef, 0x9727301e, 0x76cd1ddb, 0x2bcd89e0, 0x45b7de13, 0xf239f2ad,
+            0xae66187d, 0xb92a6f32, 0xf0fb1c7f, 0xb77384f2, 0x6e405312, 0x6616a82e, 0x9bdca728, 0x1b5e6782,
+            0xdd243a3f, 0xf148d161, 0xfe0e7b47, 0x0fdadcf7, 0x9f21d59d, 0x5057328f, 0x22f944b9, 0x7e68d807,
+            0x46de914d, 0x2d351dad, 0x6b0f3436, 0x6d6a8943, 0xcd18923c, 0x2e8fa891, 0x33f1ed84, 0x30e3a20a,
+            0xa15f52a0, 0x3162fa56, 0xa60d4a72, 0x3e9fab64, 0x0a584673, 0x99d08542, 0x5ce99b5a, 0xcf1be8b0,
+            0xe83225e3, 0xad522e70, 0xb17e0c87, 0x5b081b14, 0xc4c71a48, 0xb430a70b, 0xf38673cd, 0x1aad3b26,
+            0x0e50ca70, 0xa1aeb568, 0x4140ea0c, 0xdabeee2d, 0x2779c11b, 0x5e06c86e, 0x12803b8f, 0xa46fd322,
+            0x7de67db9, 0x7d1ee355, 0xbea94742, 0xf529e572, 0x5374fffc, 0xf9037c7a, 0x1010523f, 0xb1a96f9c,
+            0x89b49bfc, 0xf2469dc2, 0x1692f9e1, 0x95ec9a68, 0x09426ab7, 0x0bc30953, 0x8628bd58, 0xa28375f2,
+            0xd9d4c2bf, 0xaae40027, 0x2b56df1b, 0x9d9fbc50, 0x14bf937d, 0xe7b0fb0a, 0xa5e40995, 0xfae90145,
+            0x1ea68371, 0x671f2f40, 0xc654778c, 0x477cf3fd, 0x6aa5cbda, 0x8f9960c8, 0xc08542ef, 0x88bbddc8,
+        },
     },
-  },
-  // Level 6
-  {
-    UINT64_C(0xf33fe2d4), UINT64_C(0x3be3330adb), {UINT64_C(0), UINT64_C(0)}, // dummy
+    // Level 6
     {
-      0x413faa9b, 0x1a3a2814, 0x957ff066, 0xfc5c55ec, 0x7898f40d, 0x30d71b62, 0xab1f1b9a, 0x5c93c31a,
-      0x27e1bf84, 0x277fd4f4, 0xc8de8b61, 0x619ec0a3, 0xcc3106c9, 0x7e07e8c7, 0xadbbff04, 0x986f8050,
-      0x26cd3f0a, 0xe7dcfd5a, 0xed3be524, 0x4a1e0f2b, 0xe0888023, 0x24d0c5eb, 0x476e89ae, 0x1a222b82,
-      0xb3d0cd98, 0x8856e275, 0x95ac5c19, 0xbbf334b5, 0x1a346ac4, 0x9f9ed27d, 0xe64567c6, 0xfc52f176,
-      0x98c8223c, 0xc09233fb, 0x078e98a4, 0xa36a369a, 0x89dfd3f0, 0x10a40ad1, 0xd14f4f1f, 0xe8ec2908,
-      0xb9af0bd3, 0x4d55c288, 0xc235e430, 0x77564268, 0x42c4877e, 0x00baab49, 0xd79bda2b, 0x490fcfc2,
-      0x225bfa4b, 0x216af042, 0xac221547, 0x6d8d84e0, 0x17dc383c, 0x49dcb049, 0x46d29882, 0x6661b4ed,
-      0x77b0becd, 0xf7a52591, 0x70c7256d, 0x0872d1fd, 0x2940fad9, 0x2c857e39, 0x358bf808, 0x0081180c,
-      0x01ec2a40, 0x3b7e716d, 0x2e0da024, 0xb77c9d9f, 0x725b6a35, 0x42d22b0c, 0x30fe2079, 0x8b72db40,
-      0xba80de6a, 0x03fb3689, 0x0557ad42, 0x7237cc5d, 0x792b74ae, 0x3bd5a870, 0x136749ef, 0x81c9ddf5,
-      0x95b80aa7, 0x7e885861, 0xc797839c, 0x667083b5, 0xe8e9b2d7, 0x9b282b8e, 0x8e7a7db0, 0x79d39fea,
-      0x1f9cea00, 0xf7c5c4f1, 0x9e669399, 0x136a5889, 0x680d40a6, 0xea6ba4fa, 0xf7660f4b, 0xfd9af075,
-      0xf242ad0c, 0xcf89799a, 0x1173b431, 0x8b3b0aa0, 0xd8e862ff, 0x6ee0e93e, 0x482772e0, 0x6f382985,
-      0x995506f1, 0x5f1c3b7f, 0xc54d0f78, 0x5ba663aa, 0x91e7cc43, 0x07295028, 0xe1f9640d, 0x5e0d49cb,
-      0xd1d6d96a, 0x7e602d59, 0xc8a376ac, 0x15ddcff4, 0x90481328, 0x543e0eb7, 0x07d297e4, 0xddfb2d18,
-      0x94a578aa, 0x9a39368e, 0x6aab286e, 0x0a39debd, 0x8ee5e818, 0x5c30655e, 0x661772e5, 0x527b25c1,
+        UINT64_C(0xf33fe2d4), UINT64_C(0x3be3330adb), { UINT64_C(0), UINT64_C(0) }, // dummy
+        {
+            0x413faa9b, 0x1a3a2814, 0x957ff066, 0xfc5c55ec, 0x7898f40d, 0x30d71b62, 0xab1f1b9a, 0x5c93c31a,
+            0x27e1bf84, 0x277fd4f4, 0xc8de8b61, 0x619ec0a3, 0xcc3106c9, 0x7e07e8c7, 0xadbbff04, 0x986f8050,
+            0x26cd3f0a, 0xe7dcfd5a, 0xed3be524, 0x4a1e0f2b, 0xe0888023, 0x24d0c5eb, 0x476e89ae, 0x1a222b82,
+            0xb3d0cd98, 0x8856e275, 0x95ac5c19, 0xbbf334b5, 0x1a346ac4, 0x9f9ed27d, 0xe64567c6, 0xfc52f176,
+            0x98c8223c, 0xc09233fb, 0x078e98a4, 0xa36a369a, 0x89dfd3f0, 0x10a40ad1, 0xd14f4f1f, 0xe8ec2908,
+            0xb9af0bd3, 0x4d55c288, 0xc235e430, 0x77564268, 0x42c4877e, 0x00baab49, 0xd79bda2b, 0x490fcfc2,
+            0x225bfa4b, 0x216af042, 0xac221547, 0x6d8d84e0, 0x17dc383c, 0x49dcb049, 0x46d29882, 0x6661b4ed,
+            0x77b0becd, 0xf7a52591, 0x70c7256d, 0x0872d1fd, 0x2940fad9, 0x2c857e39, 0x358bf808, 0x0081180c,
+            0x01ec2a40, 0x3b7e716d, 0x2e0da024, 0xb77c9d9f, 0x725b6a35, 0x42d22b0c, 0x30fe2079, 0x8b72db40,
+            0xba80de6a, 0x03fb3689, 0x0557ad42, 0x7237cc5d, 0x792b74ae, 0x3bd5a870, 0x136749ef, 0x81c9ddf5,
+            0x95b80aa7, 0x7e885861, 0xc797839c, 0x667083b5, 0xe8e9b2d7, 0x9b282b8e, 0x8e7a7db0, 0x79d39fea,
+            0x1f9cea00, 0xf7c5c4f1, 0x9e669399, 0x136a5889, 0x680d40a6, 0xea6ba4fa, 0xf7660f4b, 0xfd9af075,
+            0xf242ad0c, 0xcf89799a, 0x1173b431, 0x8b3b0aa0, 0xd8e862ff, 0x6ee0e93e, 0x482772e0, 0x6f382985,
+            0x995506f1, 0x5f1c3b7f, 0xc54d0f78, 0x5ba663aa, 0x91e7cc43, 0x07295028, 0xe1f9640d, 0x5e0d49cb,
+            0xd1d6d96a, 0x7e602d59, 0xc8a376ac, 0x15ddcff4, 0x90481328, 0x543e0eb7, 0x07d297e4, 0xddfb2d18,
+            0x94a578aa, 0x9a39368e, 0x6aab286e, 0x0a39debd, 0x8ee5e818, 0x5c30655e, 0x661772e5, 0x527b25c1,
+        },
     },
-  },
-  // Level 7
-  {
-    UINT64_C(0x6d983dad), UINT64_C(0x3e435b56e5), {UINT64_C(0), UINT64_C(0)}, // dummy
+    // Level 7
     {
-      0x4014ee95, 0xfdbe07f6, 0x27a2c5d7, 0x497ae9f0, 0x18a372d5, 0x375c55ae, 0x4aab4110, 0x2d554d43,
-      0x9504cbcd, 0xfbaedcce, 0x758c4326, 0xfafbba66, 0x9bda2b02, 0x1d955954, 0xe4bb3e12, 0xd558ed02,
-      0x770c3bec, 0x6fcf284d, 0x7142cbb0, 0xefe84369, 0x9516d833, 0x097022c9, 0x8572785a, 0xcc866071,
-      0x11084cac, 0x15707ce6, 0xc8a05f69, 0xf15c7b38, 0x3607b067, 0xa8f646b2, 0x62949620, 0x0e013130,
-      0xe73a8f37, 0x853e3bd2, 0x4ad40839, 0x961fff58, 0x5b9a291e, 0x4df678ae, 0x9e49ab57, 0x12c0823b,
-      0x804a15b9, 0xedbe4a7f, 0x3f65fe91, 0x0aca6940, 0xa14a7dc6, 0xd9a78895, 0x4c90b7fa, 0x90443c6a,
-      0xc1325ada, 0x48876a7b, 0x091df649, 0x7ae46bc8, 0xdcfdc695, 0xc398dd91, 0xe6a24f20, 0x333f496b,
-      0xe08413da, 0xbd197fa0, 0x55abc5e6, 0xa1abe124, 0x1cfdeee2, 0x48732fff, 0xdb2f1a4a, 0x192de0ae,
-      0x87a288b7, 0x406f0062, 0xc4358b22, 0x19ccdeba, 0xa30cd0c5, 0x848d1e9a, 0x2fd31932, 0x7b78238e,
-      0x9e9a208e, 0x517f5394, 0x8b689859, 0xe2202a00, 0x7d82aa8d, 0x736d2f4c, 0x8a5c630a, 0xaf1857bf,
-      0xd56d5b1f, 0x3416feea, 0x6b16d737, 0xf61f0747, 0x359f0963, 0x6044d7c6, 0xedcdcafd, 0xa53ff8c5,
-      0x09c7732a, 0x7f1b4137, 0x9d63e5c0, 0x776c5120, 0x0b0d231e, 0x57e54da1, 0x3b5e1e5e, 0x63069af7,
-      0xa44a600c, 0x3d5a02fb, 0x2387039e, 0xf32214b4, 0x95707014, 0x65ae19ab, 0xa906bfd3, 0x41083458,
-      0x106bdfd4, 0x41a3efe8, 0xb58bee3f, 0xaa70953c, 0x01cf2485, 0x40e5bdb9, 0xc94b2765, 0xc79cd151,
-      0xad2d9daa, 0x62b40b60, 0x02800b32, 0x97d69686, 0xa9f0efdb, 0x24952809, 0x48694c4f, 0x630104fe,
-      0x24f26b53, 0xc94d2a0f, 0x8635b8db, 0xb6822421, 0xe53c26dd, 0x9286330f, 0xf5a431ec, 0xacbb86b4,
+        UINT64_C(0x6d983dad), UINT64_C(0x3e435b56e5), { UINT64_C(0), UINT64_C(0) }, // dummy
+        {
+            0x4014ee95, 0xfdbe07f6, 0x27a2c5d7, 0x497ae9f0, 0x18a372d5, 0x375c55ae, 0x4aab4110, 0x2d554d43,
+            0x9504cbcd, 0xfbaedcce, 0x758c4326, 0xfafbba66, 0x9bda2b02, 0x1d955954, 0xe4bb3e12, 0xd558ed02,
+            0x770c3bec, 0x6fcf284d, 0x7142cbb0, 0xefe84369, 0x9516d833, 0x097022c9, 0x8572785a, 0xcc866071,
+            0x11084cac, 0x15707ce6, 0xc8a05f69, 0xf15c7b38, 0x3607b067, 0xa8f646b2, 0x62949620, 0x0e013130,
+            0xe73a8f37, 0x853e3bd2, 0x4ad40839, 0x961fff58, 0x5b9a291e, 0x4df678ae, 0x9e49ab57, 0x12c0823b,
+            0x804a15b9, 0xedbe4a7f, 0x3f65fe91, 0x0aca6940, 0xa14a7dc6, 0xd9a78895, 0x4c90b7fa, 0x90443c6a,
+            0xc1325ada, 0x48876a7b, 0x091df649, 0x7ae46bc8, 0xdcfdc695, 0xc398dd91, 0xe6a24f20, 0x333f496b,
+            0xe08413da, 0xbd197fa0, 0x55abc5e6, 0xa1abe124, 0x1cfdeee2, 0x48732fff, 0xdb2f1a4a, 0x192de0ae,
+            0x87a288b7, 0x406f0062, 0xc4358b22, 0x19ccdeba, 0xa30cd0c5, 0x848d1e9a, 0x2fd31932, 0x7b78238e,
+            0x9e9a208e, 0x517f5394, 0x8b689859, 0xe2202a00, 0x7d82aa8d, 0x736d2f4c, 0x8a5c630a, 0xaf1857bf,
+            0xd56d5b1f, 0x3416feea, 0x6b16d737, 0xf61f0747, 0x359f0963, 0x6044d7c6, 0xedcdcafd, 0xa53ff8c5,
+            0x09c7732a, 0x7f1b4137, 0x9d63e5c0, 0x776c5120, 0x0b0d231e, 0x57e54da1, 0x3b5e1e5e, 0x63069af7,
+            0xa44a600c, 0x3d5a02fb, 0x2387039e, 0xf32214b4, 0x95707014, 0x65ae19ab, 0xa906bfd3, 0x41083458,
+            0x106bdfd4, 0x41a3efe8, 0xb58bee3f, 0xaa70953c, 0x01cf2485, 0x40e5bdb9, 0xc94b2765, 0xc79cd151,
+            0xad2d9daa, 0x62b40b60, 0x02800b32, 0x97d69686, 0xa9f0efdb, 0x24952809, 0x48694c4f, 0x630104fe,
+            0x24f26b53, 0xc94d2a0f, 0x8635b8db, 0xb6822421, 0xe53c26dd, 0x9286330f, 0xf5a431ec, 0xacbb86b4,
+        },
     },
-  },
 };
-//STATIC_ASSERT(PMPML_32_LEVELS <= 8, "Only 8 levels of data currently exist");
+// STATIC_ASSERT(PMPML_32_LEVELS <= 8, "Only 8 levels of data currently exist");
 
 //-------------------------------------------------------------
 // 64-bit constants
@@ -317,345 +314,345 @@ static thread_local random_data_for_PMPML_32 rd_for_PMPML_32[PMPML_32_LEVELS] =
 #define PMPML_64_WORD_SIZE_BYTES_LOG2 3
 #define PMPML_64_LEVELS 8
 // Derived constants
-static const uint32_t PMPML_64_CHUNK_SIZE = (1 << PMPML_64_CHUNK_SIZE_LOG2);
-static const uint32_t PMPML_64_WORD_SIZE_BYTES = (1 << PMPML_64_WORD_SIZE_BYTES_LOG2);
-static const uint32_t PMPML_64_CHUNK_SIZE_BYTES = PMPML_64_CHUNK_SIZE * PMPML_64_WORD_SIZE_BYTES;
+static const uint32_t PMPML_64_CHUNK_SIZE            = (1 << PMPML_64_CHUNK_SIZE_LOG2     );
+static const uint32_t PMPML_64_WORD_SIZE_BYTES       = (1 << PMPML_64_WORD_SIZE_BYTES_LOG2);
+static const uint32_t PMPML_64_CHUNK_SIZE_BYTES      = PMPML_64_CHUNK_SIZE * PMPML_64_WORD_SIZE_BYTES;
 static const uint32_t PMPML_64_CHUNK_SIZE_BYTES_LOG2 = PMPML_64_CHUNK_SIZE_LOG2 + PMPML_64_WORD_SIZE_BYTES_LOG2;
 
 // container for coefficients
-typedef struct alignas(32) _random_data_for_PMPML_64 {
-  uint64_t const_term;
-  uint64_t cachedSumLow;
-  uint64_t cachedSumHigh;
-  uint64_t dummy;
-  uint64_t random_coeff[1 << PMPML_64_CHUNK_SIZE_LOG2];
+typedef struct alignas( 32 ) _random_data_for_PMPML_64 {
+    uint64_t const_term;
+    uint64_t cachedSumLow;
+    uint64_t cachedSumHigh;
+    uint64_t dummy;
+    uint64_t random_coeff[1 << PMPML_64_CHUNK_SIZE_LOG2];
 } random_data_for_PMPML_64;
 
 static thread_local random_data_for_PMPML_64 rd_for_PMPML_64[PMPML_64_LEVELS] = {
-  // Level 0
-  {
-    UINT64_C(0x4a29bfabe82f3abe), UINT64_C(0x2ccb0e578cfa99b), UINT64_C(0x000000041), 0, // sum of coeff and dummy
+    // Level 0
     {
-      UINT64_C(0x2f129e0f017dff36), UINT64_C(0xb42c52ed219ac8ce), UINT64_C(0xd3324e2b5efdfa21), UINT64_C(0xc830746c5019f1de),
-      UINT64_C(0x57b1306026904f72), UINT64_C(0x0ec3ffd84539cf3d), UINT64_C(0x95664d4564b54986), UINT64_C(0xe0ee74349c002680),
-      UINT64_C(0x5a365b98971ff939), UINT64_C(0xf6bcac95513c540e), UINT64_C(0x49567d345ab6b3cf), UINT64_C(0x526ab3f6dee0def3),
-      UINT64_C(0x1d6fb9cf7dc2f089), UINT64_C(0xaeff1dbeb93f0749), UINT64_C(0xd4e05404a7eecac8), UINT64_C(0x5175e11e90cf1a69),
-      UINT64_C(0x29aac3810d90cf44), UINT64_C(0xe9930a671d8aab37), UINT64_C(0x00eded5ac8eeb924), UINT64_C(0xdb4820639e005b34),
-      UINT64_C(0x12debc35a3054ea7), UINT64_C(0x5a9dccd55b94986f), UINT64_C(0x666773be4be48027), UINT64_C(0xf9a45b94c9c5ce42),
-      UINT64_C(0xf3f018ccd958cf92), UINT64_C(0x473c23beeb584939), UINT64_C(0xc5e4f821ec00cd5b), UINT64_C(0x1d61cf5079c28b1c),
-      UINT64_C(0xf46643c7b0c9427b), UINT64_C(0x34d7177b30a2a078), UINT64_C(0x5279d153b2ab790a), UINT64_C(0xeaf18c48a1791f4c),
-      UINT64_C(0x90a13cb0c7ccb5b1), UINT64_C(0x2900f5242f23c3e6), UINT64_C(0x0975f1f8a1f6800f), UINT64_C(0xa53f1a9605cce7f2),
-      UINT64_C(0x0b396087cda51e60), UINT64_C(0x842e287b1fc29d36), UINT64_C(0x4556b0258878e52d), UINT64_C(0x546c60312887a3f0),
-      UINT64_C(0xdc13b1bb35399672), UINT64_C(0x32f18c1aa7a4697c), UINT64_C(0xc9223ebe2ebe5810), UINT64_C(0xeb845691d3f028e8),
-      UINT64_C(0xa21337280cc34732), UINT64_C(0x94d78e46776a29e2), UINT64_C(0x6cba9535a7c4c9a8), UINT64_C(0x9758fe18e1fb3d08),
-      UINT64_C(0x92478227db728e63), UINT64_C(0xa782477118744c90), UINT64_C(0xb1e0b74044f53769), UINT64_C(0x7b3a58b416f2474f),
-      UINT64_C(0xea041c911fc2991f), UINT64_C(0x4515562dfb118051), UINT64_C(0x36133ab6715ff0bd), UINT64_C(0xb0d107f4c74bcfc7),
-      UINT64_C(0xef47885bb62db5b8), UINT64_C(0xb2060330e33f5951), UINT64_C(0x96758e992ce56ba6), UINT64_C(0xe6ca7568b7f6a8ec),
-      UINT64_C(0xd6fd9b1a7b29fb71), UINT64_C(0x2e95d6aaa1593907), UINT64_C(0xf1abe303bdda6758), UINT64_C(0x1eb12f0ed0f91332),
-      UINT64_C(0xf593589b9ff39cbb), UINT64_C(0x110e67013362cf26), UINT64_C(0x671ca6801c7f9d57), UINT64_C(0x0aa55c338ed83b64),
-      UINT64_C(0x627d00690f3f465d), UINT64_C(0xff97bfbba48e8524), UINT64_C(0x9c3f5a0387919b50), UINT64_C(0x25f1e1efb7f91c48),
-      UINT64_C(0x7114cada956a53ae), UINT64_C(0x626a4e2ff89c39af), UINT64_C(0x86540186b2e391cc), UINT64_C(0x82d5f935e9a90bcd),
-      UINT64_C(0xe2d4d3059b6f5dc1), UINT64_C(0xbb3cc83e6478dd2e), UINT64_C(0x59b9b400b166ed62), UINT64_C(0xf04b9b209bb113b1),
-      UINT64_C(0xb27be3c3397ac130), UINT64_C(0xf619002cc54ac417), UINT64_C(0x46a8c23f12907210), UINT64_C(0x54fc42e7d99aa54f),
-      UINT64_C(0x2b264e8ea68323e7), UINT64_C(0x0e0b0f627257dfb9), UINT64_C(0xadc098de597949e8), UINT64_C(0xe2ba17b10bd5401a),
-      UINT64_C(0x7fa49be97f34ca1a), UINT64_C(0x8817b0a7e7d981cf), UINT64_C(0x3bede65042860a1f), UINT64_C(0xae569b2aafd241eb),
-      UINT64_C(0x5f1cc5a3059aa744), UINT64_C(0x762409219323dae9), UINT64_C(0x64d5aac875461b4e), UINT64_C(0x62147c9101655025),
-      UINT64_C(0xbde2c420826c8ddd), UINT64_C(0xde6d7e2be12d0797), UINT64_C(0x8338ac734c823357), UINT64_C(0x419b2aa58f1b985a),
-      UINT64_C(0x39ed88775355ae2d), UINT64_C(0x7a2e8cc72c7f3bce), UINT64_C(0x97935746814fa944), UINT64_C(0x828331abf2018ef4),
-      UINT64_C(0xd6b9060cd1d0ba56), UINT64_C(0x5548e64ac7626ff2), UINT64_C(0xe4635461f9175d23), UINT64_C(0x566d5d69d40cd206),
-      UINT64_C(0x65ffaf0c83ae838f), UINT64_C(0x5a585c800a52de9e), UINT64_C(0x64a121bc55d0b7a2), UINT64_C(0x661ef9d5b90d6e53),
-      UINT64_C(0xb298bfcff8afba20), UINT64_C(0x2a60665850d1a5e8), UINT64_C(0x61aba7a90d9ae6eb), UINT64_C(0x083667e22ffdf423),
-      UINT64_C(0xd5efe61f9bd9a79c), UINT64_C(0x582a3cf851cafad0), UINT64_C(0x1989365a301ef819), UINT64_C(0xe2778e8aee7b917e),
-      UINT64_C(0x4bd139ea2fc74066), UINT64_C(0x2716bfaa4b18912a), UINT64_C(0x1a477a7687dbbe34), UINT64_C(0x90127b1d8835c6e1),
-      UINT64_C(0x44651dc23bfac77d), UINT64_C(0xb030740966562609), UINT64_C(0xb295d4733127a190), UINT64_C(0xf022c66dc7b74382),
+        UINT64_C(0x4a29bfabe82f3abe), UINT64_C(0x2ccb0e578cfa99b), UINT64_C(0x000000041), 0, // sum of coeff and dummy
+        {
+            UINT64_C(0x2f129e0f017dff36), UINT64_C(0xb42c52ed219ac8ce), UINT64_C(0xd3324e2b5efdfa21), UINT64_C(0xc830746c5019f1de),
+            UINT64_C(0x57b1306026904f72), UINT64_C(0x0ec3ffd84539cf3d), UINT64_C(0x95664d4564b54986), UINT64_C(0xe0ee74349c002680),
+            UINT64_C(0x5a365b98971ff939), UINT64_C(0xf6bcac95513c540e), UINT64_C(0x49567d345ab6b3cf), UINT64_C(0x526ab3f6dee0def3),
+            UINT64_C(0x1d6fb9cf7dc2f089), UINT64_C(0xaeff1dbeb93f0749), UINT64_C(0xd4e05404a7eecac8), UINT64_C(0x5175e11e90cf1a69),
+            UINT64_C(0x29aac3810d90cf44), UINT64_C(0xe9930a671d8aab37), UINT64_C(0x00eded5ac8eeb924), UINT64_C(0xdb4820639e005b34),
+            UINT64_C(0x12debc35a3054ea7), UINT64_C(0x5a9dccd55b94986f), UINT64_C(0x666773be4be48027), UINT64_C(0xf9a45b94c9c5ce42),
+            UINT64_C(0xf3f018ccd958cf92), UINT64_C(0x473c23beeb584939), UINT64_C(0xc5e4f821ec00cd5b), UINT64_C(0x1d61cf5079c28b1c),
+            UINT64_C(0xf46643c7b0c9427b), UINT64_C(0x34d7177b30a2a078), UINT64_C(0x5279d153b2ab790a), UINT64_C(0xeaf18c48a1791f4c),
+            UINT64_C(0x90a13cb0c7ccb5b1), UINT64_C(0x2900f5242f23c3e6), UINT64_C(0x0975f1f8a1f6800f), UINT64_C(0xa53f1a9605cce7f2),
+            UINT64_C(0x0b396087cda51e60), UINT64_C(0x842e287b1fc29d36), UINT64_C(0x4556b0258878e52d), UINT64_C(0x546c60312887a3f0),
+            UINT64_C(0xdc13b1bb35399672), UINT64_C(0x32f18c1aa7a4697c), UINT64_C(0xc9223ebe2ebe5810), UINT64_C(0xeb845691d3f028e8),
+            UINT64_C(0xa21337280cc34732), UINT64_C(0x94d78e46776a29e2), UINT64_C(0x6cba9535a7c4c9a8), UINT64_C(0x9758fe18e1fb3d08),
+            UINT64_C(0x92478227db728e63), UINT64_C(0xa782477118744c90), UINT64_C(0xb1e0b74044f53769), UINT64_C(0x7b3a58b416f2474f),
+            UINT64_C(0xea041c911fc2991f), UINT64_C(0x4515562dfb118051), UINT64_C(0x36133ab6715ff0bd), UINT64_C(0xb0d107f4c74bcfc7),
+            UINT64_C(0xef47885bb62db5b8), UINT64_C(0xb2060330e33f5951), UINT64_C(0x96758e992ce56ba6), UINT64_C(0xe6ca7568b7f6a8ec),
+            UINT64_C(0xd6fd9b1a7b29fb71), UINT64_C(0x2e95d6aaa1593907), UINT64_C(0xf1abe303bdda6758), UINT64_C(0x1eb12f0ed0f91332),
+            UINT64_C(0xf593589b9ff39cbb), UINT64_C(0x110e67013362cf26), UINT64_C(0x671ca6801c7f9d57), UINT64_C(0x0aa55c338ed83b64),
+            UINT64_C(0x627d00690f3f465d), UINT64_C(0xff97bfbba48e8524), UINT64_C(0x9c3f5a0387919b50), UINT64_C(0x25f1e1efb7f91c48),
+            UINT64_C(0x7114cada956a53ae), UINT64_C(0x626a4e2ff89c39af), UINT64_C(0x86540186b2e391cc), UINT64_C(0x82d5f935e9a90bcd),
+            UINT64_C(0xe2d4d3059b6f5dc1), UINT64_C(0xbb3cc83e6478dd2e), UINT64_C(0x59b9b400b166ed62), UINT64_C(0xf04b9b209bb113b1),
+            UINT64_C(0xb27be3c3397ac130), UINT64_C(0xf619002cc54ac417), UINT64_C(0x46a8c23f12907210), UINT64_C(0x54fc42e7d99aa54f),
+            UINT64_C(0x2b264e8ea68323e7), UINT64_C(0x0e0b0f627257dfb9), UINT64_C(0xadc098de597949e8), UINT64_C(0xe2ba17b10bd5401a),
+            UINT64_C(0x7fa49be97f34ca1a), UINT64_C(0x8817b0a7e7d981cf), UINT64_C(0x3bede65042860a1f), UINT64_C(0xae569b2aafd241eb),
+            UINT64_C(0x5f1cc5a3059aa744), UINT64_C(0x762409219323dae9), UINT64_C(0x64d5aac875461b4e), UINT64_C(0x62147c9101655025),
+            UINT64_C(0xbde2c420826c8ddd), UINT64_C(0xde6d7e2be12d0797), UINT64_C(0x8338ac734c823357), UINT64_C(0x419b2aa58f1b985a),
+            UINT64_C(0x39ed88775355ae2d), UINT64_C(0x7a2e8cc72c7f3bce), UINT64_C(0x97935746814fa944), UINT64_C(0x828331abf2018ef4),
+            UINT64_C(0xd6b9060cd1d0ba56), UINT64_C(0x5548e64ac7626ff2), UINT64_C(0xe4635461f9175d23), UINT64_C(0x566d5d69d40cd206),
+            UINT64_C(0x65ffaf0c83ae838f), UINT64_C(0x5a585c800a52de9e), UINT64_C(0x64a121bc55d0b7a2), UINT64_C(0x661ef9d5b90d6e53),
+            UINT64_C(0xb298bfcff8afba20), UINT64_C(0x2a60665850d1a5e8), UINT64_C(0x61aba7a90d9ae6eb), UINT64_C(0x083667e22ffdf423),
+            UINT64_C(0xd5efe61f9bd9a79c), UINT64_C(0x582a3cf851cafad0), UINT64_C(0x1989365a301ef819), UINT64_C(0xe2778e8aee7b917e),
+            UINT64_C(0x4bd139ea2fc74066), UINT64_C(0x2716bfaa4b18912a), UINT64_C(0x1a477a7687dbbe34), UINT64_C(0x90127b1d8835c6e1),
+            UINT64_C(0x44651dc23bfac77d), UINT64_C(0xb030740966562609), UINT64_C(0xb295d4733127a190), UINT64_C(0xf022c66dc7b74382),
+        },
     },
-  },
 // Level 1
-  {
-    UINT64_C(0x39cd7650ff4f752a), UINT64_C(0xe9b49347770073e9), UINT64_C(0x00000003f), 0, // sum of coeff and dummy
     {
-      UINT64_C(0x6a22166c40f87e99), UINT64_C(0xff7e13387c337404), UINT64_C(0xd15f0f4dd5de05be), UINT64_C(0x825bb897d6ad1ef4),
-      UINT64_C(0x77b045691a63a8ec), UINT64_C(0x0a49df4370eb4048), UINT64_C(0xf6c80d9827e7043b), UINT64_C(0x1628979784f8c50d),
-      UINT64_C(0xd1a3e1f52402e01b), UINT64_C(0x6cfa2849efd5bc7f), UINT64_C(0xc6416ba240b063ec), UINT64_C(0x772d9ac4e43b2707),
-      UINT64_C(0x8cc9c4735bea20c5), UINT64_C(0xede4a423d10791b3), UINT64_C(0xc75eb6c16dbb96eb), UINT64_C(0x2df99f5f3ac91794),
-      UINT64_C(0x31be65ba10763ed5), UINT64_C(0xe89ce26b47440bc2), UINT64_C(0xe537526e59ddafdf), UINT64_C(0x16ae378ed0ef349c),
-      UINT64_C(0x747c11f0403b290e), UINT64_C(0xc1ada5226937ff10), UINT64_C(0x91886c173226bd6f), UINT64_C(0x7e0002e3c3aaeee3),
-      UINT64_C(0x65c329b5ce3ffac3), UINT64_C(0xd01f1343a37cc2f7), UINT64_C(0x366e7896927020e8), UINT64_C(0x84327c9993246a19),
-      UINT64_C(0x2c08dcf57f5487d1), UINT64_C(0x9981f7143c3f09bf), UINT64_C(0xe413c704e8ac8b14), UINT64_C(0x6c1354b6a416b3fb),
-      UINT64_C(0xaf14a970a5db32a3), UINT64_C(0x37428eb1cbdf20a8), UINT64_C(0x9b3a2f48a45999fc), UINT64_C(0x894d39e47aad1efa),
-      UINT64_C(0x662abdc6b0bb17e8), UINT64_C(0xd449820255e4bc4a), UINT64_C(0x5fc5d5a18389fa01), UINT64_C(0xf76102aa2484326e),
-      UINT64_C(0x08c4308c96b8ef43), UINT64_C(0x5c3a562402cee74c), UINT64_C(0xcf896705837e6c8c), UINT64_C(0xe069655ea3c1a067),
-      UINT64_C(0x3478c1c88ef76c15), UINT64_C(0x8f97330dff9ff33b), UINT64_C(0xba8c150f3fa32e41), UINT64_C(0x1f9be6e624480693),
-      UINT64_C(0x65d39bd613016d2c), UINT64_C(0x8d4504cb5be46d10), UINT64_C(0xf8b9f2f1685ce679), UINT64_C(0x023c59373ff7edc6),
-      UINT64_C(0x86283f83c707e5fa), UINT64_C(0xd7c3eebedd1a109b), UINT64_C(0x942b2786ea139167), UINT64_C(0xf54a2b229a268134),
-      UINT64_C(0x85d175f335d21fa1), UINT64_C(0xce39abb9d7e787e0), UINT64_C(0x3290b3797c71b62d), UINT64_C(0x954aebd35bc2d445),
-      UINT64_C(0xfb24c9a40287bbea), UINT64_C(0x7c50d2bef8066d38), UINT64_C(0xf8614d3fa751b1d1), UINT64_C(0x0ed6bd1b203b43b9),
-      UINT64_C(0x7444a688119fc803), UINT64_C(0xaafc0cf7a8f588a3), UINT64_C(0x86790f357d28efc6), UINT64_C(0xbc6d006ea2a48c65),
-      UINT64_C(0x192cd81c89e62897), UINT64_C(0x144a15fa87c09aa8), UINT64_C(0xc9466727de209085), UINT64_C(0xeaf453256eda97d1),
-      UINT64_C(0x2f0baafb5017bc8e), UINT64_C(0x1871e4808c0438bd), UINT64_C(0x1e78e125290b3e64), UINT64_C(0xb85bef6ba39ebc7d),
-      UINT64_C(0xc4487e3cabd4bf9e), UINT64_C(0x2ec0963510ce4901), UINT64_C(0x3b760a55c2ffc8aa), UINT64_C(0x0538bff351c74590),
-      UINT64_C(0xa2720fb707bf396d), UINT64_C(0xbca7ae2418758cc9), UINT64_C(0x6080c33057e68c8d), UINT64_C(0x0ce8e54cf677833c),
-      UINT64_C(0xc08644e5a40fa1ec), UINT64_C(0x143ce206cebb6352), UINT64_C(0x9842eb597773bb9a), UINT64_C(0xf9a01484a87d6b12),
-      UINT64_C(0x734da10581a35732), UINT64_C(0x1c5817613ea17f8d), UINT64_C(0xfbeb5bf815f12eb3), UINT64_C(0x0879175b1d28ed23),
-      UINT64_C(0xc470ffc0a1ce0cfd), UINT64_C(0x0b4b4e44b3d0b5d8), UINT64_C(0x2cd5a8501f56ac9a), UINT64_C(0xf2dfcf44a1689892),
-      UINT64_C(0x3bf38a66c6b001a2), UINT64_C(0xabfe0c1ce71d4829), UINT64_C(0xde1916f0d7565ad1), UINT64_C(0x97d66cfacf3df802),
-      UINT64_C(0x0e28348769858002), UINT64_C(0xefed65d521df30e9), UINT64_C(0x33abb8c0116b7721), UINT64_C(0xb21b1751d4a13405),
-      UINT64_C(0x3c445b844cb809e8), UINT64_C(0x48fe0d52ba18de8c), UINT64_C(0x88206dc4b93a7829), UINT64_C(0x2543fca442fe076b),
-      UINT64_C(0x4c6b6b567a3571d3), UINT64_C(0x47d9c2f551c39ba7), UINT64_C(0x2c6e0a4ebba24ac4), UINT64_C(0xb0a1c2f16942e728),
-      UINT64_C(0x536ca9a81adc2f15), UINT64_C(0xd84840af846d8115), UINT64_C(0x6a85aa0fa3159219), UINT64_C(0x4c167b95be156d20),
-      UINT64_C(0xcd3f7f07382d52cb), UINT64_C(0x000020e3a8604961), UINT64_C(0x0889912d52e797ba), UINT64_C(0x19eca83144939b12),
-      UINT64_C(0xb746c4bc57d2b80d), UINT64_C(0x5f19680e72e9ae82), UINT64_C(0xc8d7c655d341f90e), UINT64_C(0xd5d17f24f8e76882),
-      UINT64_C(0x111bc49d022a5575), UINT64_C(0xd6c434f7739424b9), UINT64_C(0x5d56d36b4ded16fe), UINT64_C(0x910276b4a008443f),
+        UINT64_C(0x39cd7650ff4f752a), UINT64_C(0xe9b49347770073e9), UINT64_C(0x00000003f), 0, // sum of coeff and dummy
+        {
+            UINT64_C(0x6a22166c40f87e99), UINT64_C(0xff7e13387c337404), UINT64_C(0xd15f0f4dd5de05be), UINT64_C(0x825bb897d6ad1ef4),
+            UINT64_C(0x77b045691a63a8ec), UINT64_C(0x0a49df4370eb4048), UINT64_C(0xf6c80d9827e7043b), UINT64_C(0x1628979784f8c50d),
+            UINT64_C(0xd1a3e1f52402e01b), UINT64_C(0x6cfa2849efd5bc7f), UINT64_C(0xc6416ba240b063ec), UINT64_C(0x772d9ac4e43b2707),
+            UINT64_C(0x8cc9c4735bea20c5), UINT64_C(0xede4a423d10791b3), UINT64_C(0xc75eb6c16dbb96eb), UINT64_C(0x2df99f5f3ac91794),
+            UINT64_C(0x31be65ba10763ed5), UINT64_C(0xe89ce26b47440bc2), UINT64_C(0xe537526e59ddafdf), UINT64_C(0x16ae378ed0ef349c),
+            UINT64_C(0x747c11f0403b290e), UINT64_C(0xc1ada5226937ff10), UINT64_C(0x91886c173226bd6f), UINT64_C(0x7e0002e3c3aaeee3),
+            UINT64_C(0x65c329b5ce3ffac3), UINT64_C(0xd01f1343a37cc2f7), UINT64_C(0x366e7896927020e8), UINT64_C(0x84327c9993246a19),
+            UINT64_C(0x2c08dcf57f5487d1), UINT64_C(0x9981f7143c3f09bf), UINT64_C(0xe413c704e8ac8b14), UINT64_C(0x6c1354b6a416b3fb),
+            UINT64_C(0xaf14a970a5db32a3), UINT64_C(0x37428eb1cbdf20a8), UINT64_C(0x9b3a2f48a45999fc), UINT64_C(0x894d39e47aad1efa),
+            UINT64_C(0x662abdc6b0bb17e8), UINT64_C(0xd449820255e4bc4a), UINT64_C(0x5fc5d5a18389fa01), UINT64_C(0xf76102aa2484326e),
+            UINT64_C(0x08c4308c96b8ef43), UINT64_C(0x5c3a562402cee74c), UINT64_C(0xcf896705837e6c8c), UINT64_C(0xe069655ea3c1a067),
+            UINT64_C(0x3478c1c88ef76c15), UINT64_C(0x8f97330dff9ff33b), UINT64_C(0xba8c150f3fa32e41), UINT64_C(0x1f9be6e624480693),
+            UINT64_C(0x65d39bd613016d2c), UINT64_C(0x8d4504cb5be46d10), UINT64_C(0xf8b9f2f1685ce679), UINT64_C(0x023c59373ff7edc6),
+            UINT64_C(0x86283f83c707e5fa), UINT64_C(0xd7c3eebedd1a109b), UINT64_C(0x942b2786ea139167), UINT64_C(0xf54a2b229a268134),
+            UINT64_C(0x85d175f335d21fa1), UINT64_C(0xce39abb9d7e787e0), UINT64_C(0x3290b3797c71b62d), UINT64_C(0x954aebd35bc2d445),
+            UINT64_C(0xfb24c9a40287bbea), UINT64_C(0x7c50d2bef8066d38), UINT64_C(0xf8614d3fa751b1d1), UINT64_C(0x0ed6bd1b203b43b9),
+            UINT64_C(0x7444a688119fc803), UINT64_C(0xaafc0cf7a8f588a3), UINT64_C(0x86790f357d28efc6), UINT64_C(0xbc6d006ea2a48c65),
+            UINT64_C(0x192cd81c89e62897), UINT64_C(0x144a15fa87c09aa8), UINT64_C(0xc9466727de209085), UINT64_C(0xeaf453256eda97d1),
+            UINT64_C(0x2f0baafb5017bc8e), UINT64_C(0x1871e4808c0438bd), UINT64_C(0x1e78e125290b3e64), UINT64_C(0xb85bef6ba39ebc7d),
+            UINT64_C(0xc4487e3cabd4bf9e), UINT64_C(0x2ec0963510ce4901), UINT64_C(0x3b760a55c2ffc8aa), UINT64_C(0x0538bff351c74590),
+            UINT64_C(0xa2720fb707bf396d), UINT64_C(0xbca7ae2418758cc9), UINT64_C(0x6080c33057e68c8d), UINT64_C(0x0ce8e54cf677833c),
+            UINT64_C(0xc08644e5a40fa1ec), UINT64_C(0x143ce206cebb6352), UINT64_C(0x9842eb597773bb9a), UINT64_C(0xf9a01484a87d6b12),
+            UINT64_C(0x734da10581a35732), UINT64_C(0x1c5817613ea17f8d), UINT64_C(0xfbeb5bf815f12eb3), UINT64_C(0x0879175b1d28ed23),
+            UINT64_C(0xc470ffc0a1ce0cfd), UINT64_C(0x0b4b4e44b3d0b5d8), UINT64_C(0x2cd5a8501f56ac9a), UINT64_C(0xf2dfcf44a1689892),
+            UINT64_C(0x3bf38a66c6b001a2), UINT64_C(0xabfe0c1ce71d4829), UINT64_C(0xde1916f0d7565ad1), UINT64_C(0x97d66cfacf3df802),
+            UINT64_C(0x0e28348769858002), UINT64_C(0xefed65d521df30e9), UINT64_C(0x33abb8c0116b7721), UINT64_C(0xb21b1751d4a13405),
+            UINT64_C(0x3c445b844cb809e8), UINT64_C(0x48fe0d52ba18de8c), UINT64_C(0x88206dc4b93a7829), UINT64_C(0x2543fca442fe076b),
+            UINT64_C(0x4c6b6b567a3571d3), UINT64_C(0x47d9c2f551c39ba7), UINT64_C(0x2c6e0a4ebba24ac4), UINT64_C(0xb0a1c2f16942e728),
+            UINT64_C(0x536ca9a81adc2f15), UINT64_C(0xd84840af846d8115), UINT64_C(0x6a85aa0fa3159219), UINT64_C(0x4c167b95be156d20),
+            UINT64_C(0xcd3f7f07382d52cb), UINT64_C(0x000020e3a8604961), UINT64_C(0x0889912d52e797ba), UINT64_C(0x19eca83144939b12),
+            UINT64_C(0xb746c4bc57d2b80d), UINT64_C(0x5f19680e72e9ae82), UINT64_C(0xc8d7c655d341f90e), UINT64_C(0xd5d17f24f8e76882),
+            UINT64_C(0x111bc49d022a5575), UINT64_C(0xd6c434f7739424b9), UINT64_C(0x5d56d36b4ded16fe), UINT64_C(0x910276b4a008443f),
+        },
     },
-  },
 // Level 2
-  {
-    UINT64_C(0x8d88b6de8694f9bd), UINT64_C(0xab3746b512cf0a0e), UINT64_C(0x00000003d), 0, // sum of coeff and dummy
     {
-      UINT64_C(0x8c35afea7008c707), UINT64_C(0x41ead554cfccdc94), UINT64_C(0x2efb2ec168e3bffc), UINT64_C(0xe7c3a0bbddc63920),
-      UINT64_C(0x4dce9e2b34302387), UINT64_C(0xfaf035fd5624990c), UINT64_C(0xccd919a786ba8213), UINT64_C(0x9a18857bdb2be4c1),
-      UINT64_C(0x001d03ba509647b6), UINT64_C(0x7e331694b4f66982), UINT64_C(0xb478c5a41317d762), UINT64_C(0xe717e226317c1144),
-      UINT64_C(0x022ffa0a2f15f66e), UINT64_C(0x6519929c261c063c), UINT64_C(0xff2060eae017d4e0), UINT64_C(0xefff6af725b87556),
-      UINT64_C(0x5d4d573a24be5312), UINT64_C(0xc07e9f4f495eb740), UINT64_C(0x5257032ed4c0e657), UINT64_C(0x2841f8526903c4ce),
-      UINT64_C(0xa5deee0ffb84873b), UINT64_C(0x45ce5d741491bbb2), UINT64_C(0x9c2b70601078ed64), UINT64_C(0x43837fdef168a0b0),
-      UINT64_C(0xf2ac139bf0bef9e8), UINT64_C(0x31f63ea0f89c8f29), UINT64_C(0x566268e5d7e2b1a7), UINT64_C(0x90a1dcf90070c039),
-      UINT64_C(0xb656b46da32098f3), UINT64_C(0x932e618f2bf02ff5), UINT64_C(0x6567346814e558c3), UINT64_C(0x6fee0aa9bbcd1aab),
-      UINT64_C(0x55a497a53ecf775d), UINT64_C(0xcce903fab3ead90d), UINT64_C(0x7fe3e530e9d3eaa0), UINT64_C(0x4dde47c8e75c1597),
-      UINT64_C(0x9d487b4725819ca5), UINT64_C(0x5893db2002678a18), UINT64_C(0x75f4da89918d8bff), UINT64_C(0x46736d07b2f80ed6),
-      UINT64_C(0x2b6e79c066e45341), UINT64_C(0xce708ef399b937cb), UINT64_C(0xa63749ae5d4f1767), UINT64_C(0x635d830a136e0563),
-      UINT64_C(0x55eea54f48f48df6), UINT64_C(0x68a076896b939688), UINT64_C(0x6e980d43ce7b11e9), UINT64_C(0x199065b551f0a7da),
-      UINT64_C(0x5d42faee0cb91d94), UINT64_C(0xa1770f53043c2107), UINT64_C(0x35c1ac46c4e4a748), UINT64_C(0xff43f86b0cd6ab3b),
-      UINT64_C(0x279dbad410c06a67), UINT64_C(0x40017b35ed84446a), UINT64_C(0xa73172134f9c5e8f), UINT64_C(0xfcff1de2975b0043),
-      UINT64_C(0xae0dd9ae2cfa364f), UINT64_C(0x52129c7818987b00), UINT64_C(0xaa0e91dae1a89606), UINT64_C(0x91dc4cbfdbb14973),
-      UINT64_C(0xb0ab9a3a7281965c), UINT64_C(0x9a8e2941fc1696a4), UINT64_C(0x6c76a89ed0a78b2c), UINT64_C(0xaa2539208db7d79a),
-      UINT64_C(0xcd5a73ca1b8ad462), UINT64_C(0xd2844afcfff68b7a), UINT64_C(0x808b81ab58a3c11e), UINT64_C(0x2003a1d79ee96e7e),
-      UINT64_C(0x87b236e5742b42d7), UINT64_C(0x3a3610e8bad3b373), UINT64_C(0xb481ca092e54fd87), UINT64_C(0xaf8adee08b5326e7),
-      UINT64_C(0x3ee2e6130ab53ef6), UINT64_C(0xbf7427af75a7c2d1), UINT64_C(0x4d7a6067dbeed20f), UINT64_C(0xcbdb5568d804ef3f),
-      UINT64_C(0x508ff58236e7a6f9), UINT64_C(0xacf7eac3c3037dab), UINT64_C(0x482b277d6928bddc), UINT64_C(0x538974760ddc6f83),
-      UINT64_C(0x6c3b990a1194ebe4), UINT64_C(0xeb3dfeda259aae19), UINT64_C(0x1043b1e32e6a609c), UINT64_C(0xe29853f3b731712a),
-      UINT64_C(0x725474cd1469a035), UINT64_C(0x08cc37d08547e287), UINT64_C(0x0de8c6d9ae66fe36), UINT64_C(0xaaef7eb47eb75f52),
-      UINT64_C(0xa29a69722b3bf66b), UINT64_C(0xd44d96ca50981b64), UINT64_C(0x0952a0827ec5b006), UINT64_C(0xaeced6c30c1fff4a),
-      UINT64_C(0xcf8551b4584c0c46), UINT64_C(0x2611b04aafedc71c), UINT64_C(0xd927dc8e6de6164f), UINT64_C(0x1fd5e2029d572551),
-      UINT64_C(0x45ad5bcd4bf72122), UINT64_C(0x54a3c4b12c343b21), UINT64_C(0x96156949c3f32a47), UINT64_C(0xa81023ef8e94e51b),
-      UINT64_C(0x26d335efc1d4efde), UINT64_C(0x669c4846e9284067), UINT64_C(0xcabd41a53335f6e1), UINT64_C(0x4f517812e06a917f),
-      UINT64_C(0xcdd989ce6aa55626), UINT64_C(0x5ca882c756fe4999), UINT64_C(0x639d8b99c6477c42), UINT64_C(0x2716a772911dca49),
-      UINT64_C(0x4374400157dc3d13), UINT64_C(0x1d0a512182a280f5), UINT64_C(0xd822a4f87a0ad77c), UINT64_C(0x0a0ab212f142db2b),
-      UINT64_C(0xe80fb8a935595883), UINT64_C(0x7568eec35a490b83), UINT64_C(0x09abdb9e114df5fc), UINT64_C(0x55137c447d1bca41),
-      UINT64_C(0x0de593a7acafcc85), UINT64_C(0xb975febcee3ca728), UINT64_C(0x63bef68e44fea1d5), UINT64_C(0xb013be7092b2a894),
-      UINT64_C(0xeba8c75d166e19d9), UINT64_C(0x224ad7936de628b9), UINT64_C(0x42b55663e6da91c0), UINT64_C(0x68f73c834d3b02a8),
-      UINT64_C(0x0bd2a1b0f697dc42), UINT64_C(0x89fc577d065f571a), UINT64_C(0xdc714c2c16925d8d), UINT64_C(0x5f94692fe9a6b2eb),
+        UINT64_C(0x8d88b6de8694f9bd), UINT64_C(0xab3746b512cf0a0e), UINT64_C(0x00000003d), 0, // sum of coeff and dummy
+        {
+            UINT64_C(0x8c35afea7008c707), UINT64_C(0x41ead554cfccdc94), UINT64_C(0x2efb2ec168e3bffc), UINT64_C(0xe7c3a0bbddc63920),
+            UINT64_C(0x4dce9e2b34302387), UINT64_C(0xfaf035fd5624990c), UINT64_C(0xccd919a786ba8213), UINT64_C(0x9a18857bdb2be4c1),
+            UINT64_C(0x001d03ba509647b6), UINT64_C(0x7e331694b4f66982), UINT64_C(0xb478c5a41317d762), UINT64_C(0xe717e226317c1144),
+            UINT64_C(0x022ffa0a2f15f66e), UINT64_C(0x6519929c261c063c), UINT64_C(0xff2060eae017d4e0), UINT64_C(0xefff6af725b87556),
+            UINT64_C(0x5d4d573a24be5312), UINT64_C(0xc07e9f4f495eb740), UINT64_C(0x5257032ed4c0e657), UINT64_C(0x2841f8526903c4ce),
+            UINT64_C(0xa5deee0ffb84873b), UINT64_C(0x45ce5d741491bbb2), UINT64_C(0x9c2b70601078ed64), UINT64_C(0x43837fdef168a0b0),
+            UINT64_C(0xf2ac139bf0bef9e8), UINT64_C(0x31f63ea0f89c8f29), UINT64_C(0x566268e5d7e2b1a7), UINT64_C(0x90a1dcf90070c039),
+            UINT64_C(0xb656b46da32098f3), UINT64_C(0x932e618f2bf02ff5), UINT64_C(0x6567346814e558c3), UINT64_C(0x6fee0aa9bbcd1aab),
+            UINT64_C(0x55a497a53ecf775d), UINT64_C(0xcce903fab3ead90d), UINT64_C(0x7fe3e530e9d3eaa0), UINT64_C(0x4dde47c8e75c1597),
+            UINT64_C(0x9d487b4725819ca5), UINT64_C(0x5893db2002678a18), UINT64_C(0x75f4da89918d8bff), UINT64_C(0x46736d07b2f80ed6),
+            UINT64_C(0x2b6e79c066e45341), UINT64_C(0xce708ef399b937cb), UINT64_C(0xa63749ae5d4f1767), UINT64_C(0x635d830a136e0563),
+            UINT64_C(0x55eea54f48f48df6), UINT64_C(0x68a076896b939688), UINT64_C(0x6e980d43ce7b11e9), UINT64_C(0x199065b551f0a7da),
+            UINT64_C(0x5d42faee0cb91d94), UINT64_C(0xa1770f53043c2107), UINT64_C(0x35c1ac46c4e4a748), UINT64_C(0xff43f86b0cd6ab3b),
+            UINT64_C(0x279dbad410c06a67), UINT64_C(0x40017b35ed84446a), UINT64_C(0xa73172134f9c5e8f), UINT64_C(0xfcff1de2975b0043),
+            UINT64_C(0xae0dd9ae2cfa364f), UINT64_C(0x52129c7818987b00), UINT64_C(0xaa0e91dae1a89606), UINT64_C(0x91dc4cbfdbb14973),
+            UINT64_C(0xb0ab9a3a7281965c), UINT64_C(0x9a8e2941fc1696a4), UINT64_C(0x6c76a89ed0a78b2c), UINT64_C(0xaa2539208db7d79a),
+            UINT64_C(0xcd5a73ca1b8ad462), UINT64_C(0xd2844afcfff68b7a), UINT64_C(0x808b81ab58a3c11e), UINT64_C(0x2003a1d79ee96e7e),
+            UINT64_C(0x87b236e5742b42d7), UINT64_C(0x3a3610e8bad3b373), UINT64_C(0xb481ca092e54fd87), UINT64_C(0xaf8adee08b5326e7),
+            UINT64_C(0x3ee2e6130ab53ef6), UINT64_C(0xbf7427af75a7c2d1), UINT64_C(0x4d7a6067dbeed20f), UINT64_C(0xcbdb5568d804ef3f),
+            UINT64_C(0x508ff58236e7a6f9), UINT64_C(0xacf7eac3c3037dab), UINT64_C(0x482b277d6928bddc), UINT64_C(0x538974760ddc6f83),
+            UINT64_C(0x6c3b990a1194ebe4), UINT64_C(0xeb3dfeda259aae19), UINT64_C(0x1043b1e32e6a609c), UINT64_C(0xe29853f3b731712a),
+            UINT64_C(0x725474cd1469a035), UINT64_C(0x08cc37d08547e287), UINT64_C(0x0de8c6d9ae66fe36), UINT64_C(0xaaef7eb47eb75f52),
+            UINT64_C(0xa29a69722b3bf66b), UINT64_C(0xd44d96ca50981b64), UINT64_C(0x0952a0827ec5b006), UINT64_C(0xaeced6c30c1fff4a),
+            UINT64_C(0xcf8551b4584c0c46), UINT64_C(0x2611b04aafedc71c), UINT64_C(0xd927dc8e6de6164f), UINT64_C(0x1fd5e2029d572551),
+            UINT64_C(0x45ad5bcd4bf72122), UINT64_C(0x54a3c4b12c343b21), UINT64_C(0x96156949c3f32a47), UINT64_C(0xa81023ef8e94e51b),
+            UINT64_C(0x26d335efc1d4efde), UINT64_C(0x669c4846e9284067), UINT64_C(0xcabd41a53335f6e1), UINT64_C(0x4f517812e06a917f),
+            UINT64_C(0xcdd989ce6aa55626), UINT64_C(0x5ca882c756fe4999), UINT64_C(0x639d8b99c6477c42), UINT64_C(0x2716a772911dca49),
+            UINT64_C(0x4374400157dc3d13), UINT64_C(0x1d0a512182a280f5), UINT64_C(0xd822a4f87a0ad77c), UINT64_C(0x0a0ab212f142db2b),
+            UINT64_C(0xe80fb8a935595883), UINT64_C(0x7568eec35a490b83), UINT64_C(0x09abdb9e114df5fc), UINT64_C(0x55137c447d1bca41),
+            UINT64_C(0x0de593a7acafcc85), UINT64_C(0xb975febcee3ca728), UINT64_C(0x63bef68e44fea1d5), UINT64_C(0xb013be7092b2a894),
+            UINT64_C(0xeba8c75d166e19d9), UINT64_C(0x224ad7936de628b9), UINT64_C(0x42b55663e6da91c0), UINT64_C(0x68f73c834d3b02a8),
+            UINT64_C(0x0bd2a1b0f697dc42), UINT64_C(0x89fc577d065f571a), UINT64_C(0xdc714c2c16925d8d), UINT64_C(0x5f94692fe9a6b2eb),
+        },
     },
-  },
 // Level 3
-  {
-    UINT64_C(0x8370e3dd2dd7e740), UINT64_C(0x4ac7a23650afaa5d), UINT64_C(0x00000003c), 0, // sum of coeff and dummy
     {
-      UINT64_C(0x141a416e635e3008), UINT64_C(0xe59e5696300fc54e), UINT64_C(0x3ac6afaf368cd3a6), UINT64_C(0x1c4d7641d7192768),
-      UINT64_C(0xaae556230b19cb19), UINT64_C(0x09fe3e074ade9f7e), UINT64_C(0xcc11adbd55ed21af), UINT64_C(0x862d3632edce6066),
-      UINT64_C(0x83200725a18ecf18), UINT64_C(0xef8a88f410ebfffa), UINT64_C(0x8f32ade56cc5cd11), UINT64_C(0x68601c8acb3b697b),
-      UINT64_C(0x3f7bc460e435c5be), UINT64_C(0xead87aaff097bf77), UINT64_C(0x5d35b160f1047863), UINT64_C(0x3c7c707d1decebe3),
-      UINT64_C(0xffab7fcb4b288977), UINT64_C(0xbb30bf67ea8078d4), UINT64_C(0x08c14f33079c0375), UINT64_C(0xc34be6df85f4e084),
-      UINT64_C(0xc5d61545239490a8), UINT64_C(0xc206111b5df05780), UINT64_C(0xb40b9d277b5eb1a6), UINT64_C(0x61f772ed20991bd7),
-      UINT64_C(0xa423cf9ee644f9b9), UINT64_C(0x63a281c7fb30afbe), UINT64_C(0x33dd3deb21ee47f3), UINT64_C(0x3d882a465f6520e0),
-      UINT64_C(0xd8f44673c67ff2c6), UINT64_C(0x159cafea157a4f90), UINT64_C(0x38a18e681a48e2a0), UINT64_C(0xb9ebf2a06fe035b4),
-      UINT64_C(0xdd504b49fd3e67bb), UINT64_C(0xae67fb542747c488), UINT64_C(0x7416c312f3387e02), UINT64_C(0xa5bebc6a0bc34dd0),
-      UINT64_C(0x89a98f212c21c94a), UINT64_C(0xd377d8c55c6c78c8), UINT64_C(0x23f194d2e59b81d0), UINT64_C(0xc0efd26a5d0ed051),
-      UINT64_C(0x0112146515113ef8), UINT64_C(0x2031a3cd82ce8702), UINT64_C(0x7ec8e3c87ce50a07), UINT64_C(0x47a142fc6fcd89c7),
-      UINT64_C(0x2bcb63e57f0cae2f), UINT64_C(0x8664c6f962a87b24), UINT64_C(0xe6d174ff007b2c34), UINT64_C(0x87e09c902d073b32),
-      UINT64_C(0xb543d64ed7dfb009), UINT64_C(0x7c31c340b3dae313), UINT64_C(0x562ba6cf0b4713cc), UINT64_C(0x957f23822221316e),
-      UINT64_C(0x9612164e43a7d75e), UINT64_C(0x66088836498298a7), UINT64_C(0x2277a69befc583cd), UINT64_C(0xc6a74c6baecd220d),
-      UINT64_C(0xc3df4a454eaf882f), UINT64_C(0x4c70af7cee8f0bbc), UINT64_C(0x2ba3590fd97517d4), UINT64_C(0xbb00a28e752d346c),
-      UINT64_C(0xebfa174a39681974), UINT64_C(0x033d8678eca2890b), UINT64_C(0xede2c5142f49827c), UINT64_C(0x614d56f55dde9f8b),
-      UINT64_C(0x72e2e9d5582a0a08), UINT64_C(0x9d1f6238ddac882b), UINT64_C(0xfcd3682c3bd70286), UINT64_C(0x8958816740699ee2),
-      UINT64_C(0xa5c7a3559d07b917), UINT64_C(0x4d8e82254c5a70e4), UINT64_C(0x291f69d4c89e5c45), UINT64_C(0x9c94a14902c4b249),
-      UINT64_C(0xd9bcf68e0f055258), UINT64_C(0x3a0cc6dcfffd05b7), UINT64_C(0xf0a22a2d6b06d03a), UINT64_C(0xeb9a2918852926aa),
-      UINT64_C(0x37915f797a6675f7), UINT64_C(0x98cdbb4e1686b742), UINT64_C(0x7007270bff4fcbe1), UINT64_C(0xc458d4068dc6c70f),
-      UINT64_C(0x073bbe0965ce93f3), UINT64_C(0xe7f2df0297e091e6), UINT64_C(0x3bf1a925fb9e6d1c), UINT64_C(0x48af31eef7b34f4b),
-      UINT64_C(0x00e92e127962fa5e), UINT64_C(0x0f8fc920466f3cd3), UINT64_C(0x25a21a02222a64b5), UINT64_C(0xb9853aa495decb46),
-      UINT64_C(0x262dc131bb0c35bb), UINT64_C(0xaf519c96fb0e9f68), UINT64_C(0x755849eedbb94ff2), UINT64_C(0x13a3d660e45f77b0),
-      UINT64_C(0x9f5d4268c5d69a64), UINT64_C(0x8c8a5e806938377c), UINT64_C(0x5bd34bfb54b64524), UINT64_C(0x6b5f1db574ecfaa9),
-      UINT64_C(0x37f725e56c1e9dc3), UINT64_C(0xc7fe10ac9904f90f), UINT64_C(0x879ae4eff04c0ab8), UINT64_C(0x76aea0675622e495),
-      UINT64_C(0xe29e3a0ebbe40dba), UINT64_C(0x157ffad6ff36b56f), UINT64_C(0x5466d89bca624434), UINT64_C(0x5449470d65bc5b35),
-      UINT64_C(0x7f6c99db52e6348a), UINT64_C(0x776d4dff2abd85c7), UINT64_C(0xb010a7f1beffcc1a), UINT64_C(0xad74603f4c6d9ab6),
-      UINT64_C(0x0599c30e3b018f16), UINT64_C(0x127a45fdeef28abd), UINT64_C(0x4cf790e8928575a0), UINT64_C(0x58fa1edd4caa9a51),
-      UINT64_C(0x5f3e8dd37e04eb51), UINT64_C(0xac131e1aea11807f), UINT64_C(0xf46fd7f990fb8cca), UINT64_C(0x73963b93ad4b9bb2),
-      UINT64_C(0x004c15e2478e8c36), UINT64_C(0xc79d966848c52c68), UINT64_C(0x827091c5d5309f35), UINT64_C(0x8e6290b4ecb7be34),
-      UINT64_C(0x4a2a701831915090), UINT64_C(0xb9ed682c26ae8721), UINT64_C(0x06c94a32c3f063b5), UINT64_C(0x11946415f289d8b4),
-      UINT64_C(0x4e6d4a3b505cd181), UINT64_C(0x7ad8e06beddabbeb), UINT64_C(0x272e050758ccfa94), UINT64_C(0x1a38a7703463de87),
+        UINT64_C(0x8370e3dd2dd7e740), UINT64_C(0x4ac7a23650afaa5d), UINT64_C(0x00000003c), 0, // sum of coeff and dummy
+        {
+            UINT64_C(0x141a416e635e3008), UINT64_C(0xe59e5696300fc54e), UINT64_C(0x3ac6afaf368cd3a6), UINT64_C(0x1c4d7641d7192768),
+            UINT64_C(0xaae556230b19cb19), UINT64_C(0x09fe3e074ade9f7e), UINT64_C(0xcc11adbd55ed21af), UINT64_C(0x862d3632edce6066),
+            UINT64_C(0x83200725a18ecf18), UINT64_C(0xef8a88f410ebfffa), UINT64_C(0x8f32ade56cc5cd11), UINT64_C(0x68601c8acb3b697b),
+            UINT64_C(0x3f7bc460e435c5be), UINT64_C(0xead87aaff097bf77), UINT64_C(0x5d35b160f1047863), UINT64_C(0x3c7c707d1decebe3),
+            UINT64_C(0xffab7fcb4b288977), UINT64_C(0xbb30bf67ea8078d4), UINT64_C(0x08c14f33079c0375), UINT64_C(0xc34be6df85f4e084),
+            UINT64_C(0xc5d61545239490a8), UINT64_C(0xc206111b5df05780), UINT64_C(0xb40b9d277b5eb1a6), UINT64_C(0x61f772ed20991bd7),
+            UINT64_C(0xa423cf9ee644f9b9), UINT64_C(0x63a281c7fb30afbe), UINT64_C(0x33dd3deb21ee47f3), UINT64_C(0x3d882a465f6520e0),
+            UINT64_C(0xd8f44673c67ff2c6), UINT64_C(0x159cafea157a4f90), UINT64_C(0x38a18e681a48e2a0), UINT64_C(0xb9ebf2a06fe035b4),
+            UINT64_C(0xdd504b49fd3e67bb), UINT64_C(0xae67fb542747c488), UINT64_C(0x7416c312f3387e02), UINT64_C(0xa5bebc6a0bc34dd0),
+            UINT64_C(0x89a98f212c21c94a), UINT64_C(0xd377d8c55c6c78c8), UINT64_C(0x23f194d2e59b81d0), UINT64_C(0xc0efd26a5d0ed051),
+            UINT64_C(0x0112146515113ef8), UINT64_C(0x2031a3cd82ce8702), UINT64_C(0x7ec8e3c87ce50a07), UINT64_C(0x47a142fc6fcd89c7),
+            UINT64_C(0x2bcb63e57f0cae2f), UINT64_C(0x8664c6f962a87b24), UINT64_C(0xe6d174ff007b2c34), UINT64_C(0x87e09c902d073b32),
+            UINT64_C(0xb543d64ed7dfb009), UINT64_C(0x7c31c340b3dae313), UINT64_C(0x562ba6cf0b4713cc), UINT64_C(0x957f23822221316e),
+            UINT64_C(0x9612164e43a7d75e), UINT64_C(0x66088836498298a7), UINT64_C(0x2277a69befc583cd), UINT64_C(0xc6a74c6baecd220d),
+            UINT64_C(0xc3df4a454eaf882f), UINT64_C(0x4c70af7cee8f0bbc), UINT64_C(0x2ba3590fd97517d4), UINT64_C(0xbb00a28e752d346c),
+            UINT64_C(0xebfa174a39681974), UINT64_C(0x033d8678eca2890b), UINT64_C(0xede2c5142f49827c), UINT64_C(0x614d56f55dde9f8b),
+            UINT64_C(0x72e2e9d5582a0a08), UINT64_C(0x9d1f6238ddac882b), UINT64_C(0xfcd3682c3bd70286), UINT64_C(0x8958816740699ee2),
+            UINT64_C(0xa5c7a3559d07b917), UINT64_C(0x4d8e82254c5a70e4), UINT64_C(0x291f69d4c89e5c45), UINT64_C(0x9c94a14902c4b249),
+            UINT64_C(0xd9bcf68e0f055258), UINT64_C(0x3a0cc6dcfffd05b7), UINT64_C(0xf0a22a2d6b06d03a), UINT64_C(0xeb9a2918852926aa),
+            UINT64_C(0x37915f797a6675f7), UINT64_C(0x98cdbb4e1686b742), UINT64_C(0x7007270bff4fcbe1), UINT64_C(0xc458d4068dc6c70f),
+            UINT64_C(0x073bbe0965ce93f3), UINT64_C(0xe7f2df0297e091e6), UINT64_C(0x3bf1a925fb9e6d1c), UINT64_C(0x48af31eef7b34f4b),
+            UINT64_C(0x00e92e127962fa5e), UINT64_C(0x0f8fc920466f3cd3), UINT64_C(0x25a21a02222a64b5), UINT64_C(0xb9853aa495decb46),
+            UINT64_C(0x262dc131bb0c35bb), UINT64_C(0xaf519c96fb0e9f68), UINT64_C(0x755849eedbb94ff2), UINT64_C(0x13a3d660e45f77b0),
+            UINT64_C(0x9f5d4268c5d69a64), UINT64_C(0x8c8a5e806938377c), UINT64_C(0x5bd34bfb54b64524), UINT64_C(0x6b5f1db574ecfaa9),
+            UINT64_C(0x37f725e56c1e9dc3), UINT64_C(0xc7fe10ac9904f90f), UINT64_C(0x879ae4eff04c0ab8), UINT64_C(0x76aea0675622e495),
+            UINT64_C(0xe29e3a0ebbe40dba), UINT64_C(0x157ffad6ff36b56f), UINT64_C(0x5466d89bca624434), UINT64_C(0x5449470d65bc5b35),
+            UINT64_C(0x7f6c99db52e6348a), UINT64_C(0x776d4dff2abd85c7), UINT64_C(0xb010a7f1beffcc1a), UINT64_C(0xad74603f4c6d9ab6),
+            UINT64_C(0x0599c30e3b018f16), UINT64_C(0x127a45fdeef28abd), UINT64_C(0x4cf790e8928575a0), UINT64_C(0x58fa1edd4caa9a51),
+            UINT64_C(0x5f3e8dd37e04eb51), UINT64_C(0xac131e1aea11807f), UINT64_C(0xf46fd7f990fb8cca), UINT64_C(0x73963b93ad4b9bb2),
+            UINT64_C(0x004c15e2478e8c36), UINT64_C(0xc79d966848c52c68), UINT64_C(0x827091c5d5309f35), UINT64_C(0x8e6290b4ecb7be34),
+            UINT64_C(0x4a2a701831915090), UINT64_C(0xb9ed682c26ae8721), UINT64_C(0x06c94a32c3f063b5), UINT64_C(0x11946415f289d8b4),
+            UINT64_C(0x4e6d4a3b505cd181), UINT64_C(0x7ad8e06beddabbeb), UINT64_C(0x272e050758ccfa94), UINT64_C(0x1a38a7703463de87),
+        },
     },
-  },
 // Level 4
-  {
-    UINT64_C(0x7c024d493240fd81), UINT64_C(0xcbedce790be4d6b), UINT64_C(0x000000041), 0, // sum of coeff and dummy
     {
-      UINT64_C(0xc385e890cdafa370), UINT64_C(0x72af2ae52cda3c0c), UINT64_C(0x377cc48ad117edce), UINT64_C(0xf3724d905f5cdc46),
-      UINT64_C(0xf51e0db646e04641), UINT64_C(0xb3ef041173b95e50), UINT64_C(0x483d8f190412d741), UINT64_C(0x9565fe70636fe7d1),
-      UINT64_C(0x7b5497f93bca30f2), UINT64_C(0xf7aa697c1f31e835), UINT64_C(0x26b9b332c5097919), UINT64_C(0x609c027c0e94be94),
-      UINT64_C(0xa4a77bf651dff968), UINT64_C(0xd3e952f9477aa964), UINT64_C(0xb6eb6ba84eafa8c3), UINT64_C(0xecc3cb66b4f9e264),
-      UINT64_C(0x6f7de149b48c42d2), UINT64_C(0xef38e08b77c94c8b), UINT64_C(0xd6a178affe73a087), UINT64_C(0xba01cfe6a8b0bfaf),
-      UINT64_C(0x771821ab27b1d361), UINT64_C(0x7b5e6b3e68a80c08), UINT64_C(0xd53c33bab8faf82f), UINT64_C(0x81e128821c9b5835),
-      UINT64_C(0x6968851cd767ecb8), UINT64_C(0x539510f090361d02), UINT64_C(0xee243a481fed197e), UINT64_C(0x57a7a6f5c2d4a423),
-      UINT64_C(0x7afc981eebfd0da8), UINT64_C(0xca100d08037f88e1), UINT64_C(0x7caf7e30e051e2f3), UINT64_C(0x09c6f692bb7e0c5e),
-      UINT64_C(0xff97c9f9213491a7), UINT64_C(0x3c7f06f4da8b68a8), UINT64_C(0xcc22969e12b0c521), UINT64_C(0xd3c246d637dc486c),
-      UINT64_C(0x645c098f230c482c), UINT64_C(0x7be14df33d02c990), UINT64_C(0xea99f1bc32cc189f), UINT64_C(0x8b776c2437b66a29),
-      UINT64_C(0xb6975830b26d1bcb), UINT64_C(0x3c24c07fb12dedfb), UINT64_C(0x939403d4624cb460), UINT64_C(0x0b4f454217f1f947),
-      UINT64_C(0x1ba0c284e2ac36c2), UINT64_C(0x25cfdc661fa02193), UINT64_C(0x661dc556bc51ede9), UINT64_C(0x8e4e8f1996c5b04f),
-      UINT64_C(0x6196e065ebbfc052), UINT64_C(0xbc1f2b573fcaf323), UINT64_C(0x74b0be15966126bc), UINT64_C(0xb61922dc3648b491),
-      UINT64_C(0x7528e5507af25415), UINT64_C(0xa03fee7cecbf5a92), UINT64_C(0x28f080a17abcdbf4), UINT64_C(0xf558e58265b50247),
-      UINT64_C(0x48946bc6b781b231), UINT64_C(0x1d3f9268ece51d01), UINT64_C(0x64cfd592583cd6d1), UINT64_C(0x33227252dde03dcc),
-      UINT64_C(0xfe487eba451edd0e), UINT64_C(0x1554136d4e0da4f8), UINT64_C(0x5446eb38aa369ed4), UINT64_C(0x5b46c4ce910d2ab6),
-      UINT64_C(0x5ca4f4ee4346e6f3), UINT64_C(0xb8a0111cf306801f), UINT64_C(0x4f96aae6581da78e), UINT64_C(0x6245d9523980b137),
-      UINT64_C(0x5e6efad77dd317ba), UINT64_C(0x7eb8de8eb617c7f4), UINT64_C(0x84e4d9ed06dce648), UINT64_C(0x24ed663bd6ce99fd),
-      UINT64_C(0xdf0ba8713d3bd076), UINT64_C(0xc11063b88172e67a), UINT64_C(0xb173e8e756868535), UINT64_C(0x6f9b72467e93008f),
-      UINT64_C(0x0c7ab90fa88aa8b2), UINT64_C(0x3deb22d963a56bcf), UINT64_C(0xa56348ee35314bb8), UINT64_C(0x9881a7a2129cebdb),
-      UINT64_C(0xc160ec1b18ecaeb6), UINT64_C(0x358f2bd362310528), UINT64_C(0xa92ccae5ed750d12), UINT64_C(0xdce6d5d94a23845d),
-      UINT64_C(0xf50e3e4e30ac79f4), UINT64_C(0x308e35ff0a5c199f), UINT64_C(0x9843f1db5c0f0066), UINT64_C(0x21e31f7ea490ff33),
-      UINT64_C(0x180b0bd32ae3dc81), UINT64_C(0x64067fc5626d1cd9), UINT64_C(0x10803e502f4b4eef), UINT64_C(0x64f3d35137338ceb),
-      UINT64_C(0x12f3445e0c9d7641), UINT64_C(0x7be6720939744b5c), UINT64_C(0xe85e4cc174c166e2), UINT64_C(0x9468eb4ab9946aed),
-      UINT64_C(0xa8bb2b2d4df63a32), UINT64_C(0xb2f95c382e934037), UINT64_C(0x3e902ed369fbbb44), UINT64_C(0x185a9eade1869dd0),
-      UINT64_C(0xd240a5734d051bf1), UINT64_C(0x92faec8652bea745), UINT64_C(0x8996ab0aec688aba), UINT64_C(0xbcac5f2824c8daef),
-      UINT64_C(0x5881daacfc329969), UINT64_C(0x55364eaf990b3b21), UINT64_C(0xe5de0bd0d06f1120), UINT64_C(0xd6a6fb94a44fbf1a),
-      UINT64_C(0x4e10e2dcf9e9aa49), UINT64_C(0xfe401a3e5cdb41ae), UINT64_C(0x81a4db50e11a295f), UINT64_C(0xfcc87dd6a04da032),
-      UINT64_C(0x6c5f6fa90c36ccb6), UINT64_C(0xf7fa702ef53bd5bd), UINT64_C(0x37345651f635ded5), UINT64_C(0x9650ac0acc8b0f11),
-      UINT64_C(0xfb1fc5e6a46f6c48), UINT64_C(0x75fbd67a4f588024), UINT64_C(0xbcf48525891fbf4e), UINT64_C(0x076fdfe68cb57efc),
-      UINT64_C(0x9ff4fdeb562abe4d), UINT64_C(0x363686dcec66ee6f), UINT64_C(0x3ed3c65e6660e857), UINT64_C(0x555629fb07677f9c),
-      UINT64_C(0x0b9e59e5e2dc63f0), UINT64_C(0x3dd204d3c272f8e8), UINT64_C(0x0a5e2bc12753cc6f), UINT64_C(0x261571527dae8627),
+        UINT64_C(0x7c024d493240fd81), UINT64_C(0xcbedce790be4d6b), UINT64_C(0x000000041), 0, // sum of coeff and dummy
+        {
+            UINT64_C(0xc385e890cdafa370), UINT64_C(0x72af2ae52cda3c0c), UINT64_C(0x377cc48ad117edce), UINT64_C(0xf3724d905f5cdc46),
+            UINT64_C(0xf51e0db646e04641), UINT64_C(0xb3ef041173b95e50), UINT64_C(0x483d8f190412d741), UINT64_C(0x9565fe70636fe7d1),
+            UINT64_C(0x7b5497f93bca30f2), UINT64_C(0xf7aa697c1f31e835), UINT64_C(0x26b9b332c5097919), UINT64_C(0x609c027c0e94be94),
+            UINT64_C(0xa4a77bf651dff968), UINT64_C(0xd3e952f9477aa964), UINT64_C(0xb6eb6ba84eafa8c3), UINT64_C(0xecc3cb66b4f9e264),
+            UINT64_C(0x6f7de149b48c42d2), UINT64_C(0xef38e08b77c94c8b), UINT64_C(0xd6a178affe73a087), UINT64_C(0xba01cfe6a8b0bfaf),
+            UINT64_C(0x771821ab27b1d361), UINT64_C(0x7b5e6b3e68a80c08), UINT64_C(0xd53c33bab8faf82f), UINT64_C(0x81e128821c9b5835),
+            UINT64_C(0x6968851cd767ecb8), UINT64_C(0x539510f090361d02), UINT64_C(0xee243a481fed197e), UINT64_C(0x57a7a6f5c2d4a423),
+            UINT64_C(0x7afc981eebfd0da8), UINT64_C(0xca100d08037f88e1), UINT64_C(0x7caf7e30e051e2f3), UINT64_C(0x09c6f692bb7e0c5e),
+            UINT64_C(0xff97c9f9213491a7), UINT64_C(0x3c7f06f4da8b68a8), UINT64_C(0xcc22969e12b0c521), UINT64_C(0xd3c246d637dc486c),
+            UINT64_C(0x645c098f230c482c), UINT64_C(0x7be14df33d02c990), UINT64_C(0xea99f1bc32cc189f), UINT64_C(0x8b776c2437b66a29),
+            UINT64_C(0xb6975830b26d1bcb), UINT64_C(0x3c24c07fb12dedfb), UINT64_C(0x939403d4624cb460), UINT64_C(0x0b4f454217f1f947),
+            UINT64_C(0x1ba0c284e2ac36c2), UINT64_C(0x25cfdc661fa02193), UINT64_C(0x661dc556bc51ede9), UINT64_C(0x8e4e8f1996c5b04f),
+            UINT64_C(0x6196e065ebbfc052), UINT64_C(0xbc1f2b573fcaf323), UINT64_C(0x74b0be15966126bc), UINT64_C(0xb61922dc3648b491),
+            UINT64_C(0x7528e5507af25415), UINT64_C(0xa03fee7cecbf5a92), UINT64_C(0x28f080a17abcdbf4), UINT64_C(0xf558e58265b50247),
+            UINT64_C(0x48946bc6b781b231), UINT64_C(0x1d3f9268ece51d01), UINT64_C(0x64cfd592583cd6d1), UINT64_C(0x33227252dde03dcc),
+            UINT64_C(0xfe487eba451edd0e), UINT64_C(0x1554136d4e0da4f8), UINT64_C(0x5446eb38aa369ed4), UINT64_C(0x5b46c4ce910d2ab6),
+            UINT64_C(0x5ca4f4ee4346e6f3), UINT64_C(0xb8a0111cf306801f), UINT64_C(0x4f96aae6581da78e), UINT64_C(0x6245d9523980b137),
+            UINT64_C(0x5e6efad77dd317ba), UINT64_C(0x7eb8de8eb617c7f4), UINT64_C(0x84e4d9ed06dce648), UINT64_C(0x24ed663bd6ce99fd),
+            UINT64_C(0xdf0ba8713d3bd076), UINT64_C(0xc11063b88172e67a), UINT64_C(0xb173e8e756868535), UINT64_C(0x6f9b72467e93008f),
+            UINT64_C(0x0c7ab90fa88aa8b2), UINT64_C(0x3deb22d963a56bcf), UINT64_C(0xa56348ee35314bb8), UINT64_C(0x9881a7a2129cebdb),
+            UINT64_C(0xc160ec1b18ecaeb6), UINT64_C(0x358f2bd362310528), UINT64_C(0xa92ccae5ed750d12), UINT64_C(0xdce6d5d94a23845d),
+            UINT64_C(0xf50e3e4e30ac79f4), UINT64_C(0x308e35ff0a5c199f), UINT64_C(0x9843f1db5c0f0066), UINT64_C(0x21e31f7ea490ff33),
+            UINT64_C(0x180b0bd32ae3dc81), UINT64_C(0x64067fc5626d1cd9), UINT64_C(0x10803e502f4b4eef), UINT64_C(0x64f3d35137338ceb),
+            UINT64_C(0x12f3445e0c9d7641), UINT64_C(0x7be6720939744b5c), UINT64_C(0xe85e4cc174c166e2), UINT64_C(0x9468eb4ab9946aed),
+            UINT64_C(0xa8bb2b2d4df63a32), UINT64_C(0xb2f95c382e934037), UINT64_C(0x3e902ed369fbbb44), UINT64_C(0x185a9eade1869dd0),
+            UINT64_C(0xd240a5734d051bf1), UINT64_C(0x92faec8652bea745), UINT64_C(0x8996ab0aec688aba), UINT64_C(0xbcac5f2824c8daef),
+            UINT64_C(0x5881daacfc329969), UINT64_C(0x55364eaf990b3b21), UINT64_C(0xe5de0bd0d06f1120), UINT64_C(0xd6a6fb94a44fbf1a),
+            UINT64_C(0x4e10e2dcf9e9aa49), UINT64_C(0xfe401a3e5cdb41ae), UINT64_C(0x81a4db50e11a295f), UINT64_C(0xfcc87dd6a04da032),
+            UINT64_C(0x6c5f6fa90c36ccb6), UINT64_C(0xf7fa702ef53bd5bd), UINT64_C(0x37345651f635ded5), UINT64_C(0x9650ac0acc8b0f11),
+            UINT64_C(0xfb1fc5e6a46f6c48), UINT64_C(0x75fbd67a4f588024), UINT64_C(0xbcf48525891fbf4e), UINT64_C(0x076fdfe68cb57efc),
+            UINT64_C(0x9ff4fdeb562abe4d), UINT64_C(0x363686dcec66ee6f), UINT64_C(0x3ed3c65e6660e857), UINT64_C(0x555629fb07677f9c),
+            UINT64_C(0x0b9e59e5e2dc63f0), UINT64_C(0x3dd204d3c272f8e8), UINT64_C(0x0a5e2bc12753cc6f), UINT64_C(0x261571527dae8627),
+        },
     },
-  },
 // Level 5
-  {
-    UINT64_C(0x742b91e91dcfb0a6), UINT64_C(0xcfeca6a967921914), UINT64_C(0x00000003c), 0, // sum of coeff and dummy
     {
-      UINT64_C(0x6edee5be930ba5a3), UINT64_C(0x7da756c8a9d5865f), UINT64_C(0x979d7286e9ec6a3a), UINT64_C(0xb5f53e73c1075910),
-      UINT64_C(0xac17c48f4a6369d1), UINT64_C(0xe59c869b50f242b8), UINT64_C(0xd82f2c4debbd7a92), UINT64_C(0x2f480ab7fcef8c2a),
-      UINT64_C(0x5455617627c7967c), UINT64_C(0x391f4653479cd148), UINT64_C(0x93816a1fe3fe659f), UINT64_C(0x750610cc458f0e83),
-      UINT64_C(0xaea9ec84538ba181), UINT64_C(0x07f69ef23331d201), UINT64_C(0x1154b8671a7e21a6), UINT64_C(0x44f2b2a5e705dccd),
-      UINT64_C(0xf4137114642bd756), UINT64_C(0x0d9fdd5c26862aa0), UINT64_C(0x24252072220e87e6), UINT64_C(0x40c56b66c01c20f4),
-      UINT64_C(0x3d1246932d66f5fb), UINT64_C(0x549be143f5ad841a), UINT64_C(0xf5a694fd849975f9), UINT64_C(0xab3a75807839e2ae),
-      UINT64_C(0xdbc151ec40a63d29), UINT64_C(0x252d86d9b6ff7885), UINT64_C(0xd848fb1e2a170064), UINT64_C(0x8dbfbaa7e285d213),
-      UINT64_C(0x48c5c1a431e6a390), UINT64_C(0x4ea411a44607dc21), UINT64_C(0xbb8535f2c692910e), UINT64_C(0x6d8c5388d2aed8b2),
-      UINT64_C(0x2fddc57f1a7b1cc8), UINT64_C(0x3a2c8bd7ea3f25ab), UINT64_C(0x87708e34be0fb414), UINT64_C(0x8543e5d4e9f7c34e),
-      UINT64_C(0x2c349130b9d62f31), UINT64_C(0x8589d21285426c0c), UINT64_C(0x5b2a39baebaad52f), UINT64_C(0x03f8700c91cd5413),
-      UINT64_C(0xcc00c06be9d784fb), UINT64_C(0x70a78056b4c5b930), UINT64_C(0x4a2aa9811bbd47a3), UINT64_C(0x4a878b1e922c6304),
-      UINT64_C(0x2443f15ef107a70f), UINT64_C(0xf64b29a8f4069376), UINT64_C(0xfc309fa9086da268), UINT64_C(0xffeedab78f765ff4),
-      UINT64_C(0xa99a216b423fac77), UINT64_C(0x3b9c309929d6991e), UINT64_C(0x113fe1aa6ba4c211), UINT64_C(0x2f214dea6f758f36),
-      UINT64_C(0x519806a4ba5b5ca8), UINT64_C(0xef203bc2948dda9e), UINT64_C(0xaa83a59110f3a193), UINT64_C(0xebdef286170eb7ef),
-      UINT64_C(0x9bd44760cd090ead), UINT64_C(0x234b9dde9fd14ab3), UINT64_C(0xee6e9c107305b2f4), UINT64_C(0x5eae7639d8a2b0ab),
-      UINT64_C(0x63d30ff6c83a7320), UINT64_C(0x3ded1e0f42fa1cb2), UINT64_C(0xd386b3b3b19d708e), UINT64_C(0x34d5016669fe449a),
-      UINT64_C(0xb9f91d66682b7278), UINT64_C(0x817659853e4e435e), UINT64_C(0xfc2e6483c3048759), UINT64_C(0xb261e03ffbd9519e),
-      UINT64_C(0xb49de284f5cf5d02), UINT64_C(0x02387c87bbbf7445), UINT64_C(0x6d937def7be53a83), UINT64_C(0x08526f8ae49dbd0f),
-      UINT64_C(0x615ef3f5af7fd5ab), UINT64_C(0x54cb4d9e528c1d79), UINT64_C(0x3cb713ba05a67835), UINT64_C(0xf592fb2d4d2af2db),
-      UINT64_C(0x86ec6601e42b2456), UINT64_C(0x0e857a59e7439d0d), UINT64_C(0x8326414cd1f6874f), UINT64_C(0xa92dad5f5d9a106a),
-      UINT64_C(0x58793e150f7ff874), UINT64_C(0x519bc1ed4913c3c5), UINT64_C(0x4f3b0da10be83d82), UINT64_C(0xd82c561b6f18a264),
-      UINT64_C(0xa47f8878009a1815), UINT64_C(0x0673feb8c6083dd6), UINT64_C(0x343ac4c37efb4d08), UINT64_C(0x4847b3364092fa4a),
-      UINT64_C(0x1a30098e32c503a0), UINT64_C(0x7f242c4cb083e69b), UINT64_C(0x08e69e6c3b1070ec), UINT64_C(0x0711fa2b404a9684),
-      UINT64_C(0xfc24e0a982ae39fa), UINT64_C(0x02ff5ca0bd974db5), UINT64_C(0x2777845db37d0e98), UINT64_C(0x5555b5942327e543),
-      UINT64_C(0x7717c93942df84b7), UINT64_C(0x2a661b86ad2dcdde), UINT64_C(0x61c93d7746664b20), UINT64_C(0x514090cc1a87d06b),
-      UINT64_C(0x7aa2f5f8bcf987ad), UINT64_C(0x2898047ec7fa8778), UINT64_C(0xe5cf2d9a08d8927c), UINT64_C(0xecde6d34e5c3fe5a),
-      UINT64_C(0x5589c848adaebaf8), UINT64_C(0xedac4b9343975aa2), UINT64_C(0x48503cf321ad26b2), UINT64_C(0x4e7f1530c16f8941),
-      UINT64_C(0x6a9fe4e56715fa4e), UINT64_C(0xefa9aec821c89e4b), UINT64_C(0xc23b542018927c97), UINT64_C(0xeedb11ae93481c6f),
-      UINT64_C(0x35f45dab8618f030), UINT64_C(0x2a5eb24e550fcb99), UINT64_C(0x5c6d2d61242cf3a8), UINT64_C(0x96058fee3f9becb0),
-      UINT64_C(0x811ed70d6e6cd756), UINT64_C(0x93642e8381c4a6a0), UINT64_C(0xc81e05bef85ad62b), UINT64_C(0xd12ce5cee02edeae),
-      UINT64_C(0x0a00b676c5f25868), UINT64_C(0xc5c91383914e9732), UINT64_C(0xd9e4fbd6c7a78695), UINT64_C(0x24741bcd3aab63f3),
-      UINT64_C(0xa86f85bc7932add8), UINT64_C(0xd851daaea4ade651), UINT64_C(0xc1b2a4b765bd4ee2), UINT64_C(0xd648f4971ef524f7),
+        UINT64_C(0x742b91e91dcfb0a6), UINT64_C(0xcfeca6a967921914), UINT64_C(0x00000003c), 0, // sum of coeff and dummy
+        {
+            UINT64_C(0x6edee5be930ba5a3), UINT64_C(0x7da756c8a9d5865f), UINT64_C(0x979d7286e9ec6a3a), UINT64_C(0xb5f53e73c1075910),
+            UINT64_C(0xac17c48f4a6369d1), UINT64_C(0xe59c869b50f242b8), UINT64_C(0xd82f2c4debbd7a92), UINT64_C(0x2f480ab7fcef8c2a),
+            UINT64_C(0x5455617627c7967c), UINT64_C(0x391f4653479cd148), UINT64_C(0x93816a1fe3fe659f), UINT64_C(0x750610cc458f0e83),
+            UINT64_C(0xaea9ec84538ba181), UINT64_C(0x07f69ef23331d201), UINT64_C(0x1154b8671a7e21a6), UINT64_C(0x44f2b2a5e705dccd),
+            UINT64_C(0xf4137114642bd756), UINT64_C(0x0d9fdd5c26862aa0), UINT64_C(0x24252072220e87e6), UINT64_C(0x40c56b66c01c20f4),
+            UINT64_C(0x3d1246932d66f5fb), UINT64_C(0x549be143f5ad841a), UINT64_C(0xf5a694fd849975f9), UINT64_C(0xab3a75807839e2ae),
+            UINT64_C(0xdbc151ec40a63d29), UINT64_C(0x252d86d9b6ff7885), UINT64_C(0xd848fb1e2a170064), UINT64_C(0x8dbfbaa7e285d213),
+            UINT64_C(0x48c5c1a431e6a390), UINT64_C(0x4ea411a44607dc21), UINT64_C(0xbb8535f2c692910e), UINT64_C(0x6d8c5388d2aed8b2),
+            UINT64_C(0x2fddc57f1a7b1cc8), UINT64_C(0x3a2c8bd7ea3f25ab), UINT64_C(0x87708e34be0fb414), UINT64_C(0x8543e5d4e9f7c34e),
+            UINT64_C(0x2c349130b9d62f31), UINT64_C(0x8589d21285426c0c), UINT64_C(0x5b2a39baebaad52f), UINT64_C(0x03f8700c91cd5413),
+            UINT64_C(0xcc00c06be9d784fb), UINT64_C(0x70a78056b4c5b930), UINT64_C(0x4a2aa9811bbd47a3), UINT64_C(0x4a878b1e922c6304),
+            UINT64_C(0x2443f15ef107a70f), UINT64_C(0xf64b29a8f4069376), UINT64_C(0xfc309fa9086da268), UINT64_C(0xffeedab78f765ff4),
+            UINT64_C(0xa99a216b423fac77), UINT64_C(0x3b9c309929d6991e), UINT64_C(0x113fe1aa6ba4c211), UINT64_C(0x2f214dea6f758f36),
+            UINT64_C(0x519806a4ba5b5ca8), UINT64_C(0xef203bc2948dda9e), UINT64_C(0xaa83a59110f3a193), UINT64_C(0xebdef286170eb7ef),
+            UINT64_C(0x9bd44760cd090ead), UINT64_C(0x234b9dde9fd14ab3), UINT64_C(0xee6e9c107305b2f4), UINT64_C(0x5eae7639d8a2b0ab),
+            UINT64_C(0x63d30ff6c83a7320), UINT64_C(0x3ded1e0f42fa1cb2), UINT64_C(0xd386b3b3b19d708e), UINT64_C(0x34d5016669fe449a),
+            UINT64_C(0xb9f91d66682b7278), UINT64_C(0x817659853e4e435e), UINT64_C(0xfc2e6483c3048759), UINT64_C(0xb261e03ffbd9519e),
+            UINT64_C(0xb49de284f5cf5d02), UINT64_C(0x02387c87bbbf7445), UINT64_C(0x6d937def7be53a83), UINT64_C(0x08526f8ae49dbd0f),
+            UINT64_C(0x615ef3f5af7fd5ab), UINT64_C(0x54cb4d9e528c1d79), UINT64_C(0x3cb713ba05a67835), UINT64_C(0xf592fb2d4d2af2db),
+            UINT64_C(0x86ec6601e42b2456), UINT64_C(0x0e857a59e7439d0d), UINT64_C(0x8326414cd1f6874f), UINT64_C(0xa92dad5f5d9a106a),
+            UINT64_C(0x58793e150f7ff874), UINT64_C(0x519bc1ed4913c3c5), UINT64_C(0x4f3b0da10be83d82), UINT64_C(0xd82c561b6f18a264),
+            UINT64_C(0xa47f8878009a1815), UINT64_C(0x0673feb8c6083dd6), UINT64_C(0x343ac4c37efb4d08), UINT64_C(0x4847b3364092fa4a),
+            UINT64_C(0x1a30098e32c503a0), UINT64_C(0x7f242c4cb083e69b), UINT64_C(0x08e69e6c3b1070ec), UINT64_C(0x0711fa2b404a9684),
+            UINT64_C(0xfc24e0a982ae39fa), UINT64_C(0x02ff5ca0bd974db5), UINT64_C(0x2777845db37d0e98), UINT64_C(0x5555b5942327e543),
+            UINT64_C(0x7717c93942df84b7), UINT64_C(0x2a661b86ad2dcdde), UINT64_C(0x61c93d7746664b20), UINT64_C(0x514090cc1a87d06b),
+            UINT64_C(0x7aa2f5f8bcf987ad), UINT64_C(0x2898047ec7fa8778), UINT64_C(0xe5cf2d9a08d8927c), UINT64_C(0xecde6d34e5c3fe5a),
+            UINT64_C(0x5589c848adaebaf8), UINT64_C(0xedac4b9343975aa2), UINT64_C(0x48503cf321ad26b2), UINT64_C(0x4e7f1530c16f8941),
+            UINT64_C(0x6a9fe4e56715fa4e), UINT64_C(0xefa9aec821c89e4b), UINT64_C(0xc23b542018927c97), UINT64_C(0xeedb11ae93481c6f),
+            UINT64_C(0x35f45dab8618f030), UINT64_C(0x2a5eb24e550fcb99), UINT64_C(0x5c6d2d61242cf3a8), UINT64_C(0x96058fee3f9becb0),
+            UINT64_C(0x811ed70d6e6cd756), UINT64_C(0x93642e8381c4a6a0), UINT64_C(0xc81e05bef85ad62b), UINT64_C(0xd12ce5cee02edeae),
+            UINT64_C(0x0a00b676c5f25868), UINT64_C(0xc5c91383914e9732), UINT64_C(0xd9e4fbd6c7a78695), UINT64_C(0x24741bcd3aab63f3),
+            UINT64_C(0xa86f85bc7932add8), UINT64_C(0xd851daaea4ade651), UINT64_C(0xc1b2a4b765bd4ee2), UINT64_C(0xd648f4971ef524f7),
+        },
     },
-  },
 // Level 6
-  {
-    UINT64_C(0xaf62ce594afbb378), UINT64_C(0x248e65d01cba3e0b), UINT64_C(0x00000003f), 0, // sum of coeff and dummy
     {
-      UINT64_C(0x6ce36b80768d6e7f), UINT64_C(0xa397920aa6626e5a), UINT64_C(0x04de32bd5633745d), UINT64_C(0xe699be0bb8411b1f),
-      UINT64_C(0xd06b3da1042ffeff), UINT64_C(0xc8c12f5678dbc1fe), UINT64_C(0x5f1c5df4786ec543), UINT64_C(0xc64eed21fe2dab71),
-      UINT64_C(0x43083efd3ab83bc9), UINT64_C(0xfbd27f38b364bb80), UINT64_C(0x948701fc4ed5f457), UINT64_C(0xb26d9d8304db31a5),
-      UINT64_C(0x18ec7952e4e525a9), UINT64_C(0x0a81dbd330204a9d), UINT64_C(0x033c520def3d2101), UINT64_C(0x73a6c045c701aadd),
-      UINT64_C(0xd7d19f80a027afec), UINT64_C(0x8bf3f0c57c2fe429), UINT64_C(0xb8344463c59719e3), UINT64_C(0xf76ffe54b2fd1d64),
-      UINT64_C(0xf3358f8c810dda81), UINT64_C(0x8049af80eb93f21f), UINT64_C(0x5ff59a51e9dafd79), UINT64_C(0xb3f6e7835814a5e9),
-      UINT64_C(0xbd127322c2e4b16c), UINT64_C(0x7bc601b6ef92afa3), UINT64_C(0x00b5e1e97c28a598), UINT64_C(0x38d94a15139b608e),
-      UINT64_C(0x39737d09f0035403), UINT64_C(0x65337848d976c3a2), UINT64_C(0x91c04f2a6a9ec21f), UINT64_C(0x02548b83235c115f),
-      UINT64_C(0x430e4ec854acc042), UINT64_C(0x0b0d27ee05bcd498), UINT64_C(0xf669534441242d11), UINT64_C(0x02cbaa107829c390),
-      UINT64_C(0x35b4d683817b903c), UINT64_C(0x31834f7142d5cfa0), UINT64_C(0x77fd19567cb1ffea), UINT64_C(0x0911558876310281),
-      UINT64_C(0xeaaef1c301d92167), UINT64_C(0xf1c746401671b4d3), UINT64_C(0x7d1888c23b2447e9), UINT64_C(0x72c44c19bde5d380),
-      UINT64_C(0x7a6156a99377bf58), UINT64_C(0xeafd8cb3722b6aa4), UINT64_C(0xa4b21df76c4ae4a6), UINT64_C(0xa612df347cb132bf),
-      UINT64_C(0x2f8331da53e4651f), UINT64_C(0x498baa43072061aa), UINT64_C(0x669cd34bdf522223), UINT64_C(0x611a32f117b489e3),
-      UINT64_C(0xb1d08c016e277a67), UINT64_C(0xb1d4d0937395b21f), UINT64_C(0x9d3e7447db71fd3d), UINT64_C(0x8d61714b54616249),
-      UINT64_C(0x91cfe6cad3939afb), UINT64_C(0x785efcfc1fbed3f8), UINT64_C(0xc7270e86e752b71a), UINT64_C(0xe91bc93a14e678c4),
-      UINT64_C(0x9bf095b9662cf95d), UINT64_C(0xa82d8d1309df2256), UINT64_C(0x41abc3fa674c6a06), UINT64_C(0x0e38a88b0398547e),
-      UINT64_C(0x6fe82427e8c24696), UINT64_C(0x0f20ed4a9e8e02c2), UINT64_C(0x5df70b3c4784b7e1), UINT64_C(0x000b2deddde9963c),
-      UINT64_C(0xc8929e6367803b53), UINT64_C(0xb28033a4c174c86d), UINT64_C(0x3a666b4c18406801), UINT64_C(0xbd8b5791ba056136),
-      UINT64_C(0x715ed0ae7c79e816), UINT64_C(0x577c1b256c64436a), UINT64_C(0x54a4f8d1b535e02d), UINT64_C(0xc8d7f16769d38240),
-      UINT64_C(0xb707839b15b0d3fc), UINT64_C(0x255def6be6755b91), UINT64_C(0x9bb54bbffd57d21f), UINT64_C(0xd882bcc3caa155e7),
-      UINT64_C(0x32706a042f57ab60), UINT64_C(0xf2f38aa7f8c31e8b), UINT64_C(0xa1e84cfff8dc3cae), UINT64_C(0xa703b9fc24c2e1db),
-      UINT64_C(0x8c3bd99cdd77d160), UINT64_C(0x4d4692d129444836), UINT64_C(0xef4b1c7cd501fd7d), UINT64_C(0xde07e34df48421ab),
-      UINT64_C(0xae4083dd864c910d), UINT64_C(0xfa4ba5e1a2d58460), UINT64_C(0x6f0068aa4e75a5ec), UINT64_C(0x0a9e07133b5a2abe),
-      UINT64_C(0x337739bfa36cecc8), UINT64_C(0xe3591f5cc97b787c), UINT64_C(0xf2bbe16b3ec41399), UINT64_C(0xf3dcc6246a758716),
-      UINT64_C(0xc73351933e7e2417), UINT64_C(0x0e1f947d867b0bdd), UINT64_C(0xe48bf8efb1f572a0), UINT64_C(0xd5b209d89f09fa2a),
-      UINT64_C(0x27478ae42843f9f1), UINT64_C(0x01b30ed80db664a5), UINT64_C(0x0181e5ed5e84cd8b), UINT64_C(0xf6318c19349acefb),
-      UINT64_C(0x69c8492982778f4b), UINT64_C(0x4af6702966bca750), UINT64_C(0xa8b4d353631e2482), UINT64_C(0x5ce04a70f584d238),
-      UINT64_C(0xfbf5b2cdc0394772), UINT64_C(0x104d44c77b80b6ae), UINT64_C(0xbe8e5a49d6ee3335), UINT64_C(0x5bf8f3f9a05f36f9),
-      UINT64_C(0x4be7aeb57af4a56a), UINT64_C(0xa09e9cd11d6ef9a7), UINT64_C(0x091ecc28674a929a), UINT64_C(0xad2c90bc1f89d87f),
-      UINT64_C(0xbf25df5f95456364), UINT64_C(0x7b104f2289b28c07), UINT64_C(0x902272c148ddc16d), UINT64_C(0x3285c7b614a096f3),
-      UINT64_C(0x6491973c285a2f0f), UINT64_C(0x31f84ba2ce5e3755), UINT64_C(0x3300c615947fd40c), UINT64_C(0x3c4747adf437f115),
-      UINT64_C(0x04fa56d556527742), UINT64_C(0xd7b45d6644b42059), UINT64_C(0x4cdea756d6091a28), UINT64_C(0x2431ed986745785b),
+        UINT64_C(0xaf62ce594afbb378), UINT64_C(0x248e65d01cba3e0b), UINT64_C(0x00000003f), 0, // sum of coeff and dummy
+        {
+            UINT64_C(0x6ce36b80768d6e7f), UINT64_C(0xa397920aa6626e5a), UINT64_C(0x04de32bd5633745d), UINT64_C(0xe699be0bb8411b1f),
+            UINT64_C(0xd06b3da1042ffeff), UINT64_C(0xc8c12f5678dbc1fe), UINT64_C(0x5f1c5df4786ec543), UINT64_C(0xc64eed21fe2dab71),
+            UINT64_C(0x43083efd3ab83bc9), UINT64_C(0xfbd27f38b364bb80), UINT64_C(0x948701fc4ed5f457), UINT64_C(0xb26d9d8304db31a5),
+            UINT64_C(0x18ec7952e4e525a9), UINT64_C(0x0a81dbd330204a9d), UINT64_C(0x033c520def3d2101), UINT64_C(0x73a6c045c701aadd),
+            UINT64_C(0xd7d19f80a027afec), UINT64_C(0x8bf3f0c57c2fe429), UINT64_C(0xb8344463c59719e3), UINT64_C(0xf76ffe54b2fd1d64),
+            UINT64_C(0xf3358f8c810dda81), UINT64_C(0x8049af80eb93f21f), UINT64_C(0x5ff59a51e9dafd79), UINT64_C(0xb3f6e7835814a5e9),
+            UINT64_C(0xbd127322c2e4b16c), UINT64_C(0x7bc601b6ef92afa3), UINT64_C(0x00b5e1e97c28a598), UINT64_C(0x38d94a15139b608e),
+            UINT64_C(0x39737d09f0035403), UINT64_C(0x65337848d976c3a2), UINT64_C(0x91c04f2a6a9ec21f), UINT64_C(0x02548b83235c115f),
+            UINT64_C(0x430e4ec854acc042), UINT64_C(0x0b0d27ee05bcd498), UINT64_C(0xf669534441242d11), UINT64_C(0x02cbaa107829c390),
+            UINT64_C(0x35b4d683817b903c), UINT64_C(0x31834f7142d5cfa0), UINT64_C(0x77fd19567cb1ffea), UINT64_C(0x0911558876310281),
+            UINT64_C(0xeaaef1c301d92167), UINT64_C(0xf1c746401671b4d3), UINT64_C(0x7d1888c23b2447e9), UINT64_C(0x72c44c19bde5d380),
+            UINT64_C(0x7a6156a99377bf58), UINT64_C(0xeafd8cb3722b6aa4), UINT64_C(0xa4b21df76c4ae4a6), UINT64_C(0xa612df347cb132bf),
+            UINT64_C(0x2f8331da53e4651f), UINT64_C(0x498baa43072061aa), UINT64_C(0x669cd34bdf522223), UINT64_C(0x611a32f117b489e3),
+            UINT64_C(0xb1d08c016e277a67), UINT64_C(0xb1d4d0937395b21f), UINT64_C(0x9d3e7447db71fd3d), UINT64_C(0x8d61714b54616249),
+            UINT64_C(0x91cfe6cad3939afb), UINT64_C(0x785efcfc1fbed3f8), UINT64_C(0xc7270e86e752b71a), UINT64_C(0xe91bc93a14e678c4),
+            UINT64_C(0x9bf095b9662cf95d), UINT64_C(0xa82d8d1309df2256), UINT64_C(0x41abc3fa674c6a06), UINT64_C(0x0e38a88b0398547e),
+            UINT64_C(0x6fe82427e8c24696), UINT64_C(0x0f20ed4a9e8e02c2), UINT64_C(0x5df70b3c4784b7e1), UINT64_C(0x000b2deddde9963c),
+            UINT64_C(0xc8929e6367803b53), UINT64_C(0xb28033a4c174c86d), UINT64_C(0x3a666b4c18406801), UINT64_C(0xbd8b5791ba056136),
+            UINT64_C(0x715ed0ae7c79e816), UINT64_C(0x577c1b256c64436a), UINT64_C(0x54a4f8d1b535e02d), UINT64_C(0xc8d7f16769d38240),
+            UINT64_C(0xb707839b15b0d3fc), UINT64_C(0x255def6be6755b91), UINT64_C(0x9bb54bbffd57d21f), UINT64_C(0xd882bcc3caa155e7),
+            UINT64_C(0x32706a042f57ab60), UINT64_C(0xf2f38aa7f8c31e8b), UINT64_C(0xa1e84cfff8dc3cae), UINT64_C(0xa703b9fc24c2e1db),
+            UINT64_C(0x8c3bd99cdd77d160), UINT64_C(0x4d4692d129444836), UINT64_C(0xef4b1c7cd501fd7d), UINT64_C(0xde07e34df48421ab),
+            UINT64_C(0xae4083dd864c910d), UINT64_C(0xfa4ba5e1a2d58460), UINT64_C(0x6f0068aa4e75a5ec), UINT64_C(0x0a9e07133b5a2abe),
+            UINT64_C(0x337739bfa36cecc8), UINT64_C(0xe3591f5cc97b787c), UINT64_C(0xf2bbe16b3ec41399), UINT64_C(0xf3dcc6246a758716),
+            UINT64_C(0xc73351933e7e2417), UINT64_C(0x0e1f947d867b0bdd), UINT64_C(0xe48bf8efb1f572a0), UINT64_C(0xd5b209d89f09fa2a),
+            UINT64_C(0x27478ae42843f9f1), UINT64_C(0x01b30ed80db664a5), UINT64_C(0x0181e5ed5e84cd8b), UINT64_C(0xf6318c19349acefb),
+            UINT64_C(0x69c8492982778f4b), UINT64_C(0x4af6702966bca750), UINT64_C(0xa8b4d353631e2482), UINT64_C(0x5ce04a70f584d238),
+            UINT64_C(0xfbf5b2cdc0394772), UINT64_C(0x104d44c77b80b6ae), UINT64_C(0xbe8e5a49d6ee3335), UINT64_C(0x5bf8f3f9a05f36f9),
+            UINT64_C(0x4be7aeb57af4a56a), UINT64_C(0xa09e9cd11d6ef9a7), UINT64_C(0x091ecc28674a929a), UINT64_C(0xad2c90bc1f89d87f),
+            UINT64_C(0xbf25df5f95456364), UINT64_C(0x7b104f2289b28c07), UINT64_C(0x902272c148ddc16d), UINT64_C(0x3285c7b614a096f3),
+            UINT64_C(0x6491973c285a2f0f), UINT64_C(0x31f84ba2ce5e3755), UINT64_C(0x3300c615947fd40c), UINT64_C(0x3c4747adf437f115),
+            UINT64_C(0x04fa56d556527742), UINT64_C(0xd7b45d6644b42059), UINT64_C(0x4cdea756d6091a28), UINT64_C(0x2431ed986745785b),
+        },
     },
-  },
 // Level 7
-  {
-    UINT64_C(0x1249b1f513689151), UINT64_C(0xc658fcfbfabe77d5), UINT64_C(0x000000042), 0, // sum of coeff and dummy
     {
-      UINT64_C(0xabaaefde77273dcd), UINT64_C(0xe737f9d4fba6ee5b), UINT64_C(0xc2c8521e524e50e7), UINT64_C(0xb6347dd4ecff2e08),
-      UINT64_C(0x81cc14e56b826c78), UINT64_C(0x7e96733438db219f), UINT64_C(0x93f66e8959ad9a5d), UINT64_C(0xad77e6ffafdfa01b),
-      UINT64_C(0x79842c77afd94c9a), UINT64_C(0xb2fe351094030a32), UINT64_C(0x04f00838dc236276), UINT64_C(0x1064827c937cd78b),
-      UINT64_C(0xa914296fc9de0469), UINT64_C(0x4a87b2d1971b2b6e), UINT64_C(0x1ef28858c6e99de6), UINT64_C(0x23429a77bea42f46),
-      UINT64_C(0xf771817be7a38b16), UINT64_C(0xcc348f7a13deb19a), UINT64_C(0x0a91d46fb1ae97e8), UINT64_C(0x753cdb5468c83c10),
-      UINT64_C(0x65cc613edbcd3f84), UINT64_C(0xcb157fac042d9ab2), UINT64_C(0x18e6a31aed525487), UINT64_C(0x5924230b1281b56d),
-      UINT64_C(0xb828c042782945ba), UINT64_C(0x2decd50526005abe), UINT64_C(0x05caa6f761c5857a), UINT64_C(0x4c93892d66de5320),
-      UINT64_C(0xac796b30f48a75b3), UINT64_C(0xe11728c76eab1822), UINT64_C(0xa59ec090b0f3ed2e), UINT64_C(0xada9c2e74edc137b),
-      UINT64_C(0x4ca60d77ed9f8e0d), UINT64_C(0x6304a44de4bc4219), UINT64_C(0x361436da34a05f49), UINT64_C(0x097fcaec609fd08f),
-      UINT64_C(0xf9f9ae511316dcce), UINT64_C(0xa62ca6c22fa94122), UINT64_C(0xb32ebc94594cf9c8), UINT64_C(0x1b673219068f53f7),
-      UINT64_C(0x28a8f7de358ea82b), UINT64_C(0x7d3e002bee6f572f), UINT64_C(0xbe24c789f9ddb580), UINT64_C(0x0257b24167d83acd),
-      UINT64_C(0x5651f9ac1cfa5113), UINT64_C(0x225aaaa55c5d72d4), UINT64_C(0x1bb9759abf1d08b0), UINT64_C(0x7c36896386d4f50c),
-      UINT64_C(0xdd4ceaf465f970eb), UINT64_C(0xf349d378bfd4beb9), UINT64_C(0xf2d9ea03c79109d8), UINT64_C(0xe915c84fab4efd66),
-      UINT64_C(0xe401bb6a403813b6), UINT64_C(0x2171265710c01426), UINT64_C(0x6542b43cba6a4d08), UINT64_C(0x58591c6e1250104f),
-      UINT64_C(0x77bc044ed6c4a7a0), UINT64_C(0x73b1a5f682fd2d52), UINT64_C(0x6c2b7083b26b9976), UINT64_C(0xf9e3b1347ceaaaca),
-      UINT64_C(0xa709263b9c304a96), UINT64_C(0x6c6fedc1e78481dc), UINT64_C(0xbec268cc818190e0), UINT64_C(0xbafa9271d75b733b),
-      UINT64_C(0xeace12cbb37fc677), UINT64_C(0x1176816b69b51d98), UINT64_C(0x62d28bbf94c2762d), UINT64_C(0x142b7d89bcc06043),
-      UINT64_C(0x8e166c13e205cc00), UINT64_C(0xac3dcf9c75177f8e), UINT64_C(0xc75695f82b7f6c46), UINT64_C(0xdff44c46fe5e7b6d),
-      UINT64_C(0x932846955828d471), UINT64_C(0x7593c5e733dca4d6), UINT64_C(0xf1efc8ad9718ca14), UINT64_C(0x93a618cb5b6aff34),
-      UINT64_C(0x1d89f5253c2f819f), UINT64_C(0x419744eb9c63d0b2), UINT64_C(0x2b07ff7747ed7c29), UINT64_C(0x617be6e4454749a0),
-      UINT64_C(0xaa24d8e4142c5bf4), UINT64_C(0xe25d6c2fe999691d), UINT64_C(0xf78965d974e8e076), UINT64_C(0x8e6203aa0037ae8e),
-      UINT64_C(0x732c3a3a561c6d79), UINT64_C(0xd61a9622b0da5c93), UINT64_C(0xfc1c73c6152a141b), UINT64_C(0x03a4694838529e5b),
-      UINT64_C(0x686cb297afba7101), UINT64_C(0xbee9f55d5260fbe2), UINT64_C(0xd53a374387aa4f2a), UINT64_C(0xc6b2494c1a96d781),
-      UINT64_C(0xbe8aa945ac411c10), UINT64_C(0xbfc814fa4da90048), UINT64_C(0xb46847e8ecaca5f4), UINT64_C(0x83466ccfb2037365),
-      UINT64_C(0x39bfd895a4917200), UINT64_C(0xfd6106ab889f9c14), UINT64_C(0x87d80fcd94875b38), UINT64_C(0xd05a5e75bdd29067),
-      UINT64_C(0xc8fbbb4d3e850e9d), UINT64_C(0xef2dc9eb5228f1ae), UINT64_C(0xc3775c3e9ac4da44), UINT64_C(0x12004ef1609624ed),
-      UINT64_C(0x43ec24f8c096ee25), UINT64_C(0xeb207061723522ad), UINT64_C(0xbd3767314ad773e4), UINT64_C(0x4b2059a2964d28f4),
-      UINT64_C(0xcd4522a02ed66868), UINT64_C(0x74c6b45b4b5b5657), UINT64_C(0x48bcc161232e14b1), UINT64_C(0x958c3b741a54bd75),
-      UINT64_C(0x2f64940639fedc7d), UINT64_C(0xc1321efa1c279cc3), UINT64_C(0x0680b3866e485f15), UINT64_C(0x5633b30c0c7c4a96),
-      UINT64_C(0xb5c9b8539fa9ea3c), UINT64_C(0x1fd67c7175c87172), UINT64_C(0xe03ed40e88bcdf23), UINT64_C(0x81a69e0147fbb776),
-      UINT64_C(0x244e2bf676590e87), UINT64_C(0x8a86357137c0d611), UINT64_C(0x4fcaad51eba3720f), UINT64_C(0x2b8b7b933f76e019),
-      UINT64_C(0xecff900b265d06f4), UINT64_C(0xbc3b359d2e438bbc), UINT64_C(0x086c671b288776d9), UINT64_C(0x652c4a2d18d847ba),
+        UINT64_C(0x1249b1f513689151), UINT64_C(0xc658fcfbfabe77d5), UINT64_C(0x000000042), 0, // sum of coeff and dummy
+        {
+            UINT64_C(0xabaaefde77273dcd), UINT64_C(0xe737f9d4fba6ee5b), UINT64_C(0xc2c8521e524e50e7), UINT64_C(0xb6347dd4ecff2e08),
+            UINT64_C(0x81cc14e56b826c78), UINT64_C(0x7e96733438db219f), UINT64_C(0x93f66e8959ad9a5d), UINT64_C(0xad77e6ffafdfa01b),
+            UINT64_C(0x79842c77afd94c9a), UINT64_C(0xb2fe351094030a32), UINT64_C(0x04f00838dc236276), UINT64_C(0x1064827c937cd78b),
+            UINT64_C(0xa914296fc9de0469), UINT64_C(0x4a87b2d1971b2b6e), UINT64_C(0x1ef28858c6e99de6), UINT64_C(0x23429a77bea42f46),
+            UINT64_C(0xf771817be7a38b16), UINT64_C(0xcc348f7a13deb19a), UINT64_C(0x0a91d46fb1ae97e8), UINT64_C(0x753cdb5468c83c10),
+            UINT64_C(0x65cc613edbcd3f84), UINT64_C(0xcb157fac042d9ab2), UINT64_C(0x18e6a31aed525487), UINT64_C(0x5924230b1281b56d),
+            UINT64_C(0xb828c042782945ba), UINT64_C(0x2decd50526005abe), UINT64_C(0x05caa6f761c5857a), UINT64_C(0x4c93892d66de5320),
+            UINT64_C(0xac796b30f48a75b3), UINT64_C(0xe11728c76eab1822), UINT64_C(0xa59ec090b0f3ed2e), UINT64_C(0xada9c2e74edc137b),
+            UINT64_C(0x4ca60d77ed9f8e0d), UINT64_C(0x6304a44de4bc4219), UINT64_C(0x361436da34a05f49), UINT64_C(0x097fcaec609fd08f),
+            UINT64_C(0xf9f9ae511316dcce), UINT64_C(0xa62ca6c22fa94122), UINT64_C(0xb32ebc94594cf9c8), UINT64_C(0x1b673219068f53f7),
+            UINT64_C(0x28a8f7de358ea82b), UINT64_C(0x7d3e002bee6f572f), UINT64_C(0xbe24c789f9ddb580), UINT64_C(0x0257b24167d83acd),
+            UINT64_C(0x5651f9ac1cfa5113), UINT64_C(0x225aaaa55c5d72d4), UINT64_C(0x1bb9759abf1d08b0), UINT64_C(0x7c36896386d4f50c),
+            UINT64_C(0xdd4ceaf465f970eb), UINT64_C(0xf349d378bfd4beb9), UINT64_C(0xf2d9ea03c79109d8), UINT64_C(0xe915c84fab4efd66),
+            UINT64_C(0xe401bb6a403813b6), UINT64_C(0x2171265710c01426), UINT64_C(0x6542b43cba6a4d08), UINT64_C(0x58591c6e1250104f),
+            UINT64_C(0x77bc044ed6c4a7a0), UINT64_C(0x73b1a5f682fd2d52), UINT64_C(0x6c2b7083b26b9976), UINT64_C(0xf9e3b1347ceaaaca),
+            UINT64_C(0xa709263b9c304a96), UINT64_C(0x6c6fedc1e78481dc), UINT64_C(0xbec268cc818190e0), UINT64_C(0xbafa9271d75b733b),
+            UINT64_C(0xeace12cbb37fc677), UINT64_C(0x1176816b69b51d98), UINT64_C(0x62d28bbf94c2762d), UINT64_C(0x142b7d89bcc06043),
+            UINT64_C(0x8e166c13e205cc00), UINT64_C(0xac3dcf9c75177f8e), UINT64_C(0xc75695f82b7f6c46), UINT64_C(0xdff44c46fe5e7b6d),
+            UINT64_C(0x932846955828d471), UINT64_C(0x7593c5e733dca4d6), UINT64_C(0xf1efc8ad9718ca14), UINT64_C(0x93a618cb5b6aff34),
+            UINT64_C(0x1d89f5253c2f819f), UINT64_C(0x419744eb9c63d0b2), UINT64_C(0x2b07ff7747ed7c29), UINT64_C(0x617be6e4454749a0),
+            UINT64_C(0xaa24d8e4142c5bf4), UINT64_C(0xe25d6c2fe999691d), UINT64_C(0xf78965d974e8e076), UINT64_C(0x8e6203aa0037ae8e),
+            UINT64_C(0x732c3a3a561c6d79), UINT64_C(0xd61a9622b0da5c93), UINT64_C(0xfc1c73c6152a141b), UINT64_C(0x03a4694838529e5b),
+            UINT64_C(0x686cb297afba7101), UINT64_C(0xbee9f55d5260fbe2), UINT64_C(0xd53a374387aa4f2a), UINT64_C(0xc6b2494c1a96d781),
+            UINT64_C(0xbe8aa945ac411c10), UINT64_C(0xbfc814fa4da90048), UINT64_C(0xb46847e8ecaca5f4), UINT64_C(0x83466ccfb2037365),
+            UINT64_C(0x39bfd895a4917200), UINT64_C(0xfd6106ab889f9c14), UINT64_C(0x87d80fcd94875b38), UINT64_C(0xd05a5e75bdd29067),
+            UINT64_C(0xc8fbbb4d3e850e9d), UINT64_C(0xef2dc9eb5228f1ae), UINT64_C(0xc3775c3e9ac4da44), UINT64_C(0x12004ef1609624ed),
+            UINT64_C(0x43ec24f8c096ee25), UINT64_C(0xeb207061723522ad), UINT64_C(0xbd3767314ad773e4), UINT64_C(0x4b2059a2964d28f4),
+            UINT64_C(0xcd4522a02ed66868), UINT64_C(0x74c6b45b4b5b5657), UINT64_C(0x48bcc161232e14b1), UINT64_C(0x958c3b741a54bd75),
+            UINT64_C(0x2f64940639fedc7d), UINT64_C(0xc1321efa1c279cc3), UINT64_C(0x0680b3866e485f15), UINT64_C(0x5633b30c0c7c4a96),
+            UINT64_C(0xb5c9b8539fa9ea3c), UINT64_C(0x1fd67c7175c87172), UINT64_C(0xe03ed40e88bcdf23), UINT64_C(0x81a69e0147fbb776),
+            UINT64_C(0x244e2bf676590e87), UINT64_C(0x8a86357137c0d611), UINT64_C(0x4fcaad51eba3720f), UINT64_C(0x2b8b7b933f76e019),
+            UINT64_C(0xecff900b265d06f4), UINT64_C(0xbc3b359d2e438bbc), UINT64_C(0x086c671b288776d9), UINT64_C(0x652c4a2d18d847ba),
+        },
     },
-  },
 };
-//STATIC_ASSERT(PMPML_64_LEVELS <= 8, "Only 8 levels of data currently exist");
+// STATIC_ASSERT(PMPML_64_LEVELS <= 8, "Only 8 levels of data currently exist");
 
 //-------------------------------------------------------------
 // Common math routines
 
-static inline uint32_t fmix32_short(uint32_t h) {
-  h ^= h >> 13;
-  h *= 0xab3be54f;
-  h ^= h >> 16;
+static inline uint32_t fmix32_short( uint32_t h ) {
+    h ^= h >> 13;
+    h *= 0xab3be54f;
+    h ^= h >> 16;
 
-  return h;
+    return h;
 }
 
-static inline uint64_t fmix64_short(uint64_t k) {
-  k ^= k >> 33;
-  k *= UINT64_C(0xc4ceb9fe1a85ec53 );
-  k ^= k >> 33;
+static inline uint64_t fmix64_short( uint64_t k ) {
+    k ^= k >> 33;
+    k *= UINT64_C(0xc4ceb9fe1a85ec53);
+    k ^= k >> 33;
 
-  return k;
+    return k;
 }
 
 #define UInt32x32To64(a, b) ((uint64_t)(((uint64_t)((uint32_t)(a))) * ((uint32_t)(b))))
@@ -664,1158 +661,1253 @@ static inline uint64_t fmix64_short(uint64_t k) {
 // 32-bit hash
 
 static inline
-void multiply32x32to64(uint32_t& rhi, uint32_t& rlo, uint32_t a, uint32_t b) {
+void multiply32x32to64( uint32_t & rhi, uint32_t & rlo, uint32_t a, uint32_t b ) {
     mult32_64(rlo, rhi, a, b);
 }
 
 static inline
-void add64(uint32_t& loWord, uint32_t& hiWord, uint32_t& hhWord, uint32_t& loAdd, uint32_t& hiAdd, uint32_t& hhAdd) {
+void add64( uint32_t & loWord, uint32_t & hiWord, uint32_t & hhWord, uint32_t & loAdd, uint32_t & hiAdd, uint32_t & hhAdd ) {
     add96(loWord, hiWord, hhWord, loAdd, hiAdd, hhAdd);
 }
 
 static FORCE_INLINE
-void mul32x32to64addto96(uint32_t& loWord, uint32_t& hiWord, uint32_t& hhWord, uint32_t a, uint32_t b) {
+void mul32x32to64addto96( uint32_t & loWord, uint32_t & hiWord, uint32_t & hhWord, uint32_t a, uint32_t b ) {
     fma32_96(loWord, hiWord, hhWord, a, b);
 }
 
 #define PMPML_CHUNK_LOOP_INTRO_L0 \
-	uint32_t ctr; \
-	ctr = 0; \
+	uint32_t ctr;             \
+	ctr = 0;                  \
 	ULARGE_INTEGER__XX mul;
 
 // Input data is read in 32-bit chunks.
-#define PMPML_CHUNK_LOOP_BODY_ULI_T1( i ) \
-    /*multiply32x32to64(mul.HighPart, mul.LowPart, x[i], coeff[ i ]); \
+#define PMPML_CHUNK_LOOP_BODY_ULI_T1( i )                                                 \
+    /*multiply32x32to64(mul.HighPart, mul.LowPart, x[i], coeff[ i ]);                     \
     add64(constTerm.LowPart, constTerm.HighPart, ctr, mul.LowPart, mul.HighPart, zero);*/ \
   mul32x32to64addto96(constTerm.LowPart, constTerm.HighPart, ctr, GET_U32<bswap>((const uint8_t*)x, (i)*sizeof(x[0])), coeff[ i ]);
 
 // Hash data from previous blocks is read in 64-bit chunks, and always
 // in native endian format.
-#define PMPML_CHUNK_LOOP_BODY_ULI_T1_64( i ) \
-    /*multiply32x32to64(mul.HighPart, mul.LowPart, x[i], coeff[ i ]); \
+#define PMPML_CHUNK_LOOP_BODY_ULI_T1_64( i )                                              \
+    /*multiply32x32to64(mul.HighPart, mul.LowPart, x[i], coeff[ i ]);                     \
     add64(constTerm.LowPart, constTerm.HighPart, ctr, mul.LowPart, mul.HighPart, zero);*/ \
   mul32x32to64addto96(constTerm.LowPart, constTerm.HighPart, ctr, GET_U64<false>((const uint8_t*)x, (i)*sizeof(x[0])), coeff[ i ]);
 
-#define PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST \
-    /*multiply32x32to64(mul.HighPart, mul.LowPart, xLast, coeff[ size ]); \
-	add64(constTerm.LowPart, constTerm.HighPart, ctr, mul.LowPart, mul.HighPart);*/ \
+#define PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST                                                  \
+    /*multiply32x32to64(mul.HighPart, mul.LowPart, xLast, coeff[ size ]);                  \
+	add64(constTerm.LowPart, constTerm.HighPart, ctr, mul.LowPart, mul.HighPart);*/    \
     mul32x32to64addto96(constTerm.LowPart, constTerm.HighPart, ctr, xLast, coeff[ size ]); \
 
 #define PMPML_CHUNK_LOOP_PRE_REDUCE_L0
 
 /*
-#define PMPML_MOD_2_32_PLUS_15( x, y ) \
-	x = (uint32_t)x + UINT64_C(0xF000000E1) - (( (uint64_t)x >> 32 ) << 4) + ( x >> 32 ); \
-	y = (uint32_t)x; \
-	y -= ((uint32_t)(x >> 32 )) * 15; \
-	if ( y < 0 ) y += PMPML_MAIN_PRIME; // y += PMPML_MAIN_PRIME * ( y < 0 );
-	*/
+ #define PMPML_MOD_2_32_PLUS_15( x, y ) \
+ *  x = (uint32_t)x + UINT64_C(0xF000000E1) - (( (uint64_t)x >> 32 ) << 4) + ( x >> 32 ); \
+ *  y = (uint32_t)x; \
+ *  y -= ((uint32_t)(x >> 32 )) * 15; \
+ *  if ( y < 0 ) y += PMPML_MAIN_PRIME; // y += PMPML_MAIN_PRIME * ( y < 0 );
+ */
 
 #define PMPML_CHUNK_REDUCE_96_TO_64
 
-#define PMPML_CHUNK_REDUCE_64_TO_32 \
-{ \
-	uint32_t lo, hi; \
-	multiply32x32to64(hi, lo, constTerm.HighPart, 15); \
-	uint32_t part = ctr * 225 + (hi << 4) - hi + 15; \
-	constTerm.LowPart += part; \
-	constTerm.HighPart = 1 + (constTerm.LowPart < part); \
-	constTerm.HighPart -= (constTerm.LowPart < lo); \
-	constTerm.LowPart -= lo; \
-	if ( likely( constTerm.LowPart >= 30) ) { constTerm.LowPart -= constTerm.HighPart * 15; constTerm.HighPart = 0; } \
-	else \
-	{ \
-		if ( constTerm.HighPart ) \
-		{ \
-			constTerm.LowPart -= constTerm.HighPart * 15; \
-			constTerm.HighPart = 1; \
-			if ( likely( constTerm.LowPart >= 15)) { constTerm.LowPart -= 15; constTerm.HighPart = 0; } \
-			else \
-			{ \
-				constTerm.LowPart -= 15; \
-				constTerm.HighPart = 0; \
-			} \
-		} \
-	} \
+#define PMPML_CHUNK_REDUCE_64_TO_32                   \
+{                                                     \
+	uint32_t lo, hi;                              \
+	multiply32x32to64(hi, lo, constTerm.HighPart, 15);\
+	uint32_t part = ctr * 225 + (hi << 4) - hi + 15;\
+	constTerm.LowPart += part;                    \
+	constTerm.HighPart = 1 + (constTerm.LowPart < part);\
+	constTerm.HighPart -= (constTerm.LowPart < lo);\
+	constTerm.LowPart -= lo;                      \
+	if ( likely( constTerm.LowPart >= 30) ) {     \
+        constTerm.LowPart -= constTerm.HighPart * 15; \
+        constTerm.HighPart = 0;                       \
+    } else {                                          \
+		if ( constTerm.HighPart )  {          \
+			constTerm.LowPart -= constTerm.HighPart * 15;\
+			constTerm.HighPart = 1;       \
+			if ( likely( constTerm.LowPart >= 15)) {\
+                constTerm.LowPart -= 15;              \
+                constTerm.HighPart = 0;               \
+            } else {                                  \
+				constTerm.LowPart -= 15;\
+				constTerm.HighPart = 0;\
+			}                             \
+		}                                     \
+	}                                             \
 }
 
 #define PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN \
-	PMPML_CHUNK_REDUCE_96_TO_64 \
-	PMPML_CHUNK_REDUCE_64_TO_32 \
+	PMPML_CHUNK_REDUCE_96_TO_64                   \
+	PMPML_CHUNK_REDUCE_64_TO_32                   \
 	return constTerm.QuadPart;
 
-#define PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY \
-{ \
-	constTerm.QuadPart = constTerm.LowPart + PMPML_MAIN_PRIME - constTerm.HighPart * UINT64_C( 15 ); \
-	if ( likely( constTerm.LowPart >= 30) ) { constTerm.LowPart -= (constTerm.HighPart << 4) - constTerm.HighPart; return fmix32_short( constTerm.LowPart ); } \
-	else \
-	{ \
-		constTerm.LowPart -= constTerm.HighPart * 15; \
-		if ( constTerm.LowPart < 30 ) return fmix32_short( constTerm.LowPart ); \
-		else \
-		{ \
-			constTerm.LowPart += 15; \
-			return fmix32_short( constTerm.LowPart ); \
-		} \
-	} \
+#define PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY                           \
+{                                                                                                 \
+	constTerm.QuadPart = constTerm.LowPart + PMPML_MAIN_PRIME - constTerm.HighPart * UINT64_C( 15 );\
+	if ( likely( constTerm.LowPart >= 30) ) {                                                 \
+        constTerm.LowPart -= (constTerm.HighPart << 4) - constTerm.HighPart;                      \
+        return fmix32_short( constTerm.LowPart );                                                 \
+    } else {                                                                                      \
+		constTerm.LowPart -= constTerm.HighPart * 15;                                     \
+		if ( constTerm.LowPart < 30 ) {                                                   \
+            return fmix32_short( constTerm.LowPart );                                             \
+        } else  {                                                                                 \
+			constTerm.LowPart += 15;                                                  \
+			return fmix32_short( constTerm.LowPart );                                 \
+		}                                                                                 \
+	}                                                                                         \
 }
 
-#define PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN \
-{ \
-	uint32_t lo, hi; \
-	multiply32x32to64(hi, lo, constTerm.HighPart, 15); \
-	uint32_t part = ctr * 225 + (hi << 4) - hi + 15; \
-	constTerm.LowPart += part; \
-	constTerm.HighPart = 1 + (constTerm.LowPart < part); \
-	constTerm.HighPart -= (constTerm.LowPart < lo); \
-	constTerm.LowPart -= lo; \
-	if ( likely( constTerm.LowPart >= 30) ) { constTerm.LowPart -= (constTerm.HighPart << 4) - constTerm.HighPart/*constTerm.HighPart * 15*/; return fmix32_short( constTerm.LowPart ); } \
-	else \
-	{ \
-		if ( constTerm.HighPart ) \
-		{ \
-			constTerm.LowPart -= constTerm.HighPart * 15 - 15; \
-			constTerm.HighPart = 1; \
-			if ( likely( constTerm.LowPart >= 15)) { constTerm.LowPart -= 15; return fmix32_short( constTerm.LowPart ); } \
-			else \
-			{ \
-				return constTerm.LowPart; \
-			} \
-		} \
-		else \
-			return fmix32_short( constTerm.LowPart ); \
-	} \
+#define PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN                                            \
+{                                                                                                       \
+	uint32_t lo, hi;                                                                                \
+	multiply32x32to64(hi, lo, constTerm.HighPart, 15);                                              \
+	uint32_t part = ctr * 225 + (hi << 4) - hi + 15;                                                \
+	constTerm.LowPart += part;                                                                      \
+	constTerm.HighPart = 1 + (constTerm.LowPart < part);                                            \
+	constTerm.HighPart -= (constTerm.LowPart < lo);                                                 \
+	constTerm.LowPart -= lo;                                                                        \
+	if ( likely( constTerm.LowPart >= 30) ) {                                                       \
+        constTerm.LowPart -= (constTerm.HighPart << 4) - constTerm.HighPart/*constTerm.HighPart * 15*/; \
+        return fmix32_short( constTerm.LowPart );                                                       \
+	} else  {                                                                                       \
+		if ( constTerm.HighPart ) {                                                             \
+			constTerm.LowPart -= constTerm.HighPart * 15 - 15;                              \
+			constTerm.HighPart = 1;                                                         \
+			if ( likely( constTerm.LowPart >= 15)) {                                        \
+                constTerm.LowPart -= 15;                                                                \
+                return fmix32_short( constTerm.LowPart );                                               \
+            } else  {                                                                                   \
+				return constTerm.LowPart;                                               \
+			}                                                                               \
+        } else {                                                                                        \
+			return fmix32_short( constTerm.LowPart );                                       \
+        }                                                                                               \
+	}                                                                                               \
 }
 
-class PMP_Multilinear_Hasher_32
-{
+class PMP_Multilinear_Hasher_32 {
   private:
-  random_data_for_PMPML_32* curr_rd;
-  uint64_t coeff0;
+    random_data_for_PMPML_32 * curr_rd;
+    uint64_t  coeff0;
 
-  // calls to be done from LEVEL=0
-  template < bool bswap >
-  FORCE_INLINE uint64_t hash_of_string_chunk_compact( const uint32_t* coeff, ULARGE_INTEGER__XX constTerm, const uint32_t* x ) const {
-	PMPML_CHUNK_LOOP_INTRO_L0
+    // calls to be done from LEVEL=0
+    template <bool bswap>
+    FORCE_INLINE uint64_t hash_of_string_chunk_compact( const uint32_t * coeff,
+            ULARGE_INTEGER__XX constTerm, const uint32_t * x ) const {
+        PMPML_CHUNK_LOOP_INTRO_L0
 
 #if defined(HAVE_AVX2) && (PMPML_32_CHUNK_SIZE_LOG2 >= 3)
         __m256i ctr0, ctr1, mask_low;
-	__m256i a, data, product, temp;
-	uint64_t temp_fin;
-	int i;
-
-	ctr0 = _mm256_setzero_si256 (); // Sets the 128-bit value to zero.
-	ctr1 = _mm256_setzero_si256 ();
-	mask_low = _mm256_set_epi32 ( 0, -1, 0 , -1, 0, -1, 0 , -1 );
-
-	uint32_t *x1, *x2, *x3, *c1, *c2, *c3;
-
-#if (PMPML_32_CHUNK_SIZE_LOG2 >= 6)
-	for ( i=0; i<PMPML_32_CHUNK_SIZE; i+=64 )
-#elif (PMPML_32_CHUNK_SIZE_LOG2 == 5)
-	for ( i=0; i<PMPML_32_CHUNK_SIZE; i+=32 )
-#elif (PMPML_32_CHUNK_SIZE_LOG2 == 4)
-	for ( i=0; i<PMPML_32_CHUNK_SIZE; i+=16 )
-#else
-	for ( i=0; i<PMPML_32_CHUNK_SIZE; i+=8 )
-#endif
-	{
-		a = _mm256_load_si256 ((__m256i *)(coeff+i)); // Loads 256-bit value. Address p must be 32-byte aligned.
-		data = _mm256_loadu_si256 ((__m256i *)(x+i)); // Loads 256-bit value. Address p does not need be 32-byte aligned.
-		product = _mm256_mul_epu32 ( data, a); // A 256-bit value that contains four 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2; ...
-		temp = _mm256_srli_epi64( product, 32 ); // Shifts the 4 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm256_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-#if (PMPML_32_CHUNK_SIZE_LOG2 > 3)
-
-		a = _mm256_load_si256 ((__m256i *)(coeff+i+8));
-		data = _mm256_loadu_si256 ((__m256i *)(x+i+8));
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm256_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-#endif
-#if (PMPML_32_CHUNK_SIZE_LOG2 > 4)
-
-		a = _mm256_load_si256 ((__m256i *)(coeff+i+16));
-		data = _mm256_loadu_si256 ((__m256i *)(x+i+16));
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm256_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_load_si256 ((__m256i *)(coeff+i+24));
-		data = _mm256_loadu_si256 ((__m256i *)(x+i+24));
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm256_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-#endif
-#if (PMPML_32_CHUNK_SIZE_LOG2 > 5)
-
-		a = _mm256_load_si256 ((__m256i *)(coeff+i+32));
-		data = _mm256_loadu_si256 ((__m256i *)(x+i+32));
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm256_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_load_si256 ((__m256i *)(coeff+i+40));
-		data = _mm256_loadu_si256 ((__m256i *)(x+i+40));
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm256_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_load_si256 ((__m256i *)(coeff+i+48));
-		data = _mm256_loadu_si256 ((__m256i *)(x+i+48));
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm256_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_load_si256 ((__m256i *)(coeff+i+56));
-		data = _mm256_loadu_si256 ((__m256i *)(x+i+56));
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-
-		a = _mm256_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm256_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm256_mul_epu32 ( data, a);
-		temp = _mm256_srli_epi64( product, 32 );
-		ctr1 = _mm256_add_epi64 ( ctr1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		ctr0 = _mm256_add_epi64 ( ctr0, product );//ctr0 = _mm256_add_epi64 ( ctr0, temp );
-#endif
-	}
-
-	temp = _mm256_unpackhi_epi64 ( ctr0, ctr1 ); // Interleaves the upper signed or unsigned 64-bit integer in a with the upper signed or unsigned 64-bit integer in b. r0 := a1 ; r1 := b1 ; ...
-	data = _mm256_unpacklo_epi64 ( ctr0, ctr1 ); // Interleaves the lower signed or unsigned 64-bit integer in a with the lower signed or unsigned 64-bit integer in b. r0 := a0 ; r1 := b0 ; ...
-	ctr1 = _mm256_add_epi64 ( data, temp );
-
-	uint64_t lo = *(uint64_t*)(&ctr1) + ((uint64_t*)(&ctr1))[2];
-	uint64_t hi = ((uint64_t*)(&ctr1))[1] + ((uint64_t*)(&ctr1))[3];
-	uint32_t lohi = lo >> 32;
-	uint32_t hilo = hi;
-	uint32_t diff = lohi - hilo;
-	hi += diff;
-	lo = (uint32_t)lo + (((uint64_t)(uint32_t)hi) << 32 );
-	constTerm.QuadPart += lo;
-	ctr += constTerm.QuadPart < lo;
-	ctr += hi >> 32;
+        __m256i  a, data, product, temp;
+        uint64_t temp_fin;
+        int      i;
+
+        ctr0     = _mm256_setzero_si256(); // Sets the 128-bit value to zero.
+        ctr1     = _mm256_setzero_si256();
+        mask_low = _mm256_set_epi32(0, -1, 0, -1, 0, -1, 0, -1);
+
+        uint32_t * x1, * x2, * x3, * c1, * c2, * c3;
+
+  #if (PMPML_32_CHUNK_SIZE_LOG2 >= 6)
+        for (i = 0; i < PMPML_32_CHUNK_SIZE; i += 64)
+  #elif (PMPML_32_CHUNK_SIZE_LOG2 == 5)
+        for (i = 0; i < PMPML_32_CHUNK_SIZE; i += 32)
+  #elif (PMPML_32_CHUNK_SIZE_LOG2 == 4)
+        for (i = 0; i < PMPML_32_CHUNK_SIZE; i += 16)
+  #else
+        for (i = 0; i < PMPML_32_CHUNK_SIZE; i += 8)
+  #endif
+        {
+            a       = _mm256_load_si256((__m256i * )(coeff + i)); // Loads 256-bit value. Address p must be 32-byte
+                                                                  // aligned.
+            data    = _mm256_loadu_si256((__m256i *)(x     + i)); // Loads 256-bit value. Address p does not need be
+                                                                  // 32-byte aligned.
+            product = _mm256_mul_epu32(data, a);                  // A 256-bit value that contains four 64-bit unsigned
+                                                                  // integers. The result can be expressed by the
+                                                                  // following equations. r0 := a0 * b0; r1 := a2 * b2;
+                                                                  // ...
+            temp    = _mm256_srli_epi64(product, 32);             // Shifts the 4 signed or unsigned 64-bit integers in
+                                                                  // a right by count bits while shifting in zeros.
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm256_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+  #if (PMPML_32_CHUNK_SIZE_LOG2 > 3)
+
+            a       = _mm256_load_si256((__m256i * )(coeff + i + 8));
+            data    = _mm256_loadu_si256((__m256i *)(x     + i + 8));
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm256_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+  #endif
+  #if (PMPML_32_CHUNK_SIZE_LOG2 > 4)
+
+            a       = _mm256_load_si256((__m256i * )(coeff + i + 16));
+            data    = _mm256_loadu_si256((__m256i *)(x     + i + 16));
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm256_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_load_si256((__m256i * )(coeff + i + 24));
+            data    = _mm256_loadu_si256((__m256i *)(x     + i + 24));
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm256_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+  #endif
+  #if (PMPML_32_CHUNK_SIZE_LOG2 > 5)
+
+            a       = _mm256_load_si256((__m256i * )(coeff + i + 32));
+            data    = _mm256_loadu_si256((__m256i *)(x     + i + 32));
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm256_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_load_si256((__m256i * )(coeff + i + 40));
+            data    = _mm256_loadu_si256((__m256i *)(x     + i + 40));
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm256_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_load_si256((__m256i * )(coeff + i + 48));
+            data    = _mm256_loadu_si256((__m256i *)(x     + i + 48));
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm256_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_load_si256((__m256i * )(coeff + i + 56));
+            data    = _mm256_loadu_si256((__m256i *)(x     + i + 56));
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+
+            a       = _mm256_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm256_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm256_mul_epu32(data, a);
+            temp    = _mm256_srli_epi64(product, 32);
+            ctr1    = _mm256_add_epi64(ctr1, temp   );
+            // temp = _mm256_and_si256 ( mask_low, product );
+            ctr0    = _mm256_add_epi64(ctr0, product); // ctr0 = _mm256_add_epi64 ( ctr0, temp );
+  #endif
+        }
+
+        temp = _mm256_unpackhi_epi64(ctr0, ctr1); // Interleaves the upper signed or unsigned 64-bit integer in a with
+                                                  // the upper signed or unsigned 64-bit integer in b. r0 := a1 ; r1 :=
+                                                  // b1 ; ...
+        data = _mm256_unpacklo_epi64(ctr0, ctr1); // Interleaves the lower signed or unsigned 64-bit integer in a with
+                                                  // the lower signed or unsigned 64-bit integer in b. r0 := a0 ; r1 :=
+                                                  // b0 ; ...
+        ctr1 = _mm256_add_epi64(data, temp);
+
+        uint64_t lo   = *(uint64_t *)(&ctr1) + ((uint64_t *)(&ctr1))[2];
+        uint64_t hi   = ((uint64_t *)(&ctr1))[1] + ((uint64_t *)(&ctr1))[3];
+        uint32_t lohi = lo >> 32;
+        uint32_t hilo = hi;
+        uint32_t diff = lohi - hilo;
+        hi  += diff;
+        lo   = (uint32_t)lo + (((uint64_t)(uint32_t)hi) << 32);
+        constTerm.QuadPart += lo;
+        ctr += constTerm.QuadPart < lo;
+        ctr += hi >> 32;
 
 #elif defined(HAVE_SSE_2) && (PMPML_32_CHUNK_SIZE_LOG2 >= 2)
 
-	__m128i ctr0, ctr1, mask_low;
-	__m128i a, data, product, temp;
-	uint64_t temp_fin;
-	int i;
-
-	ctr0 = _mm_setzero_si128 (); // Sets the 128-bit value to zero.
-	ctr1 = _mm_setzero_si128 ();
-	mask_low = _mm_set_epi32 ( 0, -1, 0 , -1 );
-
-	uint32_t *x1, *x2, *x3, *c1, *c2, *c3;
-
-#if (PMPML_32_CHUNK_SIZE_LOG2 >= 6)
-	for ( i=0; i<PMPML_32_CHUNK_SIZE; i+=64 )
-#elif (PMPML_32_CHUNK_SIZE_LOG2 == 5)
-	for ( i=0; i<PMPML_32_CHUNK_SIZE; i+=32 )
-#elif (PMPML_32_CHUNK_SIZE_LOG2 == 4)
-	for ( i=0; i<PMPML_32_CHUNK_SIZE; i+=16 )
-#elif (PMPML_32_CHUNK_SIZE_LOG2 == 3)
-	for ( i=0; i<PMPML_32_CHUNK_SIZE; i+=8 )
-#else
-	for ( i=0; i<PMPML_32_CHUNK_SIZE; i+=4 )
-#endif
-	{
-		a = _mm_load_si128 ((__m128i *)(coeff+i)); // Loads 128-bit value. Address p must be 16-byte aligned.
-		data = _mm_loadu_si128 ((__m128i *)(x+i)); // Loads 128-bit value. Address p does not need be 16-byte aligned.
-		product = _mm_mul_epu32 ( data, a); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		temp = _mm_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-#if (PMPML_32_CHUNK_SIZE_LOG2 > 2)
-
-		a = _mm_load_si128 ((__m128i *)(coeff+i+4));
-		data = _mm_loadu_si128 ((__m128i *)(x+i+4));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-#endif
-#if (PMPML_32_CHUNK_SIZE_LOG2 > 3)
-
-		a = _mm_load_si128 ((__m128i *)(coeff+i+8));
-		data = _mm_loadu_si128 ((__m128i *)(x+i+8));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(coeff+i+12));
-		data = _mm_loadu_si128 ((__m128i *)(x+i+12));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-#endif
-#if (PMPML_32_CHUNK_SIZE_LOG2 > 4)
-
-		a = _mm_load_si128 ((__m128i *)(coeff+i+16));
-		data = _mm_loadu_si128 ((__m128i *)(x+i+16));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(coeff+i+20));
-		data = _mm_loadu_si128 ((__m128i *)(x+i+20));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(coeff+i+24));
-		data = _mm_loadu_si128 ((__m128i *)(x+i+24));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(coeff+i+28));
-		data = _mm_loadu_si128 ((__m128i *)(x+i+28));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-#endif
-#if (PMPML_32_CHUNK_SIZE_LOG2 > 5)
-
-		x1 = const_cast<uint32_t*>( x+i+36 );
-		x2 = const_cast<uint32_t*>( x+i+40 );
-		x3 = const_cast<uint32_t*>( x+i+44 );
-		c1 = const_cast<uint32_t*>( coeff+i+36 );
-		c2 = const_cast<uint32_t*>( coeff+i+40 );
-		c3 = const_cast<uint32_t*>( coeff+i+44 );
-		a = _mm_load_si128 ((__m128i *)(coeff+i+32));
-		data = _mm_loadu_si128 ((__m128i *)(x+i+32));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(c1));
-		data = _mm_loadu_si128 ((__m128i *)(x1));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(c2));
-		data = _mm_loadu_si128 ((__m128i *)(x2));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(c3));
-		data = _mm_loadu_si128 ((__m128i *)(x3));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		x1 = const_cast<uint32_t*>( x+i+52 );
-		x2 = const_cast<uint32_t*>( x+i+56 );
-		x3 = const_cast<uint32_t*>( x+i+60 );
-		c1 = const_cast<uint32_t*>( coeff+i+52 );
-		c2 = const_cast<uint32_t*>( coeff+i+56 );
-		c3 = const_cast<uint32_t*>( coeff+i+60 );
-		a = _mm_load_si128 ((__m128i *)(coeff+i+48));
-		data = _mm_loadu_si128 ((__m128i *)(x+i+48));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(c1));
-		data = _mm_loadu_si128 ((__m128i *)(x1));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(c2));
-		data = _mm_loadu_si128 ((__m128i *)(x2));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-
-		a = _mm_load_si128 ((__m128i *)(c3));
-		data = _mm_loadu_si128 ((__m128i *)(x3));
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a);
-		temp = _mm_srli_epi64( product, 32 );
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		//temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, product );//ctr0 = _mm_add_epi64 ( ctr0, temp );
-#endif
-	}
-
-	temp = _mm_unpackhi_epi64 ( ctr0, ctr1 ); // Interleaves the upper signed or unsigned 64-bit integer in a with the upper signed or unsigned 64-bit integer in b. r0 := a1 ; r1 := b1
-	data = _mm_unpacklo_epi64 ( ctr0, ctr1 ); // Interleaves the lower signed or unsigned 64-bit integer in a with the lower signed or unsigned 64-bit integer in b. r0 := a0 ; r1 := b0
-	ctr1 = _mm_add_epi64 ( data, temp );
-
-#if defined(_MSC_VER)
-	constTerm.QuadPart += ctr1.m128i_u32[0]; // Microsoft specific
-	ctr.QuadPart += ctr1.m128i_u64[1] + ctr1.m128i_u32[1];
-#elif defined(HAVE_SSE_4_1)
-	constTer.QuadPart += _mm_extract_epi32(ctr1,0);
-	ctr.QuadPart += _mm_extract_epi64(ctr1,0) + _mm_extract_epi32(ctr1,1);
-#elif (defined __arm__ || defined __aarch64__)
+        __m128i ctr0, ctr1, mask_low;
+        __m128i  a, data, product, temp;
+        uint64_t temp_fin;
+        int      i;
+
+        ctr0     = _mm_setzero_si128(); // Sets the 128-bit value to zero.
+        ctr1     = _mm_setzero_si128();
+        mask_low = _mm_set_epi32(0, -1, 0, -1);
+
+        uint32_t * x1, * x2, * x3, * c1, * c2, * c3;
+
+  #if (PMPML_32_CHUNK_SIZE_LOG2 >= 6)
+        for (i = 0; i < PMPML_32_CHUNK_SIZE; i += 64)
+  #elif (PMPML_32_CHUNK_SIZE_LOG2 == 5)
+        for (i = 0; i < PMPML_32_CHUNK_SIZE; i += 32)
+  #elif (PMPML_32_CHUNK_SIZE_LOG2 == 4)
+        for (i = 0; i < PMPML_32_CHUNK_SIZE; i += 16)
+  #elif (PMPML_32_CHUNK_SIZE_LOG2 == 3)
+        for (i = 0; i < PMPML_32_CHUNK_SIZE; i += 8)
+  #else
+        for (i = 0; i < PMPML_32_CHUNK_SIZE; i += 4)
+  #endif
+        {
+            a       = _mm_load_si128((__m128i * )(coeff + i)); // Loads 128-bit value. Address p must be 16-byte
+                                                               // aligned.
+            data    = _mm_loadu_si128((__m128i *)(x     + i)); // Loads 128-bit value. Address p does not need be
+                                                               // 16-byte aligned.
+            product = _mm_mul_epu32(data, a);                  // A 128-bit value that contains two 64-bit unsigned
+                                                               // integers. The result can be expressed by the following
+                                                               // equations. r0 := a0 * b0; r1 := a2 * b2
+            temp    = _mm_srli_epi64(product, 32);             // Shifts the 2 signed or unsigned 64-bit integers in a
+                                                               // right by count bits while shifting in zeros.
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+  #if (PMPML_32_CHUNK_SIZE_LOG2 > 2)
+
+            a       = _mm_load_si128((__m128i * )(coeff + i + 4));
+            data    = _mm_loadu_si128((__m128i *)(x     + i + 4));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+  #endif
+  #if (PMPML_32_CHUNK_SIZE_LOG2 > 3)
+
+            a       = _mm_load_si128((__m128i * )(coeff + i + 8));
+            data    = _mm_loadu_si128((__m128i *)(x     + i + 8));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(coeff + i + 12));
+            data    = _mm_loadu_si128((__m128i *)(x     + i + 12));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+  #endif
+  #if (PMPML_32_CHUNK_SIZE_LOG2 > 4)
+
+            a       = _mm_load_si128((__m128i * )(coeff + i + 16));
+            data    = _mm_loadu_si128((__m128i *)(x     + i + 16));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(coeff + i + 20));
+            data    = _mm_loadu_si128((__m128i *)(x     + i + 20));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(coeff + i + 24));
+            data    = _mm_loadu_si128((__m128i *)(x     + i + 24));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(coeff + i + 28));
+            data    = _mm_loadu_si128((__m128i *)(x     + i + 28));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+  #endif
+  #if (PMPML_32_CHUNK_SIZE_LOG2 > 5)
+
+            x1      = const_cast<uint32_t *>(x     + i + 36);
+            x2      = const_cast<uint32_t *>(x     + i + 40);
+            x3      = const_cast<uint32_t *>(x     + i + 44);
+            c1      = const_cast<uint32_t *>(coeff + i + 36);
+            c2      = const_cast<uint32_t *>(coeff + i + 40);
+            c3      = const_cast<uint32_t *>(coeff + i + 44);
+            a       = _mm_load_si128((__m128i * )(coeff + i + 32));
+            data    = _mm_loadu_si128((__m128i *)(x     + i + 32));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(c1));
+            data    = _mm_loadu_si128((__m128i *)(x1));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(c2));
+            data    = _mm_loadu_si128((__m128i *)(x2));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(c3));
+            data    = _mm_loadu_si128((__m128i *)(x3));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            x1      = const_cast<uint32_t *>(x     + i + 52);
+            x2      = const_cast<uint32_t *>(x     + i + 56);
+            x3      = const_cast<uint32_t *>(x     + i + 60);
+            c1      = const_cast<uint32_t *>(coeff + i + 52);
+            c2      = const_cast<uint32_t *>(coeff + i + 56);
+            c3      = const_cast<uint32_t *>(coeff + i + 60);
+            a       = _mm_load_si128((__m128i * )(coeff + i + 48));
+            data    = _mm_loadu_si128((__m128i *)(x     + i + 48));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(c1));
+            data    = _mm_loadu_si128((__m128i *)(x1));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(c2));
+            data    = _mm_loadu_si128((__m128i *)(x2));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_load_si128((__m128i * )(c3));
+            data    = _mm_loadu_si128((__m128i *)(x3));
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);
+            temp    = _mm_srli_epi64(product, 32);
+            ctr1    = _mm_add_epi64(ctr1, temp   );
+            // temp = _mm_and_si128 ( mask_low, product );
+            ctr0    = _mm_add_epi64(ctr0, product); // ctr0 = _mm_add_epi64 ( ctr0, temp );
+  #endif
+        }
+
+        temp = _mm_unpackhi_epi64(ctr0, ctr1); // Interleaves the upper signed or unsigned 64-bit integer in a with the
+                                               // upper signed or unsigned 64-bit integer in b. r0 := a1 ; r1 := b1
+        data = _mm_unpacklo_epi64(ctr0, ctr1); // Interleaves the lower signed or unsigned 64-bit integer in a with the
+                                               // lower signed or unsigned 64-bit integer in b. r0 := a0 ; r1 := b0
+        ctr1 = _mm_add_epi64(data, temp);
+
+  #if defined(_MSC_VER)
+        constTerm.QuadPart += ctr1.m128i_u32[0]; // Microsoft specific
+        ctr.QuadPart       += ctr1.m128i_u64[1] + ctr1.m128i_u32[1];
+  #elif defined(HAVE_SSE_4_1)
+        constTer.QuadPart  += _mm_extract_epi32(ctr1, 0);
+        ctr.QuadPart       += _mm_extract_epi64(ctr1, 0) + _mm_extract_epi32(ctr1, 1);
+  #elif (defined __arm__ || defined __aarch64__)
         uint32_t b[4];
-        _mm_storeu_si128((__m128i *)b,ctr1);
-	constTerm.QuadPart += b[0];
-	ctr.QuadPart += b[1] + b[2] + ((uint64_t) b[3] <<32);
-#else
-	uint64_t lo = ((uint64_t*)(&ctr1))[0];
-	uint64_t hi = ((uint64_t*)(&ctr1))[1];
-/*	constTerm.QuadPart += lo;
-	ctr += constTerm.QuadPart < lo;
-	constTerm.HighPart += ((uint32_t*)(&ctr1))[2];
-	ctr += constTerm.HighPart < ((uint32_t*)(&ctr1))[2];
-	ctr +=  ((uint32_t*)(&ctr1))[3];*/
-	uint32_t lohi = lo >> 32;
-	uint32_t hilo = hi;
-	uint32_t diff = lohi - hilo;
-	hi += diff;
-	lo = (uint32_t)lo + (((uint64_t)(uint32_t)hi) << 32 );
-	constTerm.QuadPart += lo;
-	ctr += constTerm.QuadPart < lo;
-	ctr += hi >> 32;
-#endif
+        _mm_storeu_si128((__m128i *)b, ctr1);
+        constTerm.QuadPart += b[0];
+        ctr.QuadPart       += b[1] + b[2] + ((uint64_t)b[3] << 32);
+  #else
+        uint64_t lo   = ((uint64_t *)(&ctr1))[0];
+        uint64_t hi   = ((uint64_t *)(&ctr1))[1];
+/*
+ *      constTerm.QuadPart += lo;
+ *  ctr += constTerm.QuadPart < lo;
+ *  constTerm.HighPart += ((uint32_t*)(&ctr1))[2];
+ *  ctr += constTerm.HighPart < ((uint32_t*)(&ctr1))[2];
+ *  ctr +=  ((uint32_t*)(&ctr1))[3];
+ */
+        uint32_t lohi = lo >> 32;
+        uint32_t hilo = hi;
+        uint32_t diff = lohi - hilo;
+        hi  += diff;
+        lo   = (uint32_t)lo + (((uint64_t)(uint32_t)hi) << 32);
+        constTerm.QuadPart += lo;
+        ctr += constTerm.QuadPart < lo;
+        ctr += hi >> 32;
+  #endif
 
 #else // No AVX2 and no SSE
-	for ( uint32_t i=0; i<PMPML_32_CHUNK_SIZE; i+=8 )
-	{
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 + i )
-#if (PMPML_32_CHUNK_SIZE_LOG2 > 2)
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 7 + i )
-#endif
-	}
+        for (uint32_t i = 0; i < PMPML_32_CHUNK_SIZE; i += 8) {
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(0 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(1 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(2 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(3 + i)
+  #if (PMPML_32_CHUNK_SIZE_LOG2 > 2)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(4 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(5 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(6 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(7 + i)
+  #endif
+        }
 #endif // PMPML_USE_SSE
 
-	PMPML_CHUNK_LOOP_PRE_REDUCE_L0
+        PMPML_CHUNK_LOOP_PRE_REDUCE_L0
 
-	PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN
-  }
+                PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN
+    }
 
-  template < bool bswap >
-  FORCE_INLINE uint64_t hash_of_beginning_of_string_chunk_type2( const uint32_t* coeff, ULARGE_INTEGER__XX constTerm, const unsigned char* tail, unsigned int tail_size  ) const
-  {
-	PMPML_CHUNK_LOOP_INTRO_L0
-	uint32_t size = tail_size >> PMPML_32_WORD_SIZE_BYTES_LOG2;
-	const uint32_t* x = (const uint32_t*)tail;
+    template <bool bswap>
+    FORCE_INLINE uint64_t hash_of_beginning_of_string_chunk_type2( const uint32_t * coeff, ULARGE_INTEGER__XX constTerm,
+            const unsigned char * tail, unsigned int tail_size  ) const {
+        PMPML_CHUNK_LOOP_INTRO_L0
+        uint32_t         size = tail_size >> PMPML_32_WORD_SIZE_BYTES_LOG2;
+        const uint32_t * x    = (const uint32_t *)tail;
 
 #if defined(HAVE_SSE_2)
-	__m128i ctr0, ctr1, a, data, product, temp, mask_low;
-	int i;
-
-	ctr0 = _mm_setzero_si128 (); // Sets the 128-bit value to zero.
-	ctr1 = _mm_setzero_si128 ();
-	mask_low = _mm_set_epi32 ( 0, -1, 0 , -1 );
-
-	for ( i=0; i<(size&0xFFFFFFF8); i+=4 )
-	{
-		a = _mm_load_si128 ((__m128i *)(coeff+i)); // Loads 128-bit value. Address p must be 16-byte aligned.
-		data = _mm_loadu_si128 ((__m128i *)(x+i)); // Loads 128-bit value. Address p does not need be 16-byte aligned.
-		product = _mm_mul_epu32 ( data, a); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		temp = _mm_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, temp );
-
-//		a = _mm_srli_epi64 ( a, 32 );
-//		data = _mm_srli_epi64 ( data, 32 );
-		a = _mm_shuffle_epi32( a, 1*1+0*4+3*16+2*64 );
-		data = _mm_shuffle_epi32( data, 1*1+0*4+3*16+2*64 );
-		product = _mm_mul_epu32 ( data, a); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		temp = _mm_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		ctr1 = _mm_add_epi64 ( ctr1, temp );
-		temp = _mm_and_si128 ( mask_low, product );
-		ctr0 = _mm_add_epi64 ( ctr0, temp );
-	}
-
-	temp = _mm_unpackhi_epi64 ( ctr0, ctr1 ); // Interleaves the upper signed or unsigned 64-bit integer in a with the upper signed or unsigned 64-bit integer in b. r0 := a1 ; r1 := b1
-	data = _mm_unpacklo_epi64 ( ctr0, ctr1 ); // Interleaves the lower signed or unsigned 64-bit integer in a with the lower signed or unsigned 64-bit integer in b. r0 := a0 ; r1 := b0
-	ctr1 = _mm_add_epi64 ( data, temp );
-
-#if defined(_MSC_VER)
+        __m128i ctr0, ctr1, a, data, product, temp, mask_low;
+        int     i;
+
+        ctr0     = _mm_setzero_si128(); // Sets the 128-bit value to zero.
+        ctr1     = _mm_setzero_si128();
+        mask_low = _mm_set_epi32(0, -1, 0, -1);
+
+        for (i = 0; i < (size & 0xFFFFFFF8); i += 4) {
+            a       = _mm_load_si128((__m128i * )(coeff + i)); // Loads 128-bit value. Address p must be 16-byte
+                                                               // aligned.
+            data    = _mm_loadu_si128((__m128i *)(x     + i)); // Loads 128-bit value. Address p does not need be
+                                                               // 16-byte aligned.
+            product = _mm_mul_epu32(data, a);                  // A 128-bit value that contains two 64-bit unsigned
+                                                               // integers. The result can be expressed by the following
+                                                               // equations. r0 := a0 * b0; r1 := a2 * b2
+            temp    = _mm_srli_epi64(product, 32);             // Shifts the 2 signed or unsigned 64-bit integers in a
+                                                               // right by count bits while shifting in zeros.
+            ctr1    = _mm_add_epi64(ctr1, temp);
+            temp    = _mm_and_si128(mask_low, product);
+            ctr0    = _mm_add_epi64(ctr0, temp);
+
+//              a = _mm_srli_epi64 ( a, 32 );
+//              data = _mm_srli_epi64 ( data, 32 );
+            a       = _mm_shuffle_epi32(a   , 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            data    = _mm_shuffle_epi32(data, 1 * 1 + 0 * 4 + 3 * 16 + 2 * 64);
+            product = _mm_mul_epu32(data, a);      // A 128-bit value that contains two 64-bit unsigned integers. The
+                                                   // result can be expressed by the following equations. r0 := a0 * b0;
+                                                   // r1 := a2 * b2
+            temp    = _mm_srli_epi64(product, 32); // Shifts the 2 signed or unsigned 64-bit integers in a right by
+                                                   // count bits while shifting in zeros.
+            ctr1    = _mm_add_epi64(ctr1, temp);
+            temp    = _mm_and_si128(mask_low, product);
+            ctr0    = _mm_add_epi64(ctr0, temp);
+        }
+
+        temp = _mm_unpackhi_epi64(ctr0, ctr1); // Interleaves the upper signed or unsigned 64-bit integer in a with the
+                                               // upper signed or unsigned 64-bit integer in b. r0 := a1 ; r1 := b1
+        data = _mm_unpacklo_epi64(ctr0, ctr1); // Interleaves the lower signed or unsigned 64-bit integer in a with the
+                                               // lower signed or unsigned 64-bit integer in b. r0 := a0 ; r1 := b0
+        ctr1 = _mm_add_epi64(data, temp);
+
+  #if defined(_MSC_VER)
         constTerm.QuadPart += ctr1.m128i_u32[0]; // Microsoft specific
-        ctr.QuadPart += ctr1.m128i_u64[1] + ctr1.m128i_u32[1];
-#elif 0 && defined( __SSE4_1__)
-        constTerm.QuadPart += _mm_extract_epi32(ctr1,0);
-        ctr.QuadPart += _mm_extract_epi64(ctr1,0) + _mm_extract_epi32(ctr1,1);
-#elif 0 && defined(IDEK)
+        ctr.QuadPart       += ctr1.m128i_u64[1] + ctr1.m128i_u32[1];
+  #elif 0 && defined(__SSE4_1__)
+        constTerm.QuadPart += _mm_extract_epi32(ctr1, 0);
+        ctr.QuadPart       += _mm_extract_epi64(ctr1, 0) + _mm_extract_epi32(ctr1, 1);
+  #elif 0 && defined(IDEK)
         uint32_t b[4];
-        _mm_storeu_si128((__m128i *)b,ctr1);
+        _mm_storeu_si128((__m128i *)b, ctr1);
         constTerm.QuadPart += b[0];
-        ctr.QuadPart += b[1] + b[2] + ((uint64_t) b[3] <<32);
-#else
-	constTerm.QuadPart += *(uint64_t*)(&ctr1);
-	ctr += constTerm.QuadPart < *(uint64_t*)(&ctr1);
-	constTerm.HighPart += ((uint32_t*)(&ctr1))[2];
-	ctr += constTerm.HighPart < ((uint32_t*)(&ctr1))[2];
-	ctr +=  ((uint32_t*)(&ctr1))[3];
-#endif
+        ctr.QuadPart       += b[1] + b[2] + ((uint64_t)b[3] << 32);
+  #else
+        constTerm.QuadPart += *      (uint64_t *)(&ctr1);
+        ctr += constTerm.QuadPart < *(uint64_t *)(&ctr1);
+        constTerm.HighPart +=       ((uint32_t *)(&ctr1))[2];
+        ctr += constTerm.HighPart < ((uint32_t *)(&ctr1))[2];
+        ctr += ((uint32_t *)(&ctr1))[3];
+  #endif
 
 #else // HAVE_SSE_2
 
-	for ( uint32_t i=0; i<(size&0xFFFFFFF8); i+=8 )
-	{
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 + i )
-#if (PMPML_32_CHUNK_SIZE_LOG2 > 2)
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 7 + i )
-#endif
-	}
+        for (uint32_t i = 0; i < (size & 0xFFFFFFF8); i += 8) {
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(0 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(1 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(2 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(3 + i)
+  #if (PMPML_32_CHUNK_SIZE_LOG2 > 2)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(4 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(5 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(6 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1(7 + i)
+  #endif
+        }
 
 #endif // HAVE_SSE_2
 
-	uint32_t offset = size & 0xFFFFFFF8;
-
-	switch( size & 0x7 )
-	  {
-		case 0: { break; }
-		case 1:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) } break;
-		case 2:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + offset ) } break;
-		case 3:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) } break;
-		case 4:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 + offset ) } break;
-		case 5:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 + offset ) } break;
-		case 6:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 + offset ) } break;
-		case 7:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 + offset ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 + offset ) } break;
-	}
-
-	uint32_t xLast;
-	switch ( tail_size & ( PMPML_32_WORD_SIZE_BYTES - 1 ) )
-	{
-	case 0: { xLast = 0x1; break;}
-	case 1: { xLast = 0x100 | tail[tail_size-1]; break;}
-	case 2: { xLast = GET_U16<bswap>(tail + tail_size - 2, 0) | 0x10000; break; }
-	case 3: { xLast = tail[ tail_size - 1 ]; xLast = ( xLast << 16 ) | GET_U16<bswap>(tail + tail_size - 3, 0) | 0x1000000; break;}
-	}
-
-	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST
-
-	PMPML_CHUNK_LOOP_PRE_REDUCE_L0
-
-	PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN
-  }
+        uint32_t offset = size & 0xFFFFFFF8;
+
+        switch (size & 0x7) {
+        case 0: { break; }
+        case 1: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0 + offset) }
+        break;
+        case 2: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(1 + offset) }
+        break;
+        case 3: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(1 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(
+                    2 + offset) }
+                    break;
+        case 4: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(1 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(
+                    2 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(3 + offset) }
+                    break;
+        case 5: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(1 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(
+                    2 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(3 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(4 + offset) }
+                    break;
+        case 6: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(1 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(
+                    2 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(3 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(4 +
+                            offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(5 + offset) }
+                            break;
+        case 7: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(1 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(
+                    2 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(3 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(4 +
+                            offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(5 + offset) PMPML_CHUNK_LOOP_BODY_ULI_T1(6 + offset) }
+                            break;
+        }
+
+        uint32_t xLast;
+        switch (tail_size & (PMPML_32_WORD_SIZE_BYTES - 1)) {
+        case 0: { xLast = 0x1; break; }
+        case 1: { xLast = 0x100 | tail[tail_size - 1]; break; }
+        case 2: { xLast = GET_U16<bswap>(tail + tail_size - 2, 0) | 0x10000; break; }
+        case 3: { xLast = tail[tail_size - 1];
+                  xLast = (xLast << 16) | GET_U16<bswap>(tail + tail_size - 3, 0) | 0x1000000; break; }
+        }
+
+        PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST
+
+        PMPML_CHUNK_LOOP_PRE_REDUCE_L0
+
+                PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN
+    }
 
-  // a call to be done from subsequent levels
-  FORCE_INLINE uint64_t hash_of_num_chunk( const uint32_t* coeff, ULARGE_INTEGER__XX constTerm, const uint64_t* x ) const
-  {
-	PMPML_CHUNK_LOOP_INTRO_L0
-
-	for ( uint32_t i=0; i<PMPML_32_CHUNK_SIZE; i+=8 )
-	{
-		PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 0 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 1 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 2 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 3 + i )
+    // a call to be done from subsequent levels
+    FORCE_INLINE uint64_t hash_of_num_chunk( const uint32_t * coeff, ULARGE_INTEGER__XX constTerm, const uint64_t * x ) const {
+        PMPML_CHUNK_LOOP_INTRO_L0
+
+        for (uint32_t i = 0; i < PMPML_32_CHUNK_SIZE; i += 8) {
+            PMPML_CHUNK_LOOP_BODY_ULI_T1_64(0 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1_64(1 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1_64(2 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1_64(3 + i)
 #if (PMPML_32_CHUNK_SIZE_LOG2 > 2)
-		PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 4 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 5 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 6 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 7 + i )
+            PMPML_CHUNK_LOOP_BODY_ULI_T1_64(4 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1_64(5 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1_64(6 + i)
+            PMPML_CHUNK_LOOP_BODY_ULI_T1_64(7 + i)
 #endif
-	}
+        }
 
-	PMPML_CHUNK_LOOP_PRE_REDUCE_L0
+        PMPML_CHUNK_LOOP_PRE_REDUCE_L0
 
-	PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN
-  }
+                PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN
+    }
 
-  // a call to be done from subsequent levels
-  FORCE_INLINE uint64_t hash_of_num_chunk_incomplete( const uint32_t* coeff, ULARGE_INTEGER__XX constTerm, ULARGE_INTEGER__XX prevConstTerm, ULARGE_INTEGER__XX coeffSum, const uint64_t* x, size_t count ) const
-  {
-	PMPML_CHUNK_LOOP_INTRO_L0
-
-	ULARGE_INTEGER__XX c_ctr;
-	c_ctr.QuadPart = 0;
-
-	uint32_t i;
-
-	if ( count < ( PMPML_32_CHUNK_SIZE >> 1 ) )
-	{
-		for ( i=0; i<count; i++ )
-		{
-			PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 0 + i )
-			c_ctr.QuadPart += coeff[ i ];
-		}
-		c_ctr.QuadPart = coeffSum.QuadPart - c_ctr.QuadPart;
-	}
-	else
-	{
-		for ( i=0; i<count; i++ )
-			PMPML_CHUNK_LOOP_BODY_ULI_T1_64( 0 + i )
-		for ( ; i<PMPML_32_CHUNK_SIZE; i++ )
-			c_ctr.QuadPart += coeff[ i ];
-	}
-
-	ULARGE_INTEGER__XX lowProduct;
-	lowProduct.QuadPart = UInt32x32To64( c_ctr.LowPart, prevConstTerm.LowPart );
-	ULARGE_INTEGER__XX midProduct;
-	midProduct.QuadPart = UInt32x32To64( c_ctr.LowPart, prevConstTerm.HighPart ) + UInt32x32To64( c_ctr.HighPart, prevConstTerm.LowPart );
-	midProduct.QuadPart += lowProduct.HighPart;
-	lowProduct.HighPart = midProduct.LowPart;
-	uint32_t hiProduct = c_ctr.HighPart * prevConstTerm.HighPart + midProduct.HighPart;
-
-	constTerm.QuadPart += lowProduct.QuadPart;
-	ctr += hiProduct + ( constTerm.QuadPart < lowProduct.QuadPart );
-
-/*	for ( uint32_t i=0; i<PMPML_CHUNK_SIZE; i+=8 )
-	{
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 + i )
-#if ( PMPML_CHUNK_SIZE > 4 )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 + i )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 7 + i )
-#endif
-	}*/
+    // a call to be done from subsequent levels
+    FORCE_INLINE uint64_t hash_of_num_chunk_incomplete( const uint32_t * coeff, ULARGE_INTEGER__XX constTerm,
+            ULARGE_INTEGER__XX prevConstTerm, ULARGE_INTEGER__XX coeffSum, const uint64_t * x, size_t count ) const {
+        PMPML_CHUNK_LOOP_INTRO_L0
+
+        ULARGE_INTEGER__XX c_ctr;
+
+        c_ctr.QuadPart = 0;
+
+        uint32_t i;
+
+        if (count < (PMPML_32_CHUNK_SIZE >> 1)) {
+            for (i = 0; i < count; i++) {
+                PMPML_CHUNK_LOOP_BODY_ULI_T1_64(0 + i)
+                c_ctr.QuadPart += coeff[i];
+            }
+            c_ctr.QuadPart = coeffSum.QuadPart - c_ctr.QuadPart;
+        } else {
+            for (i = 0; i < count; i++) {
+                PMPML_CHUNK_LOOP_BODY_ULI_T1_64(0 + i)
+                for (; i < PMPML_32_CHUNK_SIZE; i++) {
+                    c_ctr.QuadPart += coeff[i];
+                }
+            }
+        }
+
+        ULARGE_INTEGER__XX lowProduct;
+        lowProduct.QuadPart  = UInt32x32To64(c_ctr.LowPart, prevConstTerm.LowPart );
+        ULARGE_INTEGER__XX midProduct;
+        midProduct.QuadPart  = UInt32x32To64(c_ctr.LowPart, prevConstTerm.HighPart) + UInt32x32To64(
+                c_ctr.HighPart, prevConstTerm.LowPart);
+        midProduct.QuadPart += lowProduct.HighPart;
+        lowProduct.HighPart  = midProduct.LowPart;
+        uint32_t hiProduct = c_ctr.HighPart * prevConstTerm.HighPart + midProduct.HighPart;
+
+        constTerm.QuadPart += lowProduct.QuadPart;
+        ctr += hiProduct + (constTerm.QuadPart < lowProduct.QuadPart);
 
-	PMPML_CHUNK_LOOP_PRE_REDUCE_L0
+/*
+ *      for ( uint32_t i=0; i<PMPML_CHUNK_SIZE; i+=8 )
+ *  {
+ *      PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 + i )
+ *      PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 + i )
+ *      PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 + i )
+ *      PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 + i )
+ #if ( PMPML_CHUNK_SIZE > 4 )
+ *      PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 + i )
+ *      PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 + i )
+ *      PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 + i )
+ *      PMPML_CHUNK_LOOP_BODY_ULI_T1( 7 + i )
+ #endif
+ *  }
+ */
 
-	PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN
-  }
+        PMPML_CHUNK_LOOP_PRE_REDUCE_L0
 
-  template < bool bswap >
-  FORCE_INLINE void procesNextValue( int level, uint64_t value, uint64_t * allValues, unsigned int * cnts, unsigned int& flag ) const
-  {
-	for ( int i=level;;i++ )
-	{
-		// NOTE: it's not necessary to check whether ( i < PMPML_LEVELS ),
-		// if it is guaranteed that the string size is less than 1 << USHF_MACHINE_WORD_SIZE_BITS
-		allValues[ ( i << PMPML_32_CHUNK_SIZE_LOG2 ) + cnts[ i ] ] = value;
-		(cnts[ i ]) ++;
-		if ( cnts[ i ] != PMPML_32_CHUNK_SIZE )
-			break;
-		cnts[ i ] = 0;
-		value = hash_of_num_chunk( curr_rd[ i ].random_coeff, *(ULARGE_INTEGER__XX*)(&(curr_rd[i].const_term)), allValues + ( i << PMPML_32_CHUNK_SIZE_LOG2 ) );
-		if ( ( flag & ( 1 << i ) ) == 0 )
-		{
-			cnts[ i + 1] = 0;
-			flag |= 1 << i;
-		}
-	}
-  }
+                PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN
+    }
 
-  template < bool bswap >
-  FORCE_INLINE uint64_t finalize( int level, uint64_t * allValues, unsigned int * cnts, unsigned int& flag ) const
-  {
-	for ( int i=level;;i++ )
-	{
-//		assert ( level != PMPML_LEVELS )
-		if ( ( ( flag & ( 1 << i ) ) == 0 ) && cnts[ i ] == 1 )
-		{
-			return allValues[ i << PMPML_32_CHUNK_SIZE_LOG2 ];
-		}
-		if ( cnts[ i ] )
-		{
-/*			for ( int j=cnts[ i ]; j<PMPML_CHUNK_SIZE; j++ )
-				( allValues + ( i << PMPML_CHUNK_SIZE_LOG2 ) )[ j ] = curr_rd[ i - 1 ].const_term;*/
-			if ( ( flag & ( 1 << i ) ) == 0 )
-			{
-				cnts[ i + 1] = 0;
-				flag |= 1 << i;
-			}
-			procesNextValue<bswap>( i + 1,
-/*							 hash_of_num_chunk( curr_rd[ i ].random_coeff,
-												*(ULARGE_INTEGER__XX*)(&(curr_rd[i].const_term)),
-												allValues + ( i << PMPML_CHUNK_SIZE_LOG2 ) ), */
-							 hash_of_num_chunk_incomplete( curr_rd[ i ].random_coeff,
-												*(ULARGE_INTEGER__XX*)(&(curr_rd[i].const_term)),
-												*(ULARGE_INTEGER__XX*)(&(curr_rd[i-1].const_term)),
-												*(ULARGE_INTEGER__XX*)(&(curr_rd[i].cachedSum)),
-												allValues + ( i << PMPML_32_CHUNK_SIZE_LOG2 ),
-												cnts[ i ]),
-							 allValues, cnts, flag );
-		}
-	}
-  }
+    template <bool bswap>
+    FORCE_INLINE void procesNextValue( int level, uint64_t value, uint64_t * allValues,
+            unsigned int * cnts, unsigned int & flag ) const {
+        for (int i = level;; i++) {
+            // NOTE: it's not necessary to check whether ( i < PMPML_LEVELS ),
+            // if it is guaranteed that the string size is less than 1 << USHF_MACHINE_WORD_SIZE_BITS
+            allValues[(i << PMPML_32_CHUNK_SIZE_LOG2) + cnts[i]] = value;
+            (cnts[i])++;
+            if (cnts[i] != PMPML_32_CHUNK_SIZE) {
+                break;
+            }
+            cnts[i] = 0;
+            value   = hash_of_num_chunk(curr_rd[i].random_coeff, *(ULARGE_INTEGER__XX *)(&(curr_rd[i].const_term)),
+                    allValues + (i << PMPML_32_CHUNK_SIZE_LOG2));
+            if ((flag & (1 << i)) == 0) {
+                cnts[i + 1] = 0;
+                flag       |= 1 << i;
+            }
+        }
+    }
+
+    template <bool bswap>
+    FORCE_INLINE uint64_t finalize( int level, uint64_t * allValues, unsigned int * cnts, unsigned int & flag ) const {
+        for (int i = level;; i++) {
+//              assert ( level != PMPML_LEVELS )
+            if (((flag & (1 << i)) == 0) && (cnts[i] == 1)) {
+                return allValues[i << PMPML_32_CHUNK_SIZE_LOG2];
+            }
+            if (cnts[i]) {
+/*
+ *                      for ( int j=cnts[ i ]; j<PMPML_CHUNK_SIZE; j++ )
+ *              ( allValues + ( i << PMPML_CHUNK_SIZE_LOG2 ) )[ j ] = curr_rd[ i - 1 ].const_term;
+ */
+                if ((flag & (1 << i)) == 0) {
+                    cnts[i + 1] = 0;
+                    flag       |= 1 << i;
+                }
+                procesNextValue<bswap>(i + 1,
+/*
+ *                                                       hash_of_num_chunk( curr_rd[ i ].random_coeff,
+ *(ULARGE_INTEGER__XX*)(&(curr_rd[i].const_term)),
+ *                                              allValues + ( i << PMPML_CHUNK_SIZE_LOG2 ) ),
+ */
+                        hash_of_num_chunk_incomplete(curr_rd[i].random_coeff, *(ULARGE_INTEGER__XX *)(&(curr_rd[i].const_term)),
+                        *(ULARGE_INTEGER__XX *)(&(curr_rd[i - 1].const_term)), *(ULARGE_INTEGER__XX *)(&(curr_rd[i].cachedSum)),
+                        allValues + (i << PMPML_32_CHUNK_SIZE_LOG2), cnts[i]), allValues, cnts, flag);
+            }
+        }
+    }
 
 #if defined(_MSC_VER) && defined(HAVE_32BIT_PLATFORM)
-  template < uint32_t N, bool bswap >
-  static FORCE_INLINE uint32_t hash_size_SMALL_N( const unsigned char* chars ) const
-  {
-		const uint32_t* coeff = curr_rd[0].random_coeff;
-		ULARGE_INTEGER__XX constTerm = *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term));
-		uint32_t xLast;
-
-		switch(N) {
-		case 0: break;
-		case 1: xLast = 0x100 + chars[0]; break
-		case 2: xLast = GET_U16<bswap>(chars,0) + 0x10000; break;
-		case 3: xLast = chars[ 2 ]; xLast = ( xLast << 16 ) + GET_U16<bswap>(chars,0) + 0x1000000; break;
-		case 4: xLast = GET_U32<bswap>(chars, 0) + coeff[ 1 ]; break;
-		}
-
-		if (N != 0) {
-		  constTerm.QuadPart += UInt32x32To64( coeff[ 0 ], xLast );
-		} else {
-		  constTerm.QuadPart += coeff[ 0 ];
-		}
-
-		PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;
-  }
 
-#define HASH_SIZE_XXX_BEGIN( XXX ) \
+    template <uint32_t N, bool bswap>
+    static FORCE_INLINE uint32_t hash_size_SMALL_N( const unsigned char * chars ) const {
+        const uint32_t *   coeff     = curr_rd[0].random_coeff;
+        ULARGE_INTEGER__XX constTerm = *(ULARGE_INTEGER__XX *)(&(curr_rd[0].const_term));
+        uint32_t           xLast;
+
+        switch (N) {
+        case 0: break;
+        case 1: xLast = 0x100 + chars[0]; break
+        case 2: xLast = GET_U16<bswap>(chars, 0) + 0x10000; break;
+        case 3: xLast = chars[2]; xLast = (xLast << 16) + GET_U16<bswap>(chars, 0) + 0x1000000; break;
+        case 4: xLast = GET_U32<bswap>(chars, 0) + coeff[1]; break;
+        }
+
+        if (N != 0) {
+            constTerm.QuadPart += UInt32x32To64(coeff[0], xLast);
+        } else {
+            constTerm.QuadPart += coeff[0];
+        }
+
+        PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;
+    }
+
+#define HASH_SIZE_XXX_BEGIN( XXX )                                                 \
   static FORCE_INLINE uint32_t hash_size_##XXX( const unsigned char* chars ) const \
-  { \
-		const uint32_t* coeff = curr_rd[0].random_coeff; \
-		const uint32_t* x = (const uint32_t*)chars; \
-		ULARGE_INTEGER__XX constTerm = *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term)); \
-		uint32_t xLast; \
-		PMPML_CHUNK_LOOP_INTRO_L0 \
-  		uint32_t size = XXX >> PMPML_WORD_SIZE_BYTES_LOG2;
-
-#define HASH_SIZE_XXX_END \
-		PMPML_CHUNK_LOOP_PRE_REDUCE_L0 \
-		PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN \
+  {                                                                                \
+		const uint32_t* coeff = curr_rd[0].random_coeff;                   \
+		const uint32_t* x = (const uint32_t*)chars;                        \
+		ULARGE_INTEGER__XX constTerm = *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term));\
+		uint32_t xLast;                                                    \
+		PMPML_CHUNK_LOOP_INTRO_L0                                          \
+                uint32_t size = XXX >> PMPML_WORD_SIZE_BYTES_LOG2;
+
+#define HASH_SIZE_XXX_END                              \
+		PMPML_CHUNK_LOOP_PRE_REDUCE_L0         \
+		PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN\
   }
 
-HASH_SIZE_XXX_BEGIN(28 )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 ) 	xLast = 0x1;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;
-HASH_SIZE_XXX_END
-
-HASH_SIZE_XXX_BEGIN(29 )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 ) 	xLast = 0x100 + chars[28];	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;
-HASH_SIZE_XXX_END
-
-HASH_SIZE_XXX_BEGIN(30 )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 ) 	xLast = *((const unsigned short*)(chars + 28 )) + 0x10000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST
-HASH_SIZE_XXX_END
-
-HASH_SIZE_XXX_BEGIN(31 )
-		PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 ) 	xLast = chars[ 30 ]; xLast = ( xLast << 16 ) + *((const unsigned short*)(chars + 28 )) + 0x1000000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;
-HASH_SIZE_XXX_END
+    HASH_SIZE_XXX_BEGIN(28)
+    PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_CHUNK_LOOP_BODY_ULI_T1(3)
+    PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5) PMPML_CHUNK_LOOP_BODY_ULI_T1(6)   xLast =
+            0x1;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;
+    HASH_SIZE_XXX_END
+
+            HASH_SIZE_XXX_BEGIN( 29 )
+    PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_CHUNK_LOOP_BODY_ULI_T1(3)
+    PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5) PMPML_CHUNK_LOOP_BODY_ULI_T1(6)   xLast =
+            0x100 + chars[28];  PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;
+    HASH_SIZE_XXX_END
+
+            HASH_SIZE_XXX_BEGIN( 30 )
+    PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_CHUNK_LOOP_BODY_ULI_T1(3)
+    PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5) PMPML_CHUNK_LOOP_BODY_ULI_T1(6)   xLast =
+            *((const unsigned short *)(chars + 28)) + 0x10000;  PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST
+    HASH_SIZE_XXX_END
+
+            HASH_SIZE_XXX_BEGIN( 31 )
+    PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_CHUNK_LOOP_BODY_ULI_T1(3)
+    PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5) PMPML_CHUNK_LOOP_BODY_ULI_T1(6)   xLast = chars[30];
+    xLast = (xLast << 16) + *((const unsigned short *)(chars + 28)) + 0x1000000; PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;
+    HASH_SIZE_XXX_END
 
 #endif // PMPML_MSC_32_WORKAROUND
 
-  template < bool bswap >
-  NEVER_INLINE uint32_t _hash_noRecursionNoInline_forLessThanChunk(const unsigned char* chars, unsigned int cnt) const
-  {
-			unsigned int i;
-			ULARGE_INTEGER__XX tmp_hash;
-
-			tmp_hash.QuadPart = hash_of_beginning_of_string_chunk_type2<bswap>( curr_rd[0].random_coeff, *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term)), chars, cnt );
-			if ( tmp_hash.HighPart == 0 ) //LIKELY
-			{
-				return fmix32_short( tmp_hash.LowPart );
-			}
-			return tmp_hash.LowPart;
-  }
+    template <bool bswap>
+    NEVER_INLINE uint32_t _hash_noRecursionNoInline_forLessThanChunk( const unsigned char * chars, unsigned int cnt ) const {
+        unsigned int       i;
+        ULARGE_INTEGER__XX tmp_hash;
+
+        tmp_hash.QuadPart = hash_of_beginning_of_string_chunk_type2<bswap>(curr_rd[0].random_coeff,
+                *(ULARGE_INTEGER__XX *)(&(curr_rd[0].const_term)), chars, cnt);
+        if (tmp_hash.HighPart == 0) {     // LIKELY
+            return fmix32_short(tmp_hash.LowPart);
+        }
+        return tmp_hash.LowPart;
+    }
 
-  template < bool bswap >
-  NEVER_INLINE uint32_t _hash_noRecursionNoInline_type2(const unsigned char* chars, unsigned int cnt) const
-  {
-			uint64_t allValues[ PMPML_32_LEVELS * PMPML_32_CHUNK_SIZE ];
-			unsigned int cnts[ PMPML_32_LEVELS ];
-			unsigned int flag;
-			cnts[ 1 ] = 0;
-			flag = 0;
-
-			unsigned int i;
-			ULARGE_INTEGER__XX tmp_hash;
-
-			// process full chunks
-			for ( i=0; i<(cnt>>PMPML_32_CHUNK_SIZE_BYTES_LOG2); i++ )
-			{
-				tmp_hash.QuadPart = hash_of_string_chunk_compact<bswap>( curr_rd[0].random_coeff, *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term)), ((const uint32_t*)(chars)) + ( i << PMPML_32_CHUNK_SIZE_LOG2 ) );
-				procesNextValue<bswap>( 1, tmp_hash.QuadPart, allValues, cnts, flag );
-			}
-
-			// process remaining incomplete chunk(s)
-			// note: if string size is a multiple of chunk size, we create a new chunk (1,0,0,...0),
-			// so THIS PROCESSING IS ALWAYS PERFORMED
-			unsigned int tailCnt = cnt & ( PMPML_32_CHUNK_SIZE_BYTES - 1 );
-			const unsigned char* tail = chars + ( (cnt>>PMPML_32_CHUNK_SIZE_BYTES_LOG2) << PMPML_32_CHUNK_SIZE_BYTES_LOG2 );
-
-			tmp_hash.QuadPart = hash_of_beginning_of_string_chunk_type2<bswap>( curr_rd[0].random_coeff, *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term)), tail, tailCnt );
-			procesNextValue<bswap>( 1, tmp_hash.QuadPart, allValues, cnts, flag );
-			ULARGE_INTEGER__XX ret64;
-			ret64.QuadPart = finalize<bswap>( 1, allValues, cnts, flag );
-			if ( ret64.HighPart == 0 ) //LIKELY
-			{
-				return fmix32_short( ret64.LowPart );
-			}
-			return ret64.LowPart;
-  }
+    template <bool bswap>
+    NEVER_INLINE uint32_t _hash_noRecursionNoInline_type2( const unsigned char * chars, unsigned int cnt ) const {
+        uint64_t     allValues[PMPML_32_LEVELS * PMPML_32_CHUNK_SIZE];
+        unsigned int cnts[PMPML_32_LEVELS];
+        unsigned int flag;
+
+        cnts[1] = 0;
+        flag    = 0;
+
+        unsigned int       i;
+        ULARGE_INTEGER__XX tmp_hash;
+
+        // process full chunks
+        for (i = 0; i < (cnt >> PMPML_32_CHUNK_SIZE_BYTES_LOG2); i++) {
+            tmp_hash.QuadPart = hash_of_string_chunk_compact<bswap>(curr_rd[0].random_coeff,
+                    *(ULARGE_INTEGER__XX *)(&(curr_rd[0].const_term)),
+                    ((const uint32_t *)(chars)) + (i << PMPML_32_CHUNK_SIZE_LOG2));
+            procesNextValue<bswap>(1, tmp_hash.QuadPart, allValues, cnts, flag);
+        }
+
+        // process remaining incomplete chunk(s)
+        // note: if string size is a multiple of chunk size, we create a new chunk (1,0,0,...0),
+        // so THIS PROCESSING IS ALWAYS PERFORMED
+        unsigned int          tailCnt = cnt & (PMPML_32_CHUNK_SIZE_BYTES - 1);
+        const unsigned char * tail    = chars + ((cnt >> PMPML_32_CHUNK_SIZE_BYTES_LOG2) << PMPML_32_CHUNK_SIZE_BYTES_LOG2);
+
+        tmp_hash.QuadPart = hash_of_beginning_of_string_chunk_type2<bswap>(curr_rd[0].random_coeff,
+                *(ULARGE_INTEGER__XX *)(&(curr_rd[0].const_term)), tail, tailCnt);
+        procesNextValue<bswap>(1, tmp_hash.QuadPart, allValues, cnts, flag);
+        ULARGE_INTEGER__XX ret64;
+        ret64.QuadPart = finalize<bswap>(1, allValues, cnts, flag);
+        if (ret64.HighPart == 0) {     // LIKELY
+            return fmix32_short(ret64.LowPart);
+        }
+        return ret64.LowPart;
+    }
+
+  public:
 
-public:
-  template < bool bswap >
-  FORCE_INLINE uint32_t hash( const unsigned char* chars, unsigned int cnt ) const
-  {
-	if ( likely(cnt < 32) )
-	{
-		const uint32_t* coeff = curr_rd[0].random_coeff;
-		ULARGE_INTEGER__XX constTerm = *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term));
-		PMPML_CHUNK_LOOP_INTRO_L0
-		uint32_t size = cnt >> PMPML_32_WORD_SIZE_BYTES_LOG2;
-		uint32_t xLast;
+    template <bool bswap>
+    FORCE_INLINE uint32_t hash( const unsigned char * chars, unsigned int cnt ) const {
+        if (likely(cnt < 32)) {
+            const uint32_t *   coeff     = curr_rd[0].random_coeff;
+            ULARGE_INTEGER__XX constTerm = *(ULARGE_INTEGER__XX *)(&(curr_rd[0].const_term));
+            PMPML_CHUNK_LOOP_INTRO_L0
+            uint32_t size = cnt >> PMPML_32_WORD_SIZE_BYTES_LOG2;
+            uint32_t xLast;
 
-		const uint32_t* x = (const uint32_t*)chars;
+            const uint32_t * x = (const uint32_t *)chars;
 
 #if defined(_MSC_VER) && defined(HAVE_32BIT_PLATFORM)
 // enables MSVC-specific code that appears to be more efficient than a regular one; comment out, if not desired
-		switch ( cnt )
-		{
-/*			case 0: { 			xLast = 0x1;	constTerm.QuadPart += coeff[ 0 ];	PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;	}
-			case 1: { 			xLast = 0x100 + chars[cnt-1];	constTerm.QuadPart += UInt32x32To64( coeff[ 0 ], xLast );		PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;	}
-			case 2: { 			xLast = *((const unsigned short*)(chars + cnt - 2 )) + 0x10000;	constTerm.QuadPart += UInt32x32To64( coeff[ 0 ], xLast );		PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;	}
-			case 3: { 			xLast = chars[ cnt - 1 ]; xLast = ( xLast << 16 ) + *((const unsigned short*)(chars + cnt - 3 )) + 0x1000000;	constTerm.QuadPart += UInt32x32To64( coeff[ 0 ], xLast );		PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;	}
-
-			case 0:	{	xLast = 0x1;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-			case 1:	{	xLast = 0x100 + chars[cnt-1];	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-			case 2:	{	xLast = *((const unsigned short*)(chars + cnt - 2 )) + 0x10000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-			case 3:	{	xLast = chars[ cnt - 1 ]; xLast = ( xLast << 16 ) + *((const unsigned short*)(chars + cnt - 3 )) + 0x1000000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-*/
-		case 0:	{	return hash_size_SMALL_N<0, bswap>( chars );	}
-		case 1:	{	return hash_size_SMALL_N<1, bswap>( chars );	}
-		case 2:	{	return hash_size_SMALL_N<2, bswap>( chars );	}
-		case 3:	{	return hash_size_SMALL_N<3, bswap>( chars );	}
-		case 4:	{	return hash_size_SMALL_N<4, bswap>( chars );	}
-//			case 4:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) 	xLast = 0x1;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;	break;	}
-		case 5:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) 	xLast = 0x100 + chars[4];	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 6:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) 	xLast = GET_U16<bswap>(chars, 4) + 0x10000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 7:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) 	xLast = chars[ 6 ]; xLast = ( xLast << 16 ) + GET_U16<bswap>(chars, 4) + 0x1000000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 8:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) 	xLast = 0x1;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;	break;	}
-		case 9:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) 	xLast = 0x100 + chars[8];	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 10:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) 	xLast = GET_U16<bswap>(chars, 8) + 0x10000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 11:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) 	xLast = chars[ 10 ]; xLast = ( xLast << 16 ) + GET_U16<bswap>(chars, 8) + 0x1000000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 12:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) 	xLast = 0x1;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;	break;	}
-		case 13:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) 	xLast = 0x100 + chars[12];	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 14:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) 	xLast = GET_U16<bswap>(chars, 12) + 0x10000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 15:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) 	xLast = chars[ 14 ]; xLast = ( xLast << 16 ) + GET_U16<bswap>(chars, 12) + 0x1000000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 16:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) 	xLast = 0x1;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;	break;	}
-		case 17:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) 	xLast = 0x100 + chars[16];	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 18:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) 	xLast = GET_U16<bswap>(chars, 16) + 0x10000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 19:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) 	xLast = chars[ 18 ]; xLast = ( xLast << 16 ) + GET_U16<bswap>(chars, 16) + 0x1000000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 20:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) 	xLast = 0x1;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;	break;	}
-		case 21:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) 	xLast = 0x100 + chars[20];	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 22:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) 	xLast = GET_U16<bswap>(chars, 20) + 0x10000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 23:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) 	xLast = chars[ 22 ]; xLast = ( xLast << 16 ) + GET_U16<bswap>(chars, 20) + 0x1000000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 24:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) 	xLast = 0x1;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;	break;	}
-		case 25:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) 	xLast = 0x100 + chars[24];	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 26:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) 	xLast = GET_U16<bswap>(chars, 24) + 0x10000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-		case 27:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) 	xLast = chars[ 26 ]; xLast = ( xLast << 16 ) + GET_U16<bswap>(chars, 24) + 0x1000000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-
-/*			case 28:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 ) 	xLast = 0x1;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;	break;	}
-			case 29:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 ) 	xLast = 0x100 + chars[28];	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-			case 30:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 ) 	xLast = *((const unsigned short*)(chars + 28 )) + 0x10000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-			default:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 ) 	xLast = chars[ 30 ]; xLast = ( xLast << 16 ) + *((const unsigned short*)(chars + 28 )) + 0x1000000;	PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;	break;	}
-*/
-		case 28:	{	return hash_size_28( chars );	}
-		case 29:	{	return hash_size_29( chars );	}
-		case 30:	{	return hash_size_30( chars );	}
-		default:	{	return hash_size_31( chars );	}
-		}
+            switch (cnt) {
+/*
+ *                              case 0: {                       xLast = 0x1;    constTerm.QuadPart += coeff[ 0 ];       PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;        }
+ *          case 1: {                   xLast = 0x100 + chars[cnt-1];   constTerm.QuadPart += UInt32x32To64( coeff[ 0 ],
+ * xLast );             PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;        }
+ *          case 2: {                   xLast = *((const unsigned short*)(chars + cnt - 2 )) + 0x10000; constTerm.QuadPart
+ * += UInt32x32To64( coeff[ 0 ], xLast );               PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;        }
+ *          case 3: {                   xLast = chars[ cnt - 1 ]; xLast = ( xLast << 16 ) + *((const unsigned
+ * short*)(chars + cnt - 3 )) + 0x1000000;      constTerm.QuadPart += UInt32x32To64( coeff[ 0 ], xLast );               PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN_32x32_ONLY;        }
+ *
+ *          case 0:     {       xLast = 0x1;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;      break;  }
+ *          case 1:     {       xLast = 0x100 + chars[cnt-1];   PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;      break;  }
+ *          case 2:     {       xLast = *((const unsigned short*)(chars + cnt - 2 )) + 0x10000; PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;      break;  }
+ *          case 3:     {       xLast = chars[ cnt - 1 ]; xLast = ( xLast << 16 ) + *((const unsigned short*)(chars +
+ * cnt - 3 )) + 0x1000000;      PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;      break;  }
+ */
+            case 0 : {   return hash_size_SMALL_N<0, bswap>(chars);    }
+            case 1 : {   return hash_size_SMALL_N<1, bswap>(chars);    }
+            case 2 : {   return hash_size_SMALL_N<2, bswap>(chars);    }
+            case 3 : {   return hash_size_SMALL_N<3, bswap>(chars);    }
+            case 4 : {   return hash_size_SMALL_N<4, bswap>(chars);    }
+//                      case 4: {       PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 )       xLast = 0x1;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;   break;  }
+            case 5 : {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0)   xLast = 0x100 +
+                                 chars[4];   PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 6 : {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0)   xLast =
+                                 GET_U16<bswap>(chars, 4) + 0x10000; PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 7 : {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0)   xLast = chars[6];
+                         xLast = (xLast << 16) + GET_U16<bswap>(chars, 4) + 0x1000000;
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case  8: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1)     xLast =
+                                 0x1;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;   break;  }
+            case  9: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1)     xLast =
+                                 0x100 + chars[8];   PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 10: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1)     xLast =
+                                 GET_U16<bswap>(chars, 8) + 0x10000; PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 11: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1)     xLast = chars[10];
+                         xLast = (xLast << 16) + GET_U16<bswap>(chars, 8) + 0x1000000;
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 12: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)   xLast =
+                                 0x1;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;   break;  }
+            case 13: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)   xLast =
+                                 0x100 + chars[12];  PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 14: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)   xLast =
+                                 GET_U16<bswap>(chars, 12) + 0x10000;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 15: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)   xLast =
+                                 chars[14];
+                         xLast = (xLast << 16) + GET_U16<bswap>(chars, 12) + 0x1000000;
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 16: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3)     xLast = 0x1;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;
+                         break;  }
+            case 17: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3)     xLast = 0x100 + chars[16];
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 18: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3)     xLast =
+                                 GET_U16<bswap>(chars, 16) + 0x10000;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 19: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3)     xLast = chars[18]; xLast = (xLast << 16) + GET_U16<bswap>(
+                                 chars, 16) + 0x1000000;   PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 20: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4)   xLast =
+                                 0x1;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;   break;  }
+            case 21: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4)   xLast =
+                                 0x100 + chars[20];  PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 22: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4)   xLast =
+                                 GET_U16<bswap>(chars, 20) + 0x10000;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 23: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4)   xLast = chars[22];
+                         xLast = (xLast << 16) + GET_U16<bswap>(chars, 20) + 0x1000000;
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 24: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5)
+                         xLast = 0x1;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;   break;  }
+            case 25: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5)
+                         xLast = 0x100 + chars[24];  PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 26: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5)
+                         xLast = GET_U16<bswap>(chars, 24) + 0x10000;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+            case 27: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5)
+                         xLast = chars[26];
+                         xLast = (xLast << 16) + GET_U16<bswap>(chars, 24) + 0x1000000;
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;  break;  }
+
+/*
+ *                              case 28:        {       PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1(
+ * 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 )
+ * PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 )  xLast = 0x1;    PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST_FOR_JUST_1;   break;  }
+ *          case 29:    {       PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 )
+ * PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 )
+ * PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 )  xLast = 0x100 + chars[28];      PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;      break;  }
+ *          case 30:    {       PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 )
+ * PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 )
+ * PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 )  xLast = *((const unsigned short*)(chars + 28 ))
+ * + 0x10000;   PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;      break;  }
+ *          default:    {       PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 )
+ * PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 )
+ * PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 )  xLast = chars[ 30 ]; xLast = ( xLast << 16 ) +
+ * *((const unsigned short*)(chars + 28 )) + 0x1000000; PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST;      break;  }
+ */
+            case 28: {   return hash_size_28(chars);   }
+            case 29: {   return hash_size_29(chars);   }
+            case 30: {   return hash_size_30(chars);   }
+            default: {   return hash_size_31(chars);   }
+            }
 #else
-		switch( size )
-		{
-			case 0: { break; }
-			case 1:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) } break;
-			case 2:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) } break;
-			case 3:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) } break;
-			case 4:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) } break;
-			case 5:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) } break;
-			case 6:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) } break;
-			default:	{	PMPML_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 1 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 3 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 5 ) PMPML_CHUNK_LOOP_BODY_ULI_T1( 6 ) } break;
-		}
-
-		switch ( cnt & ( PMPML_32_WORD_SIZE_BYTES - 1 ) )
-		{
-		case 0: { xLast = 0x1; break;}
-		case 1: { xLast = 0x100 + chars[cnt-1]; break;}
-		case 2: { xLast = GET_U16<bswap>(chars + cnt - 2, 0) + 0x10000; break; }
-		default: { xLast = chars[ cnt - 1 ]; xLast = ( xLast << 16 ) + GET_U16<bswap>(chars + cnt - 3, 0) + 0x1000000; break;}
-		}
-
-		PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST
+            switch (size) {
+            case 0: { break; }
+            case 1: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) }
+            break;
+            case 2: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) }
+            break;
+            case 3: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2) }
+            break;
+            case 4: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                        PMPML_CHUNK_LOOP_BODY_ULI_T1(3) }
+                        break;
+            case 5: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                        PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4) }
+                        break;
+            case 6: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                        PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5) }
+                        break;
+            default: {   PMPML_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_CHUNK_LOOP_BODY_ULI_T1(1) PMPML_CHUNK_LOOP_BODY_ULI_T1(2)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(3) PMPML_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_CHUNK_LOOP_BODY_ULI_T1(5)
+                         PMPML_CHUNK_LOOP_BODY_ULI_T1(6) }
+                         break;
+            }
+
+            switch (cnt & (PMPML_32_WORD_SIZE_BYTES - 1)) {
+            case 0: { xLast = 0x1; break; }
+            case 1: { xLast = 0x100 + chars[cnt - 1]; break; }
+            case 2: { xLast = GET_U16<bswap>(chars + cnt - 2, 0) + 0x10000; break; }
+            default: { xLast = chars[cnt - 1]; xLast = (xLast << 16) + GET_U16<bswap>(chars + cnt - 3, 0) + 0x1000000; break; }
+            }
+
+            PMPML_CHUNK_LOOP_BODY_ULI_T1_LAST
 #endif // PMPML_MSC_32_WORKAROUND
 
-		PMPML_CHUNK_LOOP_PRE_REDUCE_L0
-
-		PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN
-	}
-	else if ( cnt < PMPML_32_CHUNK_SIZE_BYTES )
-	{
-		return _hash_noRecursionNoInline_forLessThanChunk<bswap>( chars, cnt );
-	}
-	else
-	{
-		return _hash_noRecursionNoInline_type2<bswap>( chars, cnt );
-	}
-  }
+            PMPML_CHUNK_LOOP_PRE_REDUCE_L0
 
-  PMP_Multilinear_Hasher_32()
-  {
-      curr_rd = (random_data_for_PMPML_32*)rd_for_PMPML_32;
-      coeff0 = curr_rd[0].const_term;
-  }
-  void seed( uint64_t seed )
-  {
-    curr_rd[0].const_term = coeff0 ^ seed;
-  }
-};
+            PMPML_FULL_REDUCE_MOD_2_32_PLUS_15_AND_RETURN_RETURN
+        } else if (cnt < PMPML_32_CHUNK_SIZE_BYTES) {
+            return _hash_noRecursionNoInline_forLessThanChunk<bswap>(chars, cnt);
+        } else {
+            return _hash_noRecursionNoInline_type2<bswap>(chars, cnt);
+        }
+    }
+
+    PMP_Multilinear_Hasher_32() {
+        curr_rd = (random_data_for_PMPML_32 *)rd_for_PMPML_32;
+        coeff0  = curr_rd[0].const_term;
+    }
+
+    void seed( uint64_t seed ) {
+        curr_rd[0].const_term = coeff0 ^ seed;
+    }
+}; // class PMP_Multilinear_Hasher_32
 
 //-------------------------------------------------------------
 // 64-bit hash
 
-static FORCE_INLINE void MultiplyWordLoHi(uint64_t& rlo, uint64_t& rhi, uint64_t a, uint64_t b) {
+static FORCE_INLINE void MultiplyWordLoHi( uint64_t & rlo, uint64_t & rhi, uint64_t a, uint64_t b ) {
     mult64_128(rlo, rhi, a, b);
 }
 
@@ -1823,7 +1915,7 @@ static FORCE_INLINE void MultiplyWordLoHi(uint64_t& rlo, uint64_t& rhi, uint64_t
  * Adds the 64-bit value in alo into the 128-bit
  * value spread across rhi:rlo.
  */
-static FORCE_INLINE void AccumulateLoHi(uint64_t& rlo, uint64_t& rhi, uint64_t alo) {
+static FORCE_INLINE void AccumulateLoHi( uint64_t & rlo, uint64_t & rhi, uint64_t alo ) {
     add128(rlo, rhi, alo);
 }
 
@@ -1831,7 +1923,8 @@ static FORCE_INLINE void AccumulateLoHi(uint64_t& rlo, uint64_t& rhi, uint64_t a
  * Adds the 192-bit value spread across ahi:ami:alo into the 192-bit
  * value spread across rhi:rmi:rlo.
  */
-static FORCE_INLINE void AccumulateLoMidHi(uint64_t& rlo, uint64_t& rmi, uint64_t& rhi, uint64_t alo, uint64_t ami, uint64_t ahi) {
+static FORCE_INLINE void AccumulateLoMidHi( uint64_t & rlo, uint64_t & rmi,
+        uint64_t & rhi, uint64_t alo, uint64_t ami, uint64_t ahi ) {
     add192(rlo, rmi, rhi, alo, ami, ahi);
 }
 
@@ -1839,7 +1932,7 @@ static FORCE_INLINE void AccumulateLoMidHi(uint64_t& rlo, uint64_t& rmi, uint64_
  * Does a 64x64->128 multiply on a and b, and adds the result into the
  * 192-bit value spread across rhi:rmi:rlo.
  */
-static FORCE_INLINE void MultiplyAccumulateWordLoMidHi(uint64_t& rlo, uint64_t& rmi, uint64_t& rhi, uint64_t a, uint64_t b) {
+static FORCE_INLINE void MultiplyAccumulateWordLoMidHi( uint64_t & rlo, uint64_t & rmi, uint64_t & rhi, uint64_t a, uint64_t b ) {
     fma64_192(rlo, rmi, rhi, a, b);
 }
 
@@ -1847,713 +1940,775 @@ static FORCE_INLINE void MultiplyAccumulateWordLoMidHi(uint64_t& rlo, uint64_t&
  * Does a 64x64->128 multiply on a and b, and adds the result into the
  * 128-bit value spread across rhi:rlo.
  */
-static FORCE_INLINE void MultiplyAccumulateWordLoHi(uint64_t& rlo, uint64_t& rhi, uint64_t a, uint64_t b) {
+static FORCE_INLINE void MultiplyAccumulateWordLoHi( uint64_t & rlo, uint64_t & rhi, uint64_t a, uint64_t b ) {
     fma64_128(rlo, rhi, a, b);
 }
 
-#define ADD_SHIFT_ADD_NORMALIZE( lo, hi ) {     \
-	uint32_t lohi = lo >> 32; \
-	uint32_t hilo = hi; \
-	uint32_t diff = lohi - hilo; \
-	hi += diff; \
-	lo = (uint32_t)lo + (((uint64_t)(uint32_t)hi) << 32 ); \
-	hi >>= 32; \
+#define ADD_SHIFT_ADD_NORMALIZE( lo, hi ) {             \
+	uint32_t lohi = lo >> 32;                       \
+	uint32_t hilo = hi;                             \
+	uint32_t diff = lohi - hilo;                    \
+	hi += diff;                                     \
+	lo = (uint32_t)lo + (((uint64_t)(uint32_t)hi) << 32 );\
+	hi >>= 32;                                      \
 }
 
-#define ADD_SHIFT_ADD_NORMALIZE_TO_UPPER( lo, hi ) {\
-	uint32_t lohi = lo >> 32; \
-	uint32_t hilo = hi; \
-	uint32_t diff = lohi - hilo; \
-	hi += diff; \
-	lo = (uint32_t)lo; \
+#define ADD_SHIFT_ADD_NORMALIZE_TO_UPPER( lo, hi ) { \
+	uint32_t lohi = lo >> 32;                    \
+	uint32_t hilo = hi;                          \
+	uint32_t diff = lohi - hilo;                 \
+	hi += diff;                                  \
+	lo = (uint32_t)lo;                           \
 }
 
-#define PMPML_CHUNK_LOOP_INTRO_L0_64 \
-	  ULARGE_INTEGER__XX ctr0, ctr1, ctr2; \
-	  ctr0.QuadPart = constTerm.QuadPart; \
-	  ctr1.QuadPart = 0; \
-	  ctr2.QuadPart = 0;\
-	  ULARGE_INTEGER__XX ctr2_0, ctr2_1, ctr2_2, ctr2_3; \
-	  ctr2_0.QuadPart = 0; \
-	  ctr2_1.QuadPart = 0;\
-	  ctr2_2.QuadPart = 0; \
-	  ctr2_3.QuadPart = 0;\
+#define PMPML_CHUNK_LOOP_INTRO_L0_64                  \
+	  ULARGE_INTEGER__XX ctr0, ctr1, ctr2;        \
+	  ctr0.QuadPart = constTerm.QuadPart;         \
+	  ctr1.QuadPart = 0;                          \
+	  ctr2.QuadPart = 0;                          \
+	  ULARGE_INTEGER__XX ctr2_0, ctr2_1, ctr2_2, ctr2_3;\
+	  ctr2_0.QuadPart = 0;                        \
+	  ctr2_1.QuadPart = 0;                        \
+	  ctr2_2.QuadPart = 0;                        \
+	  ctr2_3.QuadPart = 0;                        \
 	  ULARGE_INTEGER__XX mulLow, mulHigh;
 
-#define PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( i ) {			\
-    uint64_t xi = GET_U64<bswap>((const uint8_t*)x,(i)*8);		\
+#define PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( i ) {                                         \
+    uint64_t xi = GET_U64<bswap>((const uint8_t*)x,(i)*8);                                    \
     MultiplyAccumulateWordLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, xi, coeff[i]); \
 }
 
-#define PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( ii ) {			\
-    uint64_t xii = GET_U64<bswap>((const uint8_t*)x,(ii)*8);		\
+#define PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( ii ) {                                                 \
+    uint64_t xii = GET_U64<bswap>((const uint8_t*)x,(ii)*8);                                          \
     MultiplyAccumulateWordLoMidHi(ctr2_0.QuadPart, ctr2_1.QuadPart, ctr2_2.QuadPart, xii, coeff[ii]); \
 }
 
 #define PMPML_64_CHUNK_LOOP_BODY_ULI_T1(i) PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(i)
 
-#define _compensate_ {					      \
+#define _compensate_ {                                                                                                 \
     AccumulateLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, ctr2_0.QuadPart, ctr2_1.QuadPart, ctr2_2.QuadPart); \
 }
 
-#define PMPML_64_CHUNK_LOOP_BODY_ULI_T1_LAST( size ) {			\
+#define PMPML_64_CHUNK_LOOP_BODY_ULI_T1_LAST( size ) {                                              \
     MultiplyAccumulateWordLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, xLast, coeff[size]); \
 }
 
-#define PMPML_64_CHUNK_LOOP_BODY_ULI_T2( i )  {				\
-    if (likely(x[i].HighPart == 0)) {					\
-      MultiplyAccumulateWordLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, x[i].LowPart, coeff[i]); \
-    } else {								\
-      MultiplyWordLoHi(mulLow.QuadPart, mulHigh.QuadPart, x[i].LowPart, coeff[i]); \
-      mulHigh.QuadPart += x[i].HighPart * coeff[i];			\
+#define PMPML_64_CHUNK_LOOP_BODY_ULI_T2( i )  {                                                                      \
+    if (likely(x[i].HighPart == 0)) {                                                                                \
+      MultiplyAccumulateWordLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, x[i].LowPart, coeff[i]);            \
+    } else {                                                                                                         \
+      MultiplyWordLoHi(mulLow.QuadPart, mulHigh.QuadPart, x[i].LowPart, coeff[i]);                                   \
+      mulHigh.QuadPart += x[i].HighPart * coeff[i];                                                                  \
       MultiplyAccumulateWordLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, mulLow.QuadPart, mulHigh.QuadPart); \
-    }									\
+    }                                                                                                                \
 }
 
-#define PMPML_64_CHUNK_LOOP_BODY_ULI_ADD_COEFF( i ) {		\
-  AccumulateLoHi(c_ctr0.QuadPart, c_ctr1.QuadPart, coeff[i]);	\
+#define PMPML_64_CHUNK_LOOP_BODY_ULI_ADD_COEFF( i ) {         \
+  AccumulateLoHi(c_ctr0.QuadPart, c_ctr1.QuadPart, coeff[i]); \
 }
 
-#define PMPML_CHUNK_LOOP_BODY_ULI_T2_AND_ADD_COEFF_64( i ) {		\
-    AccumulateLoHi(c_ctr0.QuadPart, c_ctr1.QuadPart, coeff[i]);		\
-    if (likely(x[i].HighPart == 0)) {					\
-      MultiplyAccumulateWordLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, x[i].LowPart, coeff[i]); \
-    } else {								\
-      MultiplyWordLoHi(mulLow.QuadPart, mulHigh.QuadPart, x[i].LowPart, coeff[i]); \
-      mulHigh.QuadPart += x[i].HighPart * coeff[i];			\
+#define PMPML_CHUNK_LOOP_BODY_ULI_T2_AND_ADD_COEFF_64( i ) {                                                         \
+    AccumulateLoHi(c_ctr0.QuadPart, c_ctr1.QuadPart, coeff[i]);                                                      \
+    if (likely(x[i].HighPart == 0)) {                                                                                \
+      MultiplyAccumulateWordLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, x[i].LowPart, coeff[i]);            \
+    } else {                                                                                                         \
+      MultiplyWordLoHi(mulLow.QuadPart, mulHigh.QuadPart, x[i].LowPart, coeff[i]);                                   \
+      mulHigh.QuadPart += x[i].HighPart * coeff[i];                                                                  \
       MultiplyAccumulateWordLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, mulLow.QuadPart, mulHigh.QuadPart); \
-    }									\
+    }                                                                                                                \
 }
 
-#define PMPML_CHUNK_LOOP_BODY_ULI_T2_AND_ADD_SUM_OF_COEFF_64 { \
+#define PMPML_CHUNK_LOOP_BODY_ULI_T2_AND_ADD_SUM_OF_COEFF_64 {                                                  \
     MultiplyAccumulateWordLoMidHi(ctr0.QuadPart, ctr1.QuadPart, ctr2.QuadPart, c_ctr0.QuadPart, prevConstTerm); \
-    MultiplyAccumulateWordLoHi(ctr1.QuadPart, ctr2.QuadPart, c_ctr1.QuadPart, prevConstTerm); \
+    MultiplyAccumulateWordLoHi(ctr1.QuadPart, ctr2.QuadPart, c_ctr1.QuadPart, prevConstTerm);                   \
 }
 
 #define PMPML_CHUNK_LOOP_PRE_REDUCE_L0_64
 
-#define PMPML_CHUNK_REDUCE_128_TO_64 \
-{ \
-	uint64_t hi, lo; \
+#define PMPML_CHUNK_REDUCE_128_TO_64                 \
+{                                                    \
+	uint64_t hi, lo;                             \
 	MultiplyWordLoHi(lo, hi, ctr1.QuadPart, 13); \
-	uint64_t part = ctr2.QuadPart * 169 + hi * 13 + 13; \
-	ctr0.QuadPart += part; \
-	ctr1.QuadPart = 1 + (ctr0.QuadPart < part); \
-	ctr1.QuadPart -= (ctr0.QuadPart < lo); \
-	ctr0.QuadPart -= lo; \
-	if ( likely( ctr0.QuadPart >= 26) ) { ctr0.QuadPart -= ctr1.QuadPart * 13; ctr1.QuadPart = 0; } \
-	else \
-	{ \
+	uint64_t part = ctr2.QuadPart * 169 + hi * 13 + 13;\
+	ctr0.QuadPart += part;                       \
+	ctr1.QuadPart = 1 + (ctr0.QuadPart < part);  \
+	ctr1.QuadPart -= (ctr0.QuadPart < lo);       \
+	ctr0.QuadPart -= lo;                         \
+	if ( likely( ctr0.QuadPart >= 26) ) {        \
+        ctr0.QuadPart -= ctr1.QuadPart * 13;         \
+        ctr1.QuadPart = 0;                           \
+	} else {                                     \
 		ctr0.QuadPart -= ctr1.QuadPart * 13; \
-		if ( ctr0.QuadPart < 26 ) ctr1.QuadPart = 0; \
-		else \
-		{ \
-			ctr0.QuadPart += 13; \
-			if ( ctr0.QuadPart < 13 ) ctr1.QuadPart = 1; \
-			else ctr1.QuadPart = 0; \
-		} \
-	} \
+		if ( ctr0.QuadPart < 26 ) {          \
+            ctr1.QuadPart = 0;                       \
+		} else {                             \
+			ctr0.QuadPart += 13;         \
+			if ( ctr0.QuadPart < 13 ) {  \
+                ctr1.QuadPart = 1;                   \
+			} else {                     \
+                ctr1.QuadPart = 0;                   \
+            }                                        \
+		}                                    \
+	}                                            \
 }
 
-#define PMPML_CHUNK_REDUCE_128_TO_64____ \
-{ \
-	_compensate_ \
-	uint64_t hi, lo; \
+#define PMPML_CHUNK_REDUCE_128_TO_64____             \
+{                                                    \
+	_compensate_                                 \
+	uint64_t hi, lo;                             \
 	MultiplyWordLoHi(lo, hi, ctr1.QuadPart, 13); \
-	uint64_t part = ctr2.QuadPart * 169 + hi * 13 + 13; \
-	ctr0.QuadPart += part; \
-	ctr1.QuadPart = 1 + (ctr0.QuadPart < part); \
-	ctr1.QuadPart -= (ctr0.QuadPart < lo); \
-	ctr0.QuadPart -= lo; \
-	if ( likely( ctr0.QuadPart >= 26) ) { ctr0.QuadPart -= ctr1.QuadPart * 13; ctr1.QuadPart = 0; } \
-	else \
-	{ \
+	uint64_t part = ctr2.QuadPart * 169 + hi * 13 + 13;\
+	ctr0.QuadPart += part;                       \
+	ctr1.QuadPart = 1 + (ctr0.QuadPart < part);  \
+	ctr1.QuadPart -= (ctr0.QuadPart < lo);       \
+	ctr0.QuadPart -= lo;                         \
+	if ( likely( ctr0.QuadPart >= 26) ) {        \
+        ctr0.QuadPart -= ctr1.QuadPart * 13;         \
+        ctr1.QuadPart = 0;                           \
+	} else {                                     \
 		ctr0.QuadPart -= ctr1.QuadPart * 13; \
-		if ( ctr0.QuadPart < 26 ) ctr1.QuadPart = 0; \
-		else \
-		{ \
-			ctr0.QuadPart += 13; \
-			if ( ctr0.QuadPart < 13 ) ctr1.QuadPart = 1; \
-			else ctr1.QuadPart = 0; \
-		} \
-	} \
+		if ( ctr0.QuadPart < 26 ) {          \
+            ctr1.QuadPart = 0;                       \
+		} else {                             \
+			ctr0.QuadPart += 13;         \
+			if ( ctr0.QuadPart < 13 ) {  \
+                ctr1.QuadPart = 1;                   \
+			} else {                     \
+                ctr1.QuadPart = 0;                   \
+            }                                        \
+		}                                    \
+	}                                            \
 }
 
-#define PMPML_CHUNK_REDUCE_128_TO_64_AND_RETURN \
-{ \
-	uint64_t hi, lo; \
+#define PMPML_CHUNK_REDUCE_128_TO_64_AND_RETURN      \
+{                                                    \
+	uint64_t hi, lo;                             \
 	MultiplyWordLoHi(lo, hi, ctr1.QuadPart, 13); \
-	uint64_t part = ctr2.QuadPart * 169 + hi * 13 + 13; \
-	ctr0.QuadPart += part; \
-	ctr1.QuadPart = 1 + (ctr0.QuadPart < part); \
-	ctr1.QuadPart -= (ctr0.QuadPart < lo); \
-	ctr0.QuadPart -= lo; \
-	if ( likely( ctr0.QuadPart >= 26) ) { ctr0.QuadPart -= ctr1.QuadPart * 13; return fmix64_short( ctr0.QuadPart ); } \
-	else \
-	{ \
+	uint64_t part = ctr2.QuadPart * 169 + hi * 13 + 13;\
+	ctr0.QuadPart += part;                       \
+	ctr1.QuadPart = 1 + (ctr0.QuadPart < part);  \
+	ctr1.QuadPart -= (ctr0.QuadPart < lo);       \
+	ctr0.QuadPart -= lo;                         \
+	if ( likely( ctr0.QuadPart >= 26) ) {        \
+        ctr0.QuadPart -= ctr1.QuadPart * 13;         \
+        return fmix64_short( ctr0.QuadPart );        \
+	} else {                                     \
 		ctr0.QuadPart -= ctr1.QuadPart * 13; \
-		if ( ctr0.QuadPart < 26 ) return fmix64_short( ctr0.QuadPart ); \
-		else \
-		{ \
-			ctr0.QuadPart += 13; \
-			return fmix64_short( ctr0.QuadPart ); \
-		} \
-	} \
+		if ( ctr0.QuadPart < 26 ) {          \
+            return fmix64_short( ctr0.QuadPart );    \
+		} else {                             \
+			ctr0.QuadPart += 13;         \
+			return fmix64_short( ctr0.QuadPart );\
+		}                                    \
+	}                                            \
 }
 
-template < bool bswap >
-static uint64_t ReadTail(const uint8_t * tail, uint64_t tail_size) {
-  uint64_t xLast;
-
-  switch (tail_size & (PMPML_64_WORD_SIZE_BYTES - 1)) {
-  case 0: { xLast = 0x1; break;}
-  case 1: { xLast = 0x100 + tail[tail_size-1]; break;}
-  case 2: { xLast = GET_U16<bswap>(tail + tail_size - 2, 0) + 0x10000; break; }
-  case 3: { xLast = tail[ tail_size - 1 ]; xLast = ( xLast << 16 ) + GET_U16<bswap>(tail + tail_size - 3, 0) + 0x1000000; break;}
-  case 4: { xLast = GET_U32<bswap>(tail + tail_size - 4, 0) + UINT64_C( 0x100000000 ); break; }
-  case 5: { xLast = tail[ tail_size - 1 ]; xLast = ( xLast << 32 ) + UINT64_C( 0x10000000000 ) + GET_U32<bswap>(tail + tail_size - 5, 0); break;}
-  case 6: { xLast = GET_U16<bswap>(tail + tail_size - 2, 0); xLast = ( xLast << 32 ) + UINT64_C( 0x1000000000000 ) + GET_U32<bswap>(tail + tail_size - 6, 0); break;}
-  default: { xLast = tail[ tail_size - 1 ]; xLast <<= 48; uint64_t xLast1 = GET_U16<bswap>(tail + tail_size - 3, 0); xLast += (xLast1<<32) + UINT64_C( 0x100000000000000 ) + GET_U32<bswap>(tail + tail_size - 7, 0); break;}
-  }
+template <bool bswap>
+static uint64_t ReadTail( const uint8_t * tail, uint64_t tail_size ) {
+    uint64_t xLast;
+
+    switch (tail_size & (PMPML_64_WORD_SIZE_BYTES - 1)) {
+    case 0: { xLast = 0x1; break; }
+    case 1: { xLast = 0x100 + tail[tail_size - 1]; break; }
+    case 2: { xLast = GET_U16<bswap>(tail + tail_size - 2, 0) + 0x10000; break; }
+    case 3: { xLast = tail[tail_size - 1]; xLast = (xLast << 16) + GET_U16<bswap>(tail + tail_size - 3, 0) + 0x1000000; break; }
+    case 4: { xLast = GET_U32<bswap>(tail + tail_size - 4, 0) + UINT64_C(0x100000000); break; }
+    case 5: { xLast = tail[tail_size - 1]; xLast = (xLast << 32) + UINT64_C(0x10000000000) + GET_U32<bswap>(
+                tail + tail_size - 5, 0); break; }
+    case 6: { xLast = GET_U16<bswap>(tail + tail_size - 2, 0); xLast = (xLast << 32) + UINT64_C(0x1000000000000) + GET_U32<bswap>(
+                tail + tail_size - 6, 0); break; }
+    default: { xLast  = tail[tail_size - 1]; xLast <<= 48;
+               uint64_t xLast1 = GET_U16<bswap>(tail + tail_size - 3, 0);
+               xLast += (xLast1 << 32) + UINT64_C(0x100000000000000) + GET_U32<bswap>(tail + tail_size - 7, 0); break; }
+    }
 
-  return xLast;
+    return xLast;
 }
 
-class PMP_Multilinear_Hasher_64
-{
+class PMP_Multilinear_Hasher_64 {
   private:
-  random_data_for_PMPML_64* curr_rd;
-  uint64_t coeff0;
+    random_data_for_PMPML_64 * curr_rd;
+    uint64_t  coeff0;
 
-  // calls to be done from LEVEL=0
-  template < bool bswap >
-  FORCE_INLINE void hash_of_string_chunk_compact( const uint64_t* coeff, ULARGE_INTEGER__XX constTerm, const uint64_t* x, ULARGELARGE_INTEGER__XX& ret ) const
-  {
-	PMPML_CHUNK_LOOP_INTRO_L0_64
+    // calls to be done from LEVEL=0
+    template <bool bswap>
+    FORCE_INLINE void hash_of_string_chunk_compact( const uint64_t * coeff, ULARGE_INTEGER__XX constTerm,
+            const uint64_t * x, ULARGELARGE_INTEGER__XX & ret ) const {
+        PMPML_CHUNK_LOOP_INTRO_L0_64
 
 #if defined(HAVE_AVX2) && (PMPML_64_CHUNK_SIZE_LOG2 >= 3)
-	__m256i sse_ctr0_0, sse_ctr0_1, sse_ctr1, sse_ctr2, sse_ctr3_0, sse_ctr3_1, a, a_shifted, a_low, data, data_low, product, temp, mask_low;
-	sse_ctr0_0 = _mm256_setzero_si256 (); // Sets the 128-bit value to zero.
-	sse_ctr0_1 = _mm256_setzero_si256 (); // Sets the 128-bit value to zero.
-	sse_ctr1 = _mm256_setzero_si256 ();
-	sse_ctr2 = _mm256_setzero_si256 ();
-	sse_ctr3_0 = _mm256_setzero_si256 ();
-	sse_ctr3_1 = _mm256_setzero_si256 ();
-	mask_low = _mm256_set_epi32 ( 0, -1, 0 , -1, 0, -1, 0 , -1 );
-
-#if (PMPML_64_CHUNK_SIZE_LOG2 >= 4)
-	for (uint64_t i=0; i<(PMPML_64_CHUNK_SIZE); i+=16)
-#else
-	for (uint64_t i=0; i<(PMPML_64_CHUNK_SIZE); i+=8)
-#endif
-	{
-		a = _mm256_load_si256 ((__m256i *)(coeff+i)); // Loads 128-bit value. Address p must be 16-byte aligned.
-		data = _mm256_loadu_si256 ((__m256i *)(x+i)); // Loads 128-bit value. Address p does not need be 16-byte aligned.
-
-		// lower 32 bits
-		a_low = _mm256_and_si256 ( mask_low, a );
-		data_low = _mm256_and_si256 ( mask_low, data );
-		product = _mm256_mul_epu32 ( data_low, a_low); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		sse_ctr0_0 = _mm256_add_epi64 ( sse_ctr0_0, product );//sse_ctr0 = _mm256_add_epi64 ( sse_ctr0, temp );
-		temp = _mm256_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		sse_ctr0_1 = _mm256_add_epi64 ( sse_ctr0_1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-
-        PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 4 + i )
-
-		// first cross
-		a_shifted = _mm256_srli_epi64( a, 32 );
-		product = _mm256_mul_epu32 ( data_low, a_shifted ); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, product );//sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, temp );
-		temp = _mm256_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		sse_ctr2 = _mm256_add_epi64 ( sse_ctr2, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 5 + i )
-		// second cross
-		data = _mm256_srli_epi64( data, 32 );
-		product = _mm256_mul_epu32 ( data, a_low ); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, product );//sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, temp );
-		temp = _mm256_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		sse_ctr2 = _mm256_add_epi64 ( sse_ctr2, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 6 + i )
-		// upper 32 bits
-		product = _mm256_mul_epu32 ( data, a_shifted ); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		sse_ctr3_0 = _mm256_add_epi64 ( sse_ctr3_0, product );//sse_ctr2 = _mm256_add_epi64 ( sse_ctr2, temp );
-		temp = _mm256_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		sse_ctr3_1 = _mm256_add_epi64 ( sse_ctr3_1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 7 + i )
-
-#if (PMPML_64_CHUNK_SIZE_LOG2 >= 4)
-		a = _mm256_load_si256 ((__m256i *)(coeff+i+8)); // Loads 128-bit value. Address p must be 16-byte aligned.
-		data = _mm256_loadu_si256 ((__m256i *)(x+i+8)); // Loads 128-bit value. Address p does not need be 16-byte aligned.
-
-		// lower 32 bits
-		a_low = _mm256_and_si256 ( mask_low, a );
-		data_low = _mm256_and_si256 ( mask_low, data );
-		product = _mm256_mul_epu32 ( data_low, a_low); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		sse_ctr0_0 = _mm256_add_epi64 ( sse_ctr0_0, product );//sse_ctr0 = _mm256_add_epi64 ( sse_ctr0, temp );
-		temp = _mm256_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		sse_ctr0_1 = _mm256_add_epi64 ( sse_ctr0_1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 12 + i )
-
-		// first cross
-		a_shifted = _mm256_srli_epi64( a, 32 );
-		product = _mm256_mul_epu32 ( data_low, a_shifted ); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, product );//sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, temp );
-		temp = _mm256_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		sse_ctr2 = _mm256_add_epi64 ( sse_ctr2, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 13 + i )
-
-		// second cross
-		data = _mm256_srli_epi64( data, 32 );
-		product = _mm256_mul_epu32 ( data, a_low ); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, product );//sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, temp );
-		temp = _mm256_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		sse_ctr2 = _mm256_add_epi64 ( sse_ctr2, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 14 + i )
-		// upper 32 bits
-		product = _mm256_mul_epu32 ( data, a_shifted ); // A 128-bit value that contains two 64-bit unsigned integers. The result can be expressed by the following equations. r0 := a0 * b0; r1 := a2 * b2
-		sse_ctr3_0 = _mm256_add_epi64 ( sse_ctr3_0, product );//sse_ctr2 = _mm256_add_epi64 ( sse_ctr2, temp );
-		temp = _mm256_srli_epi64( product, 32 ); // Shifts the 2 signed or unsigned 64-bit integers in a right by count bits while shifting in zeros.
-		sse_ctr3_1 = _mm256_add_epi64 ( sse_ctr3_1, temp );
-		//temp = _mm256_and_si256 ( mask_low, product );
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 15 + i )
-#endif
-	}
-
-	uint64_t t0_0, t0_1, t1, t2, t3_0, t3_1;
-	t0_0 = ((uint64_t*)(&sse_ctr0_0))[0] + ((uint64_t*)(&sse_ctr0_0))[1] + ((uint64_t*)(&sse_ctr0_0))[2] + ((uint64_t*)(&sse_ctr0_0))[3];
-	t0_1 = ((uint64_t*)(&sse_ctr0_1))[0] + ((uint64_t*)(&sse_ctr0_1))[1] + ((uint64_t*)(&sse_ctr0_1))[2] + ((uint64_t*)(&sse_ctr0_1))[3];
-	t1 = ((uint64_t*)(&sse_ctr1))[0] + ((uint64_t*)(&sse_ctr1))[1] + ((uint64_t*)(&sse_ctr1))[2] + ((uint64_t*)(&sse_ctr1))[3];
-	t2 = ((uint64_t*)(&sse_ctr2))[0] + ((uint64_t*)(&sse_ctr2))[1] + ((uint64_t*)(&sse_ctr2))[2] + ((uint64_t*)(&sse_ctr2))[3];
-	t3_0 = ((uint64_t*)(&sse_ctr3_0))[0] + ((uint64_t*)(&sse_ctr3_0))[1] + ((uint64_t*)(&sse_ctr3_0))[2] + ((uint64_t*)(&sse_ctr3_0))[3];
-	t3_1 = ((uint64_t*)(&sse_ctr3_1))[0] + ((uint64_t*)(&sse_ctr3_1))[1] + ((uint64_t*)(&sse_ctr3_1))[2] + ((uint64_t*)(&sse_ctr3_1))[3];
-
-	ADD_SHIFT_ADD_NORMALIZE_TO_UPPER( t0_0, t0_1 )
-	ADD_SHIFT_ADD_NORMALIZE_TO_UPPER( t1, t2 )
-	ADD_SHIFT_ADD_NORMALIZE_TO_UPPER( t3_0, t3_1 )
-
-	uint64_t add_sse1, add_sse2;
-
-	t1 += t0_1;
-	add_sse1 = t0_0 + ( ((uint64_t)(uint32_t)t1) << 32 );
-	ctr0.QuadPart += add_sse1;
-	add_sse2 = ctr0.QuadPart < add_sse1;
-
-	t2 += t3_0 + (t1>>32);
-	t3_1 += t2>>32;
-
-	add_sse2 += (uint32_t)t2 + ( ( (uint64_t)(uint32_t)t3_1 ) << 32 );
-	ctr1.QuadPart += add_sse2;
-
-	ctr2.QuadPart += (t3_1 >> 32) + (ctr1.QuadPart < add_sse2);
+        __m256i sse_ctr0_0, sse_ctr0_1, sse_ctr1, sse_ctr2, sse_ctr3_0, sse_ctr3_1,
+        a, a_shifted, a_low, data, data_low, product, temp, mask_low;
+        sse_ctr0_0 = _mm256_setzero_si256(); // Sets the 128-bit value to zero.
+        sse_ctr0_1 = _mm256_setzero_si256(); // Sets the 128-bit value to zero.
+        sse_ctr1   = _mm256_setzero_si256();
+        sse_ctr2   = _mm256_setzero_si256();
+        sse_ctr3_0 = _mm256_setzero_si256();
+        sse_ctr3_1 = _mm256_setzero_si256();
+        mask_low   = _mm256_set_epi32(0, -1, 0, -1, 0, -1, 0, -1);
+
+  #if (PMPML_64_CHUNK_SIZE_LOG2 >= 4)
+        for (uint64_t i = 0; i < (PMPML_64_CHUNK_SIZE); i += 16)
+  #else
+        for (uint64_t i = 0; i < (PMPML_64_CHUNK_SIZE); i += 8)
+  #endif
+        {
+            a    = _mm256_load_si256((__m256i * )(coeff + i)); // Loads 128-bit value. Address p must be 16-byte
+                                                               // aligned.
+            data = _mm256_loadu_si256((__m256i *)(x     + i)); // Loads 128-bit value. Address p does not need be
+                                                               // 16-byte aligned.
+
+            // lower 32 bits
+            a_low      = _mm256_and_si256(mask_low, a   );
+            data_low   = _mm256_and_si256(mask_low, data);
+            product    = _mm256_mul_epu32(data_low, a_low); // A 128-bit value that contains two 64-bit unsigned
+                                                            // integers. The result can be expressed by the following
+                                                            // equations. r0 := a0 * b0; r1 := a2 * b2
+            sse_ctr0_0 = _mm256_add_epi64(sse_ctr0_0, product); // sse_ctr0 = _mm256_add_epi64 ( sse_ctr0, temp );
+            temp       = _mm256_srli_epi64(product, 32);    // Shifts the 2 signed or unsigned 64-bit integers in a
+                                                            // right by count bits while shifting in zeros.
+            sse_ctr0_1 = _mm256_add_epi64(sse_ctr0_1, temp);
+            // temp = _mm256_and_si256 ( mask_low, product );
+
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(4 + i)
+
+            // first cross
+            a_shifted = _mm256_srli_epi64(a, 32);
+            product   = _mm256_mul_epu32(data_low, a_shifted); // A 128-bit value that contains two 64-bit unsigned
+                                                               // integers. The result can be expressed by the following
+                                                               // equations. r0 := a0 * b0; r1 := a2 * b2
+            sse_ctr1  = _mm256_add_epi64(sse_ctr1, product); // sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, temp );
+            temp      = _mm256_srli_epi64(product, 32);        // Shifts the 2 signed or unsigned 64-bit integers in a
+                                                               // right by count bits while shifting in zeros.
+            sse_ctr2  = _mm256_add_epi64(sse_ctr2, temp);
+            // temp = _mm256_and_si256 ( mask_low, product );
+
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(5 + i)
+            // second cross
+            data     = _mm256_srli_epi64(data, 32);
+            product  = _mm256_mul_epu32(data, a_low);  // A 128-bit value that contains two 64-bit unsigned integers.
+                                                       // The result can be expressed by the following equations. r0 :=
+                                                       // a0 * b0; r1 := a2 * b2
+            sse_ctr1 = _mm256_add_epi64(sse_ctr1, product); // sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, temp );
+            temp     = _mm256_srli_epi64(product, 32); // Shifts the 2 signed or unsigned 64-bit integers in a right by
+                                                       // count bits while shifting in zeros.
+            sse_ctr2 = _mm256_add_epi64(sse_ctr2, temp);
+            // temp = _mm256_and_si256 ( mask_low, product );
+
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(6 + i)
+            // upper 32 bits
+            product    = _mm256_mul_epu32(data, a_shifted); // A 128-bit value that contains two 64-bit unsigned
+                                                            // integers. The result can be expressed by the following
+                                                            // equations. r0 := a0 * b0; r1 := a2 * b2
+            sse_ctr3_0 = _mm256_add_epi64(sse_ctr3_0, product); // sse_ctr2 = _mm256_add_epi64 ( sse_ctr2, temp );
+            temp       = _mm256_srli_epi64(product, 32);    // Shifts the 2 signed or unsigned 64-bit integers in a
+                                                            // right by count bits while shifting in zeros.
+            sse_ctr3_1 = _mm256_add_epi64(sse_ctr3_1, temp);
+            // temp = _mm256_and_si256 ( mask_low, product );
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(7 + i)
+
+  #if (PMPML_64_CHUNK_SIZE_LOG2 >= 4)
+            a    = _mm256_load_si256((__m256i * )(coeff + i + 8)); // Loads 128-bit value. Address p must be 16-byte
+                                                                   // aligned.
+            data = _mm256_loadu_si256((__m256i *)(x     + i + 8)); // Loads 128-bit value. Address p does not need be
+                                                                   // 16-byte aligned.
+
+            // lower 32 bits
+            a_low      = _mm256_and_si256(mask_low, a   );
+            data_low   = _mm256_and_si256(mask_low, data);
+            product    = _mm256_mul_epu32(data_low, a_low); // A 128-bit value that contains two 64-bit unsigned
+                                                            // integers. The result can be expressed by the following
+                                                            // equations. r0 := a0 * b0; r1 := a2 * b2
+            sse_ctr0_0 = _mm256_add_epi64(sse_ctr0_0, product); // sse_ctr0 = _mm256_add_epi64 ( sse_ctr0, temp );
+            temp       = _mm256_srli_epi64(product, 32);    // Shifts the 2 signed or unsigned 64-bit integers in a
+                                                            // right by count bits while shifting in zeros.
+            sse_ctr0_1 = _mm256_add_epi64(sse_ctr0_1, temp);
+            // temp = _mm256_and_si256 ( mask_low, product );
+
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(12 + i)
+
+            // first cross
+            a_shifted = _mm256_srli_epi64(a, 32);
+            product   = _mm256_mul_epu32(data_low, a_shifted); // A 128-bit value that contains two 64-bit unsigned
+                                                               // integers. The result can be expressed by the following
+                                                               // equations. r0 := a0 * b0; r1 := a2 * b2
+            sse_ctr1  = _mm256_add_epi64(sse_ctr1, product); // sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, temp );
+            temp      = _mm256_srli_epi64(product, 32);        // Shifts the 2 signed or unsigned 64-bit integers in a
+                                                               // right by count bits while shifting in zeros.
+            sse_ctr2  = _mm256_add_epi64(sse_ctr2, temp);
+            // temp = _mm256_and_si256 ( mask_low, product );
+
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(13 + i)
+
+            // second cross
+            data     = _mm256_srli_epi64(data, 32);
+            product  = _mm256_mul_epu32(data, a_low);  // A 128-bit value that contains two 64-bit unsigned integers.
+                                                       // The result can be expressed by the following equations. r0 :=
+                                                       // a0 * b0; r1 := a2 * b2
+            sse_ctr1 = _mm256_add_epi64(sse_ctr1, product); // sse_ctr1 = _mm256_add_epi64 ( sse_ctr1, temp );
+            temp     = _mm256_srli_epi64(product, 32); // Shifts the 2 signed or unsigned 64-bit integers in a right by
+                                                       // count bits while shifting in zeros.
+            sse_ctr2 = _mm256_add_epi64(sse_ctr2, temp);
+            // temp = _mm256_and_si256 ( mask_low, product );
+
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(14 + i)
+            // upper 32 bits
+            product    = _mm256_mul_epu32(data, a_shifted); // A 128-bit value that contains two 64-bit unsigned
+                                                            // integers. The result can be expressed by the following
+                                                            // equations. r0 := a0 * b0; r1 := a2 * b2
+            sse_ctr3_0 = _mm256_add_epi64(sse_ctr3_0, product); // sse_ctr2 = _mm256_add_epi64 ( sse_ctr2, temp );
+            temp       = _mm256_srli_epi64(product, 32);    // Shifts the 2 signed or unsigned 64-bit integers in a
+                                                            // right by count bits while shifting in zeros.
+            sse_ctr3_1 = _mm256_add_epi64(sse_ctr3_1, temp);
+            // temp = _mm256_and_si256 ( mask_low, product );
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(15 + i)
+  #endif
+        }
+
+        uint64_t t0_0, t0_1, t1, t2, t3_0, t3_1;
+        t0_0 = ((uint64_t *)(&sse_ctr0_0))[0] + ((uint64_t *)(&sse_ctr0_0))[1] +
+                ((uint64_t *)(&sse_ctr0_0))[2] + ((uint64_t *)(&sse_ctr0_0))[3];
+        t0_1 = ((uint64_t *)(&sse_ctr0_1))[0] + ((uint64_t *)(&sse_ctr0_1))[1] +
+                ((uint64_t *)(&sse_ctr0_1))[2] + ((uint64_t *)(&sse_ctr0_1))[3];
+        t1   = ((uint64_t *)(&sse_ctr1  ))[0] + ((uint64_t *)(&sse_ctr1  ))[1] +
+                ((uint64_t *)(&sse_ctr1  ))[2] + ((uint64_t *)(&sse_ctr1  ))[3];
+        t2   = ((uint64_t *)(&sse_ctr2  ))[0] + ((uint64_t *)(&sse_ctr2  ))[1] +
+                ((uint64_t *)(&sse_ctr2  ))[2] + ((uint64_t *)(&sse_ctr2  ))[3];
+        t3_0 = ((uint64_t *)(&sse_ctr3_0))[0] + ((uint64_t *)(&sse_ctr3_0))[1] +
+                ((uint64_t *)(&sse_ctr3_0))[2] + ((uint64_t *)(&sse_ctr3_0))[3];
+        t3_1 = ((uint64_t *)(&sse_ctr3_1))[0] + ((uint64_t *)(&sse_ctr3_1))[1] +
+                ((uint64_t *)(&sse_ctr3_1))[2] + ((uint64_t *)(&sse_ctr3_1))[3];
+
+        ADD_SHIFT_ADD_NORMALIZE_TO_UPPER(t0_0, t0_1)
+        ADD_SHIFT_ADD_NORMALIZE_TO_UPPER(t1  , t2  )
+        ADD_SHIFT_ADD_NORMALIZE_TO_UPPER(t3_0, t3_1)
+
+        uint64_t add_sse1, add_sse2;
+
+        t1            += t0_1;
+        add_sse1       = t0_0         + (((uint64_t)(uint32_t)t1  ) << 32);
+        ctr0.QuadPart += add_sse1;
+        add_sse2       = ctr0.QuadPart < add_sse1;
+
+        t2            += t3_0         + (t1 >> 32);
+        t3_1          += t2 >> 32;
+
+        add_sse2      += (uint32_t)t2 + (((uint64_t)(uint32_t)t3_1) << 32);
+        ctr1.QuadPart += add_sse2;
+
+        ctr2.QuadPart += (t3_1 >> 32) + (ctr1.QuadPart < add_sse2);
 
-
-/*	ctr0.LowPart = (uint32_t)t0_0;
-	uint64_t upper64 = t0_1 + (t0_0>>32) + (uint64_t)(uint32_t)t1;
-	ctr0.HighPart = (uint32_t)upper64;
-
-	upper64 = (upper64>>32) + (t1>>32) + t2 + (uint32_t)t3_0;
-	ctr1.LowPart = (uint32_t)upper64;
-
-	upper64 = (upper64>>32) + (t3_0>>32) + (uint32_t)t3_1;
-	ctr1.HighPart += (uint32_t)upper64;
-
-	ctr2.QuadPart = (upper64>>32) + (t3_1>>32);*/
+/*
+ *      ctr0.LowPart = (uint32_t)t0_0;
+ *  uint64_t upper64 = t0_1 + (t0_0>>32) + (uint64_t)(uint32_t)t1;
+ *  ctr0.HighPart = (uint32_t)upper64;
+ *
+ *  upper64 = (upper64>>32) + (t1>>32) + t2 + (uint32_t)t3_0;
+ *  ctr1.LowPart = (uint32_t)upper64;
+ *
+ *  upper64 = (upper64>>32) + (t3_0>>32) + (uint32_t)t3_1;
+ *  ctr1.HighPart += (uint32_t)upper64;
+ *
+ *  ctr2.QuadPart = (upper64>>32) + (t3_1>>32);
+ */
 
 #else // defined(HAVE_AVX2) && (PMPML_64_CHUNK_SIZE_LOG2 >= 3)
 
-	for (uint64_t i=0; i<(PMPML_64_CHUNK_SIZE); i+=32) {
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 0 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 1 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 2 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 3 + i )
-#if (PMPML_64_CHUNK_SIZE_LOG2 > 2)
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 4 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 5 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 6 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 7 + i )
-#endif
-#if (PMPML_64_CHUNK_SIZE_LOG2 > 3)
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 8 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 9 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 10 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 11 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 12 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 13 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 14 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 15 + i )
-#endif
-#if (PMPML_64_CHUNK_SIZE_LOG2 > 4)
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 16 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 17 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 18 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 19 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 20 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 21 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 22 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 23 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 24 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 25 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 26 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 27 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 28 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 29 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST( 30 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND( 31 + i )
-#endif
-	}
+        for (uint64_t i = 0; i < (PMPML_64_CHUNK_SIZE); i += 32) {
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(0 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(1 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(2 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(3 + i)
+  #if (PMPML_64_CHUNK_SIZE_LOG2 > 2)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(4 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(5 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(6 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(7 + i)
+  #endif
+  #if (PMPML_64_CHUNK_SIZE_LOG2 > 3)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(8 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(9 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(10 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(11 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(12 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(13 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(14 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(15 + i)
+  #endif
+  #if (PMPML_64_CHUNK_SIZE_LOG2 > 4)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(16 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(17 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(18 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(19 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(20 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(21 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(22 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(23 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(24 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(25 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(26 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(27 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(28 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(29 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_FIRST(30 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_SECOND(31 + i)
+  #endif
+        }
 #endif  // defined(HAVE_AVX2) && (PMPML_64_CHUNK_SIZE_LOG2 >= 3)
 
-	PMPML_CHUNK_LOOP_PRE_REDUCE_L0_64
+        PMPML_CHUNK_LOOP_PRE_REDUCE_L0_64
 
-	PMPML_CHUNK_REDUCE_128_TO_64____
-	ret.LowPart = ctr0.QuadPart;
-	ret.HighPart = ctr1.QuadPart;
-  }
+        PMPML_CHUNK_REDUCE_128_TO_64____
+        ret.LowPart = ctr0.QuadPart;
+        ret.HighPart = ctr1.QuadPart;
+    }
 
-  template < bool bswap >
-  FORCE_INLINE void hash_of_beginning_of_string_chunk_short_type2( const uint64_t* coeff, ULARGE_INTEGER__XX constTerm, const uint8_t* tail, std::size_t tail_size, ULARGELARGE_INTEGER__XX& ret ) const
-  {
-	PMPML_CHUNK_LOOP_INTRO_L0_64
-	std::size_t size = tail_size >> PMPML_64_WORD_SIZE_BYTES_LOG2;
-	const uint64_t* x = (const uint64_t*)tail;
-
-	switch (size) {
-	case 1:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) } break;
-	case 2:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 ) } break;
-	case 3:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) } break;
-	case 4:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 ) } break;
-	case 5:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 ) } break;
-	case 6:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 5 ) } break;
-	case 7:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 5 )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 6 ) } break;
-	}
-
-	uint64_t xLast = ReadTail<bswap>(tail, tail_size);
-
-	PMPML_64_CHUNK_LOOP_BODY_ULI_T1_LAST(size)
-
-	PMPML_CHUNK_LOOP_PRE_REDUCE_L0_64
-	PMPML_CHUNK_REDUCE_128_TO_64
-	ret.LowPart = ctr0.QuadPart;
-	ret.HighPart = ctr1.QuadPart;
-  }
+    template <bool bswap>
+    FORCE_INLINE void hash_of_beginning_of_string_chunk_short_type2( const uint64_t * coeff, ULARGE_INTEGER__XX constTerm,
+            const uint8_t * tail, std::size_t tail_size, ULARGELARGE_INTEGER__XX & ret ) const {
+        PMPML_CHUNK_LOOP_INTRO_L0_64
+        std::size_t      size = tail_size >> PMPML_64_WORD_SIZE_BYTES_LOG2;
+        const uint64_t * x    = (const uint64_t *)tail;
+
+        switch (size) {
+        case 1: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) }
+        break;
+        case 2: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1) }
+        break;
+        case 3: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) }
+                  break;
+        case 4: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3) }
+                  break;
+        case 5: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4) }
+                  break;
+        case 6: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(5) }
+                  break;
+        case 7: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(5)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(6) }
+                  break;
+        }
+
+        uint64_t xLast = ReadTail<bswap>(tail, tail_size);
+
+        PMPML_64_CHUNK_LOOP_BODY_ULI_T1_LAST(size)
+
+        PMPML_CHUNK_LOOP_PRE_REDUCE_L0_64
+        PMPML_CHUNK_REDUCE_128_TO_64
+        ret.LowPart  = ctr0.QuadPart;
+        ret.HighPart = ctr1.QuadPart;
+    }
 
-  template < bool bswap >
-  FORCE_INLINE void hash_of_beginning_of_string_chunk_type2( const uint64_t* coeff, ULARGE_INTEGER__XX constTerm, const uint8_t* tail, std::size_t tail_size, ULARGELARGE_INTEGER__XX& ret ) const
-  {
-	PMPML_CHUNK_LOOP_INTRO_L0_64
-	std::size_t size = tail_size >> PMPML_64_WORD_SIZE_BYTES_LOG2;
-	const uint64_t* x = (const uint64_t*)tail;
-
-	for ( uint32_t i=0; i<(size>>3); i++ )
-	{
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 + ( i << 3 ) )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 + ( i << 3 ) )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 + ( i << 3 ) )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 + ( i << 3 ) )
+    template <bool bswap>
+    FORCE_INLINE void hash_of_beginning_of_string_chunk_type2( const uint64_t * coeff, ULARGE_INTEGER__XX constTerm,
+            const uint8_t * tail, std::size_t tail_size, ULARGELARGE_INTEGER__XX & ret ) const {
+        PMPML_CHUNK_LOOP_INTRO_L0_64
+        std::size_t      size = tail_size >> PMPML_64_WORD_SIZE_BYTES_LOG2;
+        const uint64_t * x    = (const uint64_t *)tail;
+
+        for (uint32_t i = 0; i < (size >> 3); i++) {
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0 + (i << 3))
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1 + (i << 3))
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2 + (i << 3))
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3 + (i << 3))
 #if (PMPML_64_CHUNK_SIZE_LOG2 > 2)
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 + ( i << 3 ) )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 5 + ( i << 3 ) )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 6 + ( i << 3 ) )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 7 + ( i << 3 ) )
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4 + (i << 3))
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1(5 + (i << 3))
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1(6 + (i << 3))
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1(7 + (i << 3))
 #endif
-	}
-
-	uint64_t offset = size & 0xFFFFFFF8;
-
-	switch (size & 0x7) {
-	case 0: { break; }
-	case 1:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) } break;
-	case 2:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 + offset ) } break;
-	case 3:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 + offset )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) } break;
-	case 4:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 + offset )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 + offset ) } break;
-	case 5:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 + offset )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 + offset )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 + offset ) } break;
-	case 6:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 + offset )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 + offset )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 5 + offset ) } break;
-	case 7:	{ PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 + offset )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 + offset )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 + offset ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 5 + offset )
-	      PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 6 + offset ) } break;
-	}
-
-	uint64_t xLast = ReadTail<bswap>(tail, tail_size);
-
-	PMPML_64_CHUNK_LOOP_BODY_ULI_T1_LAST(size)
-
-	PMPML_CHUNK_LOOP_PRE_REDUCE_L0_64
-	PMPML_CHUNK_REDUCE_128_TO_64
-	ret.LowPart = ctr0.QuadPart;
-	ret.HighPart = ctr1.QuadPart;
-  }
+        }
+
+        uint64_t offset = size & 0xFFFFFFF8;
+
+        switch (size & 0x7) {
+        case 0: { break; }
+        case 1: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0 + offset) }
+        break;
+        case 2: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1 + offset) }
+        break;
+        case 3: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1 + offset)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2 + offset) }
+                  break;
+        case 4: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1 + offset)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3 + offset) }
+                  break;
+        case 5: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1 + offset)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3 + offset)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4 + offset) }
+                  break;
+        case 6: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1 + offset)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3 + offset)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(5 + offset) }
+                  break;
+        case 7: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1 + offset)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3 + offset)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4 + offset) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(5 + offset)
+                  PMPML_64_CHUNK_LOOP_BODY_ULI_T1(6 + offset) }
+                  break;
+        }
+
+        uint64_t xLast = ReadTail<bswap>(tail, tail_size);
+
+        PMPML_64_CHUNK_LOOP_BODY_ULI_T1_LAST(size)
+
+        PMPML_CHUNK_LOOP_PRE_REDUCE_L0_64
+        PMPML_CHUNK_REDUCE_128_TO_64
+        ret.LowPart  = ctr0.QuadPart;
+        ret.HighPart = ctr1.QuadPart;
+    }
 
-  // a call to be done from subsequent levels
-  FORCE_INLINE void hash_of_num_chunk( const uint64_t* coeff, ULARGE_INTEGER__XX constTerm, const ULARGELARGE_INTEGER__XX* x, ULARGELARGE_INTEGER__XX& ret ) const
-  {
-	ULARGE_INTEGER__XX ctr0, ctr1, ctr2;
-	ctr0.QuadPart = constTerm.QuadPart;
-	ctr1.QuadPart = 0;
-	ctr2.QuadPart = 0;
-	ULARGE_INTEGER__XX mulLow, mulHigh;
-
-	for ( uint64_t i=0; i<(PMPML_64_CHUNK_SIZE); i+=32 )
-	{
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 0 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 1 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 2 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 3 + i )
+    // a call to be done from subsequent levels
+    FORCE_INLINE void hash_of_num_chunk( const uint64_t * coeff, ULARGE_INTEGER__XX constTerm,
+            const ULARGELARGE_INTEGER__XX * x, ULARGELARGE_INTEGER__XX & ret ) const {
+        ULARGE_INTEGER__XX ctr0, ctr1, ctr2;
+
+        ctr0.QuadPart = constTerm.QuadPart;
+        ctr1.QuadPart = 0;
+        ctr2.QuadPart = 0;
+        ULARGE_INTEGER__XX mulLow, mulHigh;
+
+        for (uint64_t i = 0; i < (PMPML_64_CHUNK_SIZE); i += 32) {
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 0 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 1 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 2 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 3 + i)
 #if (PMPML_64_CHUNK_SIZE_LOG2 > 2)
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 4 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 5 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 6 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 7 + i )
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 4 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 5 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 6 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 7 + i)
 #endif
 #if (PMPML_64_CHUNK_SIZE_LOG2 > 3)
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 8 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 9 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 10 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 11 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 12 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 13 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 14 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 15 + i )
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 8 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 9 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(10 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(11 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(12 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(13 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(14 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(15 + i)
 #endif
 #if (PMPML_64_CHUNK_SIZE_LOG2 > 4)
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 16 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 17 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 18 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 19 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 20 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 21 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 22 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 23 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 24 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 25 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 26 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 27 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 28 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 29 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 30 + i )
-		PMPML_64_CHUNK_LOOP_BODY_ULI_T2( 31 + i )
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(16 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(17 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(18 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(19 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(20 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(21 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(22 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(23 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(24 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(25 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(26 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(27 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(28 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(29 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(30 + i)
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T2(31 + i)
 #endif
-	}
+        }
 
-	PMPML_CHUNK_REDUCE_128_TO_64
-
-	ret.LowPart = ctr0.QuadPart;
-	ret.HighPart = ctr1.QuadPart;
-  }
+        PMPML_CHUNK_REDUCE_128_TO_64
 
-  // a call to be done from subsequent levels
-  FORCE_INLINE void hash_of_num_chunk_incomplete( const uint64_t* coeff, uint64_t constTerm, uint64_t prevConstTerm, uint64_t coeffSumLow, uint64_t coeffSumHigh, const ULARGELARGE_INTEGER__XX* x, size_t count, ULARGELARGE_INTEGER__XX& ret ) const
-  {
-	ULARGE_INTEGER__XX ctr0, ctr1, ctr2;
-	ctr0.QuadPart = constTerm;
-	ctr1.QuadPart = 0;
-	ctr2.QuadPart = 0;
-	ULARGE_INTEGER__XX c_ctr0, c_ctr1;
-	c_ctr0.QuadPart = 0;
-	c_ctr1.QuadPart = 0;
-	ULARGE_INTEGER__XX mulLow, mulHigh;
-	uint64_t i;
-	if ( count < ( PMPML_64_CHUNK_SIZE >> 1 ) )
-	{
-		for ( i=0; i<count; i++ )
-		{
-			PMPML_CHUNK_LOOP_BODY_ULI_T2_AND_ADD_COEFF_64( i );
-		}
-		if ( c_ctr0.QuadPart > coeffSumLow )
-			c_ctr1.QuadPart = coeffSumHigh - c_ctr1.QuadPart - 1;
-		else
-			c_ctr1.QuadPart = coeffSumHigh - c_ctr1.QuadPart;
-		c_ctr0.QuadPart = coeffSumLow - c_ctr0.QuadPart;
-	}
-	else
-	{
-		for ( i=0; i<count; i++ )
-		{
-			PMPML_64_CHUNK_LOOP_BODY_ULI_T2( i )
-		}
-		for ( ; i<PMPML_64_CHUNK_SIZE; i++ )
-		{
-			PMPML_64_CHUNK_LOOP_BODY_ULI_ADD_COEFF( i )
-		}
-	}
-	PMPML_CHUNK_LOOP_BODY_ULI_T2_AND_ADD_SUM_OF_COEFF_64
-
-	PMPML_CHUNK_REDUCE_128_TO_64
-
-	ret.LowPart = ctr0.QuadPart;
-	ret.HighPart = ctr1.QuadPart;
-  }
+        ret.LowPart = ctr0.QuadPart;
+        ret.HighPart = ctr1.QuadPart;
+    }
 
-  FORCE_INLINE void procesNextValue( int level, _ULARGELARGE_INTEGER__XX& value, _ULARGELARGE_INTEGER__XX * allValues, std::size_t * cnts, std::size_t& flag ) const
-  {
-	for ( int i=level;;i++ )
-	{
-		// NOTE: it's not necessary to check whether ( i < PMPML_64_LEVELS ),
-		// if it is guaranteed that the string size is less than 1 << USHF_MACHINE_WORD_SIZE_BITS
-		allValues[ ( i << PMPML_64_CHUNK_SIZE_LOG2 ) + cnts[ i ] ] = value;
-		(cnts[ i ]) ++;
-		if ( cnts[ i ] != PMPML_64_CHUNK_SIZE )
-			break;
-		cnts[ i ] = 0;
-		hash_of_num_chunk( curr_rd[ i ].random_coeff, *(ULARGE_INTEGER__XX*)(&(curr_rd[i].const_term)), allValues + ( i << PMPML_64_CHUNK_SIZE_LOG2 ), value );
-		if ( ( flag & ( 1 << i ) ) == 0 )
-		{
-			cnts[ i + 1] = 0;
-			flag |= 1 << i;
-		}
-	}
-  }
+    // a call to be done from subsequent levels
+    FORCE_INLINE void hash_of_num_chunk_incomplete( const uint64_t * coeff, uint64_t constTerm,
+            uint64_t prevConstTerm, uint64_t coeffSumLow, uint64_t coeffSumHigh, const ULARGELARGE_INTEGER__XX * x,
+            size_t count, ULARGELARGE_INTEGER__XX & ret ) const {
+        ULARGE_INTEGER__XX ctr0, ctr1, ctr2;
+
+        ctr0.QuadPart   = constTerm;
+        ctr1.QuadPart   = 0;
+        ctr2.QuadPart   = 0;
+        ULARGE_INTEGER__XX c_ctr0, c_ctr1;
+        c_ctr0.QuadPart = 0;
+        c_ctr1.QuadPart = 0;
+        ULARGE_INTEGER__XX mulLow, mulHigh;
+        uint64_t           i;
+        if (count < (PMPML_64_CHUNK_SIZE >> 1)) {
+            for (i = 0; i < count; i++) {
+                PMPML_CHUNK_LOOP_BODY_ULI_T2_AND_ADD_COEFF_64(i);
+            }
+            if (c_ctr0.QuadPart > coeffSumLow) {
+                c_ctr1.QuadPart = coeffSumHigh - c_ctr1.QuadPart - 1;
+            } else {
+                c_ctr1.QuadPart = coeffSumHigh - c_ctr1.QuadPart;
+            }
+            c_ctr0.QuadPart = coeffSumLow - c_ctr0.QuadPart;
+        } else {
+            for (i = 0; i < count; i++) {
+                PMPML_64_CHUNK_LOOP_BODY_ULI_T2(i)
+            }
+            for (; i < PMPML_64_CHUNK_SIZE; i++) {
+                PMPML_64_CHUNK_LOOP_BODY_ULI_ADD_COEFF(i)
+            }
+        }
+        PMPML_CHUNK_LOOP_BODY_ULI_T2_AND_ADD_SUM_OF_COEFF_64
+
+        PMPML_CHUNK_REDUCE_128_TO_64
+
+        ret.LowPart = ctr0.QuadPart;
+        ret.HighPart = ctr1.QuadPart;
+    }
 
-  FORCE_INLINE _ULARGELARGE_INTEGER__XX& finalize( int level, _ULARGELARGE_INTEGER__XX * allValues, std::size_t * cnts, std::size_t& flag ) const
-  {
-    ULARGELARGE_INTEGER__XX value;
-	for ( int i=level;;i++ )
-	{
-//		ASSERT ( level != PMPML_LEVELS )
-		if ( ( ( flag & ( 1 << i ) ) == 0 ) && cnts[ i ] == 1 )
-		{
-			return allValues[ i << PMPML_64_CHUNK_SIZE_LOG2 ];
-		}
-		if ( cnts[ i ] )
-		{
-			if ( ( flag & ( 1 << i ) ) == 0 )
-			{
-				cnts[ i + 1] = 0;
-				flag |= 1 << i;
-			}
-			hash_of_num_chunk_incomplete(curr_rd[ i ].random_coeff,
-						     curr_rd[i].const_term, curr_rd[i].const_term,
-						     curr_rd[i].cachedSumLow, curr_rd[i].cachedSumHigh,
-						     allValues + (i << PMPML_64_CHUNK_SIZE_LOG2), cnts[i], value );
-			procesNextValue( i + 1, value, allValues, cnts, flag );
-		}
-	}
-  }
+    FORCE_INLINE void procesNextValue( int level, _ULARGELARGE_INTEGER__XX & value, _ULARGELARGE_INTEGER__XX * allValues,
+            std::size_t * cnts, std::size_t & flag ) const {
+        for (int i = level;; i++) {
+            // NOTE: it's not necessary to check whether ( i < PMPML_64_LEVELS ),
+            // if it is guaranteed that the string size is less than 1 << USHF_MACHINE_WORD_SIZE_BITS
+            allValues[(i << PMPML_64_CHUNK_SIZE_LOG2) + cnts[i]] = value;
+            (cnts[i])++;
+            if (cnts[i] != PMPML_64_CHUNK_SIZE) {
+                break;
+            }
+            cnts[i] = 0;
+            hash_of_num_chunk(curr_rd[i].random_coeff, *(ULARGE_INTEGER__XX *)(&(curr_rd[i].const_term)),
+                    allValues + (i << PMPML_64_CHUNK_SIZE_LOG2), value);
+            if ((flag & (1 << i)) == 0) {
+                cnts[i + 1] = 0;
+                flag       |= 1 << i;
+            }
+        }
+    }
 
-  template < bool bswap >
-  NEVER_INLINE uint64_t _hash_noRecursionNoInline_SingleChunk( const uint8_t* chars, std::size_t cnt ) const
-  {
-    _ULARGELARGE_INTEGER__XX tmp_hash;
-    hash_of_beginning_of_string_chunk_type2<bswap>( curr_rd[0].random_coeff, *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term)), chars, cnt, tmp_hash );
-    if ( tmp_hash.HighPart == 0 ) {
-      return fmix64_short( tmp_hash.LowPart );
+    FORCE_INLINE _ULARGELARGE_INTEGER__XX & finalize( int level, _ULARGELARGE_INTEGER__XX * allValues,
+            std::size_t * cnts, std::size_t & flag ) const {
+        ULARGELARGE_INTEGER__XX value;
+
+        for (int i = level;; i++) {
+//              ASSERT ( level != PMPML_LEVELS )
+            if (((flag & (1 << i)) == 0) && (cnts[i] == 1)) {
+                return allValues[i << PMPML_64_CHUNK_SIZE_LOG2];
+            }
+            if (cnts[i]) {
+                if ((flag & (1 << i)) == 0) {
+                    cnts[i + 1] = 0;
+                    flag       |= 1 << i;
+                }
+                hash_of_num_chunk_incomplete(curr_rd[i].random_coeff, curr_rd[i].const_term, curr_rd[i].const_term,
+                        curr_rd[i].cachedSumLow, curr_rd[i].cachedSumHigh, allValues + (i << PMPML_64_CHUNK_SIZE_LOG2),
+                        cnts[i], value);
+                procesNextValue(i + 1, value, allValues, cnts, flag);
+            }
+        }
     }
-    return tmp_hash.LowPart;
-  }
 
-  template < bool bswap >
-  NEVER_INLINE uint64_t _hash_noRecursionNoInline_type2( const uint8_t* chars, std::size_t cnt ) const
-  {
-    _ULARGELARGE_INTEGER__XX allValues[ PMPML_64_LEVELS * PMPML_64_CHUNK_SIZE ];
-    std::size_t cnts[ PMPML_64_LEVELS ];
-    std::size_t flag;
-    cnts[ 1 ] = 0;
-    flag = 0;
-
-    std::size_t i;
-    _ULARGELARGE_INTEGER__XX tmp_hash;
-    // process full chunks
-    for ( i=0; i<(cnt>>PMPML_64_CHUNK_SIZE_BYTES_LOG2); i++ ) {
-      hash_of_string_chunk_compact<bswap>(curr_rd[0].random_coeff, *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term)),
-				   ((const uint64_t*)(chars)) + ( i << PMPML_64_CHUNK_SIZE_LOG2 ), tmp_hash );
-      procesNextValue( 1, tmp_hash, allValues, cnts, flag );
+    template <bool bswap>
+    NEVER_INLINE uint64_t _hash_noRecursionNoInline_SingleChunk( const uint8_t * chars, std::size_t cnt ) const {
+        _ULARGELARGE_INTEGER__XX tmp_hash;
+
+        hash_of_beginning_of_string_chunk_type2<bswap>(curr_rd[0].random_coeff, *(ULARGE_INTEGER__XX *)(&(curr_rd[0].const_term)),
+                chars, cnt, tmp_hash);
+        if (tmp_hash.HighPart == 0) {
+            return fmix64_short(tmp_hash.LowPart);
+        }
+        return tmp_hash.LowPart;
     }
-    // process remaining incomplete chunk(s)
-    // note: if string size is a multiple of chunk size, we create a new chunk (1,0,0,...0),
-    // so THIS PROCESSING IS ALWAYS PERFORMED
-    std::size_t tailCnt = cnt & ( PMPML_64_CHUNK_SIZE_BYTES - 1 );
-    const uint8_t* tail = chars + ( (cnt>>PMPML_64_CHUNK_SIZE_BYTES_LOG2) << PMPML_64_CHUNK_SIZE_BYTES_LOG2 );
-    hash_of_beginning_of_string_chunk_type2<bswap>( curr_rd[0].random_coeff, *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term)),
-					     tail, tailCnt, tmp_hash );
-    procesNextValue( 1, tmp_hash, allValues, cnts, flag );
-    _ULARGELARGE_INTEGER__XX finRet = finalize( 1, allValues, cnts, flag );
-    if ( finRet.HighPart == 0 ) { //LIKELY
-      return fmix64_short( finRet.LowPart );
+
+    template <bool bswap>
+    NEVER_INLINE uint64_t _hash_noRecursionNoInline_type2( const uint8_t * chars, std::size_t cnt ) const {
+        _ULARGELARGE_INTEGER__XX allValues[PMPML_64_LEVELS * PMPML_64_CHUNK_SIZE];
+        std::size_t cnts[PMPML_64_LEVELS];
+        std::size_t flag;
+
+        cnts[1] = 0;
+        flag    = 0;
+
+        std::size_t i;
+        _ULARGELARGE_INTEGER__XX tmp_hash;
+        // process full chunks
+        for (i = 0; i < (cnt >> PMPML_64_CHUNK_SIZE_BYTES_LOG2); i++) {
+            hash_of_string_chunk_compact<bswap>(curr_rd[0].random_coeff, *(ULARGE_INTEGER__XX *)(&(curr_rd[0].const_term)),
+                    ((const uint64_t *)(chars)) + (i << PMPML_64_CHUNK_SIZE_LOG2), tmp_hash);
+            procesNextValue(1, tmp_hash, allValues, cnts, flag);
+        }
+        // process remaining incomplete chunk(s)
+        // note: if string size is a multiple of chunk size, we create a new chunk (1,0,0,...0),
+        // so THIS PROCESSING IS ALWAYS PERFORMED
+        std::size_t     tailCnt = cnt & (PMPML_64_CHUNK_SIZE_BYTES - 1);
+        const uint8_t * tail    = chars + ((cnt >> PMPML_64_CHUNK_SIZE_BYTES_LOG2) << PMPML_64_CHUNK_SIZE_BYTES_LOG2);
+        hash_of_beginning_of_string_chunk_type2<bswap>(curr_rd[0].random_coeff, *(ULARGE_INTEGER__XX *)(&(curr_rd[0].const_term)),
+                tail, tailCnt, tmp_hash);
+        procesNextValue(1, tmp_hash, allValues, cnts, flag);
+        _ULARGELARGE_INTEGER__XX finRet = finalize(1, allValues, cnts, flag);
+        if (finRet.HighPart == 0) { // LIKELY
+            return fmix64_short(finRet.LowPart);
+        }
+        return finRet.LowPart;
     }
-    return finRet.LowPart;
-  }
 
-public:
-  template < bool bswap >
-  FORCE_INLINE uint64_t hash( const uint8_t* chars, std::size_t cnt ) const
-  {
-    if (likely(cnt < 64)) {
-      const uint64_t* coeff = curr_rd[0].random_coeff;
-      ULARGE_INTEGER__XX constTerm = *(ULARGE_INTEGER__XX*)(&(curr_rd[0].const_term));
-      PMPML_CHUNK_LOOP_INTRO_L0_64
-	std::size_t size = cnt >> PMPML_64_WORD_SIZE_BYTES_LOG2;
-      const uint64_t* x = (const uint64_t*)chars;
-
-      switch (size) {
-      case 1: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) } break;
-      case 2: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 ) } break;
-      case 3: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	    PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) } break;
-      case 4: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	    PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 ) } break;
-      case 5: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	    PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 )
-	    PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 ) } break;
-      case 6: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	    PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 )
-	    PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 5 ) } break;
-      case 7: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 0 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 1 )
-	    PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 2 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 3 )
-	    PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 4 ) PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 5 )
-	    PMPML_64_CHUNK_LOOP_BODY_ULI_T1( 6 ) } break;
-      }
-
-      uint64_t xLast = ReadTail<bswap>(chars, cnt);
-
-      PMPML_64_CHUNK_LOOP_BODY_ULI_T1_LAST(size);
-
-      PMPML_CHUNK_LOOP_PRE_REDUCE_L0_64;
-      PMPML_CHUNK_REDUCE_128_TO_64_AND_RETURN;
-    } else if (cnt < PMPML_64_CHUNK_SIZE) {
-      return _hash_noRecursionNoInline_SingleChunk<bswap>( chars, cnt );
-    } else {
-      return _hash_noRecursionNoInline_type2<bswap>( chars, cnt );
+  public:
+
+    template <bool bswap>
+    FORCE_INLINE uint64_t hash( const uint8_t * chars, std::size_t cnt ) const {
+        if (likely(cnt < 64)) {
+            const uint64_t *   coeff     = curr_rd[0].random_coeff;
+            ULARGE_INTEGER__XX constTerm = *(ULARGE_INTEGER__XX *)(&(curr_rd[0].const_term));
+            PMPML_CHUNK_LOOP_INTRO_L0_64
+            std::size_t      size        = cnt >> PMPML_64_WORD_SIZE_BYTES_LOG2;
+            const uint64_t * x = (const uint64_t *)chars;
+
+            switch (size) {
+            case 1: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) }
+            break;
+            case 2: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1) }
+            break;
+            case 3: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                      PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) }
+                      break;
+            case 4: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                      PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3) }
+                      break;
+            case 5: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                      PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3)
+                      PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4) }
+                      break;
+            case 6: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                      PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3)
+                      PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(5) }
+                      break;
+            case 7: { PMPML_64_CHUNK_LOOP_BODY_ULI_T1(0) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(1)
+                      PMPML_64_CHUNK_LOOP_BODY_ULI_T1(2) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(3)
+                      PMPML_64_CHUNK_LOOP_BODY_ULI_T1(4) PMPML_64_CHUNK_LOOP_BODY_ULI_T1(5)
+                      PMPML_64_CHUNK_LOOP_BODY_ULI_T1(6) }
+                      break;
+            }
+
+            uint64_t xLast = ReadTail<bswap>(chars, cnt);
+
+            PMPML_64_CHUNK_LOOP_BODY_ULI_T1_LAST(size);
+
+            PMPML_CHUNK_LOOP_PRE_REDUCE_L0_64;
+            PMPML_CHUNK_REDUCE_128_TO_64_AND_RETURN;
+        } else if (cnt < PMPML_64_CHUNK_SIZE) {
+            return _hash_noRecursionNoInline_SingleChunk<bswap>(chars, cnt);
+        } else {
+            return _hash_noRecursionNoInline_type2<bswap>(chars, cnt);
+        }
     }
-  }
 
   public:
-  PMP_Multilinear_Hasher_64()
-  {
-    curr_rd = (random_data_for_PMPML_64*)rd_for_PMPML_64;
-    coeff0 = curr_rd[0].random_coeff[0];
-  }
-  void seed( uint64_t seed )
-  {
-    curr_rd[0].random_coeff[0] = coeff0 ^ seed;
-  }
-};
+
+    PMP_Multilinear_Hasher_64() {
+        curr_rd = (random_data_for_PMPML_64 *)rd_for_PMPML_64;
+        coeff0  = curr_rd[0].random_coeff[0];
+    }
+
+    void seed( uint64_t seed ) {
+        curr_rd[0].random_coeff[0] = coeff0 ^ seed;
+    }
+}; // class PMP_Multilinear_Hasher_64
 
 //-------------------------------------------------------------
 // SMHasher3 API functions
@@ -2561,63 +2716,65 @@ class PMP_Multilinear_Hasher_64
 static thread_local PMP_Multilinear_Hasher_32 pmpml_hasher_32;
 static thread_local PMP_Multilinear_Hasher_64 pmpml_hasher_64;
 
-static uintptr_t PMPML_32_seed(const seed_t seed) {
-  pmpml_hasher_32.seed((uint64_t)seed);
-  return (uintptr_t)(&pmpml_hasher_32);
+static uintptr_t PMPML_32_seed( const seed_t seed ) {
+    pmpml_hasher_32.seed((uint64_t)seed);
+    return (uintptr_t)(&pmpml_hasher_32);
 }
 
-static uintptr_t PMPML_64_seed(const seed_t seed) {
-  pmpml_hasher_64.seed((uint64_t)seed);
-  return (uintptr_t)(&pmpml_hasher_64);
+static uintptr_t PMPML_64_seed( const seed_t seed ) {
+    pmpml_hasher_64.seed((uint64_t)seed);
+    return (uintptr_t)(&pmpml_hasher_64);
 }
 
-template < bool bswap >
-static void PMPML_32(const void * in, const size_t len, const seed_t seed, void * out) {
-  PMP_Multilinear_Hasher_32 * p = (PMP_Multilinear_Hasher_32 *)(uintptr_t)seed;
-  uint32_t h = p->hash<bswap>((const uint8_t *)in, len);
-  PUT_U32<bswap>(h, (uint8_t *)out, 0);
+template <bool bswap>
+static void PMPML_32( const void * in, const size_t len, const seed_t seed, void * out ) {
+    PMP_Multilinear_Hasher_32 * p = (PMP_Multilinear_Hasher_32 *)(uintptr_t)seed;
+    uint32_t h = p->hash<bswap>((const uint8_t *)in, len);
+
+    PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void PMPML_64(const void * in, const size_t len, const seed_t seed, void * out) {
-  PMP_Multilinear_Hasher_64 * p = (PMP_Multilinear_Hasher_64 *)(uintptr_t)seed;
-  uint64_t h = p->hash<bswap>((const uint8_t *)in, len);
-  PUT_U64<bswap>(h, (uint8_t *)out, 0);
+template <bool bswap>
+static void PMPML_64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    PMP_Multilinear_Hasher_64 * p = (PMP_Multilinear_Hasher_64 *)(uintptr_t)seed;
+    uint64_t h = p->hash<bswap>((const uint8_t *)in, len);
+
+    PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 REGISTER_FAMILY(PMP_mutilinear,
-  $.src_url = "https://github.com/lemire/StronglyUniversalStringHashing",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/lemire/StronglyUniversalStringHashing",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(PMP_Multilinear_32,
-  $.desc = "PMP_Multilinear 32-bit",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE ,
-  $.impl_flags =
-        FLAG_IMPL_TYPE_PUNNING |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0xF3199670,
-  $.verification_BE = 0xF602E963,
-  $.seedfn = PMPML_32_seed,
-  $.hashfn_native = PMPML_32<false>,
-  $.hashfn_bswap = PMPML_32<true>
-);
+   $.desc       = "PMP_Multilinear 32-bit",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_TYPE_PUNNING |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0xF3199670,
+   $.verification_BE = 0xF602E963,
+   $.seedfn = PMPML_32_seed,
+   $.hashfn_native   = PMPML_32<false>,
+   $.hashfn_bswap    = PMPML_32<true>
+ );
 
 REGISTER_HASH(PMP_Multilinear_64,
-  $.desc = "PMP_Multilinear 64-bit",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE ,
-  $.impl_flags =
-        FLAG_IMPL_TYPE_PUNNING     |
-        FLAG_IMPL_MULTIPLY_64_128  |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 64,
-  $.verification_LE = 0xB776D2B9,
-  $.verification_BE = 0x8E1E0CDF,
-  $.seedfn = PMPML_64_seed,
-  $.hashfn_native = PMPML_64<false>,
-  $.hashfn_bswap = PMPML_64<true>
-);
+   $.desc       = "PMP_Multilinear 64-bit",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE,
+   $.impl_flags =
+         FLAG_IMPL_TYPE_PUNNING     |
+         FLAG_IMPL_MULTIPLY_64_128  |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 64,
+   $.verification_LE = 0xB776D2B9,
+   $.verification_BE = 0x8E1E0CDF,
+   $.seedfn = PMPML_64_seed,
+   $.hashfn_native   = PMPML_64<false>,
+   $.hashfn_bswap    = PMPML_64<true>
+ );
diff --git a/hashes/poly_mersenne.cpp b/hashes/poly_mersenne.cpp
index f27160c9..b6fe5444 100644
--- a/hashes/poly_mersenne.cpp
+++ b/hashes/poly_mersenne.cpp
@@ -4,7 +4,7 @@
  * Copyright (c) 2020-2021 Reini Urban
  * Copyright (c) 2020      Thomas Dybdahl Ahle
  * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
+ *      The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -48,11 +48,11 @@
 // test it with the RNG you plan on using to seed it.
 static uint64_t BSD_nextrand;
 
-static void BSD_srand(uint64_t seed) {
+static void BSD_srand( uint64_t seed ) {
     BSD_nextrand = seed;
 }
 
-static uint32_t BSD_rand(void) {
+static uint32_t BSD_rand( void ) {
     /*
      * Compute x = (7^5 * x) mod (2^31 - 1)
      * without overflowing 31 bits:
@@ -61,41 +61,43 @@ static uint32_t BSD_rand(void) {
      * Park and Miller, Communications of the ACM, vol. 31, no. 10,
      * October 1988, p. 1195.
      */
-	uint64_t hi, lo, x;
-
-    x = (BSD_nextrand % 0x7ffffffe) + 1;
-	hi = x / 127773;
-	lo = x % 127773;
-	x = 16807 * lo - 2836 * hi;
-	if (x < 0)
-		x += 0x7fffffff;
+    uint64_t hi, lo, x;
+
+    x  = (BSD_nextrand % 0x7ffffffe)      + 1;
+    hi = x / 127773;
+    lo = x % 127773;
+    x  = 16807         * lo        - 2836 * hi;
+    if (x < 0) {
+        x += 0x7fffffff;
+    }
     BSD_nextrand = --x;
-	return x;
+    return x;
 }
 
-const static uint64_t MERSENNE_61 = (1ull << 61) - 1;
+const static uint64_t MERSENNE_61         = (1ull << 61) - 1;
 const static uint32_t POLY_MERSENNE_MAX_K = 4;
-static uint64_t poly_mersenne_random[POLY_MERSENNE_MAX_K+1];
-static uint64_t poly_mersenne_a;
-static uint64_t poly_mersenne_b;
+static uint64_t       poly_mersenne_random[POLY_MERSENNE_MAX_K + 1];
+static uint64_t       poly_mersenne_a;
+static uint64_t       poly_mersenne_b;
 
-static uint128_t rand_u128(void) {
+static uint128_t rand_u128( void ) {
     // We don't know how many bits we get from rand(),
     // but it is at least 16, so we concattenate a couple.
     uint128_t r = BSD_rand();
+
     for (int i = 0; i < 7; i++) {
         r <<= 16;
-        r ^= BSD_rand();
+        r  ^= BSD_rand();
     }
     return r;
 }
 
-static uintptr_t poly_mersenne_seed_init(const seed_t seed) {
+static uintptr_t poly_mersenne_seed_init( const seed_t seed ) {
     BSD_srand(seed);
     // a has be at most 2^60, or the lazy modular reduction won't work.
-    poly_mersenne_a = rand_u128() % (MERSENNE_61/2);
+    poly_mersenne_a = rand_u128() % (MERSENNE_61 / 2);
     poly_mersenne_b = rand_u128() % MERSENNE_61;
-    for (int i = 0; i < POLY_MERSENNE_MAX_K+1; i++) {
+    for (int i = 0; i < POLY_MERSENNE_MAX_K + 1; i++) {
         // The random values should be at most 2^61-2, or the lazy
         // modular reduction won't work.
         poly_mersenne_random[i] = rand_u128() % MERSENNE_61;
@@ -103,20 +105,21 @@ static uintptr_t poly_mersenne_seed_init(const seed_t seed) {
     return 0;
 }
 
-static uint64_t mult_combine61(uint64_t h, uint64_t x, uint64_t a) {
+static uint64_t mult_combine61( uint64_t h, uint64_t x, uint64_t a ) {
     uint64_t rhi = 0, rlo = a;
+
     fma64_128(rlo, rhi, h, x);
 
-    rhi <<= (64 - 61);
-    rhi |= (rlo >> 61);
-    rlo &= MERSENNE_61;
+    rhi <<= (64   - 61);
+    rhi  |= (rlo >> 61);
+    rlo  &= MERSENNE_61;
 
     return rlo + rhi;
 }
 
 // This function ignores the seed, because it uses a separate seeding function.
-template < uint32_t K, bool bswap >
-static void Poly_Mersenne(const void * in, const size_t len, const seed_t seed, void * out) {
+template <uint32_t K, bool bswap>
+static void Poly_Mersenne( const void * in, const size_t len, const seed_t seed, void * out ) {
     const uint8_t * buf = (const uint8_t *)in;
 
     // We first combine hashes using a polynomial in `a`:
@@ -128,7 +131,7 @@ static void Poly_Mersenne(const void * in, const size_t len, const seed_t seed,
     // We use the length as the first character.
     uint64_t h = len;
 
-    for (size_t i = 0; i < len/4; i++, buf += 4) {
+    for (size_t i = 0; i < len / 4; i++, buf += 4) {
         // Partial modular reduction. Since each round adds 32 bits, and this
         // subtracts (up to) 61 bits, we make sure to never overflow.
         h = mult_combine61(h, a, GET_U32<bswap>(buf, 0));
@@ -138,8 +141,8 @@ static void Poly_Mersenne(const void * in, const size_t len, const seed_t seed,
     int remaining_bytes = len % 4;
     if (remaining_bytes) {
         uint32_t last = 0;
-        if (remaining_bytes & 2) {last = GET_U16<bswap>(buf, 0); buf += 2;}
-        if (remaining_bytes & 1) {last = (last << 8) | (*buf);}
+        if (remaining_bytes & 2) { last = GET_U16<bswap>(buf, 0); buf += 2; }
+        if (remaining_bytes & 1) { last = (last << 8) | (*buf); }
         h = mult_combine61(h, a, last);
     }
 
@@ -161,81 +164,81 @@ static void Poly_Mersenne(const void * in, const size_t len, const seed_t seed,
 }
 
 REGISTER_FAMILY(poly_mersenne,
-  $.src_url = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(poly_mersenne__deg1,
-  $.desc = "Degree 1 Hashing mod 2^61-1",
-  $.hash_flags =
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS         | // Implementation not yet thread-safe
-        FLAG_IMPL_128BIT               |
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0x50526DA4,
-  $.verification_BE = 0xBB8CF709,
-  $.seedfn = poly_mersenne_seed_init,
-  $.hashfn_native = Poly_Mersenne<1, false>,
-  $.hashfn_bswap = Poly_Mersenne<1, true>
-);
+   $.desc       = "Degree 1 Hashing mod 2^61-1",
+   $.hash_flags =
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS         |// Implementation not yet thread-safe
+         FLAG_IMPL_128BIT               |
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0x50526DA4,
+   $.verification_BE = 0xBB8CF709,
+   $.seedfn = poly_mersenne_seed_init,
+   $.hashfn_native   = Poly_Mersenne<1, false>,
+   $.hashfn_bswap    = Poly_Mersenne<1, true>
+ );
 
 REGISTER_HASH(poly_mersenne__deg2,
-  $.desc = "Degree 2 Hashing mod 2^61-1",
-  $.hash_flags =
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS         | // Implementation not yet thread-safe
-        FLAG_IMPL_128BIT               |
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0xCDDDA91B,
-  $.verification_BE = 0x9507D811,
-  $.seedfn = poly_mersenne_seed_init,
-  $.hashfn_native = Poly_Mersenne<2, false>,
-  $.hashfn_bswap = Poly_Mersenne<2, true>,
-  $.badseeds = {0x60e8512c},
-  $.seedfixfn = excludeBadseeds
-);
+   $.desc       = "Degree 2 Hashing mod 2^61-1",
+   $.hash_flags =
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS         |// Implementation not yet thread-safe
+         FLAG_IMPL_128BIT               |
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0xCDDDA91B,
+   $.verification_BE = 0x9507D811,
+   $.seedfn = poly_mersenne_seed_init,
+   $.hashfn_native   = Poly_Mersenne<2, false>,
+   $.hashfn_bswap    = Poly_Mersenne<2, true>,
+   $.badseeds        = { 0x60e8512c },
+   $.seedfixfn       = excludeBadseeds
+ );
 
 REGISTER_HASH(poly_mersenne__deg3,
-  $.desc = "Degree 3 Hashing mod 2^61-1",
-  $.hash_flags =
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS         | // Implementation not yet thread-safe
-        FLAG_IMPL_128BIT               |
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0x7D822707,
-  $.verification_BE = 0x7273EB0A,
-  $.seedfn = poly_mersenne_seed_init,
-  $.hashfn_native = Poly_Mersenne<3, false>,
-  $.hashfn_bswap = Poly_Mersenne<3, true>,
-  $.badseeds = {0x3d25f745},
-  $.seedfixfn = excludeBadseeds
-);
+   $.desc       = "Degree 3 Hashing mod 2^61-1",
+   $.hash_flags =
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS         |// Implementation not yet thread-safe
+         FLAG_IMPL_128BIT               |
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0x7D822707,
+   $.verification_BE = 0x7273EB0A,
+   $.seedfn = poly_mersenne_seed_init,
+   $.hashfn_native   = Poly_Mersenne<3, false>,
+   $.hashfn_bswap    = Poly_Mersenne<3, true>,
+   $.badseeds        = { 0x3d25f745 },
+   $.seedfixfn       = excludeBadseeds
+ );
 
 REGISTER_HASH(poly_mersenne__deg4,
-  $.desc = "Degree 4 Hashing mod 2^61-1",
-  $.hash_flags =
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS         | // Implementation not yet thread-safe
-        FLAG_IMPL_128BIT               |
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0xBF0273E6,
-  $.verification_BE = 0xAA526413,
-  $.seedfn = poly_mersenne_seed_init,
-  $.hashfn_native = Poly_Mersenne<4, false>,
-  $.hashfn_bswap = Poly_Mersenne<4, true>
-);
+   $.desc       = "Degree 4 Hashing mod 2^61-1",
+   $.hash_flags =
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS         |// Implementation not yet thread-safe
+         FLAG_IMPL_128BIT               |
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0xBF0273E6,
+   $.verification_BE = 0xAA526413,
+   $.seedfn = poly_mersenne_seed_init,
+   $.hashfn_native   = Poly_Mersenne<4, false>,
+   $.hashfn_bswap    = Poly_Mersenne<4, true>
+ );
 
 #else
 REGISTER_FAMILY(poly_mersenne);
diff --git a/hashes/prvhash.cpp b/hashes/prvhash.cpp
index e514b1e8..072f9a87 100644
--- a/hashes/prvhash.cpp
+++ b/hashes/prvhash.cpp
@@ -36,19 +36,19 @@
  * @param MsgEnd Message's end pointer.
  * @param fb Final byte used for padding.
  */
-template < bool bswap >
-static inline uint64_t prvhash_lpu64ec( const uint8_t * const Msg,
-        const uint8_t * const MsgEnd, uint64_t fb ) {
-const int l = (int)(MsgEnd - Msg);
-    fb <<= ( l << 3 );
+template <bool bswap>
+static inline uint64_t prvhash_lpu64ec( const uint8_t * const Msg, const uint8_t * const MsgEnd, uint64_t fb ) {
+    const int l = (int)(MsgEnd - Msg);
 
-    if( l > 3 ) {
+    fb <<= (l << 3);
+
+    if (l > 3) {
         fb |= (uint64_t)GET_U32<bswap>(Msg, 0);
-        if( l > 4 ) {
+        if (l > 4) {
             fb |= (uint64_t)Msg[4] << 32;
-            if( l > 5 ) {
+            if (l > 5) {
                 fb |= (uint64_t)Msg[5] << 40;
-                if( l > 6 ) {
+                if (l > 6) {
                     fb |= (uint64_t)Msg[6] << 48;
                 }
             }
@@ -56,11 +56,11 @@ const int l = (int)(MsgEnd - Msg);
         return fb;
     }
 
-    if( l != 0 ) {
+    if (l != 0) {
         fb |= Msg[0];
-        if( l > 1 ) {
+        if (l > 1) {
             fb |= (uint64_t)Msg[1] << 8;
-            if( l > 2 ) {
+            if (l > 2) {
                 fb |= (uint64_t)Msg[2] << 16;
             }
         }
@@ -69,11 +69,11 @@ const int l = (int)(MsgEnd - Msg);
     return fb;
 }
 
-static inline uint64_t prvhash_core64(uint64_t & Seed, uint64_t & lcg, uint64_t & Hash) {
+static inline uint64_t prvhash_core64( uint64_t & Seed, uint64_t & lcg, uint64_t & Hash ) {
     Seed *= lcg * 2 + 1;
     const uint64_t rs = Seed >> 32 | Seed << 32;
-    Hash += rs + UINT64_C(0xAAAAAAAAAAAAAAAA);
-    lcg += Seed + UINT64_C(0x5555555555555555);
+    Hash += rs   + UINT64_C(0xAAAAAAAAAAAAAAAA);
+    lcg  += Seed + UINT64_C(0x5555555555555555);
     Seed ^= Hash;
     const uint64_t out = lcg ^ rs;
 
@@ -98,17 +98,17 @@ static inline uint64_t prvhash_core64(uint64_t & Seed, uint64_t & lcg, uint64_t
  * @param Hash2p Location to write the second 8-byte hash result to,
  * if width128 == true.
  */
-template < bool bswap, bool width128 >
-static inline uint64_t prvhash64_64m(const void * const Msg0,
-        const size_t MsgLen, const uint64_t UseSeed, uint64_t * Hash2p = NULL) {
-    const uint8_t * Msg = (const uint8_t *)Msg0;
-    const uint8_t* const MsgEnd = Msg + MsgLen;
-
-    uint64_t Seed = UINT64_C(0x217992B44669F46A); // The state after 5 PRVHASH rounds
-    uint64_t lcg  = UINT64_C(0xB5E2CC2FE9F0B35B); // from the "zero-state".
-    uint64_t Hash = UINT64_C(0x949B5E0A608D76D5);
+template <bool bswap, bool width128>
+static inline uint64_t prvhash64_64m( const void * const Msg0, const size_t MsgLen,
+        const uint64_t UseSeed, uint64_t * Hash2p = NULL ) {
+    const uint8_t *       Msg    = (const uint8_t *)Msg0;
+    const uint8_t * const MsgEnd = Msg + MsgLen;
+
+    uint64_t Seed  = UINT64_C(0x217992B44669F46A); // The state after 5 PRVHASH rounds
+    uint64_t lcg   = UINT64_C(0xB5E2CC2FE9F0B35B); // from the "zero-state".
+    uint64_t Hash  = UINT64_C(0x949B5E0A608D76D5);
     uint64_t Hash2 = 0;
-    bool hc = true;
+    bool     hc    = true;
 
     Hash ^= UseSeed;
 
@@ -120,15 +120,15 @@ static inline uint64_t prvhash64_64m(const void * const Msg0,
 
     while (1) {
         if (Msg < (MsgEnd - (sizeof(uint64_t) - 1))) {
-            const uint64_t msgw = GET_U64<bswap>(Msg, 0);
+            const uint64_t msgw = GET_U64        <bswap>(Msg, 0);
 
             Seed ^= msgw;
-            lcg ^= msgw;
+            lcg  ^= msgw;
         } else if (Msg <= MsgEnd) {
             const uint64_t msgw = prvhash_lpu64ec<bswap>(Msg, MsgEnd, fb);
 
             Seed ^= msgw;
-            lcg ^= msgw;
+            lcg  ^= msgw;
         } else {
             break;
         }
@@ -156,11 +156,11 @@ static inline uint64_t prvhash64_64m(const void * const Msg0,
 
     uint64_t h;
     if (hc) {
-        h = prvhash_core64(Seed, lcg, Hash);
+        h       = prvhash_core64(Seed, lcg, Hash );
         *Hash2p = prvhash_core64(Seed, lcg, Hash2);
     } else {
         *Hash2p = prvhash_core64(Seed, lcg, Hash2);
-        h = prvhash_core64(Seed, lcg, Hash);
+        h       = prvhash_core64(Seed, lcg, Hash );
     }
     return h;
 }
@@ -173,21 +173,22 @@ static inline uint64_t prvhash64_64m(const void * const Msg0,
  * (with a Seed0 of 0) to the official "prvhash64s_oneshot" function
  * with HashLen == 8 or 16, but returns an immediate result.
  */
-#define PRVHASH_INIT_COUNT 5 // Common number of initialization rounds.
-#define PRH64S_PAR 4         // PRVHASH parallelism
+#define PRVHASH_INIT_COUNT 5                       // Common number of initialization rounds.
+#define PRH64S_PAR 4                               // PRVHASH parallelism
 #define PRH64S_LEN (sizeof(uint64_t) * PRH64S_PAR) // Intermediate block's length.
-template < bool bswap, bool width128 >
-static inline void prvhash64s_oneshot(const void * const Msg0,
-        size_t MsgLen0, uint64_t Seed0, uint8_t * const HashOut) {
+
+template <bool bswap, bool width128>
+static inline void prvhash64s_oneshot( const void * const Msg0, size_t MsgLen0,
+        uint64_t Seed0, uint8_t * const HashOut ) {
     uint64_t Seed[PRH64S_PAR];
     uint64_t lcg[PRH64S_PAR];
     uint64_t Hash[2];
-    bool hc = true;
+    bool     hc = true;
 
     memset(Hash, 0, sizeof(Hash));
     for (int i = 0; i < PRH64S_PAR; i++) {
         Seed[i] = Seed0;
-        lcg[i] = 0;
+        lcg[i]  = 0;
     }
     for (int i = 0; i < PRVHASH_INIT_COUNT; i++) {
         for (int j = 0; j <  PRH64S_PAR; j++) {
@@ -195,8 +196,8 @@ static inline void prvhash64s_oneshot(const void * const Msg0,
         }
     }
 
-    const uint8_t * Msg = (const uint8_t *)Msg0;
-    size_t MsgLen = MsgLen0;
+    const uint8_t * Msg    = (const uint8_t *)Msg0;
+    size_t          MsgLen = MsgLen0;
 
     while (MsgLen >= PRH64S_LEN) {
         for (int j = 0; j <  PRH64S_PAR; j++) {
@@ -204,7 +205,7 @@ static inline void prvhash64s_oneshot(const void * const Msg0,
             Seed[j] ^= m;
             lcg[j]  ^= m;
             prvhash_core64(Seed[j], lcg[j], hc ? Hash[0] : Hash[1]);
-            Msg += sizeof(uint64_t);
+            Msg     += sizeof(uint64_t);
         }
         if (width128) {
             hc = !hc;
@@ -212,12 +213,11 @@ static inline void prvhash64s_oneshot(const void * const Msg0,
         MsgLen -= PRH64S_LEN;
     }
 
-    uint8_t fb = (MsgLen0 == 0) ? 1 :
-        (uint8_t)(1 << (*(Msg + MsgLen - 1) >> 7));
+    uint8_t fb = (MsgLen0 == 0) ? 1 : (uint8_t)(1 << (*(Msg + MsgLen - 1) >> 7));
 
-    uint8_t fbytes[PRH64S_LEN * 2 + 24];
-    uint8_t * ptr = fbytes;
-    size_t MsgExtra = 0;
+    uint8_t   fbytes[PRH64S_LEN * 2 + 24];
+    uint8_t * ptr      = fbytes;
+    size_t    MsgExtra = 0;
 
     memcpy(ptr, Msg, MsgLen);
     ptr += MsgLen;
@@ -225,18 +225,17 @@ static inline void prvhash64s_oneshot(const void * const Msg0,
     memset(ptr, 0, sizeof(fbytes) - MsgLen);
 
     ptr[sizeof(uint64_t) - 1] = fb;
-    ptr += sizeof(uint64_t);
+    ptr      += sizeof(uint64_t);
     MsgExtra += sizeof(uint64_t);
 
     PUT_U64<bswap>(MsgLen0 + sizeof(uint64_t), ptr, 0);
-    ptr += sizeof(uint64_t);
+    ptr      += sizeof(uint64_t);
     MsgExtra += sizeof(uint64_t);
 
-    fb = (MsgLen0 == 0) ? 1 :
-        (uint8_t)(1 << (*(ptr - 1) >> 7));
+    fb        = (MsgLen0 == 0) ? 1 : (uint8_t)(1 << (*(ptr - 1) >> 7));
 
     ptr[sizeof(uint64_t) - 1] = fb;
-    ptr += sizeof(uint64_t);
+    ptr      += sizeof(uint64_t);
     MsgExtra += sizeof(uint64_t);
 
     if (((ptr - fbytes) % PRH64S_LEN) != 0) {
@@ -244,12 +243,12 @@ static inline void prvhash64s_oneshot(const void * const Msg0,
     }
 
     MsgLen += MsgExtra;
-    ptr = fbytes;
+    ptr     = fbytes;
 
     while (MsgLen >= PRH64S_LEN) {
         for (int j = 0; j <  PRH64S_PAR; j++) {
             const uint64_t m = GET_U64<bswap>(ptr, 0);
-            ptr += sizeof(uint64_t);
+            ptr     += sizeof(uint64_t);
             Seed[j] ^= m;
             lcg[j]  ^= m;
             prvhash_core64(Seed[j], lcg[j], hc ? Hash[0] : Hash[1]);
@@ -260,8 +259,8 @@ static inline void prvhash64s_oneshot(const void * const Msg0,
         MsgLen -= PRH64S_LEN;
     }
 
-    const size_t fc = 8 + (!width128 ? 0 :
-            (16 + (((((MsgLen0 + MsgExtra) < (16 * PRH64S_PAR)) && !hc)) ? 8 : 0)));
+    const size_t fc = 8 + (!width128 ?
+                0 : (16 + (((((MsgLen0 + MsgExtra) < (16 * PRH64S_PAR)) && !hc)) ? 8 : 0)));
     for (size_t k = 0; k <= fc; k += sizeof(uint64_t)) {
         for (int j = 0; j <  PRH64S_PAR; j++) {
             prvhash_core64(Seed[j], lcg[j], hc ? Hash[0] : Hash[1]);
@@ -287,97 +286,99 @@ static inline void prvhash64s_oneshot(const void * const Msg0,
     }
 }
 
-template < bool bswap >
-static void prvhash64(const void * in, const size_t len, const seed_t seed, void * out) {
-    uint64_t h = prvhash64_64m<bswap,false>(in, len, (uint64_t)seed);
+template <bool bswap>
+static void prvhash64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    uint64_t h = prvhash64_64m<bswap, false>(in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void prvhash128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void prvhash128( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h1, h2;
-    h1 = prvhash64_64m<bswap,true>(in, len, (uint64_t)seed, &h2);
+
+    h1 = prvhash64_64m<bswap, true>(in, len, (uint64_t)seed, &h2);
     PUT_U64<bswap>(h1, (uint8_t *)out, 0);
     PUT_U64<bswap>(h2, (uint8_t *)out, 8);
 }
 
-template < bool bswap >
-static void prvhash64s(const void * in, const size_t len, const seed_t seed, void * out) {
-    prvhash64s_oneshot<bswap,false>(in, len, (uint64_t)seed, (uint8_t *)out);
+template <bool bswap>
+static void prvhash64s( const void * in, const size_t len, const seed_t seed, void * out ) {
+    prvhash64s_oneshot<bswap, false>(in, len, (uint64_t)seed, (uint8_t *)out);
 }
 
-template < bool bswap >
-static void prvhash128s(const void * in, const size_t len, const seed_t seed, void * out) {
-    prvhash64s_oneshot<bswap,true>(in, len, (uint64_t)seed, (uint8_t *)out);
+template <bool bswap>
+static void prvhash128s( const void * in, const size_t len, const seed_t seed, void * out ) {
+    prvhash64s_oneshot<bswap, true>(in, len, (uint64_t)seed, (uint8_t *)out);
 }
 
 REGISTER_FAMILY(prvhash,
-  $.src_url = "https://github.com/avaneev/prvhash",
-  $.src_status = HashFamilyInfo::SRC_ACTIVE
-);
+   $.src_url    = "https://github.com/avaneev/prvhash",
+   $.src_status = HashFamilyInfo::SRC_ACTIVE
+ );
 
 REGISTER_HASH(prvhash_64,
-  $.desc = "prvhash64 v4.3 64-bit output",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_SHIFT_VARIABLE  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xD37C7E74,
-  $.verification_BE = 0xBAD02709,
-  $.hashfn_native = prvhash64<false>,
-  $.hashfn_bswap = prvhash64<true>
-);
+   $.desc       = "prvhash64 v4.3 64-bit output",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_SHIFT_VARIABLE  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xD37C7E74,
+   $.verification_BE = 0xBAD02709,
+   $.hashfn_native   = prvhash64<false>,
+   $.hashfn_bswap    = prvhash64<true>
+ );
 
 REGISTER_HASH(prvhash_128,
-  $.desc = "prvhash64 v4.3 128-bit output",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_SHIFT_VARIABLE  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0xB447480F,
-  $.verification_BE = 0xF93A26FC,
-  $.hashfn_native = prvhash128<false>,
-  $.hashfn_bswap = prvhash128<true>
-);
+   $.desc       = "prvhash64 v4.3 128-bit output",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_SHIFT_VARIABLE  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0xB447480F,
+   $.verification_BE = 0xF93A26FC,
+   $.hashfn_native   = prvhash128<false>,
+   $.hashfn_bswap    = prvhash128<true>
+ );
 
 REGISTER_HASH(prvhash_64__incr,
-  $.desc = "prvhash64 v4.3 streaming mode 64-bit output",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_SLOW            |
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_SHIFT_VARIABLE  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x891521D6,
-  $.verification_BE = 0xD41B8DB5,
-  $.hashfn_native = prvhash64s<false>,
-  $.hashfn_bswap = prvhash64s<true>
-);
+   $.desc       = "prvhash64 v4.3 streaming mode 64-bit output",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_SLOW            |
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_SHIFT_VARIABLE  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x891521D6,
+   $.verification_BE = 0xD41B8DB5,
+   $.hashfn_native   = prvhash64s<false>,
+   $.hashfn_bswap    = prvhash64s<true>
+ );
 
 REGISTER_HASH(prvhash_128__incr,
-  $.desc = "prvhash64 v4.3 streaming mode 128-bit output",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_SLOW            |
-        FLAG_IMPL_MULTIPLY_64_64  |
-        FLAG_IMPL_ROTATE          |
-        FLAG_IMPL_SHIFT_VARIABLE  |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x0199728A,
-  $.verification_BE = 0xD2B2DE25,
-  $.hashfn_native = prvhash128s<false>,
-  $.hashfn_bswap = prvhash128s<true>
-);
+   $.desc       = "prvhash64 v4.3 streaming mode 128-bit output",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_SLOW            |
+         FLAG_IMPL_MULTIPLY_64_64  |
+         FLAG_IMPL_ROTATE          |
+         FLAG_IMPL_SHIFT_VARIABLE  |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x0199728A,
+   $.verification_BE = 0xD2B2DE25,
+   $.hashfn_native   = prvhash128s<false>,
+   $.hashfn_bswap    = prvhash128s<true>
+ );
diff --git a/hashes/rmd.cpp b/hashes/rmd.cpp
index b343c6d4..20eb959c 100644
--- a/hashes/rmd.cpp
+++ b/hashes/rmd.cpp
@@ -31,9 +31,9 @@
 #include "Hashlib.h"
 
 typedef struct {
-  uint64_t length;
-  uint8_t  buf[64];
-  uint32_t curlen, state[8];
+    uint64_t  length;
+    uint8_t   buf[64];
+    uint32_t  curlen, state[8];
 } rmd_ctx;
 
 /* the five basic functions */
@@ -43,448 +43,436 @@ typedef struct {
 #define I(x, y, z)        (((x) & (z)) | ((y) & ~(z)))
 #define J(x, y, z)        ((x) ^ ((y) | ~(z)))
 
-#define OP4(f, a, b, c, d, x, s, k)		\
-  (a) += f((b), (c), (d)) + (x) + (k);		\
+#define OP4(f, a, b, c, d, x, s, k)    \
+  (a) += f((b), (c), (d)) + (x) + (k); \
   (a) = ROTL32((a), (s));
 
-#define OP5(f, a, b, c, d, e, x, s, k)		\
-  (a) += f((b), (c), (d)) + (x) + (k);		\
-  (a) = ROTL32((a), (s)) + (e);			\
+#define OP5(f, a, b, c, d, e, x, s, k) \
+  (a) += f((b), (c), (d)) + (x) + (k); \
+  (a) = ROTL32((a), (s)) + (e);        \
   (c) = ROTL32((c), 10);
 
-template < uint32_t hashwidth, bool bswap >
-static void rmd_compress(rmd_ctx * ctx, const uint8_t * buf) {
-  uint32_t aa,bb,cc,dd,ee,aaa,bbb,ccc,ddd,eee,X[16];
-  int i;
-  const uint32_t k0 = 0;
-  const uint32_t k1 = 0x50a28be6;
-  const uint32_t k2 = 0x5a827999;
-  const uint32_t k3 = 0x5c4dd124;
-  const uint32_t k4 = 0x6ed9eba1;
-  const uint32_t k5 = 0x6d703ef3;
-  const uint32_t k6 = 0x8f1bbcdc;
-  const uint32_t k7 = 0;
-  const uint32_t k8 = 0xa953fd4e;
-  const uint32_t k9 = 0x7a6d76e9;
-
-   /* load words X */
-   for (i = 0; i < 16; i++){
-     X[i] = GET_U32<bswap>(buf, (4 * i));
-   }
-
-   /* load state */
-   aa = aaa = ctx->state[0];
-   bb = bbb = ctx->state[1];
-   cc = ccc = ctx->state[2];
-   dd = ddd = ctx->state[3];
-   if (hashwidth == 160) {
-     ee = eee = ctx->state[4];
-   } else if (hashwidth == 256) {
-     aaa = ctx->state[4];
-     bbb = ctx->state[5];
-     ccc = ctx->state[6];
-     ddd = ctx->state[7];
-   }
-
-   /* round 1 */
-   if (hashwidth == 160) {
-
-     OP5(F, aa, bb, cc, dd, ee, X[ 0], 11, k0);
-     OP5(F, ee, aa, bb, cc, dd, X[ 1], 14, k0);
-     OP5(F, dd, ee, aa, bb, cc, X[ 2], 15, k0);
-     OP5(F, cc, dd, ee, aa, bb, X[ 3], 12, k0);
-     OP5(F, bb, cc, dd, ee, aa, X[ 4],  5, k0);
-     OP5(F, aa, bb, cc, dd, ee, X[ 5],  8, k0);
-     OP5(F, ee, aa, bb, cc, dd, X[ 6],  7, k0);
-     OP5(F, dd, ee, aa, bb, cc, X[ 7],  9, k0);
-     OP5(F, cc, dd, ee, aa, bb, X[ 8], 11, k0);
-     OP5(F, bb, cc, dd, ee, aa, X[ 9], 13, k0);
-     OP5(F, aa, bb, cc, dd, ee, X[10], 14, k0);
-     OP5(F, ee, aa, bb, cc, dd, X[11], 15, k0);
-     OP5(F, dd, ee, aa, bb, cc, X[12],  6, k0);
-     OP5(F, cc, dd, ee, aa, bb, X[13],  7, k0);
-     OP5(F, bb, cc, dd, ee, aa, X[14],  9, k0);
-     OP5(F, aa, bb, cc, dd, ee, X[15],  8, k0);
-
-     OP5(J, aaa, bbb, ccc, ddd, eee, X[ 5],  8, k1);
-     OP5(J, eee, aaa, bbb, ccc, ddd, X[14],  9, k1);
-     OP5(J, ddd, eee, aaa, bbb, ccc, X[ 7],  9, k1);
-     OP5(J, ccc, ddd, eee, aaa, bbb, X[ 0], 11, k1);
-     OP5(J, bbb, ccc, ddd, eee, aaa, X[ 9], 13, k1);
-     OP5(J, aaa, bbb, ccc, ddd, eee, X[ 2], 15, k1);
-     OP5(J, eee, aaa, bbb, ccc, ddd, X[11], 15, k1);
-     OP5(J, ddd, eee, aaa, bbb, ccc, X[ 4],  5, k1);
-     OP5(J, ccc, ddd, eee, aaa, bbb, X[13],  7, k1);
-     OP5(J, bbb, ccc, ddd, eee, aaa, X[ 6],  7, k1);
-     OP5(J, aaa, bbb, ccc, ddd, eee, X[15],  8, k1);
-     OP5(J, eee, aaa, bbb, ccc, ddd, X[ 8], 11, k1);
-     OP5(J, ddd, eee, aaa, bbb, ccc, X[ 1], 14, k1);
-     OP5(J, ccc, ddd, eee, aaa, bbb, X[10], 14, k1);
-     OP5(J, bbb, ccc, ddd, eee, aaa, X[ 3], 12, k1);
-     OP5(J, aaa, bbb, ccc, ddd, eee, X[12],  6, k1);
-
-   } else {
-
-     OP4(F, aa, bb, cc, dd, X[ 0], 11, k0);
-     OP4(F, dd, aa, bb, cc, X[ 1], 14, k0);
-     OP4(F, cc, dd, aa, bb, X[ 2], 15, k0);
-     OP4(F, bb, cc, dd, aa, X[ 3], 12, k0);
-     OP4(F, aa, bb, cc, dd, X[ 4],  5, k0);
-     OP4(F, dd, aa, bb, cc, X[ 5],  8, k0);
-     OP4(F, cc, dd, aa, bb, X[ 6],  7, k0);
-     OP4(F, bb, cc, dd, aa, X[ 7],  9, k0);
-     OP4(F, aa, bb, cc, dd, X[ 8], 11, k0);
-     OP4(F, dd, aa, bb, cc, X[ 9], 13, k0);
-     OP4(F, cc, dd, aa, bb, X[10], 14, k0);
-     OP4(F, bb, cc, dd, aa, X[11], 15, k0);
-     OP4(F, aa, bb, cc, dd, X[12],  6, k0);
-     OP4(F, dd, aa, bb, cc, X[13],  7, k0);
-     OP4(F, cc, dd, aa, bb, X[14],  9, k0);
-     OP4(F, bb, cc, dd, aa, X[15],  8, k0);
-
-     OP4(I, aaa, bbb, ccc, ddd, X[ 5],  8, k1);
-     OP4(I, ddd, aaa, bbb, ccc, X[14],  9, k1);
-     OP4(I, ccc, ddd, aaa, bbb, X[ 7],  9, k1);
-     OP4(I, bbb, ccc, ddd, aaa, X[ 0], 11, k1);
-     OP4(I, aaa, bbb, ccc, ddd, X[ 9], 13, k1);
-     OP4(I, ddd, aaa, bbb, ccc, X[ 2], 15, k1);
-     OP4(I, ccc, ddd, aaa, bbb, X[11], 15, k1);
-     OP4(I, bbb, ccc, ddd, aaa, X[ 4],  5, k1);
-     OP4(I, aaa, bbb, ccc, ddd, X[13],  7, k1);
-     OP4(I, ddd, aaa, bbb, ccc, X[ 6],  7, k1);
-     OP4(I, ccc, ddd, aaa, bbb, X[15],  8, k1);
-     OP4(I, bbb, ccc, ddd, aaa, X[ 8], 11, k1);
-     OP4(I, aaa, bbb, ccc, ddd, X[ 1], 14, k1);
-     OP4(I, ddd, aaa, bbb, ccc, X[10], 14, k1);
-     OP4(I, ccc, ddd, aaa, bbb, X[ 3], 12, k1);
-     OP4(I, bbb, ccc, ddd, aaa, X[12],  6, k1);
-
-     if (hashwidth == 256) {
-       uint64_t tmp = aa; aa = aaa; aaa = tmp;
-     }
-   }
-
-   /* round 2 */
-   if (hashwidth == 160) {
-
-     OP5(G, ee, aa, bb, cc, dd, X[ 7],  7, k2);
-     OP5(G, dd, ee, aa, bb, cc, X[ 4],  6, k2);
-     OP5(G, cc, dd, ee, aa, bb, X[13],  8, k2);
-     OP5(G, bb, cc, dd, ee, aa, X[ 1], 13, k2);
-     OP5(G, aa, bb, cc, dd, ee, X[10], 11, k2);
-     OP5(G, ee, aa, bb, cc, dd, X[ 6],  9, k2);
-     OP5(G, dd, ee, aa, bb, cc, X[15],  7, k2);
-     OP5(G, cc, dd, ee, aa, bb, X[ 3], 15, k2);
-     OP5(G, bb, cc, dd, ee, aa, X[12],  7, k2);
-     OP5(G, aa, bb, cc, dd, ee, X[ 0], 12, k2);
-     OP5(G, ee, aa, bb, cc, dd, X[ 9], 15, k2);
-     OP5(G, dd, ee, aa, bb, cc, X[ 5],  9, k2);
-     OP5(G, cc, dd, ee, aa, bb, X[ 2], 11, k2);
-     OP5(G, bb, cc, dd, ee, aa, X[14],  7, k2);
-     OP5(G, aa, bb, cc, dd, ee, X[11], 13, k2);
-     OP5(G, ee, aa, bb, cc, dd, X[ 8], 12, k2);
-
-     OP5(I, eee, aaa, bbb, ccc, ddd, X[ 6],  9, k3);
-     OP5(I, ddd, eee, aaa, bbb, ccc, X[11], 13, k3);
-     OP5(I, ccc, ddd, eee, aaa, bbb, X[ 3], 15, k3);
-     OP5(I, bbb, ccc, ddd, eee, aaa, X[ 7],  7, k3);
-     OP5(I, aaa, bbb, ccc, ddd, eee, X[ 0], 12, k3);
-     OP5(I, eee, aaa, bbb, ccc, ddd, X[13],  8, k3);
-     OP5(I, ddd, eee, aaa, bbb, ccc, X[ 5],  9, k3);
-     OP5(I, ccc, ddd, eee, aaa, bbb, X[10], 11, k3);
-     OP5(I, bbb, ccc, ddd, eee, aaa, X[14],  7, k3);
-     OP5(I, aaa, bbb, ccc, ddd, eee, X[15],  7, k3);
-     OP5(I, eee, aaa, bbb, ccc, ddd, X[ 8], 12, k3);
-     OP5(I, ddd, eee, aaa, bbb, ccc, X[12],  7, k3);
-     OP5(I, ccc, ddd, eee, aaa, bbb, X[ 4],  6, k3);
-     OP5(I, bbb, ccc, ddd, eee, aaa, X[ 9], 15, k3);
-     OP5(I, aaa, bbb, ccc, ddd, eee, X[ 1], 13, k3);
-     OP5(I, eee, aaa, bbb, ccc, ddd, X[ 2], 11, k3);
-
-   } else {
-
-     OP4(G, aa, bb, cc, dd, X[ 7],  7, k2);
-     OP4(G, dd, aa, bb, cc, X[ 4],  6, k2);
-     OP4(G, cc, dd, aa, bb, X[13],  8, k2);
-     OP4(G, bb, cc, dd, aa, X[ 1], 13, k2);
-     OP4(G, aa, bb, cc, dd, X[10], 11, k2);
-     OP4(G, dd, aa, bb, cc, X[ 6],  9, k2);
-     OP4(G, cc, dd, aa, bb, X[15],  7, k2);
-     OP4(G, bb, cc, dd, aa, X[ 3], 15, k2);
-     OP4(G, aa, bb, cc, dd, X[12],  7, k2);
-     OP4(G, dd, aa, bb, cc, X[ 0], 12, k2);
-     OP4(G, cc, dd, aa, bb, X[ 9], 15, k2);
-     OP4(G, bb, cc, dd, aa, X[ 5],  9, k2);
-     OP4(G, aa, bb, cc, dd, X[ 2], 11, k2);
-     OP4(G, dd, aa, bb, cc, X[14],  7, k2);
-     OP4(G, cc, dd, aa, bb, X[11], 13, k2);
-     OP4(G, bb, cc, dd, aa, X[ 8], 12, k2);
-
-     OP4(H, aaa, bbb, ccc, ddd, X[ 6],  9, k3);
-     OP4(H, ddd, aaa, bbb, ccc, X[11], 13, k3);
-     OP4(H, ccc, ddd, aaa, bbb, X[ 3], 15, k3);
-     OP4(H, bbb, ccc, ddd, aaa, X[ 7],  7, k3);
-     OP4(H, aaa, bbb, ccc, ddd, X[ 0], 12, k3);
-     OP4(H, ddd, aaa, bbb, ccc, X[13],  8, k3);
-     OP4(H, ccc, ddd, aaa, bbb, X[ 5],  9, k3);
-     OP4(H, bbb, ccc, ddd, aaa, X[10], 11, k3);
-     OP4(H, aaa, bbb, ccc, ddd, X[14],  7, k3);
-     OP4(H, ddd, aaa, bbb, ccc, X[15],  7, k3);
-     OP4(H, ccc, ddd, aaa, bbb, X[ 8], 12, k3);
-     OP4(H, bbb, ccc, ddd, aaa, X[12],  7, k3);
-     OP4(H, aaa, bbb, ccc, ddd, X[ 4],  6, k3);
-     OP4(H, ddd, aaa, bbb, ccc, X[ 9], 15, k3);
-     OP4(H, ccc, ddd, aaa, bbb, X[ 1], 13, k3);
-     OP4(H, bbb, ccc, ddd, aaa, X[ 2], 11, k3);
-
-     if (hashwidth == 256) {
-       uint64_t tmp = bb; bb = bbb; bbb = tmp;
-     }
-   }
-
-   /* round 3 */
-   if (hashwidth == 160) {
-
-     OP5(H, dd, ee, aa, bb, cc, X[ 3], 11, k4);
-     OP5(H, cc, dd, ee, aa, bb, X[10], 13, k4);
-     OP5(H, bb, cc, dd, ee, aa, X[14],  6, k4);
-     OP5(H, aa, bb, cc, dd, ee, X[ 4],  7, k4);
-     OP5(H, ee, aa, bb, cc, dd, X[ 9], 14, k4);
-     OP5(H, dd, ee, aa, bb, cc, X[15],  9, k4);
-     OP5(H, cc, dd, ee, aa, bb, X[ 8], 13, k4);
-     OP5(H, bb, cc, dd, ee, aa, X[ 1], 15, k4);
-     OP5(H, aa, bb, cc, dd, ee, X[ 2], 14, k4);
-     OP5(H, ee, aa, bb, cc, dd, X[ 7],  8, k4);
-     OP5(H, dd, ee, aa, bb, cc, X[ 0], 13, k4);
-     OP5(H, cc, dd, ee, aa, bb, X[ 6],  6, k4);
-     OP5(H, bb, cc, dd, ee, aa, X[13],  5, k4);
-     OP5(H, aa, bb, cc, dd, ee, X[11], 12, k4);
-     OP5(H, ee, aa, bb, cc, dd, X[ 5],  7, k4);
-     OP5(H, dd, ee, aa, bb, cc, X[12],  5, k4);
-
-     OP5(H, ddd, eee, aaa, bbb, ccc, X[15],  9, k5);
-     OP5(H, ccc, ddd, eee, aaa, bbb, X[ 5],  7, k5);
-     OP5(H, bbb, ccc, ddd, eee, aaa, X[ 1], 15, k5);
-     OP5(H, aaa, bbb, ccc, ddd, eee, X[ 3], 11, k5);
-     OP5(H, eee, aaa, bbb, ccc, ddd, X[ 7],  8, k5);
-     OP5(H, ddd, eee, aaa, bbb, ccc, X[14],  6, k5);
-     OP5(H, ccc, ddd, eee, aaa, bbb, X[ 6],  6, k5);
-     OP5(H, bbb, ccc, ddd, eee, aaa, X[ 9], 14, k5);
-     OP5(H, aaa, bbb, ccc, ddd, eee, X[11], 12, k5);
-     OP5(H, eee, aaa, bbb, ccc, ddd, X[ 8], 13, k5);
-     OP5(H, ddd, eee, aaa, bbb, ccc, X[12],  5, k5);
-     OP5(H, ccc, ddd, eee, aaa, bbb, X[ 2], 14, k5);
-     OP5(H, bbb, ccc, ddd, eee, aaa, X[10], 13, k5);
-     OP5(H, aaa, bbb, ccc, ddd, eee, X[ 0], 13, k5);
-     OP5(H, eee, aaa, bbb, ccc, ddd, X[ 4],  7, k5);
-     OP5(H, ddd, eee, aaa, bbb, ccc, X[13],  5, k5);
-
-   } else {
-
-     OP4(H, aa, bb, cc, dd, X[ 3], 11, k4);
-     OP4(H, dd, aa, bb, cc, X[10], 13, k4);
-     OP4(H, cc, dd, aa, bb, X[14],  6, k4);
-     OP4(H, bb, cc, dd, aa, X[ 4],  7, k4);
-     OP4(H, aa, bb, cc, dd, X[ 9], 14, k4);
-     OP4(H, dd, aa, bb, cc, X[15],  9, k4);
-     OP4(H, cc, dd, aa, bb, X[ 8], 13, k4);
-     OP4(H, bb, cc, dd, aa, X[ 1], 15, k4);
-     OP4(H, aa, bb, cc, dd, X[ 2], 14, k4);
-     OP4(H, dd, aa, bb, cc, X[ 7],  8, k4);
-     OP4(H, cc, dd, aa, bb, X[ 0], 13, k4);
-     OP4(H, bb, cc, dd, aa, X[ 6],  6, k4);
-     OP4(H, aa, bb, cc, dd, X[13],  5, k4);
-     OP4(H, dd, aa, bb, cc, X[11], 12, k4);
-     OP4(H, cc, dd, aa, bb, X[ 5],  7, k4);
-     OP4(H, bb, cc, dd, aa, X[12],  5, k4);
-
-     OP4(G, aaa, bbb, ccc, ddd, X[15],  9, k5);
-     OP4(G, ddd, aaa, bbb, ccc, X[ 5],  7, k5);
-     OP4(G, ccc, ddd, aaa, bbb, X[ 1], 15, k5);
-     OP4(G, bbb, ccc, ddd, aaa, X[ 3], 11, k5);
-     OP4(G, aaa, bbb, ccc, ddd, X[ 7],  8, k5);
-     OP4(G, ddd, aaa, bbb, ccc, X[14],  6, k5);
-     OP4(G, ccc, ddd, aaa, bbb, X[ 6],  6, k5);
-     OP4(G, bbb, ccc, ddd, aaa, X[ 9], 14, k5);
-     OP4(G, aaa, bbb, ccc, ddd, X[11], 12, k5);
-     OP4(G, ddd, aaa, bbb, ccc, X[ 8], 13, k5);
-     OP4(G, ccc, ddd, aaa, bbb, X[12],  5, k5);
-     OP4(G, bbb, ccc, ddd, aaa, X[ 2], 14, k5);
-     OP4(G, aaa, bbb, ccc, ddd, X[10], 13, k5);
-     OP4(G, ddd, aaa, bbb, ccc, X[ 0], 13, k5);
-     OP4(G, ccc, ddd, aaa, bbb, X[ 4],  7, k5);
-     OP4(G, bbb, ccc, ddd, aaa, X[13],  5, k5);
-
-     if (hashwidth == 256) {
-       uint64_t tmp = cc; cc = ccc; ccc = tmp;
-     }
-   }
-
-   /* round 4 */
-   if (hashwidth == 160) {
-
-     OP5(I, cc, dd, ee, aa, bb, X[ 1], 11, k6);
-     OP5(I, bb, cc, dd, ee, aa, X[ 9], 12, k6);
-     OP5(I, aa, bb, cc, dd, ee, X[11], 14, k6);
-     OP5(I, ee, aa, bb, cc, dd, X[10], 15, k6);
-     OP5(I, dd, ee, aa, bb, cc, X[ 0], 14, k6);
-     OP5(I, cc, dd, ee, aa, bb, X[ 8], 15, k6);
-     OP5(I, bb, cc, dd, ee, aa, X[12],  9, k6);
-     OP5(I, aa, bb, cc, dd, ee, X[ 4],  8, k6);
-     OP5(I, ee, aa, bb, cc, dd, X[13],  9, k6);
-     OP5(I, dd, ee, aa, bb, cc, X[ 3], 14, k6);
-     OP5(I, cc, dd, ee, aa, bb, X[ 7],  5, k6);
-     OP5(I, bb, cc, dd, ee, aa, X[15],  6, k6);
-     OP5(I, aa, bb, cc, dd, ee, X[14],  8, k6);
-     OP5(I, ee, aa, bb, cc, dd, X[ 5],  6, k6);
-     OP5(I, dd, ee, aa, bb, cc, X[ 6],  5, k6);
-     OP5(I, cc, dd, ee, aa, bb, X[ 2], 12, k6);
-
-     OP5(G, ccc, ddd, eee, aaa, bbb, X[ 8], 15, k9);
-     OP5(G, bbb, ccc, ddd, eee, aaa, X[ 6],  5, k9);
-     OP5(G, aaa, bbb, ccc, ddd, eee, X[ 4],  8, k9);
-     OP5(G, eee, aaa, bbb, ccc, ddd, X[ 1], 11, k9);
-     OP5(G, ddd, eee, aaa, bbb, ccc, X[ 3], 14, k9);
-     OP5(G, ccc, ddd, eee, aaa, bbb, X[11], 14, k9);
-     OP5(G, bbb, ccc, ddd, eee, aaa, X[15],  6, k9);
-     OP5(G, aaa, bbb, ccc, ddd, eee, X[ 0], 14, k9);
-     OP5(G, eee, aaa, bbb, ccc, ddd, X[ 5],  6, k9);
-     OP5(G, ddd, eee, aaa, bbb, ccc, X[12],  9, k9);
-     OP5(G, ccc, ddd, eee, aaa, bbb, X[ 2], 12, k9);
-     OP5(G, bbb, ccc, ddd, eee, aaa, X[13],  9, k9);
-     OP5(G, aaa, bbb, ccc, ddd, eee, X[ 9], 12, k9);
-     OP5(G, eee, aaa, bbb, ccc, ddd, X[ 7],  5, k9);
-     OP5(G, ddd, eee, aaa, bbb, ccc, X[10], 15, k9);
-     OP5(G, ccc, ddd, eee, aaa, bbb, X[14],  8, k9);
-
-   } else {
-
-     OP4(I, aa, bb, cc, dd, X[ 1], 11, k6);
-     OP4(I, dd, aa, bb, cc, X[ 9], 12, k6);
-     OP4(I, cc, dd, aa, bb, X[11], 14, k6);
-     OP4(I, bb, cc, dd, aa, X[10], 15, k6);
-     OP4(I, aa, bb, cc, dd, X[ 0], 14, k6);
-     OP4(I, dd, aa, bb, cc, X[ 8], 15, k6);
-     OP4(I, cc, dd, aa, bb, X[12],  9, k6);
-     OP4(I, bb, cc, dd, aa, X[ 4],  8, k6);
-     OP4(I, aa, bb, cc, dd, X[13],  9, k6);
-     OP4(I, dd, aa, bb, cc, X[ 3], 14, k6);
-     OP4(I, cc, dd, aa, bb, X[ 7],  5, k6);
-     OP4(I, bb, cc, dd, aa, X[15],  6, k6);
-     OP4(I, aa, bb, cc, dd, X[14],  8, k6);
-     OP4(I, dd, aa, bb, cc, X[ 5],  6, k6);
-     OP4(I, cc, dd, aa, bb, X[ 6],  5, k6);
-     OP4(I, bb, cc, dd, aa, X[ 2], 12, k6);
-
-     OP4(F, aaa, bbb, ccc, ddd, X[ 8], 15, k7);
-     OP4(F, ddd, aaa, bbb, ccc, X[ 6],  5, k7);
-     OP4(F, ccc, ddd, aaa, bbb, X[ 4],  8, k7);
-     OP4(F, bbb, ccc, ddd, aaa, X[ 1], 11, k7);
-     OP4(F, aaa, bbb, ccc, ddd, X[ 3], 14, k7);
-     OP4(F, ddd, aaa, bbb, ccc, X[11], 14, k7);
-     OP4(F, ccc, ddd, aaa, bbb, X[15],  6, k7);
-     OP4(F, bbb, ccc, ddd, aaa, X[ 0], 14, k7);
-     OP4(F, aaa, bbb, ccc, ddd, X[ 5],  6, k7);
-     OP4(F, ddd, aaa, bbb, ccc, X[12],  9, k7);
-     OP4(F, ccc, ddd, aaa, bbb, X[ 2], 12, k7);
-     OP4(F, bbb, ccc, ddd, aaa, X[13],  9, k7);
-     OP4(F, aaa, bbb, ccc, ddd, X[ 9], 12, k7);
-     OP4(F, ddd, aaa, bbb, ccc, X[ 7],  5, k7);
-     OP4(F, ccc, ddd, aaa, bbb, X[10], 15, k7);
-     OP4(F, bbb, ccc, ddd, aaa, X[14],  8, k7);
-
-     if (hashwidth == 256) {
-       uint64_t tmp = dd; dd = ddd; ddd = tmp;
-     }
-   }
-
-   /* round 5 */
-   if (hashwidth == 160) {
-     OP5(J, bb, cc, dd, ee, aa, X[ 4],  9, k8);
-     OP5(J, aa, bb, cc, dd, ee, X[ 0], 15, k8);
-     OP5(J, ee, aa, bb, cc, dd, X[ 5],  5, k8);
-     OP5(J, dd, ee, aa, bb, cc, X[ 9], 11, k8);
-     OP5(J, cc, dd, ee, aa, bb, X[ 7],  6, k8);
-     OP5(J, bb, cc, dd, ee, aa, X[12],  8, k8);
-     OP5(J, aa, bb, cc, dd, ee, X[ 2], 13, k8);
-     OP5(J, ee, aa, bb, cc, dd, X[10], 12, k8);
-     OP5(J, dd, ee, aa, bb, cc, X[14],  5, k8);
-     OP5(J, cc, dd, ee, aa, bb, X[ 1], 12, k8);
-     OP5(J, bb, cc, dd, ee, aa, X[ 3], 13, k8);
-     OP5(J, aa, bb, cc, dd, ee, X[ 8], 14, k8);
-     OP5(J, ee, aa, bb, cc, dd, X[11], 11, k8);
-     OP5(J, dd, ee, aa, bb, cc, X[ 6],  8, k8);
-     OP5(J, cc, dd, ee, aa, bb, X[15],  5, k8);
-     OP5(J, bb, cc, dd, ee, aa, X[13],  6, k8);
-
-     OP5(F, bbb, ccc, ddd, eee, aaa, X[12],  8, k7);
-     OP5(F, aaa, bbb, ccc, ddd, eee, X[15],  5, k7);
-     OP5(F, eee, aaa, bbb, ccc, ddd, X[10], 12, k7);
-     OP5(F, ddd, eee, aaa, bbb, ccc, X[ 4],  9, k7);
-     OP5(F, ccc, ddd, eee, aaa, bbb, X[ 1], 12, k7);
-     OP5(F, bbb, ccc, ddd, eee, aaa, X[ 5],  5, k7);
-     OP5(F, aaa, bbb, ccc, ddd, eee, X[ 8], 14, k7);
-     OP5(F, eee, aaa, bbb, ccc, ddd, X[ 7],  6, k7);
-     OP5(F, ddd, eee, aaa, bbb, ccc, X[ 6],  8, k7);
-     OP5(F, ccc, ddd, eee, aaa, bbb, X[ 2], 13, k7);
-     OP5(F, bbb, ccc, ddd, eee, aaa, X[13],  6, k7);
-     OP5(F, aaa, bbb, ccc, ddd, eee, X[14],  5, k7);
-     OP5(F, eee, aaa, bbb, ccc, ddd, X[ 0], 15, k7);
-     OP5(F, ddd, eee, aaa, bbb, ccc, X[ 3], 13, k7);
-     OP5(F, ccc, ddd, eee, aaa, bbb, X[ 9], 11, k7);
-     OP5(F, bbb, ccc, ddd, eee, aaa, X[11], 11, k7);
-   }
-
-   /* combine results */
-   if (hashwidth == 128) {
-     ddd += cc + ctx->state[1];               /* final result for MDbuf[0] */
-     ctx->state[1] = ctx->state[2] + dd + aaa;
-     ctx->state[2] = ctx->state[3] + aa + bbb;
-     ctx->state[3] = ctx->state[0] + bb + ccc;
-     ctx->state[0] = ddd;
-   } else if (hashwidth == 160) {
-     ddd += cc + ctx->state[1];               /* final result for MDbuf[0] */
-     ctx->state[1] = ctx->state[2] + dd + eee;
-     ctx->state[2] = ctx->state[3] + ee + aaa;
-     ctx->state[3] = ctx->state[4] + aa + bbb;
-     ctx->state[4] = ctx->state[0] + bb + ccc;
-     ctx->state[0] = ddd;
-   } else if (hashwidth == 256) {
-     ctx->state[0] += aa;
-     ctx->state[1] += bb;
-     ctx->state[2] += cc;
-     ctx->state[3] += dd;
-     ctx->state[4] += aaa;
-     ctx->state[5] += bbb;
-     ctx->state[6] += ccc;
-     ctx->state[7] += ddd;
-   }
-
-   return;
+template <uint32_t hashwidth, bool bswap>
+static void rmd_compress( rmd_ctx * ctx, const uint8_t * buf ) {
+    uint32_t aa, bb, cc, dd, ee, aaa, bbb, ccc, ddd, eee, X[16];
+    int      i;
+    const uint32_t k0 = 0;
+    const uint32_t k1 = 0x50a28be6;
+    const uint32_t k2 = 0x5a827999;
+    const uint32_t k3 = 0x5c4dd124;
+    const uint32_t k4 = 0x6ed9eba1;
+    const uint32_t k5 = 0x6d703ef3;
+    const uint32_t k6 = 0x8f1bbcdc;
+    const uint32_t k7 = 0;
+    const uint32_t k8 = 0xa953fd4e;
+    const uint32_t k9 = 0x7a6d76e9;
+
+    /* load words X */
+    for (i = 0; i < 16; i++) {
+        X[i] = GET_U32<bswap>(buf, (4 * i));
+    }
+
+    /* load state */
+    aa = aaa = ctx->state[0];
+    bb = bbb = ctx->state[1];
+    cc = ccc = ctx->state[2];
+    dd = ddd = ctx->state[3];
+    if (hashwidth == 160) {
+        ee = eee = ctx->state[4];
+    } else if (hashwidth == 256) {
+        aaa = ctx->state[4];
+        bbb = ctx->state[5];
+        ccc = ctx->state[6];
+        ddd = ctx->state[7];
+    }
+
+    /* round 1 */
+    if (hashwidth == 160) {
+        OP5(F, aa , bb , cc , dd , ee , X[0] , 11, k0);
+        OP5(F, ee , aa , bb , cc , dd , X[1] , 14, k0);
+        OP5(F, dd , ee , aa , bb , cc , X[2] , 15, k0);
+        OP5(F, cc , dd , ee , aa , bb , X[3] , 12, k0);
+        OP5(F, bb , cc , dd , ee , aa , X[4] ,  5, k0);
+        OP5(F, aa , bb , cc , dd , ee , X[5] ,  8, k0);
+        OP5(F, ee , aa , bb , cc , dd , X[6] ,  7, k0);
+        OP5(F, dd , ee , aa , bb , cc , X[7] ,  9, k0);
+        OP5(F, cc , dd , ee , aa , bb , X[8] , 11, k0);
+        OP5(F, bb , cc , dd , ee , aa , X[9] , 13, k0);
+        OP5(F, aa , bb , cc , dd , ee , X[10], 14, k0);
+        OP5(F, ee , aa , bb , cc , dd , X[11], 15, k0);
+        OP5(F, dd , ee , aa , bb , cc , X[12],  6, k0);
+        OP5(F, cc , dd , ee , aa , bb , X[13],  7, k0);
+        OP5(F, bb , cc , dd , ee , aa , X[14],  9, k0);
+        OP5(F, aa , bb , cc , dd , ee , X[15],  8, k0);
+
+        OP5(J, aaa, bbb, ccc, ddd, eee, X[5] ,  8, k1);
+        OP5(J, eee, aaa, bbb, ccc, ddd, X[14],  9, k1);
+        OP5(J, ddd, eee, aaa, bbb, ccc, X[7] ,  9, k1);
+        OP5(J, ccc, ddd, eee, aaa, bbb, X[0] , 11, k1);
+        OP5(J, bbb, ccc, ddd, eee, aaa, X[9] , 13, k1);
+        OP5(J, aaa, bbb, ccc, ddd, eee, X[2] , 15, k1);
+        OP5(J, eee, aaa, bbb, ccc, ddd, X[11], 15, k1);
+        OP5(J, ddd, eee, aaa, bbb, ccc, X[4] ,  5, k1);
+        OP5(J, ccc, ddd, eee, aaa, bbb, X[13],  7, k1);
+        OP5(J, bbb, ccc, ddd, eee, aaa, X[6] ,  7, k1);
+        OP5(J, aaa, bbb, ccc, ddd, eee, X[15],  8, k1);
+        OP5(J, eee, aaa, bbb, ccc, ddd, X[8] , 11, k1);
+        OP5(J, ddd, eee, aaa, bbb, ccc, X[1] , 14, k1);
+        OP5(J, ccc, ddd, eee, aaa, bbb, X[10], 14, k1);
+        OP5(J, bbb, ccc, ddd, eee, aaa, X[3] , 12, k1);
+        OP5(J, aaa, bbb, ccc, ddd, eee, X[12],  6, k1);
+    } else {
+        OP4(F, aa , bb , cc , dd , X[0] , 11, k0);
+        OP4(F, dd , aa , bb , cc , X[1] , 14, k0);
+        OP4(F, cc , dd , aa , bb , X[2] , 15, k0);
+        OP4(F, bb , cc , dd , aa , X[3] , 12, k0);
+        OP4(F, aa , bb , cc , dd , X[4] ,  5, k0);
+        OP4(F, dd , aa , bb , cc , X[5] ,  8, k0);
+        OP4(F, cc , dd , aa , bb , X[6] ,  7, k0);
+        OP4(F, bb , cc , dd , aa , X[7] ,  9, k0);
+        OP4(F, aa , bb , cc , dd , X[8] , 11, k0);
+        OP4(F, dd , aa , bb , cc , X[9] , 13, k0);
+        OP4(F, cc , dd , aa , bb , X[10], 14, k0);
+        OP4(F, bb , cc , dd , aa , X[11], 15, k0);
+        OP4(F, aa , bb , cc , dd , X[12],  6, k0);
+        OP4(F, dd , aa , bb , cc , X[13],  7, k0);
+        OP4(F, cc , dd , aa , bb , X[14],  9, k0);
+        OP4(F, bb , cc , dd , aa , X[15],  8, k0);
+
+        OP4(I, aaa, bbb, ccc, ddd, X[5] ,  8, k1);
+        OP4(I, ddd, aaa, bbb, ccc, X[14],  9, k1);
+        OP4(I, ccc, ddd, aaa, bbb, X[7] ,  9, k1);
+        OP4(I, bbb, ccc, ddd, aaa, X[0] , 11, k1);
+        OP4(I, aaa, bbb, ccc, ddd, X[9] , 13, k1);
+        OP4(I, ddd, aaa, bbb, ccc, X[2] , 15, k1);
+        OP4(I, ccc, ddd, aaa, bbb, X[11], 15, k1);
+        OP4(I, bbb, ccc, ddd, aaa, X[4] ,  5, k1);
+        OP4(I, aaa, bbb, ccc, ddd, X[13],  7, k1);
+        OP4(I, ddd, aaa, bbb, ccc, X[6] ,  7, k1);
+        OP4(I, ccc, ddd, aaa, bbb, X[15],  8, k1);
+        OP4(I, bbb, ccc, ddd, aaa, X[8] , 11, k1);
+        OP4(I, aaa, bbb, ccc, ddd, X[1] , 14, k1);
+        OP4(I, ddd, aaa, bbb, ccc, X[10], 14, k1);
+        OP4(I, ccc, ddd, aaa, bbb, X[3] , 12, k1);
+        OP4(I, bbb, ccc, ddd, aaa, X[12],  6, k1);
+
+        if (hashwidth == 256) {
+            uint64_t tmp = aa; aa = aaa; aaa = tmp;
+        }
+    }
+
+    /* round 2 */
+    if (hashwidth == 160) {
+        OP5(G, ee , aa , bb , cc , dd , X[7] ,  7, k2);
+        OP5(G, dd , ee , aa , bb , cc , X[4] ,  6, k2);
+        OP5(G, cc , dd , ee , aa , bb , X[13],  8, k2);
+        OP5(G, bb , cc , dd , ee , aa , X[1] , 13, k2);
+        OP5(G, aa , bb , cc , dd , ee , X[10], 11, k2);
+        OP5(G, ee , aa , bb , cc , dd , X[6] ,  9, k2);
+        OP5(G, dd , ee , aa , bb , cc , X[15],  7, k2);
+        OP5(G, cc , dd , ee , aa , bb , X[3] , 15, k2);
+        OP5(G, bb , cc , dd , ee , aa , X[12],  7, k2);
+        OP5(G, aa , bb , cc , dd , ee , X[0] , 12, k2);
+        OP5(G, ee , aa , bb , cc , dd , X[9] , 15, k2);
+        OP5(G, dd , ee , aa , bb , cc , X[5] ,  9, k2);
+        OP5(G, cc , dd , ee , aa , bb , X[2] , 11, k2);
+        OP5(G, bb , cc , dd , ee , aa , X[14],  7, k2);
+        OP5(G, aa , bb , cc , dd , ee , X[11], 13, k2);
+        OP5(G, ee , aa , bb , cc , dd , X[8] , 12, k2);
+
+        OP5(I, eee, aaa, bbb, ccc, ddd, X[6] ,  9, k3);
+        OP5(I, ddd, eee, aaa, bbb, ccc, X[11], 13, k3);
+        OP5(I, ccc, ddd, eee, aaa, bbb, X[3] , 15, k3);
+        OP5(I, bbb, ccc, ddd, eee, aaa, X[7] ,  7, k3);
+        OP5(I, aaa, bbb, ccc, ddd, eee, X[0] , 12, k3);
+        OP5(I, eee, aaa, bbb, ccc, ddd, X[13],  8, k3);
+        OP5(I, ddd, eee, aaa, bbb, ccc, X[5] ,  9, k3);
+        OP5(I, ccc, ddd, eee, aaa, bbb, X[10], 11, k3);
+        OP5(I, bbb, ccc, ddd, eee, aaa, X[14],  7, k3);
+        OP5(I, aaa, bbb, ccc, ddd, eee, X[15],  7, k3);
+        OP5(I, eee, aaa, bbb, ccc, ddd, X[8] , 12, k3);
+        OP5(I, ddd, eee, aaa, bbb, ccc, X[12],  7, k3);
+        OP5(I, ccc, ddd, eee, aaa, bbb, X[4] ,  6, k3);
+        OP5(I, bbb, ccc, ddd, eee, aaa, X[9] , 15, k3);
+        OP5(I, aaa, bbb, ccc, ddd, eee, X[1] , 13, k3);
+        OP5(I, eee, aaa, bbb, ccc, ddd, X[2] , 11, k3);
+    } else {
+        OP4(G, aa , bb , cc , dd , X[7] ,  7, k2);
+        OP4(G, dd , aa , bb , cc , X[4] ,  6, k2);
+        OP4(G, cc , dd , aa , bb , X[13],  8, k2);
+        OP4(G, bb , cc , dd , aa , X[1] , 13, k2);
+        OP4(G, aa , bb , cc , dd , X[10], 11, k2);
+        OP4(G, dd , aa , bb , cc , X[6] ,  9, k2);
+        OP4(G, cc , dd , aa , bb , X[15],  7, k2);
+        OP4(G, bb , cc , dd , aa , X[3] , 15, k2);
+        OP4(G, aa , bb , cc , dd , X[12],  7, k2);
+        OP4(G, dd , aa , bb , cc , X[0] , 12, k2);
+        OP4(G, cc , dd , aa , bb , X[9] , 15, k2);
+        OP4(G, bb , cc , dd , aa , X[5] ,  9, k2);
+        OP4(G, aa , bb , cc , dd , X[2] , 11, k2);
+        OP4(G, dd , aa , bb , cc , X[14],  7, k2);
+        OP4(G, cc , dd , aa , bb , X[11], 13, k2);
+        OP4(G, bb , cc , dd , aa , X[8] , 12, k2);
+
+        OP4(H, aaa, bbb, ccc, ddd, X[6] ,  9, k3);
+        OP4(H, ddd, aaa, bbb, ccc, X[11], 13, k3);
+        OP4(H, ccc, ddd, aaa, bbb, X[3] , 15, k3);
+        OP4(H, bbb, ccc, ddd, aaa, X[7] ,  7, k3);
+        OP4(H, aaa, bbb, ccc, ddd, X[0] , 12, k3);
+        OP4(H, ddd, aaa, bbb, ccc, X[13],  8, k3);
+        OP4(H, ccc, ddd, aaa, bbb, X[5] ,  9, k3);
+        OP4(H, bbb, ccc, ddd, aaa, X[10], 11, k3);
+        OP4(H, aaa, bbb, ccc, ddd, X[14],  7, k3);
+        OP4(H, ddd, aaa, bbb, ccc, X[15],  7, k3);
+        OP4(H, ccc, ddd, aaa, bbb, X[8] , 12, k3);
+        OP4(H, bbb, ccc, ddd, aaa, X[12],  7, k3);
+        OP4(H, aaa, bbb, ccc, ddd, X[4] ,  6, k3);
+        OP4(H, ddd, aaa, bbb, ccc, X[9] , 15, k3);
+        OP4(H, ccc, ddd, aaa, bbb, X[1] , 13, k3);
+        OP4(H, bbb, ccc, ddd, aaa, X[2] , 11, k3);
+
+        if (hashwidth == 256) {
+            uint64_t tmp = bb; bb = bbb; bbb = tmp;
+        }
+    }
+
+    /* round 3 */
+    if (hashwidth == 160) {
+        OP5(H, dd , ee , aa , bb , cc , X[3] , 11, k4);
+        OP5(H, cc , dd , ee , aa , bb , X[10], 13, k4);
+        OP5(H, bb , cc , dd , ee , aa , X[14],  6, k4);
+        OP5(H, aa , bb , cc , dd , ee , X[4] ,  7, k4);
+        OP5(H, ee , aa , bb , cc , dd , X[9] , 14, k4);
+        OP5(H, dd , ee , aa , bb , cc , X[15],  9, k4);
+        OP5(H, cc , dd , ee , aa , bb , X[8] , 13, k4);
+        OP5(H, bb , cc , dd , ee , aa , X[1] , 15, k4);
+        OP5(H, aa , bb , cc , dd , ee , X[2] , 14, k4);
+        OP5(H, ee , aa , bb , cc , dd , X[7] ,  8, k4);
+        OP5(H, dd , ee , aa , bb , cc , X[0] , 13, k4);
+        OP5(H, cc , dd , ee , aa , bb , X[6] ,  6, k4);
+        OP5(H, bb , cc , dd , ee , aa , X[13],  5, k4);
+        OP5(H, aa , bb , cc , dd , ee , X[11], 12, k4);
+        OP5(H, ee , aa , bb , cc , dd , X[5] ,  7, k4);
+        OP5(H, dd , ee , aa , bb , cc , X[12],  5, k4);
+
+        OP5(H, ddd, eee, aaa, bbb, ccc, X[15],  9, k5);
+        OP5(H, ccc, ddd, eee, aaa, bbb, X[5] ,  7, k5);
+        OP5(H, bbb, ccc, ddd, eee, aaa, X[1] , 15, k5);
+        OP5(H, aaa, bbb, ccc, ddd, eee, X[3] , 11, k5);
+        OP5(H, eee, aaa, bbb, ccc, ddd, X[7] ,  8, k5);
+        OP5(H, ddd, eee, aaa, bbb, ccc, X[14],  6, k5);
+        OP5(H, ccc, ddd, eee, aaa, bbb, X[6] ,  6, k5);
+        OP5(H, bbb, ccc, ddd, eee, aaa, X[9] , 14, k5);
+        OP5(H, aaa, bbb, ccc, ddd, eee, X[11], 12, k5);
+        OP5(H, eee, aaa, bbb, ccc, ddd, X[8] , 13, k5);
+        OP5(H, ddd, eee, aaa, bbb, ccc, X[12],  5, k5);
+        OP5(H, ccc, ddd, eee, aaa, bbb, X[2] , 14, k5);
+        OP5(H, bbb, ccc, ddd, eee, aaa, X[10], 13, k5);
+        OP5(H, aaa, bbb, ccc, ddd, eee, X[0] , 13, k5);
+        OP5(H, eee, aaa, bbb, ccc, ddd, X[4] ,  7, k5);
+        OP5(H, ddd, eee, aaa, bbb, ccc, X[13],  5, k5);
+    } else {
+        OP4(H, aa , bb , cc , dd , X[3] , 11, k4);
+        OP4(H, dd , aa , bb , cc , X[10], 13, k4);
+        OP4(H, cc , dd , aa , bb , X[14],  6, k4);
+        OP4(H, bb , cc , dd , aa , X[4] ,  7, k4);
+        OP4(H, aa , bb , cc , dd , X[9] , 14, k4);
+        OP4(H, dd , aa , bb , cc , X[15],  9, k4);
+        OP4(H, cc , dd , aa , bb , X[8] , 13, k4);
+        OP4(H, bb , cc , dd , aa , X[1] , 15, k4);
+        OP4(H, aa , bb , cc , dd , X[2] , 14, k4);
+        OP4(H, dd , aa , bb , cc , X[7] ,  8, k4);
+        OP4(H, cc , dd , aa , bb , X[0] , 13, k4);
+        OP4(H, bb , cc , dd , aa , X[6] ,  6, k4);
+        OP4(H, aa , bb , cc , dd , X[13],  5, k4);
+        OP4(H, dd , aa , bb , cc , X[11], 12, k4);
+        OP4(H, cc , dd , aa , bb , X[5] ,  7, k4);
+        OP4(H, bb , cc , dd , aa , X[12],  5, k4);
+
+        OP4(G, aaa, bbb, ccc, ddd, X[15],  9, k5);
+        OP4(G, ddd, aaa, bbb, ccc, X[5] ,  7, k5);
+        OP4(G, ccc, ddd, aaa, bbb, X[1] , 15, k5);
+        OP4(G, bbb, ccc, ddd, aaa, X[3] , 11, k5);
+        OP4(G, aaa, bbb, ccc, ddd, X[7] ,  8, k5);
+        OP4(G, ddd, aaa, bbb, ccc, X[14],  6, k5);
+        OP4(G, ccc, ddd, aaa, bbb, X[6] ,  6, k5);
+        OP4(G, bbb, ccc, ddd, aaa, X[9] , 14, k5);
+        OP4(G, aaa, bbb, ccc, ddd, X[11], 12, k5);
+        OP4(G, ddd, aaa, bbb, ccc, X[8] , 13, k5);
+        OP4(G, ccc, ddd, aaa, bbb, X[12],  5, k5);
+        OP4(G, bbb, ccc, ddd, aaa, X[2] , 14, k5);
+        OP4(G, aaa, bbb, ccc, ddd, X[10], 13, k5);
+        OP4(G, ddd, aaa, bbb, ccc, X[0] , 13, k5);
+        OP4(G, ccc, ddd, aaa, bbb, X[4] ,  7, k5);
+        OP4(G, bbb, ccc, ddd, aaa, X[13],  5, k5);
+
+        if (hashwidth == 256) {
+            uint64_t tmp = cc; cc = ccc; ccc = tmp;
+        }
+    }
+
+    /* round 4 */
+    if (hashwidth == 160) {
+        OP5(I, cc , dd , ee , aa , bb , X[1] , 11, k6);
+        OP5(I, bb , cc , dd , ee , aa , X[9] , 12, k6);
+        OP5(I, aa , bb , cc , dd , ee , X[11], 14, k6);
+        OP5(I, ee , aa , bb , cc , dd , X[10], 15, k6);
+        OP5(I, dd , ee , aa , bb , cc , X[0] , 14, k6);
+        OP5(I, cc , dd , ee , aa , bb , X[8] , 15, k6);
+        OP5(I, bb , cc , dd , ee , aa , X[12],  9, k6);
+        OP5(I, aa , bb , cc , dd , ee , X[4] ,  8, k6);
+        OP5(I, ee , aa , bb , cc , dd , X[13],  9, k6);
+        OP5(I, dd , ee , aa , bb , cc , X[3] , 14, k6);
+        OP5(I, cc , dd , ee , aa , bb , X[7] ,  5, k6);
+        OP5(I, bb , cc , dd , ee , aa , X[15],  6, k6);
+        OP5(I, aa , bb , cc , dd , ee , X[14],  8, k6);
+        OP5(I, ee , aa , bb , cc , dd , X[5] ,  6, k6);
+        OP5(I, dd , ee , aa , bb , cc , X[6] ,  5, k6);
+        OP5(I, cc , dd , ee , aa , bb , X[2] , 12, k6);
+
+        OP5(G, ccc, ddd, eee, aaa, bbb, X[8] , 15, k9);
+        OP5(G, bbb, ccc, ddd, eee, aaa, X[6] ,  5, k9);
+        OP5(G, aaa, bbb, ccc, ddd, eee, X[4] ,  8, k9);
+        OP5(G, eee, aaa, bbb, ccc, ddd, X[1] , 11, k9);
+        OP5(G, ddd, eee, aaa, bbb, ccc, X[3] , 14, k9);
+        OP5(G, ccc, ddd, eee, aaa, bbb, X[11], 14, k9);
+        OP5(G, bbb, ccc, ddd, eee, aaa, X[15],  6, k9);
+        OP5(G, aaa, bbb, ccc, ddd, eee, X[0] , 14, k9);
+        OP5(G, eee, aaa, bbb, ccc, ddd, X[5] ,  6, k9);
+        OP5(G, ddd, eee, aaa, bbb, ccc, X[12],  9, k9);
+        OP5(G, ccc, ddd, eee, aaa, bbb, X[2] , 12, k9);
+        OP5(G, bbb, ccc, ddd, eee, aaa, X[13],  9, k9);
+        OP5(G, aaa, bbb, ccc, ddd, eee, X[9] , 12, k9);
+        OP5(G, eee, aaa, bbb, ccc, ddd, X[7] ,  5, k9);
+        OP5(G, ddd, eee, aaa, bbb, ccc, X[10], 15, k9);
+        OP5(G, ccc, ddd, eee, aaa, bbb, X[14],  8, k9);
+    } else {
+        OP4(I, aa , bb , cc , dd , X[1] , 11, k6);
+        OP4(I, dd , aa , bb , cc , X[9] , 12, k6);
+        OP4(I, cc , dd , aa , bb , X[11], 14, k6);
+        OP4(I, bb , cc , dd , aa , X[10], 15, k6);
+        OP4(I, aa , bb , cc , dd , X[0] , 14, k6);
+        OP4(I, dd , aa , bb , cc , X[8] , 15, k6);
+        OP4(I, cc , dd , aa , bb , X[12],  9, k6);
+        OP4(I, bb , cc , dd , aa , X[4] ,  8, k6);
+        OP4(I, aa , bb , cc , dd , X[13],  9, k6);
+        OP4(I, dd , aa , bb , cc , X[3] , 14, k6);
+        OP4(I, cc , dd , aa , bb , X[7] ,  5, k6);
+        OP4(I, bb , cc , dd , aa , X[15],  6, k6);
+        OP4(I, aa , bb , cc , dd , X[14],  8, k6);
+        OP4(I, dd , aa , bb , cc , X[5] ,  6, k6);
+        OP4(I, cc , dd , aa , bb , X[6] ,  5, k6);
+        OP4(I, bb , cc , dd , aa , X[2] , 12, k6);
+
+        OP4(F, aaa, bbb, ccc, ddd, X[8] , 15, k7);
+        OP4(F, ddd, aaa, bbb, ccc, X[6] ,  5, k7);
+        OP4(F, ccc, ddd, aaa, bbb, X[4] ,  8, k7);
+        OP4(F, bbb, ccc, ddd, aaa, X[1] , 11, k7);
+        OP4(F, aaa, bbb, ccc, ddd, X[3] , 14, k7);
+        OP4(F, ddd, aaa, bbb, ccc, X[11], 14, k7);
+        OP4(F, ccc, ddd, aaa, bbb, X[15],  6, k7);
+        OP4(F, bbb, ccc, ddd, aaa, X[0] , 14, k7);
+        OP4(F, aaa, bbb, ccc, ddd, X[5] ,  6, k7);
+        OP4(F, ddd, aaa, bbb, ccc, X[12],  9, k7);
+        OP4(F, ccc, ddd, aaa, bbb, X[2] , 12, k7);
+        OP4(F, bbb, ccc, ddd, aaa, X[13],  9, k7);
+        OP4(F, aaa, bbb, ccc, ddd, X[9] , 12, k7);
+        OP4(F, ddd, aaa, bbb, ccc, X[7] ,  5, k7);
+        OP4(F, ccc, ddd, aaa, bbb, X[10], 15, k7);
+        OP4(F, bbb, ccc, ddd, aaa, X[14],  8, k7);
+
+        if (hashwidth == 256) {
+            uint64_t tmp = dd; dd = ddd; ddd = tmp;
+        }
+    }
+
+    /* round 5 */
+    if (hashwidth == 160) {
+        OP5(J, bb , cc , dd , ee , aa , X[4] ,  9, k8);
+        OP5(J, aa , bb , cc , dd , ee , X[0] , 15, k8);
+        OP5(J, ee , aa , bb , cc , dd , X[5] ,  5, k8);
+        OP5(J, dd , ee , aa , bb , cc , X[9] , 11, k8);
+        OP5(J, cc , dd , ee , aa , bb , X[7] ,  6, k8);
+        OP5(J, bb , cc , dd , ee , aa , X[12],  8, k8);
+        OP5(J, aa , bb , cc , dd , ee , X[2] , 13, k8);
+        OP5(J, ee , aa , bb , cc , dd , X[10], 12, k8);
+        OP5(J, dd , ee , aa , bb , cc , X[14],  5, k8);
+        OP5(J, cc , dd , ee , aa , bb , X[1] , 12, k8);
+        OP5(J, bb , cc , dd , ee , aa , X[3] , 13, k8);
+        OP5(J, aa , bb , cc , dd , ee , X[8] , 14, k8);
+        OP5(J, ee , aa , bb , cc , dd , X[11], 11, k8);
+        OP5(J, dd , ee , aa , bb , cc , X[6] ,  8, k8);
+        OP5(J, cc , dd , ee , aa , bb , X[15],  5, k8);
+        OP5(J, bb , cc , dd , ee , aa , X[13],  6, k8);
+
+        OP5(F, bbb, ccc, ddd, eee, aaa, X[12],  8, k7);
+        OP5(F, aaa, bbb, ccc, ddd, eee, X[15],  5, k7);
+        OP5(F, eee, aaa, bbb, ccc, ddd, X[10], 12, k7);
+        OP5(F, ddd, eee, aaa, bbb, ccc, X[4] ,  9, k7);
+        OP5(F, ccc, ddd, eee, aaa, bbb, X[1] , 12, k7);
+        OP5(F, bbb, ccc, ddd, eee, aaa, X[5] ,  5, k7);
+        OP5(F, aaa, bbb, ccc, ddd, eee, X[8] , 14, k7);
+        OP5(F, eee, aaa, bbb, ccc, ddd, X[7] ,  6, k7);
+        OP5(F, ddd, eee, aaa, bbb, ccc, X[6] ,  8, k7);
+        OP5(F, ccc, ddd, eee, aaa, bbb, X[2] , 13, k7);
+        OP5(F, bbb, ccc, ddd, eee, aaa, X[13],  6, k7);
+        OP5(F, aaa, bbb, ccc, ddd, eee, X[14],  5, k7);
+        OP5(F, eee, aaa, bbb, ccc, ddd, X[0] , 15, k7);
+        OP5(F, ddd, eee, aaa, bbb, ccc, X[3] , 13, k7);
+        OP5(F, ccc, ddd, eee, aaa, bbb, X[9] , 11, k7);
+        OP5(F, bbb, ccc, ddd, eee, aaa, X[11], 11, k7);
+    }
+
+    /* combine results */
+    if (hashwidth == 128) {
+        ddd += cc + ctx->state[1];            /* final result for MDbuf[0] */
+        ctx->state[1] = ctx->state[2] + dd + aaa;
+        ctx->state[2] = ctx->state[3] + aa + bbb;
+        ctx->state[3] = ctx->state[0] + bb + ccc;
+        ctx->state[0] = ddd;
+    } else if (hashwidth == 160) {
+        ddd += cc + ctx->state[1];            /* final result for MDbuf[0] */
+        ctx->state[1] = ctx->state[2] + dd + eee;
+        ctx->state[2] = ctx->state[3] + ee + aaa;
+        ctx->state[3] = ctx->state[4] + aa + bbb;
+        ctx->state[4] = ctx->state[0] + bb + ccc;
+        ctx->state[0] = ddd;
+    } else if (hashwidth == 256) {
+        ctx->state[0] += aa;
+        ctx->state[1] += bb;
+        ctx->state[2] += cc;
+        ctx->state[3] += dd;
+        ctx->state[4] += aaa;
+        ctx->state[5] += bbb;
+        ctx->state[6] += ccc;
+        ctx->state[7] += ddd;
+    }
+
+    return;
 }
 
-template < uint32_t hashwidth >
-static void rmd_init(rmd_ctx * ctx) {
-  ctx->state[0] = 0x67452301;
-  ctx->state[1] = 0xefcdab89;
-  ctx->state[2] = 0x98badcfe;
-  ctx->state[3] = 0x10325476;
-  if (hashwidth >= 160) {
-    ctx->state[4] = 0xc3d2e1f0;
-  }
-  if (hashwidth == 256) {
-    ctx->state[4] = 0x76543210;
-    ctx->state[5] = 0xfedcba98;
-    ctx->state[6] = 0x89abcdef;
-    ctx->state[7] = 0x01234567;
-  }
-  ctx->curlen   = 0;
-  ctx->length   = 0;
-  return;
+template <uint32_t hashwidth>
+static void rmd_init( rmd_ctx * ctx ) {
+    ctx->state[0] = 0x67452301;
+    ctx->state[1] = 0xefcdab89;
+    ctx->state[2] = 0x98badcfe;
+    ctx->state[3] = 0x10325476;
+    if (hashwidth >= 160) {
+        ctx->state[4] = 0xc3d2e1f0;
+    }
+    if (hashwidth == 256) {
+        ctx->state[4] = 0x76543210;
+        ctx->state[5] = 0xfedcba98;
+        ctx->state[6] = 0x89abcdef;
+        ctx->state[7] = 0x01234567;
+    }
+    ctx->curlen = 0;
+    ctx->length = 0;
+    return;
 }
 
-template < uint32_t hashwidth, bool bswap >
-static void rmd_done(rmd_ctx * ctx, uint8_t * out) {
+template <uint32_t hashwidth, bool bswap>
+static void rmd_done( rmd_ctx * ctx, uint8_t * out ) {
     int i;
 
     /* increase the length of the message */
@@ -502,7 +490,7 @@ static void rmd_done(rmd_ctx * ctx, uint8_t * out) {
         while (ctx->curlen < 64) {
             ctx->buf[ctx->curlen++] = (unsigned char)0;
         }
-        rmd_compress<hashwidth,bswap>(ctx, ctx->buf);
+        rmd_compress<hashwidth, bswap>(ctx, ctx->buf);
         ctx->curlen = 0;
     }
 
@@ -513,249 +501,291 @@ static void rmd_done(rmd_ctx * ctx, uint8_t * out) {
 
     /* store length */
     if (isBE()) {
-      PUT_U64<true>(ctx->length, ctx->buf+56, 0);
+        PUT_U64<true>(ctx->length, ctx->buf + 56, 0);
     } else {
-      PUT_U64<false>(ctx->length, ctx->buf+56, 0);
+        PUT_U64<false>(ctx->length, ctx->buf + 56, 0);
     }
-    rmd_compress<hashwidth,bswap>(ctx, ctx->buf);
+    rmd_compress<hashwidth, bswap>(ctx, ctx->buf);
 
     /* copy output */
-    for (i = 0; i < (hashwidth/32); i++) {
-      PUT_U32<bswap>(ctx->state[i], (uint8_t*)out, 4*i);
+    for (i = 0; i < (hashwidth / 32); i++) {
+        PUT_U32<bswap>(ctx->state[i], (uint8_t *)out, 4 * i);
     }
 }
 
-template < uint32_t hashwidth, bool bswap >
-static void rmd_update(rmd_ctx * ctx, const uint8_t * data, size_t len) {
-  while (len > 0) {
-    if ((ctx->length == 0) && (len >= sizeof(ctx->buf))) {
-      rmd_compress<hashwidth,bswap>(ctx, data);
-      ctx->length += 64*8;
-      len -= 64;
-      data += 64;
-    } else {
-      size_t n = 64 - ctx->curlen;
-      if (n > len) { n = len; }
-      memcpy(&ctx->buf[ctx->curlen], data, n);
-      ctx->curlen += n;
-      len -= n;
-      data += n;
-      if (ctx->curlen == sizeof(ctx->buf)) {
-	rmd_compress<hashwidth,bswap>(ctx, ctx->buf);
-	ctx->curlen = 0;
-	ctx->length += 64*8;
-      }
+template <uint32_t hashwidth, bool bswap>
+static void rmd_update( rmd_ctx * ctx, const uint8_t * data, size_t len ) {
+    while (len > 0) {
+        if ((ctx->length == 0) && (len >= sizeof(ctx->buf))) {
+            rmd_compress<hashwidth, bswap>(ctx, data);
+            ctx->length += 64 * 8;
+            len         -= 64;
+            data        += 64;
+        } else {
+            size_t n = 64 - ctx->curlen;
+            if (n > len) { n = len; }
+            memcpy(&ctx->buf[ctx->curlen], data, n);
+            ctx->curlen += n;
+            len         -= n;
+            data        += n;
+            if (ctx->curlen == sizeof(ctx->buf)) {
+                rmd_compress<hashwidth, bswap>(ctx, ctx->buf);
+                ctx->curlen  = 0;
+                ctx->length += 64 * 8;
+            }
+        }
     }
-  }
 }
 
 /* Homegrown RMD seeding */
-static void rmd_seed(rmd_ctx * ctx, uint64_t seed) {
-  const uint32_t seedlo = seed         & 0xFFFFFFFF;
-  const uint32_t seedhi = (seed >> 32) & 0xFFFFFFFF;
-
-  ctx->state[0] ^= seedlo;
-  ctx->state[1] ^= seedlo + seedhi;
-  ctx->state[2] ^= seedhi;
-  ctx->state[3] ^= seedlo + seedhi;
+static void rmd_seed( rmd_ctx * ctx, uint64_t seed ) {
+    const uint32_t seedlo = seed         & 0xFFFFFFFF;
+    const uint32_t seedhi = (seed >> 32) & 0xFFFFFFFF;
+
+    ctx->state[0] ^= seedlo;
+    ctx->state[1] ^= seedlo + seedhi;
+    ctx->state[2] ^= seedhi;
+    ctx->state[3] ^= seedlo + seedhi;
 }
 
-template < bool bswap >
-static void rmd128(const void * in, const size_t len, const seed_t seed, void * out) {
-  rmd_ctx ctx;
+template <bool bswap>
+static void rmd128( const void * in, const size_t len, const seed_t seed, void * out ) {
+    rmd_ctx ctx;
 
-  rmd_init<128>          (&ctx);
-  rmd_seed               (&ctx, (uint64_t)seed);
-  rmd_update<128, bswap> (&ctx, (const uint8_t *)in, len);
-  rmd_done<128, bswap>   (&ctx, (uint8_t*)out);
+    rmd_init<128>(&ctx);
+    rmd_seed(&ctx, (uint64_t)seed);
+    rmd_update<128, bswap>(&ctx, (const uint8_t *)in, len);
+    rmd_done<128, bswap>(&ctx, (uint8_t *)out);
 }
 
-template < bool bswap >
-static void rmd160(const void * in, const size_t len, const seed_t seed, void * out) {
-  rmd_ctx ctx;
+template <bool bswap>
+static void rmd160( const void * in, const size_t len, const seed_t seed, void * out ) {
+    rmd_ctx ctx;
 
-  rmd_init<160>          (&ctx);
-  rmd_seed               (&ctx, (uint64_t)seed);
-  rmd_update<160, bswap> (&ctx, (const uint8_t *)in, len);
-  rmd_done<160, bswap>   (&ctx, (uint8_t*)out);
+    rmd_init<160>(&ctx);
+    rmd_seed(&ctx, (uint64_t)seed);
+    rmd_update<160, bswap>(&ctx, (const uint8_t *)in, len);
+    rmd_done<160, bswap>(&ctx, (uint8_t *)out);
 }
 
-template < bool bswap >
-static void rmd256(const void * in, const size_t len, const seed_t seed, void * out) {
-  rmd_ctx ctx;
+template <bool bswap>
+static void rmd256( const void * in, const size_t len, const seed_t seed, void * out ) {
+    rmd_ctx ctx;
 
-  rmd_init<256>          (&ctx);
-  rmd_seed               (&ctx, (uint64_t)seed);
-  rmd_update<256, bswap> (&ctx, (const uint8_t *)in, len);
-  rmd_done<256, bswap>   (&ctx, (uint8_t*)out);
+    rmd_init<256>(&ctx);
+    rmd_seed(&ctx, (uint64_t)seed);
+    rmd_update<256, bswap>(&ctx, (const uint8_t *)in, len);
+    rmd_done<256, bswap>(&ctx, (uint8_t *)out);
 }
 
-static bool rmd_test(void) {
-   static const struct {
-        const char *msg;
-        unsigned char hash128[16];
-        unsigned char hash160[20];
-        unsigned char hash256[32];
-   } tests[] = {
-   { "",
-     { 0xcd, 0xf2, 0x62, 0x13, 0xa1, 0x50, 0xdc, 0x3e,
-       0xcb, 0x61, 0x0f, 0x18, 0xf6, 0xb3, 0x8b, 0x46 },
-     { 0x9c, 0x11, 0x85, 0xa5, 0xc5, 0xe9, 0xfc, 0x54, 0x61, 0x28,
-       0x08, 0x97, 0x7e, 0xe8, 0xf5, 0x48, 0xb2, 0x25, 0x8d, 0x31 },
-     { 0x02, 0xba, 0x4c, 0x4e, 0x5f, 0x8e, 0xcd, 0x18,
-       0x77, 0xfc, 0x52, 0xd6, 0x4d, 0x30, 0xe3, 0x7a,
-       0x2d, 0x97, 0x74, 0xfb, 0x1e, 0x5d, 0x02, 0x63,
-       0x80, 0xae, 0x01, 0x68, 0xe3, 0xc5, 0x52, 0x2d }
-   },
-   { "a",
-     { 0x86, 0xbe, 0x7a, 0xfa, 0x33, 0x9d, 0x0f, 0xc7,
-       0xcf, 0xc7, 0x85, 0xe7, 0x2f, 0x57, 0x8d, 0x33 },
-     { 0x0b, 0xdc, 0x9d, 0x2d, 0x25, 0x6b, 0x3e, 0xe9, 0xda, 0xae,
-       0x34, 0x7b, 0xe6, 0xf4, 0xdc, 0x83, 0x5a, 0x46, 0x7f, 0xfe },
-     { 0xf9, 0x33, 0x3e, 0x45, 0xd8, 0x57, 0xf5, 0xd9,
-       0x0a, 0x91, 0xba, 0xb7, 0x0a, 0x1e, 0xba, 0x0c,
-       0xfb, 0x1b, 0xe4, 0xb0, 0x78, 0x3c, 0x9a, 0xcf,
-       0xcd, 0x88, 0x3a, 0x91, 0x34, 0x69, 0x29, 0x25 }
-   },
-   { "abc",
-     { 0xc1, 0x4a, 0x12, 0x19, 0x9c, 0x66, 0xe4, 0xba,
-       0x84, 0x63, 0x6b, 0x0f, 0x69, 0x14, 0x4c, 0x77 },
-     { 0x8e, 0xb2, 0x08, 0xf7, 0xe0, 0x5d, 0x98, 0x7a, 0x9b, 0x04,
-       0x4a, 0x8e, 0x98, 0xc6, 0xb0, 0x87, 0xf1, 0x5a, 0x0b, 0xfc },
-     { 0xaf, 0xbd, 0x6e, 0x22, 0x8b, 0x9d, 0x8c, 0xbb,
-       0xce, 0xf5, 0xca, 0x2d, 0x03, 0xe6, 0xdb, 0xa1,
-       0x0a, 0xc0, 0xbc, 0x7d, 0xcb, 0xe4, 0x68, 0x0e,
-       0x1e, 0x42, 0xd2, 0xe9, 0x75, 0x45, 0x9b, 0x65 }
-   },
-   { "message digest",
-     { 0x9e, 0x32, 0x7b, 0x3d, 0x6e, 0x52, 0x30, 0x62,
-       0xaf, 0xc1, 0x13, 0x2d, 0x7d, 0xf9, 0xd1, 0xb8 },
-     { 0x5d, 0x06, 0x89, 0xef, 0x49, 0xd2, 0xfa, 0xe5, 0x72, 0xb8,
-       0x81, 0xb1, 0x23, 0xa8, 0x5f, 0xfa, 0x21, 0x59, 0x5f, 0x36 },
-     { 0x87, 0xe9, 0x71, 0x75, 0x9a, 0x1c, 0xe4, 0x7a,
-       0x51, 0x4d, 0x5c, 0x91, 0x4c, 0x39, 0x2c, 0x90,
-       0x18, 0xc7, 0xc4, 0x6b, 0xc1, 0x44, 0x65, 0x55,
-       0x4a, 0xfc, 0xdf, 0x54, 0xa5, 0x07, 0x0c, 0x0e }
-   },
-   { "abcdefghijklmnopqrstuvwxyz",
-     { 0xfd, 0x2a, 0xa6, 0x07, 0xf7, 0x1d, 0xc8, 0xf5,
-       0x10, 0x71, 0x49, 0x22, 0xb3, 0x71, 0x83, 0x4e },
-     { 0xf7, 0x1c, 0x27, 0x10, 0x9c, 0x69, 0x2c, 0x1b, 0x56, 0xbb,
-       0xdc, 0xeb, 0x5b, 0x9d, 0x28, 0x65, 0xb3, 0x70, 0x8d, 0xbc },
-     { 0x64, 0x9d, 0x30, 0x34, 0x75, 0x1e, 0xa2, 0x16,
-       0x77, 0x6b, 0xf9, 0xa1, 0x8a, 0xcc, 0x81, 0xbc,
-       0x78, 0x96, 0x11, 0x8a, 0x51, 0x97, 0x96, 0x87,
-       0x82, 0xdd, 0x1f, 0xd9, 0x7d, 0x8d, 0x51, 0x33 }
-   },
-   { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
-     { 0xd1, 0xe9, 0x59, 0xeb, 0x17, 0x9c, 0x91, 0x1f,
-       0xae, 0xa4, 0x62, 0x4c, 0x60, 0xc5, 0xc7, 0x02 },
-     { 0xb0, 0xe2, 0x0b, 0x6e, 0x31, 0x16, 0x64, 0x02, 0x86, 0xed,
-       0x3a, 0x87, 0xa5, 0x71, 0x30, 0x79, 0xb2, 0x1f, 0x51, 0x89 },
-     { 0x57, 0x40, 0xa4, 0x08, 0xac, 0x16, 0xb7, 0x20,
-       0xb8, 0x44, 0x24, 0xae, 0x93, 0x1c, 0xbb, 0x1f,
-       0xe3, 0x63, 0xd1, 0xd0, 0xbf, 0x40, 0x17, 0xf1,
-       0xa8, 0x9f, 0x7e, 0xa6, 0xde, 0x77, 0xa0, 0xb8 }
-   }
-   };
-
-   int i;
-   unsigned char tmp[32];
-   bool result = true;
-
-   for (i = 0; i < (int)(sizeof(tests)/sizeof(tests[0])); i++) {
-     if (isLE()) {
-       rmd128<false>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
-     } else {
-       rmd128<true>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
-     }
-     if (memcmp(tmp, tests[i].hash128, 16) != 0) {
-       //printf("128 failure test %d\n", i);
-       result = false;
-     }
-     if (isLE()) {
-       rmd160<false>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
-     } else {
-       rmd160<true>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
-     }
-     if (memcmp(tmp, tests[i].hash160, 20) != 0) {
-       //printf("160 failure test %d\n", i);
-       result = false;
-     }
-     if (isLE()) {
-       rmd256<false>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
-     } else {
-       rmd256<true>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
-     }
-     if (memcmp(tmp, tests[i].hash256, 32) != 0) {
-       //printf("256 failure test %d\n", i);
-       result = false;
-     }
-   }
-   return result;
+static bool rmd_test( void ) {
+    static conststruct {
+        const char *   msg;
+        unsigned char  hash128[16];
+        unsigned char  hash160[20];
+        unsigned char  hash256[32];
+    } tests[] = {
+        {
+            "",
+            {
+                0xcd, 0xf2, 0x62, 0x13, 0xa1, 0x50, 0xdc, 0x3e,
+                0xcb, 0x61, 0x0f, 0x18, 0xf6, 0xb3, 0x8b, 0x46
+            },
+            {
+                0x9c, 0x11, 0x85, 0xa5, 0xc5, 0xe9, 0xfc, 0x54, 0x61, 0x28,
+                0x08, 0x97, 0x7e, 0xe8, 0xf5, 0x48, 0xb2, 0x25, 0x8d, 0x31
+            },
+            {
+                0x02, 0xba, 0x4c, 0x4e, 0x5f, 0x8e, 0xcd, 0x18,
+                0x77, 0xfc, 0x52, 0xd6, 0x4d, 0x30, 0xe3, 0x7a,
+                0x2d, 0x97, 0x74, 0xfb, 0x1e, 0x5d, 0x02, 0x63,
+                0x80, 0xae, 0x01, 0x68, 0xe3, 0xc5, 0x52, 0x2d
+            }
+        },
+        {
+            "a",
+            {
+                0x86, 0xbe, 0x7a, 0xfa, 0x33, 0x9d, 0x0f, 0xc7,
+                0xcf, 0xc7, 0x85, 0xe7, 0x2f, 0x57, 0x8d, 0x33
+            },
+            {
+                0x0b, 0xdc, 0x9d, 0x2d, 0x25, 0x6b, 0x3e, 0xe9, 0xda, 0xae,
+                0x34, 0x7b, 0xe6, 0xf4, 0xdc, 0x83, 0x5a, 0x46, 0x7f, 0xfe
+            },
+            {
+                0xf9, 0x33, 0x3e, 0x45, 0xd8, 0x57, 0xf5, 0xd9,
+                0x0a, 0x91, 0xba, 0xb7, 0x0a, 0x1e, 0xba, 0x0c,
+                0xfb, 0x1b, 0xe4, 0xb0, 0x78, 0x3c, 0x9a, 0xcf,
+                0xcd, 0x88, 0x3a, 0x91, 0x34, 0x69, 0x29, 0x25
+            }
+        },
+        {
+            "abc",
+            {
+                0xc1, 0x4a, 0x12, 0x19, 0x9c, 0x66, 0xe4, 0xba,
+                0x84, 0x63, 0x6b, 0x0f, 0x69, 0x14, 0x4c, 0x77
+            },
+            {
+                0x8e, 0xb2, 0x08, 0xf7, 0xe0, 0x5d, 0x98, 0x7a, 0x9b, 0x04,
+                0x4a, 0x8e, 0x98, 0xc6, 0xb0, 0x87, 0xf1, 0x5a, 0x0b, 0xfc
+            },
+            {
+                0xaf, 0xbd, 0x6e, 0x22, 0x8b, 0x9d, 0x8c, 0xbb,
+                0xce, 0xf5, 0xca, 0x2d, 0x03, 0xe6, 0xdb, 0xa1,
+                0x0a, 0xc0, 0xbc, 0x7d, 0xcb, 0xe4, 0x68, 0x0e,
+                0x1e, 0x42, 0xd2, 0xe9, 0x75, 0x45, 0x9b, 0x65
+            }
+        },
+        {
+            "message digest",
+            {
+                0x9e, 0x32, 0x7b, 0x3d, 0x6e, 0x52, 0x30, 0x62,
+                0xaf, 0xc1, 0x13, 0x2d, 0x7d, 0xf9, 0xd1, 0xb8
+            },
+            {
+                0x5d, 0x06, 0x89, 0xef, 0x49, 0xd2, 0xfa, 0xe5, 0x72, 0xb8,
+                0x81, 0xb1, 0x23, 0xa8, 0x5f, 0xfa, 0x21, 0x59, 0x5f, 0x36
+            },
+            {
+                0x87, 0xe9, 0x71, 0x75, 0x9a, 0x1c, 0xe4, 0x7a,
+                0x51, 0x4d, 0x5c, 0x91, 0x4c, 0x39, 0x2c, 0x90,
+                0x18, 0xc7, 0xc4, 0x6b, 0xc1, 0x44, 0x65, 0x55,
+                0x4a, 0xfc, 0xdf, 0x54, 0xa5, 0x07, 0x0c, 0x0e
+            }
+        },
+        {
+            "abcdefghijklmnopqrstuvwxyz",
+            {
+                0xfd, 0x2a, 0xa6, 0x07, 0xf7, 0x1d, 0xc8, 0xf5,
+                0x10, 0x71, 0x49, 0x22, 0xb3, 0x71, 0x83, 0x4e
+            },
+            {
+                0xf7, 0x1c, 0x27, 0x10, 0x9c, 0x69, 0x2c, 0x1b, 0x56, 0xbb,
+                0xdc, 0xeb, 0x5b, 0x9d, 0x28, 0x65, 0xb3, 0x70, 0x8d, 0xbc
+            },
+            {
+                0x64, 0x9d, 0x30, 0x34, 0x75, 0x1e, 0xa2, 0x16,
+                0x77, 0x6b, 0xf9, 0xa1, 0x8a, 0xcc, 0x81, 0xbc,
+                0x78, 0x96, 0x11, 0x8a, 0x51, 0x97, 0x96, 0x87,
+                0x82, 0xdd, 0x1f, 0xd9, 0x7d, 0x8d, 0x51, 0x33
+            }
+        },
+        {
+            "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
+            {
+                0xd1, 0xe9, 0x59, 0xeb, 0x17, 0x9c, 0x91, 0x1f,
+                0xae, 0xa4, 0x62, 0x4c, 0x60, 0xc5, 0xc7, 0x02
+            },
+            {
+                0xb0, 0xe2, 0x0b, 0x6e, 0x31, 0x16, 0x64, 0x02, 0x86, 0xed,
+                0x3a, 0x87, 0xa5, 0x71, 0x30, 0x79, 0xb2, 0x1f, 0x51, 0x89
+            },
+            {
+                0x57, 0x40, 0xa4, 0x08, 0xac, 0x16, 0xb7, 0x20,
+                0xb8, 0x44, 0x24, 0xae, 0x93, 0x1c, 0xbb, 0x1f,
+                0xe3, 0x63, 0xd1, 0xd0, 0xbf, 0x40, 0x17, 0xf1,
+                0xa8, 0x9f, 0x7e, 0xa6, 0xde, 0x77, 0xa0, 0xb8
+            }
+        }
+    };
+
+    int i;
+    unsigned char tmp[32];
+    bool          result = true;
+
+    for (i = 0; i < (int)(sizeof(tests) / sizeof(tests[0])); i++) {
+        if (isLE()) {
+            rmd128<false>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
+        } else {
+            rmd128<true>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
+        }
+        if (memcmp(tmp, tests[i].hash128, 16) != 0) {
+            // printf("128 failure test %d\n", i);
+            result = false;
+        }
+        if (isLE()) {
+            rmd160<false>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
+        } else {
+            rmd160<true>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
+        }
+        if (memcmp(tmp, tests[i].hash160, 20) != 0) {
+            // printf("160 failure test %d\n", i);
+            result = false;
+        }
+        if (isLE()) {
+            rmd256<false>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
+        } else {
+            rmd256<true>(tests[i].msg, strlen(tests[i].msg), 0, tmp);
+        }
+        if (memcmp(tmp, tests[i].hash256, 32) != 0) {
+            // printf("256 failure test %d\n", i);
+            result = false;
+        }
+    }
+    return result;
 }
 
 REGISTER_FAMILY(ripemd,
-  $.src_url = "https://github.com/libtom/libtomcrypt/blob/develop/src/hashes/rmd128.c",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/libtom/libtomcrypt/blob/develop/src/hashes/rmd128.c",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(RIPEMD_128,
-  $.desc = "RIPE-MD 128",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED              |
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW            |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0xC9B0B675,
-  $.verification_BE = 0xD1DB09B5,
-  $.initfn = rmd_test,
-  $.hashfn_native = rmd128<false>,
-  $.hashfn_bswap = rmd128<true>
-);
+   $.desc       = "RIPE-MD 128",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED              |
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW            |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0xC9B0B675,
+   $.verification_BE = 0xD1DB09B5,
+   $.initfn = rmd_test,
+   $.hashfn_native   = rmd128<false>,
+   $.hashfn_bswap    = rmd128<true>
+ );
 
 REGISTER_HASH(RIPEMD_160,
-  $.desc = "RIPE-MD 160",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED              |
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW            |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 160,
-  $.verification_LE = 0x8613F5B2,
-  $.verification_BE = 0x2265C3AA,
-  $.initfn = rmd_test,
-  $.hashfn_native = rmd160<false>,
-  $.hashfn_bswap = rmd160<true>
-);
+   $.desc       = "RIPE-MD 160",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED              |
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW            |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 160,
+   $.verification_LE = 0x8613F5B2,
+   $.verification_BE = 0x2265C3AA,
+   $.initfn = rmd_test,
+   $.hashfn_native   = rmd160<false>,
+   $.hashfn_bswap    = rmd160<true>
+ );
 
 REGISTER_HASH(RIPEMD_256,
-  $.desc = "RIPE-MD 256",
-  $.hash_flags =
-        FLAG_HASH_NO_SEED              |
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW            |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 256,
-  $.verification_LE = 0x870A973A,
-  $.verification_BE = 0xF2A877EE,
-  $.initfn = rmd_test,
-  $.hashfn_native = rmd256<false>,
-  $.hashfn_bswap = rmd256<true>
-);
+   $.desc       = "RIPE-MD 256",
+   $.hash_flags =
+         FLAG_HASH_NO_SEED              |
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW            |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 256,
+   $.verification_LE = 0x870A973A,
+   $.verification_BE = 0xF2A877EE,
+   $.initfn = rmd_test,
+   $.hashfn_native   = rmd256<false>,
+   $.hashfn_bswap    = rmd256<true>
+ );
diff --git a/hashes/seahash.cpp b/hashes/seahash.cpp
index c7565290..2bfd998a 100644
--- a/hashes/seahash.cpp
+++ b/hashes/seahash.cpp
@@ -31,128 +31,130 @@
 
 #include <cassert>
 
-static inline uint64_t diffuse(uint64_t val){
-  uint64_t a, b;
-  val *= UINT64_C(0x6eed0e9da4d94a4f);
-  a = val >> 32;
-  b = val >> 60;
-  val ^= a >> b;
-  val *= UINT64_C(0x6eed0e9da4d94a4f);
-  return val;
+static inline uint64_t diffuse( uint64_t val ) {
+    uint64_t a, b;
+
+    val *= UINT64_C(0x6eed0e9da4d94a4f);
+    a    = val >> 32;
+    b    = val >> 60;
+    val ^= a >> b;
+    val *= UINT64_C(0x6eed0e9da4d94a4f);
+    return val;
 }
 
-template < bool bswap >
-static uint64_t seahash(const uint8_t * key, size_t len, uint64_t seed) {
-  uint64_t a, b, c, d;
-  uint8_t pad[8] = {0};
-  const uint64_t orig_len = (uint64_t)len;
+template <bool bswap>
+static uint64_t seahash( const uint8_t * key, size_t len, uint64_t seed ) {
+    uint64_t       a, b, c, d;
+    uint8_t        pad[8]   = { 0 };
+    const uint64_t orig_len = (uint64_t)len;
 
-  a = UINT64_C(0x16f11fe89b0d677c) ^ seed;
-  b = UINT64_C(0xb480a793d8e6c86c);
-  c = UINT64_C(0x6fe2e5aaf078ebc9);
-  d = UINT64_C(0x14f994a4c5259381);
+    a = UINT64_C(0x16f11fe89b0d677c) ^ seed;
+    b = UINT64_C(0xb480a793d8e6c86c);
+    c = UINT64_C(0x6fe2e5aaf078ebc9);
+    d = UINT64_C(0x14f994a4c5259381);
 
-  while (len >= 32) {
-      a ^= GET_U64<bswap>(key,  0);
-      b ^= GET_U64<bswap>(key,  8);
-      c ^= GET_U64<bswap>(key, 16);
-      d ^= GET_U64<bswap>(key, 24);
-      a = diffuse(a);
-      b = diffuse(b);
-      c = diffuse(c);
-      d = diffuse(d);
-      len -= 32;
-      key += 32;
-  }
+    while (len >= 32) {
+        a   ^= GET_U64<bswap>(key,  0);
+        b   ^= GET_U64<bswap>(key,  8);
+        c   ^= GET_U64<bswap>(key, 16);
+        d   ^= GET_U64<bswap>(key, 24);
+        a    = diffuse(a);
+        b    = diffuse(b);
+        c    = diffuse(c);
+        d    = diffuse(d);
+        len -= 32;
+        key += 32;
+    }
 
-  switch (len) {
-  case 31: case 30: case 29: case 28: case 27: case 26: case 25:
-      a ^= GET_U64<bswap>(key,  0);
-      b ^= GET_U64<bswap>(key,  8);
-      c ^= GET_U64<bswap>(key,  16);
-      memcpy(pad, key + 24, len - 24);
-      d ^= GET_U64<bswap>(pad,  0);
-      a = diffuse(a);
-      b = diffuse(b);
-      c = diffuse(c);
-      d = diffuse(d);
-      break;
-  case 24:
-      a ^= GET_U64<bswap>(key,  0);
-      b ^= GET_U64<bswap>(key,  8);
-      c ^= GET_U64<bswap>(key,  16);
-      a = diffuse(a);
-      b = diffuse(b);
-      c = diffuse(c);
-      break;
-  case 23: case 22: case 21: case 20: case 19: case 18: case 17:
-      a ^= GET_U64<bswap>(key,  0);
-      b ^= GET_U64<bswap>(key,  8);
-      memcpy(pad, key + 16, len - 16);
-      c ^= GET_U64<bswap>(pad,  0);
-      a = diffuse(a);
-      b = diffuse(b);
-      c = diffuse(c);
-      break;
-  case 16:
-      a ^= GET_U64<bswap>(key,  0);
-      b ^= GET_U64<bswap>(key,  8);
-      a = diffuse(a);
-      b = diffuse(b);
-      break;
-  case 15: case 14: case 13: case 12: case 11: case 10: case 9:
-      a ^= GET_U64<bswap>(key,  0);
-      memcpy(pad, key + 8, len - 8);
-      b ^= GET_U64<bswap>(pad,  0);
-      a = diffuse(a);
-      b = diffuse(b);
-      break;
-  case 8:
-      a ^= GET_U64<bswap>(key,  0);
-      a = diffuse(a);
-      break;
-  case 7: case 6: case 5: case 4: case 3: case 2: case 1:
-      memcpy(pad, key, len);
-      a ^= GET_U64<bswap>(pad,  0);
-      a = diffuse(a);
-      break;
-  case 0:
-      break;
-  default:
-      unreachable();
-      assert(0);
-  }
+    switch (len) {
+    case 31: case 30: case 29: case 28: case 27: case 26: case 25:
+             a ^= GET_U64<bswap>(key,  0);
+             b ^= GET_U64<bswap>(key,  8);
+             c ^= GET_U64<bswap>(key, 16);
+             memcpy(pad, key + 24, len - 24);
+             d ^= GET_U64<bswap>(pad, 0);
+             a  = diffuse(a);
+             b  = diffuse(b);
+             c  = diffuse(c);
+             d  = diffuse(d);
+             break;
+    case 24:
+             a ^= GET_U64<bswap>(key,  0);
+             b ^= GET_U64<bswap>(key,  8);
+             c ^= GET_U64<bswap>(key, 16);
+             a  = diffuse(a);
+             b  = diffuse(b);
+             c  = diffuse(c);
+             break;
+    case 23: case 22: case 21: case 20: case 19: case 18: case 17:
+             a ^= GET_U64<bswap>(key, 0);
+             b ^= GET_U64<bswap>(key, 8);
+             memcpy(pad, key + 16, len - 16);
+             c ^= GET_U64<bswap>(pad, 0);
+             a  = diffuse(a);
+             b  = diffuse(b);
+             c  = diffuse(c);
+             break;
+    case 16:
+             a ^= GET_U64<bswap>(key, 0);
+             b ^= GET_U64<bswap>(key, 8);
+             a  = diffuse(a);
+             b  = diffuse(b);
+             break;
+    case 15: case 14: case 13: case 12: case 11: case 10: case 9:
+             a ^= GET_U64<bswap>(key, 0);
+             memcpy(pad, key + 8, len - 8);
+             b ^= GET_U64<bswap>(pad, 0);
+             a  = diffuse(a);
+             b  = diffuse(b);
+             break;
+    case  8:
+             a ^= GET_U64<bswap>(key, 0);
+             a  = diffuse(a);
+             break;
+    case  7: case 6: case 5: case 4: case 3: case 2: case 1:
+             memcpy(pad, key, len);
+             a ^= GET_U64<bswap>(pad, 0);
+             a  = diffuse(a);
+             break;
+    case  0:
+             break;
+    default:
+             unreachable();
+             assert(0);
+    }
 
-  a ^= b;
-  c ^= d;
-  a ^= c;
-  a ^= orig_len;
-  return BSWAP(diffuse(a));
+    a ^= b;
+    c ^= d;
+    a ^= c;
+    a ^= orig_len;
+    return BSWAP(diffuse(a));
 }
 
-template < bool bswap >
-static void SeaHash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void SeaHash( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = seahash<bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 REGISTER_FAMILY(seahash,
-  $.src_url = "https://gist.github.com/vstakhov/b58b855532a424cd634b6c7ea7baa1b9",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://gist.github.com/vstakhov/b58b855532a424cd634b6c7ea7baa1b9",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(seahash,
-  $.desc = "seahash",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_64 |
-        FLAG_IMPL_ROTATE         |
-        FLAG_IMPL_SHIFT_VARIABLE |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0xF0374078,
-  $.verification_BE = 0x5BD66274,
-  $.hashfn_native = SeaHash<false>,
-  $.hashfn_bswap = SeaHash<true>
-);
+   $.desc       = "seahash",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_64 |
+         FLAG_IMPL_ROTATE         |
+         FLAG_IMPL_SHIFT_VARIABLE |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0xF0374078,
+   $.verification_BE = 0x5BD66274,
+   $.hashfn_native   = SeaHash<false>,
+   $.hashfn_bswap    = SeaHash<true>
+ );
diff --git a/hashes/sha1.cpp b/hashes/sha1.cpp
index e042b99f..a4c50ede 100644
--- a/hashes/sha1.cpp
+++ b/hashes/sha1.cpp
@@ -40,352 +40,355 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_X86_64_SHA1) || defined(HAVE_ARM_SHA1)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
 //-----------------------------------------------------------------------------
 // Raw SHA-1 implementation
 typedef struct {
-    uint32_t state[5];
-    uint32_t count[2];
-    uint8_t  buffer[64];
+    uint32_t  state[5];
+    uint32_t  count[2];
+    uint8_t   buffer[64];
 } SHA1_CTX;
 
 #define SHA1_DIGEST_SIZE 20
 
 /* SHA1_Init - Initialize new context */
-static void SHA1_Init(SHA1_CTX * context) {
-  /* SHA1 initialization constants */
-  context->state[0] = 0x67452301;
-  context->state[1] = 0xEFCDAB89;
-  context->state[2] = 0x98BADCFE;
-  context->state[3] = 0x10325476;
-  context->state[4] = 0xC3D2E1F0;
-  context->count[0] = context->count[1] = 0;
+static void SHA1_Init( SHA1_CTX * context ) {
+    /* SHA1 initialization constants */
+    context->state[0] = 0x67452301;
+    context->state[1] = 0xEFCDAB89;
+    context->state[2] = 0x98BADCFE;
+    context->state[3] = 0x10325476;
+    context->state[4] = 0xC3D2E1F0;
+    context->count[0] = context->count[1] = 0;
 }
 
 /* Hash a single 512-bit block. This is the core of the algorithm. */
-template < bool bswap >
-static void SHA1_Transform_portable(uint32_t state[5], const uint8_t buffer[64]) {
-  uint32_t a, b, c, d, e;
-  uint32_t l[16];
+template <bool bswap>
+static void SHA1_Transform_portable( uint32_t state[5], const uint8_t buffer[64] ) {
+    uint32_t a, b, c, d, e;
+    uint32_t l[16];
 
 /* blk0() and blk() perform the initial expand. */
 /* I got the idea of expanding during the round function from SSLeay */
-#define blk0(i) (l[i]      = GET_U32<bswap>(buffer, 4*(i)))
-#define blk(i)  (l[i & 15] = ROTL32(                             \
-                                    l[(i + 13) & 15] ^           \
-                                    l[(i + 8) & 15]  ^           \
-                                    l[(i + 2) & 15]  ^           \
-                                    l[i & 15]                    \
+#define blk0(i) (l[i] = GET_U32<bswap>(buffer, 4 * (i)))
+#define blk(i)  (l[i & 15] = ROTL32(                   \
+                                    l[(i + 13) & 15] ^ \
+                                    l[(i + 8) & 15]  ^ \
+                                    l[(i + 2) & 15]  ^ \
+                                    l[i & 15]          \
                                     , 1))
 
 /* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
-#define R0(v, w, x, y, z, i)                                            \
-  z += ((w & (x ^ y)) ^ y) + blk0(i) + 0x5A827999 + ROTL32(v, 5);       \
+#define R0(v, w, x, y, z, i)                                      \
+  z += ((w & (x ^ y)) ^ y) + blk0(i) + 0x5A827999 + ROTL32(v, 5); \
   w = ROTL32(w, 30);
-#define R1(v, w, x, y, z, i)                                            \
-  z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + ROTL32(v, 5);        \
+#define R1(v, w, x, y, z, i)                                     \
+  z += ((w & (x ^ y)) ^ y) + blk(i) + 0x5A827999 + ROTL32(v, 5); \
   w = ROTL32(w, 30);
-#define R2(v, w, x, y, z, i)                                            \
-  z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + ROTL32(v, 5);                \
+#define R2(v, w, x, y, z, i)                             \
+  z += (w ^ x ^ y) + blk(i) + 0x6ED9EBA1 + ROTL32(v, 5); \
   w = ROTL32(w, 30);
-#define R3(v, w, x, y, z, i)                                            \
-  z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + ROTL32(v, 5);  \
+#define R3(v, w, x, y, z, i)                                           \
+  z += (((w | x) & y) | (w & x)) + blk(i) + 0x8F1BBCDC + ROTL32(v, 5); \
   w = ROTL32(w, 30);
-#define R4(v, w, x, y, z, i)                                            \
-  z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + ROTL32(v, 5);                \
+#define R4(v, w, x, y, z, i)                             \
+  z += (w ^ x ^ y) + blk(i) + 0xCA62C1D6 + ROTL32(v, 5); \
   w = ROTL32(w, 30);
 
-  /* Copy context->state[] to working vars */
-  a = state[0];
-  b = state[1];
-  c = state[2];
-  d = state[3];
-  e = state[4];
-
-  /* 4 rounds of 20 operations each. Loop unrolled. */
-  R0(a, b, c, d, e, 0);
-  R0(e, a, b, c, d, 1);
-  R0(d, e, a, b, c, 2);
-  R0(c, d, e, a, b, 3);
-  R0(b, c, d, e, a, 4);
-  R0(a, b, c, d, e, 5);
-  R0(e, a, b, c, d, 6);
-  R0(d, e, a, b, c, 7);
-  R0(c, d, e, a, b, 8);
-  R0(b, c, d, e, a, 9);
-  R0(a, b, c, d, e, 10);
-  R0(e, a, b, c, d, 11);
-  R0(d, e, a, b, c, 12);
-  R0(c, d, e, a, b, 13);
-  R0(b, c, d, e, a, 14);
-  R0(a, b, c, d, e, 15);
-
-  R1(e, a, b, c, d, 16);
-  R1(d, e, a, b, c, 17);
-  R1(c, d, e, a, b, 18);
-  R1(b, c, d, e, a, 19);
-
-  R2(a, b, c, d, e, 20);
-  R2(e, a, b, c, d, 21);
-  R2(d, e, a, b, c, 22);
-  R2(c, d, e, a, b, 23);
-  R2(b, c, d, e, a, 24);
-  R2(a, b, c, d, e, 25);
-  R2(e, a, b, c, d, 26);
-  R2(d, e, a, b, c, 27);
-  R2(c, d, e, a, b, 28);
-  R2(b, c, d, e, a, 29);
-  R2(a, b, c, d, e, 30);
-  R2(e, a, b, c, d, 31);
-  R2(d, e, a, b, c, 32);
-  R2(c, d, e, a, b, 33);
-  R2(b, c, d, e, a, 34);
-  R2(a, b, c, d, e, 35);
-  R2(e, a, b, c, d, 36);
-  R2(d, e, a, b, c, 37);
-  R2(c, d, e, a, b, 38);
-  R2(b, c, d, e, a, 39);
-
-  R3(a, b, c, d, e, 40);
-  R3(e, a, b, c, d, 41);
-  R3(d, e, a, b, c, 42);
-  R3(c, d, e, a, b, 43);
-  R3(b, c, d, e, a, 44);
-  R3(a, b, c, d, e, 45);
-  R3(e, a, b, c, d, 46);
-  R3(d, e, a, b, c, 47);
-  R3(c, d, e, a, b, 48);
-  R3(b, c, d, e, a, 49);
-  R3(a, b, c, d, e, 50);
-  R3(e, a, b, c, d, 51);
-  R3(d, e, a, b, c, 52);
-  R3(c, d, e, a, b, 53);
-  R3(b, c, d, e, a, 54);
-  R3(a, b, c, d, e, 55);
-  R3(e, a, b, c, d, 56);
-  R3(d, e, a, b, c, 57);
-  R3(c, d, e, a, b, 58);
-  R3(b, c, d, e, a, 59);
-
-  R4(a, b, c, d, e, 60);
-  R4(e, a, b, c, d, 61);
-  R4(d, e, a, b, c, 62);
-  R4(c, d, e, a, b, 63);
-  R4(b, c, d, e, a, 64);
-  R4(a, b, c, d, e, 65);
-  R4(e, a, b, c, d, 66);
-  R4(d, e, a, b, c, 67);
-  R4(c, d, e, a, b, 68);
-  R4(b, c, d, e, a, 69);
-  R4(a, b, c, d, e, 70);
-  R4(e, a, b, c, d, 71);
-  R4(d, e, a, b, c, 72);
-  R4(c, d, e, a, b, 73);
-  R4(b, c, d, e, a, 74);
-  R4(a, b, c, d, e, 75);
-  R4(e, a, b, c, d, 76);
-  R4(d, e, a, b, c, 77);
-  R4(c, d, e, a, b, 78);
-  R4(b, c, d, e, a, 79);
-
-  /* Add the working vars back into context.state[] */
-  state[0] += a;
-  state[1] += b;
-  state[2] += c;
-  state[3] += d;
-  state[4] += e;
+    /* Copy context->state[] to working vars */
+    a = state[0];
+    b = state[1];
+    c = state[2];
+    d = state[3];
+    e = state[4];
+
+    /* 4 rounds of 20 operations each. Loop unrolled. */
+    R0(a, b, c, d, e,  0);
+    R0(e, a, b, c, d,  1);
+    R0(d, e, a, b, c,  2);
+    R0(c, d, e, a, b,  3);
+    R0(b, c, d, e, a,  4);
+    R0(a, b, c, d, e,  5);
+    R0(e, a, b, c, d,  6);
+    R0(d, e, a, b, c,  7);
+    R0(c, d, e, a, b,  8);
+    R0(b, c, d, e, a,  9);
+    R0(a, b, c, d, e, 10);
+    R0(e, a, b, c, d, 11);
+    R0(d, e, a, b, c, 12);
+    R0(c, d, e, a, b, 13);
+    R0(b, c, d, e, a, 14);
+    R0(a, b, c, d, e, 15);
+
+    R1(e, a, b, c, d, 16);
+    R1(d, e, a, b, c, 17);
+    R1(c, d, e, a, b, 18);
+    R1(b, c, d, e, a, 19);
+
+    R2(a, b, c, d, e, 20);
+    R2(e, a, b, c, d, 21);
+    R2(d, e, a, b, c, 22);
+    R2(c, d, e, a, b, 23);
+    R2(b, c, d, e, a, 24);
+    R2(a, b, c, d, e, 25);
+    R2(e, a, b, c, d, 26);
+    R2(d, e, a, b, c, 27);
+    R2(c, d, e, a, b, 28);
+    R2(b, c, d, e, a, 29);
+    R2(a, b, c, d, e, 30);
+    R2(e, a, b, c, d, 31);
+    R2(d, e, a, b, c, 32);
+    R2(c, d, e, a, b, 33);
+    R2(b, c, d, e, a, 34);
+    R2(a, b, c, d, e, 35);
+    R2(e, a, b, c, d, 36);
+    R2(d, e, a, b, c, 37);
+    R2(c, d, e, a, b, 38);
+    R2(b, c, d, e, a, 39);
+
+    R3(a, b, c, d, e, 40);
+    R3(e, a, b, c, d, 41);
+    R3(d, e, a, b, c, 42);
+    R3(c, d, e, a, b, 43);
+    R3(b, c, d, e, a, 44);
+    R3(a, b, c, d, e, 45);
+    R3(e, a, b, c, d, 46);
+    R3(d, e, a, b, c, 47);
+    R3(c, d, e, a, b, 48);
+    R3(b, c, d, e, a, 49);
+    R3(a, b, c, d, e, 50);
+    R3(e, a, b, c, d, 51);
+    R3(d, e, a, b, c, 52);
+    R3(c, d, e, a, b, 53);
+    R3(b, c, d, e, a, 54);
+    R3(a, b, c, d, e, 55);
+    R3(e, a, b, c, d, 56);
+    R3(d, e, a, b, c, 57);
+    R3(c, d, e, a, b, 58);
+    R3(b, c, d, e, a, 59);
+
+    R4(a, b, c, d, e, 60);
+    R4(e, a, b, c, d, 61);
+    R4(d, e, a, b, c, 62);
+    R4(c, d, e, a, b, 63);
+    R4(b, c, d, e, a, 64);
+    R4(a, b, c, d, e, 65);
+    R4(e, a, b, c, d, 66);
+    R4(d, e, a, b, c, 67);
+    R4(c, d, e, a, b, 68);
+    R4(b, c, d, e, a, 69);
+    R4(a, b, c, d, e, 70);
+    R4(e, a, b, c, d, 71);
+    R4(d, e, a, b, c, 72);
+    R4(c, d, e, a, b, 73);
+    R4(b, c, d, e, a, 74);
+    R4(a, b, c, d, e, 75);
+    R4(e, a, b, c, d, 76);
+    R4(d, e, a, b, c, 77);
+    R4(c, d, e, a, b, 78);
+    R4(b, c, d, e, a, 79);
+
+    /* Add the working vars back into context.state[] */
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+    state[4] += e;
 }
 
 #if defined(HAVE_X86_64_SHA1)
-template < bool bswap >
-static void SHA1_Transform_sha1NI(uint32_t state[5], const uint8_t buffer[64]) {
-    __m128i ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
-    __m128i MSG0, MSG1, MSG2, MSG3;
+
+template <bool bswap>
+static void SHA1_Transform_sha1NI( uint32_t state[5], const uint8_t buffer[64] ) {
+    __m128i       ABCD, ABCD_SAVE, E0, E0_SAVE, E1;
+    __m128i       MSG0, MSG1, MSG2, MSG3;
     const __m128i MASK = bswap ?
-        _mm_set_epi64x(UINT64_C(0x0001020304050607), UINT64_C(0x08090a0b0c0d0e0f)) :
-        _mm_set_epi64x(UINT64_C(0x0302010007060504), UINT64_C(0x0b0a09080f0e0d0c)) ;
+                _mm_set_epi64x(UINT64_C(0x0001020304050607), UINT64_C(0x08090a0b0c0d0e0f)) :
+                _mm_set_epi64x(UINT64_C(0x0302010007060504), UINT64_C(0x0b0a09080f0e0d0c));
 
     /* Load initial values */
-    ABCD = _mm_loadu_si128((const __m128i*) state);
-    E0 = _mm_set_epi32(state[4], 0, 0, 0);
+    ABCD = _mm_loadu_si128((const __m128i *)state);
+    E0   = _mm_set_epi32(state[4], 0, 0, 0);
     ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
 
     /* Save current state  */
     ABCD_SAVE = ABCD;
-    E0_SAVE = E0;
+    E0_SAVE   = E0;
 
     /* Rounds 0-3 */
-    MSG0 = _mm_loadu_si128((const __m128i*)(buffer + 0));
+    MSG0 = _mm_loadu_si128((const __m128i *)(buffer + 0));
     MSG0 = _mm_shuffle_epi8(MSG0, MASK);
-    E0 = _mm_add_epi32(E0, MSG0);
-    E1 = ABCD;
+    E0   = _mm_add_epi32(E0, MSG0);
+    E1   = ABCD;
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
 
     /* Rounds 4-7 */
-    MSG1 = _mm_loadu_si128((const __m128i*)(buffer + 16));
+    MSG1 = _mm_loadu_si128((const __m128i *)(buffer + 16));
     MSG1 = _mm_shuffle_epi8(MSG1, MASK);
-    E1 = _mm_sha1nexte_epu32(E1, MSG1);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG1);
+    E0   = ABCD;
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
     MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
 
     /* Rounds 8-11 */
-    MSG2 = _mm_loadu_si128((const __m128i*)(buffer + 32));
+    MSG2 = _mm_loadu_si128((const __m128i *)(buffer + 32));
     MSG2 = _mm_shuffle_epi8(MSG2, MASK);
-    E0 = _mm_sha1nexte_epu32(E0, MSG2);
-    E1 = ABCD;
+    E0   = _mm_sha1nexte_epu32(E0, MSG2);
+    E1   = ABCD;
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
     MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
     MSG0 = _mm_xor_si128(MSG0, MSG2);
 
     /* Rounds 12-15 */
-    MSG3 = _mm_loadu_si128((const __m128i*)(buffer + 48));
+    MSG3 = _mm_loadu_si128((const __m128i *)(buffer + 48));
     MSG3 = _mm_shuffle_epi8(MSG3, MASK);
-    E1 = _mm_sha1nexte_epu32(E1, MSG3);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG3);
+    E0   = ABCD;
     MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 0);
     MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
     MSG1 = _mm_xor_si128(MSG1, MSG3);
 
     /* Rounds 16-19 */
-    E0 = _mm_sha1nexte_epu32(E0, MSG0);
-    E1 = ABCD;
+    E0   = _mm_sha1nexte_epu32(E0, MSG0);
+    E1   = ABCD;
     MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 0);
     MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
     MSG2 = _mm_xor_si128(MSG2, MSG0);
 
     /* Rounds 20-23 */
-    E1 = _mm_sha1nexte_epu32(E1, MSG1);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG1);
+    E0   = ABCD;
     MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
     MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
     MSG3 = _mm_xor_si128(MSG3, MSG1);
 
     /* Rounds 24-27 */
-    E0 = _mm_sha1nexte_epu32(E0, MSG2);
-    E1 = ABCD;
+    E0   = _mm_sha1nexte_epu32(E0, MSG2);
+    E1   = ABCD;
     MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
     MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
     MSG0 = _mm_xor_si128(MSG0, MSG2);
 
     /* Rounds 28-31 */
-    E1 = _mm_sha1nexte_epu32(E1, MSG3);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG3);
+    E0   = ABCD;
     MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
     MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
     MSG1 = _mm_xor_si128(MSG1, MSG3);
 
     /* Rounds 32-35 */
-    E0 = _mm_sha1nexte_epu32(E0, MSG0);
-    E1 = ABCD;
+    E0   = _mm_sha1nexte_epu32(E0, MSG0);
+    E1   = ABCD;
     MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 1);
     MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
     MSG2 = _mm_xor_si128(MSG2, MSG0);
 
     /* Rounds 36-39 */
-    E1 = _mm_sha1nexte_epu32(E1, MSG1);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG1);
+    E0   = ABCD;
     MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 1);
     MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
     MSG3 = _mm_xor_si128(MSG3, MSG1);
 
     /* Rounds 40-43 */
-    E0 = _mm_sha1nexte_epu32(E0, MSG2);
-    E1 = ABCD;
+    E0   = _mm_sha1nexte_epu32(E0, MSG2);
+    E1   = ABCD;
     MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
     MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
     MSG0 = _mm_xor_si128(MSG0, MSG2);
 
     /* Rounds 44-47 */
-    E1 = _mm_sha1nexte_epu32(E1, MSG3);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG3);
+    E0   = ABCD;
     MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
     MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
     MSG1 = _mm_xor_si128(MSG1, MSG3);
 
     /* Rounds 48-51 */
-    E0 = _mm_sha1nexte_epu32(E0, MSG0);
-    E1 = ABCD;
+    E0   = _mm_sha1nexte_epu32(E0, MSG0);
+    E1   = ABCD;
     MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
     MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
     MSG2 = _mm_xor_si128(MSG2, MSG0);
 
     /* Rounds 52-55 */
-    E1 = _mm_sha1nexte_epu32(E1, MSG1);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG1);
+    E0   = ABCD;
     MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 2);
     MSG0 = _mm_sha1msg1_epu32(MSG0, MSG1);
     MSG3 = _mm_xor_si128(MSG3, MSG1);
 
     /* Rounds 56-59 */
-    E0 = _mm_sha1nexte_epu32(E0, MSG2);
-    E1 = ABCD;
+    E0   = _mm_sha1nexte_epu32(E0, MSG2);
+    E1   = ABCD;
     MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 2);
     MSG1 = _mm_sha1msg1_epu32(MSG1, MSG2);
     MSG0 = _mm_xor_si128(MSG0, MSG2);
 
     /* Rounds 60-63 */
-    E1 = _mm_sha1nexte_epu32(E1, MSG3);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG3);
+    E0   = ABCD;
     MSG0 = _mm_sha1msg2_epu32(MSG0, MSG3);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
     MSG2 = _mm_sha1msg1_epu32(MSG2, MSG3);
     MSG1 = _mm_xor_si128(MSG1, MSG3);
 
     /* Rounds 64-67 */
-    E0 = _mm_sha1nexte_epu32(E0, MSG0);
-    E1 = ABCD;
+    E0   = _mm_sha1nexte_epu32(E0, MSG0);
+    E1   = ABCD;
     MSG1 = _mm_sha1msg2_epu32(MSG1, MSG0);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
     MSG3 = _mm_sha1msg1_epu32(MSG3, MSG0);
     MSG2 = _mm_xor_si128(MSG2, MSG0);
 
     /* Rounds 68-71 */
-    E1 = _mm_sha1nexte_epu32(E1, MSG1);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG1);
+    E0   = ABCD;
     MSG2 = _mm_sha1msg2_epu32(MSG2, MSG1);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
     MSG3 = _mm_xor_si128(MSG3, MSG1);
 
     /* Rounds 72-75 */
-    E0 = _mm_sha1nexte_epu32(E0, MSG2);
-    E1 = ABCD;
+    E0   = _mm_sha1nexte_epu32(E0, MSG2);
+    E1   = ABCD;
     MSG3 = _mm_sha1msg2_epu32(MSG3, MSG2);
     ABCD = _mm_sha1rnds4_epu32(ABCD, E0, 3);
 
     /* Rounds 76-79 */
-    E1 = _mm_sha1nexte_epu32(E1, MSG3);
-    E0 = ABCD;
+    E1   = _mm_sha1nexte_epu32(E1, MSG3);
+    E0   = ABCD;
     ABCD = _mm_sha1rnds4_epu32(ABCD, E1, 3);
 
     /* Combine state */
-    E0 = _mm_sha1nexte_epu32(E0, E0_SAVE);
+    E0   = _mm_sha1nexte_epu32(E0, E0_SAVE);
     ABCD = _mm_add_epi32(ABCD, ABCD_SAVE);
 
     /* Save state */
-    ABCD = _mm_shuffle_epi32(ABCD, 0x1B);
-    _mm_storeu_si128((__m128i*) state, ABCD);
+    ABCD     = _mm_shuffle_epi32(ABCD, 0x1B);
+    _mm_storeu_si128((__m128i *)state, ABCD);
     state[4] = _mm_extract_epi32(E0, 3);
 }
+
 #endif
 
 #if defined(HAVE_ARM_SHA1)
-template < bool bswap >
-static void SHA1_Transform_neon(uint32_t state[5], const uint8_t buffer[64]) {
+
+template <bool bswap>
+static void SHA1_Transform_neon( uint32_t state[5], const uint8_t buffer[64] ) {
     uint32x4_t ABCD, ABCD_SAVED;
     uint32x4_t TMP0, TMP1;
     uint32x4_t MSG0, MSG1, MSG2, MSG3;
@@ -393,17 +396,17 @@ static void SHA1_Transform_neon(uint32_t state[5], const uint8_t buffer[64]) {
 
     /* Load state */
     ABCD = vld1q_u32(&state[0]);
-    E0 = state[4];
+    E0   = state[4];
 
     /* Save state */
     ABCD_SAVED = ABCD;
-    E0_SAVED = E0;
+    E0_SAVED   = E0;
 
     /* Load message */
-    MSG0 = vld1q_u32((const uint32_t*)(buffer));
-    MSG1 = vld1q_u32((const uint32_t*)(buffer + 16));
-    MSG2 = vld1q_u32((const uint32_t*)(buffer + 32));
-    MSG3 = vld1q_u32((const uint32_t*)(buffer + 48));
+    MSG0 = vld1q_u32((const uint32_t *)(buffer     ));
+    MSG1 = vld1q_u32((const uint32_t *)(buffer + 16));
+    MSG2 = vld1q_u32((const uint32_t *)(buffer + 32));
+    MSG3 = vld1q_u32((const uint32_t *)(buffer + 48));
 
     if (bswap) {
         /* Reverse for little endian */
@@ -417,149 +420,150 @@ static void SHA1_Transform_neon(uint32_t state[5], const uint8_t buffer[64]) {
     TMP1 = vaddq_u32(MSG1, vdupq_n_u32(0x5A827999));
 
     /* Rounds 0-3 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1cq_u32(ABCD, E0, TMP0);
     TMP0 = vaddq_u32(MSG2, vdupq_n_u32(0x5A827999));
     MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
 
     /* Rounds 4-7 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1cq_u32(ABCD, E1, TMP1);
     TMP1 = vaddq_u32(MSG3, vdupq_n_u32(0x5A827999));
     MSG0 = vsha1su1q_u32(MSG0, MSG3);
     MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
 
     /* Rounds 8-11 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1cq_u32(ABCD, E0, TMP0);
     TMP0 = vaddq_u32(MSG0, vdupq_n_u32(0x5A827999));
     MSG1 = vsha1su1q_u32(MSG1, MSG0);
     MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
 
     /* Rounds 12-15 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1cq_u32(ABCD, E1, TMP1);
     TMP1 = vaddq_u32(MSG1, vdupq_n_u32(0x6ED9EBA1));
     MSG2 = vsha1su1q_u32(MSG2, MSG1);
     MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
 
     /* Rounds 16-19 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1cq_u32(ABCD, E0, TMP0);
     TMP0 = vaddq_u32(MSG2, vdupq_n_u32(0x6ED9EBA1));
     MSG3 = vsha1su1q_u32(MSG3, MSG2);
     MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
 
     /* Rounds 20-23 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E1, TMP1);
     TMP1 = vaddq_u32(MSG3, vdupq_n_u32(0x6ED9EBA1));
     MSG0 = vsha1su1q_u32(MSG0, MSG3);
     MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
 
     /* Rounds 24-27 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E0, TMP0);
     TMP0 = vaddq_u32(MSG0, vdupq_n_u32(0x6ED9EBA1));
     MSG1 = vsha1su1q_u32(MSG1, MSG0);
     MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
 
     /* Rounds 28-31 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E1, TMP1);
     TMP1 = vaddq_u32(MSG1, vdupq_n_u32(0x6ED9EBA1));
     MSG2 = vsha1su1q_u32(MSG2, MSG1);
     MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
 
     /* Rounds 32-35 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E0, TMP0);
     TMP0 = vaddq_u32(MSG2, vdupq_n_u32(0x8F1BBCDC));
     MSG3 = vsha1su1q_u32(MSG3, MSG2);
     MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
 
     /* Rounds 36-39 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E1, TMP1);
     TMP1 = vaddq_u32(MSG3, vdupq_n_u32(0x8F1BBCDC));
     MSG0 = vsha1su1q_u32(MSG0, MSG3);
     MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
 
     /* Rounds 40-43 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1mq_u32(ABCD, E0, TMP0);
     TMP0 = vaddq_u32(MSG0, vdupq_n_u32(0x8F1BBCDC));
     MSG1 = vsha1su1q_u32(MSG1, MSG0);
     MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
 
     /* Rounds 44-47 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1mq_u32(ABCD, E1, TMP1);
     TMP1 = vaddq_u32(MSG1, vdupq_n_u32(0x8F1BBCDC));
     MSG2 = vsha1su1q_u32(MSG2, MSG1);
     MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
 
     /* Rounds 48-51 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1mq_u32(ABCD, E0, TMP0);
     TMP0 = vaddq_u32(MSG2, vdupq_n_u32(0x8F1BBCDC));
     MSG3 = vsha1su1q_u32(MSG3, MSG2);
     MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
 
     /* Rounds 52-55 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1mq_u32(ABCD, E1, TMP1);
     TMP1 = vaddq_u32(MSG3, vdupq_n_u32(0xCA62C1D6));
     MSG0 = vsha1su1q_u32(MSG0, MSG3);
     MSG1 = vsha1su0q_u32(MSG1, MSG2, MSG3);
 
     /* Rounds 56-59 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1mq_u32(ABCD, E0, TMP0);
     TMP0 = vaddq_u32(MSG0, vdupq_n_u32(0xCA62C1D6));
     MSG1 = vsha1su1q_u32(MSG1, MSG0);
     MSG2 = vsha1su0q_u32(MSG2, MSG3, MSG0);
 
     /* Rounds 60-63 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E1, TMP1);
     TMP1 = vaddq_u32(MSG1, vdupq_n_u32(0xCA62C1D6));
     MSG2 = vsha1su1q_u32(MSG2, MSG1);
     MSG3 = vsha1su0q_u32(MSG3, MSG0, MSG1);
 
     /* Rounds 64-67 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E0, TMP0);
     TMP0 = vaddq_u32(MSG2, vdupq_n_u32(0xCA62C1D6));
     MSG3 = vsha1su1q_u32(MSG3, MSG2);
     MSG0 = vsha1su0q_u32(MSG0, MSG1, MSG2);
 
     /* Rounds 68-71 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E1, TMP1);
     TMP1 = vaddq_u32(MSG3, vdupq_n_u32(0xCA62C1D6));
     MSG0 = vsha1su1q_u32(MSG0, MSG3);
 
     /* Rounds 72-75 */
-    E1 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E1   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E0, TMP0);
 
     /* Rounds 76-79 */
-    E0 = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
+    E0   = vsha1h_u32(vgetq_lane_u32(ABCD, 0));
     ABCD = vsha1pq_u32(ABCD, E1, TMP1);
 
     /* Combine state */
-    E0 += E0_SAVED;
+    E0  += E0_SAVED;
     ABCD = vaddq_u32(ABCD_SAVED, ABCD);
 
     /* Save state */
     vst1q_u32(&state[0], ABCD);
     state[4] = E0;
 }
+
 #endif
 
-template < bool bswap >
-static void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]) {
+template <bool bswap>
+static void SHA1_Transform( uint32_t state[5], const uint8_t buffer[64] ) {
 #if defined(HAVE_X86_64_SHA1)
     return SHA1_Transform_sha1NI<bswap>(state, buffer);
 #endif
@@ -569,15 +573,16 @@ static void SHA1_Transform(uint32_t state[5], const uint8_t buffer[64]) {
     return SHA1_Transform_portable<bswap>(state, buffer);
 }
 
-template < bool bswap >
-static void SHA1_Update(SHA1_CTX * context, const uint8_t * data, const size_t len) {
+template <bool bswap>
+static void SHA1_Update( SHA1_CTX * context, const uint8_t * data, const size_t len ) {
     size_t i, j;
 
     j = context->count[0];
-    if ((context->count[0] += len << 3) < j)
+    if ((context->count[0] += len << 3) < j) {
         context->count[1]++;
+    }
     context->count[1] += (len >> 29);
-    j = (j >> 3) & 63;
+    j                  = (j   >>  3) & 63;
 
     if ((j + len) > 63) {
         memcpy(&context->buffer[j], data, (i = 64 - j));
@@ -593,36 +598,37 @@ static void SHA1_Update(SHA1_CTX * context, const uint8_t * data, const size_t l
 }
 
 /* Add padding and return len bytes of the message digest. */
-template < bool bswap >
-static void SHA1_Final(SHA1_CTX * context, uint32_t digest_words, uint8_t * digest) {
-  uint32_t i;
-  uint8_t finalcount[8];
-  uint8_t c;
-
-  for (i = 0; i < 8; i++) {
-    finalcount[i] =
-        /* Endian independent */
-        (uint8_t)(context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8));
-  }
-  c = 0200;
-  SHA1_Update<bswap>(context, &c, 1);
-  while ((context->count[0] & 504) != 448) {
-    c = 0000;
+template <bool bswap>
+static void SHA1_Final( SHA1_CTX * context, uint32_t digest_words, uint8_t * digest ) {
+    uint32_t i;
+    uint8_t  finalcount[8];
+    uint8_t  c;
+
+    for (i = 0; i < 8; i++) {
+        finalcount[i] =
+                /* Endian independent */
+                (uint8_t)(context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8));
+    }
+    c = 0200;
     SHA1_Update<bswap>(context, &c, 1);
-  }
-  SHA1_Update<bswap>(context, finalcount, 8); /* Should cause a SHA1_Transform() */
+    while ((context->count[0] & 504) != 448) {
+        c = 0000;
+        SHA1_Update<bswap>(context, &c, 1);
+    }
+    SHA1_Update<bswap>(context, finalcount, 8); /* Should cause a SHA1_Transform() */
 
-  if (digest_words > 5) { digest_words = 5; }
-  for (i = 0; i < digest_words; i++) {
-      PUT_U32<bswap>(context->state[i], digest, 4*i);
-  }
+    if (digest_words > 5) { digest_words = 5; }
+    for (i = 0; i < digest_words; i++) {
+        PUT_U32<bswap>(context->state[i], digest, 4 * i);
+    }
 }
 
 //-----------------------------------------------------------------------------
 // Homegrown SHA-1 seeding function
-static FORCE_INLINE void SHA1_Seed(SHA1_CTX * ctx, const seed_t seed) {
+static FORCE_INLINE void SHA1_Seed( SHA1_CTX * ctx, const seed_t seed ) {
     const uint32_t seedlo = seed         & 0xFFFFFFFF;
     const uint32_t seedhi = (seed >> 32) & 0xFFFFFFFF;
+
     ctx->state[0] ^= seedlo;
     ctx->state[1] ^= seedhi;
     ctx->state[2] += seedlo ^ seedhi;
@@ -631,14 +637,14 @@ static FORCE_INLINE void SHA1_Seed(SHA1_CTX * ctx, const seed_t seed) {
 }
 
 //-----------------------------------------------------------------------------
-template < uint32_t hashbits, bool bswap >
-static void SHA1(const void * in, const size_t len, const seed_t seed, void * out) {
-  SHA1_CTX context;
-
-  SHA1_Init         (&context);
-  SHA1_Seed         (&context, seed);
-  SHA1_Update<bswap>(&context, (uint8_t*)in, len);
-  SHA1_Final<bswap> (&context, (hashbits+31)/32, (uint8_t*)out);
+template <uint32_t hashbits, bool bswap>
+static void SHA1( const void * in, const size_t len, const seed_t seed, void * out ) {
+    SHA1_CTX context;
+
+    SHA1_Init(&context);
+    SHA1_Seed(&context, seed);
+    SHA1_Update<bswap>(&context, (uint8_t *)in, len);
+    SHA1_Final<bswap>(&context, (hashbits + 31) / 32, (uint8_t *)out);
 }
 
 //-----------------------------------------------------------------------------
@@ -652,16 +658,18 @@ static void SHA1(const void * in, const size_t len, const seed_t seed, void * ou
 //   A million repetitions of "a"
 //       34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
 
-static const char *const test_data[] = {
+static const char * const test_data[] = {
     "abc", "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
-    "A million repetitions of 'a'"};
-static const char *const test_results[] = {
+    "A million repetitions of 'a'"
+};
+static const char * const test_results[] = {
     "A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D",
     "84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1",
-    "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"};
+    "34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F"
+};
 
-static void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char * output) {
-    int i, j;
+static void digest_to_hex( const uint8_t digest[SHA1_DIGEST_SIZE], char * output ) {
+    int    i, j;
     char * c = output;
 
     for (i = 0; i < SHA1_DIGEST_SIZE / 4; i++) {
@@ -675,119 +683,119 @@ static void digest_to_hex(const uint8_t digest[SHA1_DIGEST_SIZE], char * output)
     *(c - 1) = '\0';
 }
 
-template < bool bswap >
-static bool SHA1_Selftest(void) {
-  int k;
-  SHA1_CTX context;
-  uint8_t digest[20];
-  char output[80];
-
-  for (k = 0; k < 2; k++) {
-      SHA1_Init         (&context);
-      SHA1_Update<bswap>(&context, (uint8_t *)test_data[k], strlen(test_data[k]));
-      SHA1_Final<bswap> (&context, 5, digest);
-      digest_to_hex(digest, output);
-
-      if (strcmp(output, test_results[k])) {
-          fprintf(stdout, "SHA-1 self test FAILED\n");
-          fprintf(stderr, "* hash of \"%s\" incorrect:\n", test_data[k]);
-          fprintf(stderr, "\t%s returned\n", output);
-          fprintf(stderr, "\t%s is correct\n", test_results[k]);
-          return false;
-      }
-  }
-
-  /* million 'a' vector we feed separately */
-  SHA1_Init(&context);
-  for (k = 0; k < 1000000; k++) {
-      SHA1_Update<bswap>(&context, (uint8_t *)"a", 1);
-  }
-  SHA1_Final<bswap>(&context, 5, digest);
-  digest_to_hex(digest, output);
-  if (strcmp(output, test_results[2])) {
-      fprintf(stdout, "SHA-1 self test FAILED\n");
-      fprintf(stderr, "* hash of \"%s\" incorrect:\n", test_data[2]);
-      fprintf(stderr, "\t%s returned\n", output);
-      fprintf(stderr, "\t%s is correct\n", test_results[2]);
-      return false;
-  }
-
-  /* success */
-  return true;
+template <bool bswap>
+static bool SHA1_Selftest( void ) {
+    int      k;
+    SHA1_CTX context;
+    uint8_t  digest[20];
+    char     output[80];
+
+    for (k = 0; k < 2; k++) {
+        SHA1_Init(&context);
+        SHA1_Update<bswap>(&context, (uint8_t *)test_data[k], strlen(test_data[k]));
+        SHA1_Final<bswap>(&context, 5, digest);
+        digest_to_hex(digest, output);
+
+        if (strcmp(output, test_results[k])) {
+            fprintf(stdout, "SHA-1 self test FAILED\n"     );
+            fprintf(stderr, "* hash of \"%s\" incorrect:\n", test_data[k]);
+            fprintf(stderr, "\t%s returned\n", output);
+            fprintf(stderr, "\t%s is correct\n", test_results[k]);
+            return false;
+        }
+    }
+
+    /* million 'a' vector we feed separately */
+    SHA1_Init(&context);
+    for (k = 0; k < 1000000; k++) {
+        SHA1_Update<bswap>(&context, (uint8_t *)"a", 1);
+    }
+    SHA1_Final<bswap>(&context, 5, digest);
+    digest_to_hex(digest, output);
+    if (strcmp(output, test_results[2])) {
+        fprintf(stdout, "SHA-1 self test FAILED\n"     );
+        fprintf(stderr, "* hash of \"%s\" incorrect:\n", test_data[2]);
+        fprintf(stderr, "\t%s returned\n", output);
+        fprintf(stderr, "\t%s is correct\n", test_results[2]);
+        return false;
+    }
+
+    /* success */
+    return true;
 }
 
-static bool SHA1_test(void) {
-  if (isBE()) {
-      return SHA1_Selftest<false>();
-  } else {
-      return SHA1_Selftest<true>();
-  }
+static bool SHA1_test( void ) {
+    if (isBE()) {
+        return SHA1_Selftest<false>();
+    } else {
+        return SHA1_Selftest<true>();
+    }
 }
 
 REGISTER_FAMILY(sha1,
-  $.src_url = "https://github.com/noloader/SHA-Intrinsics",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/noloader/SHA-Intrinsics",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(SHA_1__32,
-  $.desc = "SHA-1, bits 0-31",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_BE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 32,
-  $.verification_LE = 0xF0E4D9E9,
-  $.verification_BE = 0xE00EF4D6,
-  $.initfn = SHA1_test,
-  $.hashfn_native = SHA1<32,false>,
-  $.hashfn_bswap = SHA1<32,true>
-);
+   $.desc       = "SHA-1, bits 0-31",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_BE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 32,
+   $.verification_LE = 0xF0E4D9E9,
+   $.verification_BE = 0xE00EF4D6,
+   $.initfn = SHA1_test,
+   $.hashfn_native   = SHA1<32, false>,
+   $.hashfn_bswap    = SHA1<32, true>
+ );
 
 REGISTER_HASH(SHA_1__64,
-  $.desc = "SHA-1, bits 0-63",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_BE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 64,
-  $.verification_LE = 0x36801ECB,
-  $.verification_BE = 0xFC26F4C7,
-  $.initfn = SHA1_test,
-  $.hashfn_native = SHA1<64,false>,
-  $.hashfn_bswap = SHA1<64,true>
-);
+   $.desc       = "SHA-1, bits 0-63",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_BE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 64,
+   $.verification_LE = 0x36801ECB,
+   $.verification_BE = 0xFC26F4C7,
+   $.initfn = SHA1_test,
+   $.hashfn_native   = SHA1<64, false>,
+   $.hashfn_bswap    = SHA1<64, true>
+ );
 
 REGISTER_HASH(SHA_1,
-  $.desc = "SHA-1",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_BE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 128,
-  $.verification_LE = 0xE444A591,
-  $.verification_BE = 0x35E00C29,
-  $.initfn = SHA1_test,
-  $.hashfn_native = SHA1<128,false>,
-  $.hashfn_bswap = SHA1<128,true>
-);
+   $.desc       = "SHA-1",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_CRYPTOGRAPHIC_WEAK   |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_BE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 128,
+   $.verification_LE = 0xE444A591,
+   $.verification_BE = 0x35E00C29,
+   $.initfn = SHA1_test,
+   $.hashfn_native   = SHA1<128, false>,
+   $.hashfn_bswap    = SHA1<128, true>
+ );
diff --git a/hashes/sha2.cpp b/hashes/sha2.cpp
index 3cb446ad..46ad7ea6 100644
--- a/hashes/sha2.cpp
+++ b/hashes/sha2.cpp
@@ -35,42 +35,42 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_X86_64_SHA2) || defined(HAVE_ARM_SHA2)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
 //-----------------------------------------------------------------------------
 // Raw SHA-2 implementation
 typedef struct {
-    uint64_t length;
-    uint32_t state[8], curlen;
-    uint8_t  buf[64];
+    uint64_t  length;
+    uint32_t  state[8], curlen;
+    uint8_t   buf[64];
 } SHA2_CTX;
 
-static void SHA224_Init(SHA2_CTX * context) {
-  context->curlen = 0;
-  context->length = 0;
-  context->state[0] = 0xc1059ed8;
-  context->state[1] = 0x367cd507;
-  context->state[2] = 0x3070dd17;
-  context->state[3] = 0xf70e5939;
-  context->state[4] = 0xffc00b31;
-  context->state[5] = 0x68581511;
-  context->state[6] = 0x64f98fa7;
-  context->state[7] = 0xbefa4fa4;
+static void SHA224_Init( SHA2_CTX * context ) {
+    context->curlen   = 0;
+    context->length   = 0;
+    context->state[0] = 0xc1059ed8;
+    context->state[1] = 0x367cd507;
+    context->state[2] = 0x3070dd17;
+    context->state[3] = 0xf70e5939;
+    context->state[4] = 0xffc00b31;
+    context->state[5] = 0x68581511;
+    context->state[6] = 0x64f98fa7;
+    context->state[7] = 0xbefa4fa4;
 }
 
 /* SHA256_Init - Initialize new context */
-static void SHA256_Init(SHA2_CTX * context) {
-  context->curlen = 0;
-  context->length = 0;
-  context->state[0] = 0x6A09E667;
-  context->state[1] = 0xBB67AE85;
-  context->state[2] = 0x3C6EF372;
-  context->state[3] = 0xA54FF53A;
-  context->state[4] = 0x510E527F;
-  context->state[5] = 0x9B05688C;
-  context->state[6] = 0x1F83D9AB;
-  context->state[7] = 0x5BE0CD19;
+static void SHA256_Init( SHA2_CTX * context ) {
+    context->curlen   = 0;
+    context->length   = 0;
+    context->state[0] = 0x6A09E667;
+    context->state[1] = 0xBB67AE85;
+    context->state[2] = 0x3C6EF372;
+    context->state[3] = 0xA54FF53A;
+    context->state[4] = 0x510E527F;
+    context->state[5] = 0x9B05688C;
+    context->state[6] = 0x1F83D9AB;
+    context->state[7] = 0x5BE0CD19;
 }
 
 /* Hash a single 512-bit block. This is the core of the algorithm. */
@@ -93,266 +93,268 @@ static const uint32_t K256[] = {
     0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
 };
 
-#define ROTATE(x,y)  (((x)>>(y)) | ((x)<<(32-(y))))
-#define Sigma0(x)    (ROTATE((x), 2) ^ ROTATE((x),13) ^ ROTATE((x),22))
-#define Sigma1(x)    (ROTATE((x), 6) ^ ROTATE((x),11) ^ ROTATE((x),25))
-#define sigma0(x)    (ROTATE((x), 7) ^ ROTATE((x),18) ^ ((x)>> 3))
-#define sigma1(x)    (ROTATE((x),17) ^ ROTATE((x),19) ^ ((x)>>10))
-
-#define Ch(x,y,z)    (((x) & (y)) ^ ((~(x)) & (z)))
-#define Maj(x,y,z)   (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
-
-template < bool bswap >
-static void SHA256_Transform_portable(uint32_t state[8], const uint8_t buffer[64]) {
-  uint32_t a, b, c, d, e, f, g, h, s0, s1, T1, T2;
-  uint32_t X[16], i;
-
-  a = state[0];
-  b = state[1];
-  c = state[2];
-  d = state[3];
-  e = state[4];
-  f = state[5];
-  g = state[6];
-  h = state[7];
-
-  for (i = 0; i < 16; i++) {
-    X[i] = GET_U32<bswap>(buffer, i*4);
-
-    T1 = h;
-    T1 += Sigma1(e);
-    T1 += Ch(e, f, g);
-    T1 += K256[i];
-    T1 += X[i];
-
-    T2 = Sigma0(a);
-    T2 += Maj(a, b, c);
-
-    h = g;
-    g = f;
-    f = e;
-    e = d + T1;
-    d = c;
-    c = b;
-    b = a;
-    a = T1 + T2;
-  }
-
-  for (; i < 64; i++) {
-    s0 = X[(i + 1) & 0x0f];
-    s0 = sigma0(s0);
-    s1 = X[(i + 14) & 0x0f];
-    s1 = sigma1(s1);
-
-    T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
-    T1 += h + Sigma1(e) + Ch(e, f, g) + K256[i];
-    T2 = Sigma0(a) + Maj(a, b, c);
-    h = g;
-    g = f;
-    f = e;
-    e = d + T1;
-    d = c;
-    c = b;
-    b = a;
-    a = T1 + T2;
-  }
-
-  state[0] += a;
-  state[1] += b;
-  state[2] += c;
-  state[3] += d;
-  state[4] += e;
-  state[5] += f;
-  state[6] += g;
-  state[7] += h;
+#define ROTATE(x, y)  (((x) >> (y)) | ((x) << (32 - (y))))
+#define Sigma0(x)    (ROTATE((x), 2) ^ ROTATE((x), 13) ^ ROTATE((x), 22))
+#define Sigma1(x)    (ROTATE((x), 6) ^ ROTATE((x), 11) ^ ROTATE((x), 25))
+#define sigma0(x)    (ROTATE((x), 7) ^ ROTATE((x), 18) ^ ((x) >> 3))
+#define sigma1(x)    (ROTATE((x), 17) ^ ROTATE((x), 19) ^ ((x) >> 10))
+
+#define Ch(x, y, z)    (((x) & (y)) ^ ((~(x)) & (z)))
+#define Maj(x, y, z)   (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+
+template <bool bswap>
+static void SHA256_Transform_portable( uint32_t state[8], const uint8_t buffer[64] ) {
+    uint32_t a, b, c, d, e, f, g, h, s0, s1, T1, T2;
+    uint32_t X[16], i;
+
+    a = state[0];
+    b = state[1];
+    c = state[2];
+    d = state[3];
+    e = state[4];
+    f = state[5];
+    g = state[6];
+    h = state[7];
+
+    for (i = 0; i < 16; i++) {
+        X[i] = GET_U32<bswap>(buffer, i * 4);
+
+        T1   = h;
+        T1  += Sigma1(e);
+        T1  += Ch(e, f, g);
+        T1  += K256[i];
+        T1  += X[i];
+
+        T2   = Sigma0(a);
+        T2  += Maj(a, b, c);
+
+        h    = g;
+        g    = f;
+        f    = e;
+        e    = d + T1;
+        d    = c;
+        c    = b;
+        b    = a;
+        a    = T1 + T2;
+    }
+
+    for (; i < 64; i++) {
+        s0  = X[(i +  1) & 0x0f];
+        s0  = sigma0(s0);
+        s1  = X[(i + 14) & 0x0f];
+        s1  = sigma1(s1);
+
+        T1  = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
+        T1 += h + Sigma1(e) + Ch(e, f, g) + K256[i];
+        T2  = Sigma0(a) + Maj(a, b, c);
+        h   = g;
+        g   = f;
+        f   = e;
+        e   = d + T1;
+        d   = c;
+        c   = b;
+        b   = a;
+        a   = T1 + T2;
+    }
+
+    state[0] += a;
+    state[1] += b;
+    state[2] += c;
+    state[3] += d;
+    state[4] += e;
+    state[5] += f;
+    state[6] += g;
+    state[7] += h;
 }
 
 #if defined(HAVE_X86_64_SHA2)
-template < bool bswap >
-static void SHA256_Transform_x64(uint32_t state[8], const uint8_t data[64]) {
-  __m128i STATE0, STATE1;
-  __m128i MSG, TMP;
-  __m128i MSG0, MSG1, MSG2, MSG3;
-  __m128i ABEF_SAVE, CDGH_SAVE;
-
-  /* Load initial values */
-  TMP = _mm_loadu_si128((const __m128i*) &state[0]);
-  STATE1 = _mm_loadu_si128((const __m128i*) &state[4]);
-
-  TMP = _mm_shuffle_epi32(TMP, 0xB1);          /* CDAB */
-  STATE1 = _mm_shuffle_epi32(STATE1, 0x1B);    /* EFGH */
-  STATE0 = _mm_alignr_epi8(TMP, STATE1, 8);    /* ABEF */
-  STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); /* CDGH */
-
-  /* Save current state */
-  ABEF_SAVE = STATE0;
-  CDGH_SAVE = STATE1;
-
-  /* Rounds 0-3 */
-  MSG0 = _mm_loadu_si128((const __m128i*) (data+0));
-  if (bswap) { MSG0 = mm_bswap32(MSG0); }
-  MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(UINT64_C(0xE9B5DBA5B5C0FBCF), UINT64_C(0x71374491428A2F98)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-
-  /* Rounds 4-7 */
-  MSG1 = _mm_loadu_si128((const __m128i*) (data+16));
-  if (bswap) { MSG1 = mm_bswap32(MSG1); }
-  MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(UINT64_C(0xAB1C5ED5923F82A4), UINT64_C(0x59F111F13956C25B)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1);
-
-  /* Rounds 8-11 */
-  MSG2 = _mm_loadu_si128((const __m128i*) (data+32));
-  if (bswap) { MSG2 = mm_bswap32(MSG2); }
-  MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(UINT64_C(0x550C7DC3243185BE), UINT64_C(0x12835B01D807AA98)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2);
-
-  /* Rounds 12-15 */
-  MSG3 = _mm_loadu_si128((const __m128i*) (data+48));
-  if (bswap) { MSG3 = mm_bswap32(MSG3); }
-  MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(UINT64_C(0xC19BF1749BDC06A7), UINT64_C(0x80DEB1FE72BE5D74)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG3, MSG2, 4);
-  MSG0 = _mm_add_epi32(MSG0, TMP);
-  MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3);
-
-  /* Rounds 16-19 */
-  MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(UINT64_C(0x240CA1CC0FC19DC6), UINT64_C(0xEFBE4786E49B69C1)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG0, MSG3, 4);
-  MSG1 = _mm_add_epi32(MSG1, TMP);
-  MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0);
-
-  /* Rounds 20-23 */
-  MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(UINT64_C(0x76F988DA5CB0A9DC), UINT64_C(0x4A7484AA2DE92C6F)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG1, MSG0, 4);
-  MSG2 = _mm_add_epi32(MSG2, TMP);
-  MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1);
-
-  /* Rounds 24-27 */
-  MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(UINT64_C(0xBF597FC7B00327C8), UINT64_C(0xA831C66D983E5152)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG2, MSG1, 4);
-  MSG3 = _mm_add_epi32(MSG3, TMP);
-  MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2);
-
-  /* Rounds 28-31 */
-  MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(UINT64_C(0x1429296706CA6351),  UINT64_C(0xD5A79147C6E00BF3)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG3, MSG2, 4);
-  MSG0 = _mm_add_epi32(MSG0, TMP);
-  MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3);
-
-  /* Rounds 32-35 */
-  MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(UINT64_C(0x53380D134D2C6DFC), UINT64_C(0x2E1B213827B70A85)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG0, MSG3, 4);
-  MSG1 = _mm_add_epi32(MSG1, TMP);
-  MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0);
-
-  /* Rounds 36-39 */
-  MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(UINT64_C(0x92722C8581C2C92E), UINT64_C(0x766A0ABB650A7354)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG1, MSG0, 4);
-  MSG2 = _mm_add_epi32(MSG2, TMP);
-  MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG0 = _mm_sha256msg1_epu32(MSG0, MSG1);
-
-  /* Rounds 40-43 */
-  MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(UINT64_C(0xC76C51A3C24B8B70), UINT64_C(0xA81A664BA2BFE8A1)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG2, MSG1, 4);
-  MSG3 = _mm_add_epi32(MSG3, TMP);
-  MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG1 = _mm_sha256msg1_epu32(MSG1, MSG2);
-
-  /* Rounds 44-47 */
-  MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(UINT64_C(0x106AA070F40E3585), UINT64_C(0xD6990624D192E819)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG3, MSG2, 4);
-  MSG0 = _mm_add_epi32(MSG0, TMP);
-  MSG0 = _mm_sha256msg2_epu32(MSG0, MSG3);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG2 = _mm_sha256msg1_epu32(MSG2, MSG3);
-
-  /* Rounds 48-51 */
-  MSG = _mm_add_epi32(MSG0, _mm_set_epi64x(UINT64_C(0x34B0BCB52748774C), UINT64_C(0x1E376C0819A4C116)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG0, MSG3, 4);
-  MSG1 = _mm_add_epi32(MSG1, TMP);
-  MSG1 = _mm_sha256msg2_epu32(MSG1, MSG0);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-  MSG3 = _mm_sha256msg1_epu32(MSG3, MSG0);
-
-  /* Rounds 52-55 */
-  MSG = _mm_add_epi32(MSG1, _mm_set_epi64x(UINT64_C(0x682E6FF35B9CCA4F), UINT64_C(0x4ED8AA4A391C0CB3)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG1, MSG0, 4);
-  MSG2 = _mm_add_epi32(MSG2, TMP);
-  MSG2 = _mm_sha256msg2_epu32(MSG2, MSG1);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-
-  /* Rounds 56-59 */
-  MSG = _mm_add_epi32(MSG2, _mm_set_epi64x(UINT64_C(0x8CC7020884C87814), UINT64_C(0x78A5636F748F82EE)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  TMP = _mm_alignr_epi8(MSG2, MSG1, 4);
-  MSG3 = _mm_add_epi32(MSG3, TMP);
-  MSG3 = _mm_sha256msg2_epu32(MSG3, MSG2);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-
-  /* Rounds 60-63 */
-  MSG = _mm_add_epi32(MSG3, _mm_set_epi64x(UINT64_C(0xC67178F2BEF9A3F7), UINT64_C(0xA4506CEB90BEFFFA)));
-  STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
-  MSG = _mm_shuffle_epi32(MSG, 0x0E);
-  STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
-
-  /* Combine state  */
-  STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
-  STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
-
-  TMP = _mm_shuffle_epi32(STATE0, 0x1B);       /* FEBA */
-  STATE1 = _mm_shuffle_epi32(STATE1, 0xB1);    /* DCHG */
-  STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); /* DCBA */
-  STATE1 = _mm_alignr_epi8(STATE1, TMP, 8);    /* ABEF */
-
-  /* Save state */
-  _mm_storeu_si128((__m128i*) &state[0], STATE0);
-  _mm_storeu_si128((__m128i*) &state[4], STATE1);
+
+template <bool bswap>
+static void SHA256_Transform_x64( uint32_t state[8], const uint8_t data[64] ) {
+    __m128i STATE0, STATE1;
+    __m128i MSG, TMP;
+    __m128i MSG0, MSG1, MSG2, MSG3;
+    __m128i ABEF_SAVE, CDGH_SAVE;
+
+    /* Load initial values */
+    TMP    = _mm_loadu_si128((const __m128i *)&state[0]);
+    STATE1 = _mm_loadu_si128((const __m128i *)&state[4]);
+
+    TMP    = _mm_shuffle_epi32(TMP   , 0xB1);    /* CDAB */
+    STATE1 = _mm_shuffle_epi32(STATE1, 0x1B);    /* EFGH */
+    STATE0 = _mm_alignr_epi8(TMP, STATE1, 8);    /* ABEF */
+    STATE1 = _mm_blend_epi16(STATE1, TMP, 0xF0); /* CDGH */
+
+    /* Save current state */
+    ABEF_SAVE = STATE0;
+    CDGH_SAVE = STATE1;
+
+    /* Rounds 0-3 */
+    MSG0   = _mm_loadu_si128((const __m128i *)(data + 0));
+    if (bswap) { MSG0 = mm_bswap32(MSG0); }
+    MSG    = _mm_add_epi32(MSG0, _mm_set_epi64x(UINT64_C(0xE9B5DBA5B5C0FBCF), UINT64_C(0x71374491428A2F98)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+
+    /* Rounds 4-7 */
+    MSG1   = _mm_loadu_si128((const __m128i *)(data + 16));
+    if (bswap) { MSG1 = mm_bswap32(MSG1); }
+    MSG    = _mm_add_epi32(MSG1, _mm_set_epi64x(UINT64_C(0xAB1C5ED5923F82A4), UINT64_C(0x59F111F13956C25B)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG0   = _mm_sha256msg1_epu32(MSG0, MSG1);
+
+    /* Rounds 8-11 */
+    MSG2   = _mm_loadu_si128((const __m128i *)(data + 32));
+    if (bswap) { MSG2 = mm_bswap32(MSG2); }
+    MSG    = _mm_add_epi32(MSG2, _mm_set_epi64x(UINT64_C(0x550C7DC3243185BE), UINT64_C(0x12835B01D807AA98)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG1   = _mm_sha256msg1_epu32(MSG1, MSG2);
+
+    /* Rounds 12-15 */
+    MSG3   = _mm_loadu_si128((const __m128i *)(data + 48));
+    if (bswap) { MSG3 = mm_bswap32(MSG3); }
+    MSG    = _mm_add_epi32(MSG3, _mm_set_epi64x(UINT64_C(0xC19BF1749BDC06A7), UINT64_C(0x80DEB1FE72BE5D74)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG3, MSG2, 4);
+    MSG0   = _mm_add_epi32(MSG0, TMP);
+    MSG0   = _mm_sha256msg2_epu32(MSG0, MSG3);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG2   = _mm_sha256msg1_epu32(MSG2, MSG3);
+
+    /* Rounds 16-19 */
+    MSG    = _mm_add_epi32(MSG0, _mm_set_epi64x(UINT64_C(0x240CA1CC0FC19DC6), UINT64_C(0xEFBE4786E49B69C1)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG0, MSG3, 4);
+    MSG1   = _mm_add_epi32(MSG1, TMP);
+    MSG1   = _mm_sha256msg2_epu32(MSG1, MSG0);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG3   = _mm_sha256msg1_epu32(MSG3, MSG0);
+
+    /* Rounds 20-23 */
+    MSG    = _mm_add_epi32(MSG1, _mm_set_epi64x(UINT64_C(0x76F988DA5CB0A9DC), UINT64_C(0x4A7484AA2DE92C6F)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG1, MSG0, 4);
+    MSG2   = _mm_add_epi32(MSG2, TMP);
+    MSG2   = _mm_sha256msg2_epu32(MSG2, MSG1);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG0   = _mm_sha256msg1_epu32(MSG0, MSG1);
+
+    /* Rounds 24-27 */
+    MSG    = _mm_add_epi32(MSG2, _mm_set_epi64x(UINT64_C(0xBF597FC7B00327C8), UINT64_C(0xA831C66D983E5152)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG2, MSG1, 4);
+    MSG3   = _mm_add_epi32(MSG3, TMP);
+    MSG3   = _mm_sha256msg2_epu32(MSG3, MSG2);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG1   = _mm_sha256msg1_epu32(MSG1, MSG2);
+
+    /* Rounds 28-31 */
+    MSG    = _mm_add_epi32(MSG3, _mm_set_epi64x(UINT64_C(0x1429296706CA6351),  UINT64_C(0xD5A79147C6E00BF3)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG3, MSG2, 4);
+    MSG0   = _mm_add_epi32(MSG0, TMP);
+    MSG0   = _mm_sha256msg2_epu32(MSG0, MSG3);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG2   = _mm_sha256msg1_epu32(MSG2, MSG3);
+
+    /* Rounds 32-35 */
+    MSG    = _mm_add_epi32(MSG0, _mm_set_epi64x(UINT64_C(0x53380D134D2C6DFC), UINT64_C(0x2E1B213827B70A85)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG0, MSG3, 4);
+    MSG1   = _mm_add_epi32(MSG1, TMP);
+    MSG1   = _mm_sha256msg2_epu32(MSG1, MSG0);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG3   = _mm_sha256msg1_epu32(MSG3, MSG0);
+
+    /* Rounds 36-39 */
+    MSG    = _mm_add_epi32(MSG1, _mm_set_epi64x(UINT64_C(0x92722C8581C2C92E), UINT64_C(0x766A0ABB650A7354)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG1, MSG0, 4);
+    MSG2   = _mm_add_epi32(MSG2, TMP);
+    MSG2   = _mm_sha256msg2_epu32(MSG2, MSG1);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG0   = _mm_sha256msg1_epu32(MSG0, MSG1);
+
+    /* Rounds 40-43 */
+    MSG    = _mm_add_epi32(MSG2, _mm_set_epi64x(UINT64_C(0xC76C51A3C24B8B70), UINT64_C(0xA81A664BA2BFE8A1)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG2, MSG1, 4);
+    MSG3   = _mm_add_epi32(MSG3, TMP);
+    MSG3   = _mm_sha256msg2_epu32(MSG3, MSG2);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG1   = _mm_sha256msg1_epu32(MSG1, MSG2);
+
+    /* Rounds 44-47 */
+    MSG    = _mm_add_epi32(MSG3, _mm_set_epi64x(UINT64_C(0x106AA070F40E3585), UINT64_C(0xD6990624D192E819)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG3, MSG2, 4);
+    MSG0   = _mm_add_epi32(MSG0, TMP);
+    MSG0   = _mm_sha256msg2_epu32(MSG0, MSG3);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG2   = _mm_sha256msg1_epu32(MSG2, MSG3);
+
+    /* Rounds 48-51 */
+    MSG    = _mm_add_epi32(MSG0, _mm_set_epi64x(UINT64_C(0x34B0BCB52748774C), UINT64_C(0x1E376C0819A4C116)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG0, MSG3, 4);
+    MSG1   = _mm_add_epi32(MSG1, TMP);
+    MSG1   = _mm_sha256msg2_epu32(MSG1, MSG0);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+    MSG3   = _mm_sha256msg1_epu32(MSG3, MSG0);
+
+    /* Rounds 52-55 */
+    MSG    = _mm_add_epi32(MSG1, _mm_set_epi64x(UINT64_C(0x682E6FF35B9CCA4F), UINT64_C(0x4ED8AA4A391C0CB3)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG1, MSG0, 4);
+    MSG2   = _mm_add_epi32(MSG2, TMP);
+    MSG2   = _mm_sha256msg2_epu32(MSG2, MSG1);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+
+    /* Rounds 56-59 */
+    MSG    = _mm_add_epi32(MSG2, _mm_set_epi64x(UINT64_C(0x8CC7020884C87814), UINT64_C(0x78A5636F748F82EE)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    TMP    = _mm_alignr_epi8(MSG2, MSG1, 4);
+    MSG3   = _mm_add_epi32(MSG3, TMP);
+    MSG3   = _mm_sha256msg2_epu32(MSG3, MSG2);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+
+    /* Rounds 60-63 */
+    MSG    = _mm_add_epi32(MSG3, _mm_set_epi64x(UINT64_C(0xC67178F2BEF9A3F7), UINT64_C(0xA4506CEB90BEFFFA)));
+    STATE1 = _mm_sha256rnds2_epu32(STATE1, STATE0, MSG);
+    MSG    = _mm_shuffle_epi32(MSG, 0x0E);
+    STATE0 = _mm_sha256rnds2_epu32(STATE0, STATE1, MSG);
+
+    /* Combine state  */
+    STATE0 = _mm_add_epi32(STATE0, ABEF_SAVE);
+    STATE1 = _mm_add_epi32(STATE1, CDGH_SAVE);
+
+    TMP    = _mm_shuffle_epi32(STATE0, 0x1B);    /* FEBA */
+    STATE1 = _mm_shuffle_epi32(STATE1, 0xB1);    /* DCHG */
+    STATE0 = _mm_blend_epi16(TMP, STATE1, 0xF0); /* DCBA */
+    STATE1 = _mm_alignr_epi8(STATE1, TMP, 8);    /* ABEF */
+
+    /* Save state */
+    _mm_storeu_si128((__m128i *)&state[0], STATE0);
+    _mm_storeu_si128((__m128i *)&state[4], STATE1);
 }
+
 #endif
 
 #if defined(HAVE_ARM_SHA2)
@@ -375,167 +377,168 @@ static const uint32_t K[] = {
     0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2,
 };
 
-template < bool bswap >
-static void SHA256_Transform_neon(uint32_t state[8], const uint8_t data[64]) {
-  uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
-  uint32x4_t MSG0, MSG1, MSG2, MSG3;
-  uint32x4_t TMP0, TMP1, TMP2;
-
-  /* Load state */
-  STATE0 = vld1q_u32(&state[0]);
-  STATE1 = vld1q_u32(&state[4]);
-
-  /* Save state */
-  ABEF_SAVE = STATE0;
-  CDGH_SAVE = STATE1;
-
-  /* Load message */
-  MSG0 = vld1q_u32((const uint32_t *)(data +  0));
-  MSG1 = vld1q_u32((const uint32_t *)(data + 16));
-  MSG2 = vld1q_u32((const uint32_t *)(data + 32));
-  MSG3 = vld1q_u32((const uint32_t *)(data + 48));
-
-  /* Reverse for little endian */
-  if (bswap) {
-    MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
-    MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
-    MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
-    MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
-  }
-
-  TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0x00]));
-
-  /* Rounds 0-3 */
-  MSG0 = vsha256su0q_u32(MSG0, MSG1);
-  TMP2 = STATE0;
-  TMP1 = vaddq_u32(MSG1, vld1q_u32(&K[0x04]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
-  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);
-
-  /* Rounds 4-7 */
-  MSG1 = vsha256su0q_u32(MSG1, MSG2);
-  TMP2 = STATE0;
-  TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[0x08]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
-  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);
-
-  /* Rounds 8-11 */
-  MSG2 = vsha256su0q_u32(MSG2, MSG3);
-  TMP2 = STATE0;
-  TMP1 = vaddq_u32(MSG3, vld1q_u32(&K[0x0c]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
-  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);
-
-  /* Rounds 12-15 */
-  MSG3 = vsha256su0q_u32(MSG3, MSG0);
-  TMP2 = STATE0;
-  TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0x10]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
-  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);
-
-  /* Rounds 16-19 */
-  MSG0 = vsha256su0q_u32(MSG0, MSG1);
-  TMP2 = STATE0;
-  TMP1 = vaddq_u32(MSG1, vld1q_u32(&K[0x14]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
-  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);
-
-  /* Rounds 20-23 */
-  MSG1 = vsha256su0q_u32(MSG1, MSG2);
-  TMP2 = STATE0;
-  TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[0x18]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
-  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);
-
-  /* Rounds 24-27 */
-  MSG2 = vsha256su0q_u32(MSG2, MSG3);
-  TMP2 = STATE0;
-  TMP1 = vaddq_u32(MSG3, vld1q_u32(&K[0x1c]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
-  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);
-
-  /* Rounds 28-31 */
-  MSG3 = vsha256su0q_u32(MSG3, MSG0);
-  TMP2 = STATE0;
-  TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0x20]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
-  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);
-
-  /* Rounds 32-35 */
-  MSG0 = vsha256su0q_u32(MSG0, MSG1);
-  TMP2 = STATE0;
-  TMP1 = vaddq_u32(MSG1, vld1q_u32(&K[0x24]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
-  MSG0 = vsha256su1q_u32(MSG0, MSG2, MSG3);
-
-  /* Rounds 36-39 */
-  MSG1 = vsha256su0q_u32(MSG1, MSG2);
-  TMP2 = STATE0;
-  TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[0x28]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
-  MSG1 = vsha256su1q_u32(MSG1, MSG3, MSG0);
-
-  /* Rounds 40-43 */
-  MSG2 = vsha256su0q_u32(MSG2, MSG3);
-  TMP2 = STATE0;
-  TMP1 = vaddq_u32(MSG3, vld1q_u32(&K[0x2c]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
-  MSG2 = vsha256su1q_u32(MSG2, MSG0, MSG1);
-
-  /* Rounds 44-47 */
-  MSG3 = vsha256su0q_u32(MSG3, MSG0);
-  TMP2 = STATE0;
-  TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0x30]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
-  MSG3 = vsha256su1q_u32(MSG3, MSG1, MSG2);
-
-  /* Rounds 48-51 */
-  TMP2 = STATE0;
-  TMP1 = vaddq_u32(MSG1, vld1q_u32(&K[0x34]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
-
-  /* Rounds 52-55 */
-  TMP2 = STATE0;
-  TMP0 = vaddq_u32(MSG2, vld1q_u32(&K[0x38]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
-
-  /* Rounds 56-59 */
-  TMP2 = STATE0;
-  TMP1 = vaddq_u32(MSG3, vld1q_u32(&K[0x3c]));
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
-
-  /* Rounds 60-63 */
-  TMP2 = STATE0;
-  STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
-  STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
-
-  /* Combine state */
-  STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
-  STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
-
-  /* Save state */
-  vst1q_u32(&state[0], STATE0);
-  vst1q_u32(&state[4], STATE1);
+template <bool bswap>
+static void SHA256_Transform_neon( uint32_t state[8], const uint8_t data[64] ) {
+    uint32x4_t STATE0, STATE1, ABEF_SAVE, CDGH_SAVE;
+    uint32x4_t MSG0, MSG1, MSG2, MSG3;
+    uint32x4_t TMP0, TMP1, TMP2;
+
+    /* Load state */
+    STATE0 = vld1q_u32(&state[0]);
+    STATE1 = vld1q_u32(&state[4]);
+
+    /* Save state */
+    ABEF_SAVE = STATE0;
+    CDGH_SAVE = STATE1;
+
+    /* Load message */
+    MSG0 =   vld1q_u32((const uint32_t *)(data +  0));
+    MSG1 =   vld1q_u32((const uint32_t *)(data + 16));
+    MSG2 =   vld1q_u32((const uint32_t *)(data + 32));
+    MSG3 =   vld1q_u32((const uint32_t *)(data + 48));
+
+    /* Reverse for little endian */
+    if (bswap) {
+        MSG0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG0)));
+        MSG1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG1)));
+        MSG2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG2)));
+        MSG3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(MSG3)));
+    }
+
+    TMP0 = vaddq_u32(MSG0, vld1q_u32(&K[0x00]));
+
+    /* Rounds 0-3 */
+    MSG0   = vsha256su0q_u32(MSG0, MSG1);
+    TMP2   = STATE0;
+    TMP1   = vaddq_u32(MSG1, vld1q_u32(&K[0x04]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
+    MSG0   = vsha256su1q_u32(MSG0, MSG2, MSG3);
+
+    /* Rounds 4-7 */
+    MSG1   = vsha256su0q_u32(MSG1, MSG2);
+    TMP2   = STATE0;
+    TMP0   = vaddq_u32(MSG2, vld1q_u32(&K[0x08]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
+    MSG1   = vsha256su1q_u32(MSG1, MSG3, MSG0);
+
+    /* Rounds 8-11 */
+    MSG2   = vsha256su0q_u32(MSG2, MSG3);
+    TMP2   = STATE0;
+    TMP1   = vaddq_u32(MSG3, vld1q_u32(&K[0x0c]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
+    MSG2   = vsha256su1q_u32(MSG2, MSG0, MSG1);
+
+    /* Rounds 12-15 */
+    MSG3   = vsha256su0q_u32(MSG3, MSG0);
+    TMP2   = STATE0;
+    TMP0   = vaddq_u32(MSG0, vld1q_u32(&K[0x10]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
+    MSG3   = vsha256su1q_u32(MSG3, MSG1, MSG2);
+
+    /* Rounds 16-19 */
+    MSG0   = vsha256su0q_u32(MSG0, MSG1);
+    TMP2   = STATE0;
+    TMP1   = vaddq_u32(MSG1, vld1q_u32(&K[0x14]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
+    MSG0   = vsha256su1q_u32(MSG0, MSG2, MSG3);
+
+    /* Rounds 20-23 */
+    MSG1   = vsha256su0q_u32(MSG1, MSG2);
+    TMP2   = STATE0;
+    TMP0   = vaddq_u32(MSG2, vld1q_u32(&K[0x18]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
+    MSG1   = vsha256su1q_u32(MSG1, MSG3, MSG0);
+
+    /* Rounds 24-27 */
+    MSG2   = vsha256su0q_u32(MSG2, MSG3);
+    TMP2   = STATE0;
+    TMP1   = vaddq_u32(MSG3, vld1q_u32(&K[0x1c]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
+    MSG2   = vsha256su1q_u32(MSG2, MSG0, MSG1);
+
+    /* Rounds 28-31 */
+    MSG3   = vsha256su0q_u32(MSG3, MSG0);
+    TMP2   = STATE0;
+    TMP0   = vaddq_u32(MSG0, vld1q_u32(&K[0x20]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
+    MSG3   = vsha256su1q_u32(MSG3, MSG1, MSG2);
+
+    /* Rounds 32-35 */
+    MSG0   = vsha256su0q_u32(MSG0, MSG1);
+    TMP2   = STATE0;
+    TMP1   = vaddq_u32(MSG1, vld1q_u32(&K[0x24]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
+    MSG0   = vsha256su1q_u32(MSG0, MSG2, MSG3);
+
+    /* Rounds 36-39 */
+    MSG1   = vsha256su0q_u32(MSG1, MSG2);
+    TMP2   = STATE0;
+    TMP0   = vaddq_u32(MSG2, vld1q_u32(&K[0x28]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
+    MSG1   = vsha256su1q_u32(MSG1, MSG3, MSG0);
+
+    /* Rounds 40-43 */
+    MSG2   = vsha256su0q_u32(MSG2, MSG3);
+    TMP2   = STATE0;
+    TMP1   = vaddq_u32(MSG3, vld1q_u32(&K[0x2c]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
+    MSG2   = vsha256su1q_u32(MSG2, MSG0, MSG1);
+
+    /* Rounds 44-47 */
+    MSG3   = vsha256su0q_u32(MSG3, MSG0);
+    TMP2   = STATE0;
+    TMP0   = vaddq_u32(MSG0, vld1q_u32(&K[0x30]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
+    MSG3   = vsha256su1q_u32(MSG3, MSG1, MSG2);
+
+    /* Rounds 48-51 */
+    TMP2   = STATE0;
+    TMP1   = vaddq_u32(MSG1, vld1q_u32(&K[0x34]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
+
+    /* Rounds 52-55 */
+    TMP2   = STATE0;
+    TMP0   = vaddq_u32(MSG2, vld1q_u32(&K[0x38]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
+
+    /* Rounds 56-59 */
+    TMP2   = STATE0;
+    TMP1   = vaddq_u32(MSG3, vld1q_u32(&K[0x3c]));
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP0);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP0);
+
+    /* Rounds 60-63 */
+    TMP2   = STATE0;
+    STATE0 = vsha256hq_u32(STATE0, STATE1, TMP1);
+    STATE1 = vsha256h2q_u32(STATE1, TMP2, TMP1);
+
+    /* Combine state */
+    STATE0 = vaddq_u32(STATE0, ABEF_SAVE);
+    STATE1 = vaddq_u32(STATE1, CDGH_SAVE);
+
+    /* Save state */
+    vst1q_u32(&state[0], STATE0);
+    vst1q_u32(&state[4], STATE1);
 }
+
 #endif
 
-template < bool bswap >
-static void SHA256_Transform(uint32_t state[8], const uint8_t buffer[64]) {
+template <bool bswap>
+static void SHA256_Transform( uint32_t state[8], const uint8_t buffer[64] ) {
 #if defined(HAVE_X86_64_SHA2)
     return SHA256_Transform_x64<bswap>(state, buffer);
 #endif
@@ -545,84 +548,85 @@ static void SHA256_Transform(uint32_t state[8], const uint8_t buffer[64]) {
     return SHA256_Transform_portable<bswap>(state, buffer);
 }
 
-template < bool bswap >
-static void SHA256_Update(SHA2_CTX * context, const uint8_t * data, size_t len) {
-  while (len > 0) {
-    if ((context->curlen == 0) && (len >= sizeof(context->buf))) {
-      SHA256_Transform<bswap>(context->state, data);
-      context->length += 64*8;
-      len -= 64;
-      data += 64;
-    } else {
-      size_t n = 64 - context->curlen;
-      if (n > len) { n = len; }
-      memcpy(&context->buf[context->curlen], data, n);
-      context->curlen += n;
-      len -= n;
-      data += n;
-      if (context->curlen == 64) {
-	SHA256_Transform<bswap>(context->state, context->buf);
-	context->curlen = 0;
-	context->length += 64*8;
-      }
+template <bool bswap>
+static void SHA256_Update( SHA2_CTX * context, const uint8_t * data, size_t len ) {
+    while (len > 0) {
+        if ((context->curlen == 0) && (len >= sizeof(context->buf))) {
+            SHA256_Transform<bswap>(context->state, data);
+            context->length += 64 * 8;
+            len  -= 64;
+            data += 64;
+        } else {
+            size_t n = 64 - context->curlen;
+            if (n > len) { n = len; }
+            memcpy(&context->buf[context->curlen], data, n);
+            context->curlen += n;
+            len  -= n;
+            data += n;
+            if (context->curlen == 64) {
+                SHA256_Transform<bswap>(context->state, context->buf);
+                context->curlen  = 0;
+                context->length += 64 * 8;
+            }
+        }
     }
-  }
 }
 
 /* Add padding and return len bytes of the message digest. */
-template < bool bswap >
-static void SHA256_Final(SHA2_CTX * context, uint32_t digest_words, uint8_t * digest) {
-  uint32_t i;
-  uint8_t finalcount[8];
-  uint8_t c;
-
-  context->length += context->curlen * 8;
-  for (i = 0; i < 8; i++) {
-    finalcount[i] = (uint8_t)(context->length >> ((7 - i) * 8)); // Endian independent
-  }
-  c = 0200;
-  SHA256_Update<bswap>(context, &c, 1);
-  while ((context->curlen) != 56) {
-    c = 0000;
+template <bool bswap>
+static void SHA256_Final( SHA2_CTX * context, uint32_t digest_words, uint8_t * digest ) {
+    uint32_t i;
+    uint8_t  finalcount[8];
+    uint8_t  c;
+
+    context->length += context->curlen * 8;
+    for (i = 0; i < 8; i++) {
+        finalcount[i] = (uint8_t)(context->length >> ((7 - i) * 8)); // Endian independent
+    }
+    c = 0200;
     SHA256_Update<bswap>(context, &c, 1);
-  }
-  SHA256_Update<bswap>(context, finalcount, 8); /* Should cause a SHA256_Transform() */
+    while ((context->curlen) != 56) {
+        c = 0000;
+        SHA256_Update<bswap>(context, &c, 1);
+    }
+    SHA256_Update<bswap>(context, finalcount, 8); /* Should cause a SHA256_Transform() */
 
-  if (digest_words > 8) { digest_words = 8; }
-  for (i = 0; i < digest_words; i++) {
-      PUT_U32<bswap>(context->state[i], digest, 4*i);
-  }
+    if (digest_words > 8) { digest_words = 8; }
+    for (i = 0; i < digest_words; i++) {
+        PUT_U32<bswap>(context->state[i], digest, 4 * i);
+    }
 }
 
 //-----------------------------------------------------------------------------
 // Homegrown SHA-2 seeding function
-static FORCE_INLINE void SHA256_Seed(SHA2_CTX * ctx, const seed_t seed) {
+static FORCE_INLINE void SHA256_Seed( SHA2_CTX * ctx, const seed_t seed ) {
     const uint32_t seedlo = seed         & 0xFFFFFFFF;
     const uint32_t seedhi = (seed >> 32) & 0xFFFFFFFF;
+
     ctx->state[1] ^= seedlo;
     ctx->state[3] += seedlo + seedhi;
     ctx->state[5] ^= seedhi;
 }
 
 //-----------------------------------------------------------------------------
-template < uint32_t hashbits, bool bswap >
-static void SHA256(const void * in, const size_t len, const seed_t seed, void * out) {
-  SHA2_CTX context;
-
-  SHA256_Init         (&context);
-  SHA256_Seed         (&context, seed);
-  SHA256_Update<bswap>(&context, (const uint8_t*)in, len);
-  SHA256_Final<bswap> (&context, (hashbits+31)/32, (uint8_t*)out);
+template <uint32_t hashbits, bool bswap>
+static void SHA256( const void * in, const size_t len, const seed_t seed, void * out ) {
+    SHA2_CTX context;
+
+    SHA256_Init(&context);
+    SHA256_Seed(&context, seed);
+    SHA256_Update<bswap>(&context, (const uint8_t *)in, len);
+    SHA256_Final<bswap>(&context, (hashbits + 31) / 32, (uint8_t *)out);
 }
 
-template < uint32_t hashbits, bool bswap >
-static void SHA224(const void * in, const size_t len, const seed_t seed, void * out) {
-  SHA2_CTX context;
+template <uint32_t hashbits, bool bswap>
+static void SHA224( const void * in, const size_t len, const seed_t seed, void * out ) {
+    SHA2_CTX context;
 
-  SHA224_Init         (&context);
-  SHA256_Seed         (&context, seed);
-  SHA256_Update<bswap>(&context, (const uint8_t*)in, len);
-  SHA256_Final<bswap> (&context, (hashbits+31)/32, (uint8_t*)out);
+    SHA224_Init(&context);
+    SHA256_Seed(&context, seed);
+    SHA256_Update<bswap>(&context, (const uint8_t *)in, len);
+    SHA256_Final<bswap>(&context, (hashbits + 31) / 32, (uint8_t *)out);
 }
 
 //-----------------------------------------------------------------------------
@@ -634,22 +638,23 @@ static void SHA224(const void * in, const size_t len, const seed_t seed, void *
 //       e3b0c442 98fc1c14 9afbf4c8 996fb924
 //       27ae41e4 649b934c a495991b 7852b855
 //   "abc"
-// 	 ba7816bf 8f01cfea 414140de 5dae2223
+//       ba7816bf 8f01cfea 414140de 5dae2223
 //       b00361a3 96177a9c b410ff61 f20015ad
 //   A million repetitions of "a"
 //       cdc76e5c 9914fb92 81a1c7e2 84d73e67
 //       f1809a48 a497200e 046d39cc c7112cd0
-static const char *const test_data[] = {
+static const char * const test_data[] = {
     "", "abc",
-    "A million repetitions of 'a'"};
-static const char *const test_results[] = {
-  "e3b0c442 98fc1c14 9afbf4c8 996fb924 27ae41e4 649b934c a495991b 7852b855",
-  "ba7816bf 8f01cfea 414140de 5dae2223 b00361a3 96177a9c b410ff61 f20015ad",
-  "cdc76e5c 9914fb92 81a1c7e2 84d73e67 f1809a48 a497200e 046d39cc c7112cd0",
+    "A million repetitions of 'a'"
+};
+static const char * const test_results[] = {
+    "e3b0c442 98fc1c14 9afbf4c8 996fb924 27ae41e4 649b934c a495991b 7852b855",
+    "ba7816bf 8f01cfea 414140de 5dae2223 b00361a3 96177a9c b410ff61 f20015ad",
+    "cdc76e5c 9914fb92 81a1c7e2 84d73e67 f1809a48 a497200e 046d39cc c7112cd0",
 };
 
-static void digest_to_hex(const uint8_t digest[32], char * output) {
-    int i, j;
+static void digest_to_hex( const uint8_t digest[32], char * output ) {
+    int    i, j;
     char * c = output;
 
     for (i = 0; i < 32 / 4; i++) {
@@ -662,136 +667,136 @@ static void digest_to_hex(const uint8_t digest[32], char * output) {
     *(c - 1) = '\0';
 }
 
-template < bool bswap >
-static bool SHA256_Selftest(void) {
-  int k;
-  SHA2_CTX context;
-  uint8_t digest[32];
-  char output[72];
-
-  for (k = 0; k < 2; k++) {
-      SHA256_Init         (&context);
-      SHA256_Update<bswap>(&context, (uint8_t *)test_data[k], strlen(test_data[k]));
-      SHA256_Final<bswap> (&context, 8, digest);
-      digest_to_hex(digest, output);
-
-      if (strcmp(output, test_results[k])) {
-          fprintf(stdout, "SHA-256 self test FAILED\n");
-          fprintf(stderr, "* hash of \"%s\" incorrect:\n", test_data[k]);
-          fprintf(stderr, "\t%s returned\n", output);
-          fprintf(stderr, "\t%s is correct\n", test_results[k]);
-          return false;
-      }
-  }
-
-  /* million 'a' vector we feed separately */
-  SHA256_Init(&context);
-  for (k = 0; k < 1000000; k++) {
-      SHA256_Update<bswap>(&context, (uint8_t *)"a", 1);
-  }
-  SHA256_Final<bswap>(&context, 8, digest);
-  digest_to_hex(digest, output);
-  if (strcmp(output, test_results[2])) {
-      fprintf(stdout, "SHA-256 self test FAILED\n");
-      fprintf(stderr, "* hash of \"%s\" incorrect:\n", test_data[2]);
-      fprintf(stderr, "\t%s returned\n", output);
-      fprintf(stderr, "\t%s is correct\n", test_results[2]);
-      return false;
-  }
-
-  /* success */
-  return true;
+template <bool bswap>
+static bool SHA256_Selftest( void ) {
+    int      k;
+    SHA2_CTX context;
+    uint8_t  digest[32];
+    char     output[72];
+
+    for (k = 0; k < 2; k++) {
+        SHA256_Init(&context);
+        SHA256_Update<bswap>(&context, (uint8_t *)test_data[k], strlen(test_data[k]));
+        SHA256_Final<bswap>(&context, 8, digest);
+        digest_to_hex(digest, output);
+
+        if (strcmp(output, test_results[k])) {
+            fprintf(stdout, "SHA-256 self test FAILED\n"   );
+            fprintf(stderr, "* hash of \"%s\" incorrect:\n", test_data[k]);
+            fprintf(stderr, "\t%s returned\n", output);
+            fprintf(stderr, "\t%s is correct\n", test_results[k]);
+            return false;
+        }
+    }
+
+    /* million 'a' vector we feed separately */
+    SHA256_Init(&context);
+    for (k = 0; k < 1000000; k++) {
+        SHA256_Update<bswap>(&context, (uint8_t *)"a", 1);
+    }
+    SHA256_Final<bswap>(&context, 8, digest);
+    digest_to_hex(digest, output);
+    if (strcmp(output, test_results[2])) {
+        fprintf(stdout, "SHA-256 self test FAILED\n"   );
+        fprintf(stderr, "* hash of \"%s\" incorrect:\n", test_data[2]);
+        fprintf(stderr, "\t%s returned\n", output);
+        fprintf(stderr, "\t%s is correct\n", test_results[2]);
+        return false;
+    }
+
+    /* success */
+    return true;
 }
 
-static bool SHA256_test(void) {
-  if (isBE()) {
-      return SHA256_Selftest<false>();
-  } else {
-      return SHA256_Selftest<true>();
-  }
+static bool SHA256_test( void ) {
+    if (isBE()) {
+        return SHA256_Selftest<false>();
+    } else {
+        return SHA256_Selftest<true>();
+    }
 }
 
 REGISTER_FAMILY(sha2,
-  $.src_url = "https://github.com/noloader/SHA-Intrinsics",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/noloader/SHA-Intrinsics",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(SHA_2_256__64,
-  $.desc = "SHA-256, bits 0-63",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_BE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 64,
-  $.verification_LE = 0x31C40E74,
-  $.verification_BE = 0x6E81AB0B,
-  $.initfn = SHA256_test,
-  $.hashfn_native = SHA256<64,false>,
-  $.hashfn_bswap = SHA256<64,true>
-);
+   $.desc       = "SHA-256, bits 0-63",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_BE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 64,
+   $.verification_LE = 0x31C40E74,
+   $.verification_BE = 0x6E81AB0B,
+   $.initfn = SHA256_test,
+   $.hashfn_native   = SHA256<64, false>,
+   $.hashfn_bswap    = SHA256<64, true>
+ );
 
 REGISTER_HASH(SHA_2_256,
-  $.desc = "SHA-256",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_BE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 256,
-  $.verification_LE = 0x33BD25DE,
-  $.verification_BE = 0x1643B047,
-  $.initfn = SHA256_test,
-  $.hashfn_native = SHA256<256,false>,
-  $.hashfn_bswap = SHA256<256,true>
-);
+   $.desc       = "SHA-256",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_BE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 256,
+   $.verification_LE = 0x33BD25DE,
+   $.verification_BE = 0x1643B047,
+   $.initfn = SHA256_test,
+   $.hashfn_native   = SHA256<256, false>,
+   $.hashfn_bswap    = SHA256<256, true>
+ );
 
 REGISTER_HASH(SHA_2_224__64,
-  $.desc = "SHA-224, bits 0-63",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_BE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 64,
-  $.verification_LE = 0x36C55CA5,
-  $.verification_BE = 0x8C3C0B2A,
-  $.initfn = SHA256_test,
-  $.hashfn_native = SHA224<64,false>,
-  $.hashfn_bswap = SHA224<64,true>
-);
+   $.desc       = "SHA-224, bits 0-63",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_BE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 64,
+   $.verification_LE = 0x36C55CA5,
+   $.verification_BE = 0x8C3C0B2A,
+   $.initfn = SHA256_test,
+   $.hashfn_native   = SHA224<64, false>,
+   $.hashfn_bswap    = SHA224<64, true>
+ );
 
 REGISTER_HASH(SHA_2_224,
-  $.desc = "SHA-224",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_BE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 224,
-  $.verification_LE = 0x6BA219E5,
-  $.verification_BE = 0x56F30297,
-  $.initfn = SHA256_test,
-  $.hashfn_native = SHA224<224,false>,
-  $.hashfn_bswap = SHA224<224,true>
-);
+   $.desc       = "SHA-224",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_BE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 224,
+   $.verification_LE = 0x6BA219E5,
+   $.verification_BE = 0x56F30297,
+   $.initfn = SHA256_test,
+   $.hashfn_native   = SHA224<224, false>,
+   $.hashfn_bswap    = SHA224<224, true>
+ );
diff --git a/hashes/sha3.cpp b/hashes/sha3.cpp
index a83a9860..583f4216 100644
--- a/hashes/sha3.cpp
+++ b/hashes/sha3.cpp
@@ -33,83 +33,94 @@
 #include <cassert>
 
 /* 'Words' here refers to uint64_t */
-#define SHA3_KECCAK_SPONGE_WORDS (((1600)/8/*bits to byte*/)/sizeof(uint64_t))
+#define SHA3_KECCAK_SPONGE_WORDS (((1600) / 8 /*bits to byte*/) / sizeof(uint64_t))
 #define SHA3_KECCAK_ROUNDS 24
 
 typedef struct sha3_context_ {
-    uint64_t s[SHA3_KECCAK_SPONGE_WORDS]; /* Keccak's state */
-    uint64_t saved;             /* the portion of the input message that we
-                                 * didn't consume yet */
-    uint32_t byteIndex;         /* 0..7--the next byte after the set one
-                                 * (starts from 0; 0--none are buffered) */
-    uint32_t wordIndex;         /* 0..24--the next word to integrate input
-                                 * (starts from 0) */
-    uint32_t capacityWords;     /* the double size of the hash output in
-                                 * words (e.g. 16 for Keccak 512) */
+    uint64_t  s[SHA3_KECCAK_SPONGE_WORDS]; /* Keccak's state */
+    uint64_t  saved;                       /*
+                                            *         the portion of the input message that we
+                                            * didn't consume yet
+                                            */
+    uint32_t  byteIndex;                   /*
+                                            *         0..7--the next byte after the set one
+                                            * (starts from 0; 0--none are buffered)
+                                            */
+    uint32_t  wordIndex;                   /*
+                                            *         0..24--the next word to integrate input
+                                            * (starts from 0)
+                                            */
+    uint32_t  capacityWords;               /*
+                                            *         the double size of the hash output in
+                                            * words (e.g. 16 for Keccak 512)
+                                            */
 } sha3_context;
 
 static const uint64_t keccakf_rndc[24] = {
-   UINT64_C(0x0000000000000001), UINT64_C(0x0000000000008082),
-   UINT64_C(0x800000000000808a), UINT64_C(0x8000000080008000),
-   UINT64_C(0x000000000000808b), UINT64_C(0x0000000080000001),
-   UINT64_C(0x8000000080008081), UINT64_C(0x8000000000008009),
-   UINT64_C(0x000000000000008a), UINT64_C(0x0000000000000088),
-   UINT64_C(0x0000000080008009), UINT64_C(0x000000008000000a),
-   UINT64_C(0x000000008000808b), UINT64_C(0x800000000000008b),
-   UINT64_C(0x8000000000008089), UINT64_C(0x8000000000008003),
-   UINT64_C(0x8000000000008002), UINT64_C(0x8000000000000080),
-   UINT64_C(0x000000000000800a), UINT64_C(0x800000008000000a),
-   UINT64_C(0x8000000080008081), UINT64_C(0x8000000000008080),
-   UINT64_C(0x0000000080000001), UINT64_C(0x8000000080008008)
+    UINT64_C(0x0000000000000001), UINT64_C(0x0000000000008082),
+    UINT64_C(0x800000000000808a), UINT64_C(0x8000000080008000),
+    UINT64_C(0x000000000000808b), UINT64_C(0x0000000080000001),
+    UINT64_C(0x8000000080008081), UINT64_C(0x8000000000008009),
+    UINT64_C(0x000000000000008a), UINT64_C(0x0000000000000088),
+    UINT64_C(0x0000000080008009), UINT64_C(0x000000008000000a),
+    UINT64_C(0x000000008000808b), UINT64_C(0x800000000000008b),
+    UINT64_C(0x8000000000008089), UINT64_C(0x8000000000008003),
+    UINT64_C(0x8000000000008002), UINT64_C(0x8000000000000080),
+    UINT64_C(0x000000000000800a), UINT64_C(0x800000008000000a),
+    UINT64_C(0x8000000080008081), UINT64_C(0x8000000000008080),
+    UINT64_C(0x0000000080000001), UINT64_C(0x8000000080008008)
 };
 
 static const unsigned keccakf_rotc[24] = {
-   1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
+    1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
 };
 
 static const unsigned keccakf_piln[24] = {
-   10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
+    10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
 };
 
-static void keccakf(uint64_t s[25])
-{
-   int i, j, round;
-   uint64_t t, bc[5];
-
-   for(round = 0; round < SHA3_KECCAK_ROUNDS; round++) {
-      /* Theta */
-      for(i = 0; i < 5; i++)
-         bc[i] = s[i] ^ s[i + 5] ^ s[i + 10] ^ s[i + 15] ^ s[i + 20];
-
-      for(i = 0; i < 5; i++) {
-         t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
-         for(j = 0; j < 25; j += 5)
-            s[j + i] ^= t;
-      }
-      /* Rho Pi */
-      t = s[1];
-      for(i = 0; i < 24; i++) {
-         j = keccakf_piln[i];
-         bc[0] = s[j];
-         s[j] = ROTL64(t, keccakf_rotc[i]);
-         t = bc[0];
-      }
-      /* Chi */
-      for(j = 0; j < 25; j += 5) {
-         for(i = 0; i < 5; i++)
-            bc[i] = s[j + i];
-         for(i = 0; i < 5; i++)
-            s[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
-      }
-      /* Iota */
-      s[0] ^= keccakf_rndc[round];
-   }
+static void keccakf( uint64_t s[25] ) {
+    int      i, j, round;
+    uint64_t t, bc[5];
+
+    for (round = 0; round < SHA3_KECCAK_ROUNDS; round++) {
+        /* Theta */
+        for (i = 0; i < 5; i++) {
+            bc[i] = s[i] ^ s[i + 5] ^ s[i + 10] ^ s[i + 15] ^ s[i + 20];
+        }
+
+        for (i = 0; i < 5; i++) {
+            t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
+            for (j = 0; j < 25; j += 5) {
+                s[j + i] ^= t;
+            }
+        }
+        /* Rho Pi */
+        t = s[1];
+        for (i = 0; i < 24; i++) {
+            j     = keccakf_piln[i];
+            bc[0] = s [j];
+            s[j]  = ROTL64(t, keccakf_rotc[i]);
+            t     = bc[0];
+        }
+        /* Chi */
+        for (j = 0; j < 25; j += 5) {
+            for (i = 0; i < 5; i++) {
+                bc[i] = s[j + i];
+            }
+            for (i = 0; i < 5; i++) {
+                s[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
+            }
+        }
+        /* Iota */
+        s[0] ^= keccakf_rndc[round];
+    }
 }
 
-static void sha3_Init(sha3_context * ctx, unsigned bitSize) {
-  assert(bitSize == 256 || bitSize == 384 || bitSize == 512);
-  memset(ctx, 0, sizeof(*ctx));
-  ctx->capacityWords = 2 * bitSize / (8 * sizeof(uint64_t));
+static void sha3_Init( sha3_context * ctx, unsigned bitSize ) {
+    assert(bitSize == 256 || bitSize == 384 || bitSize == 512);
+    memset(ctx, 0, sizeof(*ctx));
+    ctx->capacityWords = 2 * bitSize / (8 * sizeof(uint64_t));
 }
 
 /*
@@ -117,137 +128,139 @@ static void sha3_Init(sha3_context * ctx, unsigned bitSize) {
  * changing the hashed bytes cannot easily reveal the seed nor
  * trivially collide the hash state.
  */
-static void sha3_Seed(sha3_context * ctx, uint64_t seed) {
-  if (ctx->capacityWords >= 2) {
-    ctx->s[SHA3_KECCAK_SPONGE_WORDS - 2] ^= seed;
-    ctx->s[SHA3_KECCAK_SPONGE_WORDS - 1] ^= seed * UINT64_C(0x9E3779B97F4A7C15);
-  } else {
-    ctx->s[SHA3_KECCAK_SPONGE_WORDS - 1] ^= seed;
-  }
+static void sha3_Seed( sha3_context * ctx, uint64_t seed ) {
+    if (ctx->capacityWords >= 2) {
+        ctx->s[SHA3_KECCAK_SPONGE_WORDS - 2] ^= seed;
+        ctx->s[SHA3_KECCAK_SPONGE_WORDS - 1] ^= seed * UINT64_C(0x9E3779B97F4A7C15);
+    } else {
+        ctx->s[SHA3_KECCAK_SPONGE_WORDS - 1] ^= seed;
+    }
 }
 
-template < bool bswap >
-static void sha3_Process(sha3_context * ctx, const uint8_t * in, size_t inlen) {
-   /* 0...7 -- how much is needed to have a word */
-  uint32_t old_tail = (8 - ctx->byteIndex) & 7;
-  uint32_t tail;
-  size_t words, i;
-
-  if (inlen == 0) return;     /* nothing to do */
-
-  if (inlen < old_tail) {     /* have no complete word or haven't started the word yet */
-      while (inlen--)
-          ctx->saved |= (uint64_t) (*(in++)) << ((ctx->byteIndex++) * 8);
-      return;
-   }
-
-   if (old_tail) {            /* will have one word to process */
-     inlen -= old_tail;
-     while (old_tail--)
-       ctx->saved |= (uint64_t) (*(in++)) << ((ctx->byteIndex++) * 8);
-
-     /* now ready to add saved to the sponge */
-     ctx->s[ctx->wordIndex] ^= ctx->saved;
-     ctx->byteIndex = 0;
-     ctx->saved = 0;
-     if(++ctx->wordIndex == (SHA3_KECCAK_SPONGE_WORDS - ctx->capacityWords)) {
-       keccakf(ctx->s);
-       ctx->wordIndex = 0;
-     }
-   }
-
-   /* now work in full words directly from input */
-   words = inlen / sizeof(uint64_t);
-   tail = inlen - words * sizeof(uint64_t);
-
-   for(i = 0; i < words; i++, in += sizeof(uint64_t)) {
-     uint64_t t = GET_U64<bswap>(in, 0);
-     ctx->s[ctx->wordIndex] ^= t;
-     if(++ctx->wordIndex == (SHA3_KECCAK_SPONGE_WORDS - ctx->capacityWords)) {
-       keccakf(ctx->s);
-       ctx->wordIndex = 0;
-     }
-   }
-
-   /* finally, save the partial word */
-   while (tail--) {
-       ctx->saved |= (uint64_t) (*(in++)) << ((ctx->byteIndex++) * 8);
-   }
-   return;
+template <bool bswap>
+static void sha3_Process( sha3_context * ctx, const uint8_t * in, size_t inlen ) {
+    /* 0...7 -- how much is needed to have a word */
+    uint32_t old_tail = (8 - ctx->byteIndex) & 7;
+    uint32_t tail;
+    size_t   words, i;
+
+    if (inlen == 0) { return; } /* nothing to do */
+
+    if (inlen < old_tail) {   /* have no complete word or haven't started the word yet */
+        while (inlen--) {
+            ctx->saved |= (uint64_t)(*(in++)) << ((ctx->byteIndex++) * 8);
+        }
+        return;
+    }
+
+    if (old_tail) {           /* will have one word to process */
+        inlen -= old_tail;
+        while (old_tail--) {
+            ctx->saved |= (uint64_t)(*(in++)) << ((ctx->byteIndex++) * 8);
+        }
+
+        /* now ready to add saved to the sponge */
+        ctx->s[ctx->wordIndex] ^= ctx->saved;
+        ctx->byteIndex          = 0;
+        ctx->saved = 0;
+        if (++ctx->wordIndex == (SHA3_KECCAK_SPONGE_WORDS - ctx->capacityWords)) {
+            keccakf(ctx->s);
+            ctx->wordIndex = 0;
+        }
+    }
+
+    /* now work in full words directly from input */
+    words = inlen / sizeof        (uint64_t);
+    tail  = inlen - words * sizeof(uint64_t);
+
+    for (i = 0; i < words; i++, in += sizeof(uint64_t)) {
+        uint64_t t = GET_U64<bswap>(in, 0);
+        ctx->s[ctx->wordIndex] ^= t;
+        if (++ctx->wordIndex == (SHA3_KECCAK_SPONGE_WORDS - ctx->capacityWords)) {
+            keccakf(ctx->s);
+            ctx->wordIndex = 0;
+        }
+    }
+
+    /* finally, save the partial word */
+    while (tail--) {
+        ctx->saved |= (uint64_t)(*(in++)) << ((ctx->byteIndex++) * 8);
+    }
+    return;
 }
 
-template < bool bswap >
-static void sha3_Finalize(sha3_context * ctx, size_t digest_words, uint8_t * digest) {
-  /*
-   * Append 2-bit suffix 01, per SHA-3 spec. Instead of 1 for padding
-   * we use 1<<2 below. The 0x02 below corresponds to the suffix 01.
-   * Overall, we feed 0, then 1, and finally 1 to start
-   * padding. Without M || 01, we would simply use 1 to start padding.
-   */
-  uint64_t t = (uint64_t)(((uint64_t)(0x02 | (1 << 2))) << ((ctx->byteIndex) * 8));
-
-  ctx->s[ctx->wordIndex] ^= ctx->saved ^ t;
-  ctx->s[SHA3_KECCAK_SPONGE_WORDS - ctx->capacityWords - 1] ^= UINT64_C(0x8000000000000000);
-  keccakf(ctx->s);
-
-  uint32_t maxdigest_words = ctx->capacityWords / 2;
-  if (digest_words > maxdigest_words) { digest_words = maxdigest_words; }
-  for (int i = 0; i < digest_words; i++) {
-    PUT_U64<bswap>(ctx->s[i], digest, 8*i);
-  }
-
-  return;
+template <bool bswap>
+static void sha3_Finalize( sha3_context * ctx, size_t digest_words, uint8_t * digest ) {
+    /*
+     * Append 2-bit suffix 01, per SHA-3 spec. Instead of 1 for padding
+     * we use 1<<2 below. The 0x02 below corresponds to the suffix 01.
+     * Overall, we feed 0, then 1, and finally 1 to start
+     * padding. Without M || 01, we would simply use 1 to start padding.
+     */
+    uint64_t t = (uint64_t)(((uint64_t)(0x02 | (1 << 2))) << ((ctx->byteIndex) * 8));
+
+    ctx->s[ctx->wordIndex] ^= ctx->saved ^ t;
+    ctx->s[SHA3_KECCAK_SPONGE_WORDS - ctx->capacityWords - 1] ^= UINT64_C(0x8000000000000000);
+    keccakf(ctx->s);
+
+    uint32_t maxdigest_words = ctx->capacityWords / 2;
+    if (digest_words > maxdigest_words) { digest_words = maxdigest_words; }
+    for (int i = 0; i < digest_words; i++) {
+        PUT_U64<bswap>(ctx->s[i], digest, 8 * i);
+    }
+
+    return;
 }
 
-template < uint32_t hashbits, bool bswap >
-static void SHA3_256(const void * in, const size_t len, const seed_t seed, void * out) {
-  sha3_context context;
+template <uint32_t hashbits, bool bswap>
+static void SHA3_256( const void * in, const size_t len, const seed_t seed, void * out ) {
+    sha3_context context;
 
-  sha3_Init           (&context, 256);
-  sha3_Seed           (&context, (uint64_t)seed);
-  sha3_Process<bswap> (&context, (const uint8_t *)in, len);
-  sha3_Finalize<bswap>(&context, (hashbits+63)/64, (uint8_t *)out);
+    sha3_Init(&context, 256);
+    sha3_Seed(&context, (uint64_t)seed);
+    sha3_Process<bswap>(&context, (const uint8_t *)in, len);
+    sha3_Finalize<bswap>(&context, (hashbits + 63) / 64, (uint8_t *)out);
 }
 
 REGISTER_FAMILY(sha3,
-  $.src_url = "https://github.com/brainhub/SHA3IUF",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/brainhub/SHA3IUF",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(SHA_3_256__64,
-  $.desc = "SHA-3, bits 0-63",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 64,
-  $.verification_LE = 0x76804BEC,
-  $.verification_BE = 0xC7D2D825,
-  $.hashfn_native = SHA3_256<64,false>,
-  $.hashfn_bswap = SHA3_256<64,true>
-);
+   $.desc       = "SHA-3, bits 0-63",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 64,
+   $.verification_LE = 0x76804BEC,
+   $.verification_BE = 0xC7D2D825,
+   $.hashfn_native   = SHA3_256<64, false>,
+   $.hashfn_bswap    = SHA3_256<64, true>
+ );
 
 REGISTER_HASH(SHA_3,
-  $.desc = "SHA-3",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC        |
-        FLAG_HASH_ENDIAN_INDEPENDENT   |
-        FLAG_HASH_NO_SEED,
-  $.impl_flags =
-        FLAG_IMPL_LICENSE_MIT          |
-        FLAG_IMPL_CANONICAL_LE         |
-        FLAG_IMPL_ROTATE               |
-        FLAG_IMPL_INCREMENTAL          |
-        FLAG_IMPL_VERY_SLOW,
-  $.bits = 256,
-  $.verification_LE = 0x79AEFB60,
-  $.verification_BE = 0x074CB90C,
-  $.hashfn_native = SHA3_256<256,false>,
-  $.hashfn_bswap = SHA3_256<256,true>
-);
+   $.desc       = "SHA-3",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC        |
+         FLAG_HASH_ENDIAN_INDEPENDENT   |
+         FLAG_HASH_NO_SEED,
+   $.impl_flags =
+         FLAG_IMPL_LICENSE_MIT          |
+         FLAG_IMPL_CANONICAL_LE         |
+         FLAG_IMPL_ROTATE               |
+         FLAG_IMPL_INCREMENTAL          |
+         FLAG_IMPL_VERY_SLOW,
+   $.bits = 256,
+   $.verification_LE = 0x79AEFB60,
+   $.verification_BE = 0x074CB90C,
+   $.hashfn_native   = SHA3_256<256, false>,
+   $.hashfn_bswap    = SHA3_256<256, true>
+ );
diff --git a/hashes/siphash.cpp b/hashes/siphash.cpp
index a63f2080..2a2da0c8 100644
--- a/hashes/siphash.cpp
+++ b/hashes/siphash.cpp
@@ -29,245 +29,247 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_SSSE_3) || defined(HAVE_SSE_2)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
 //------------------------------------------------------------
-#define SIPCOMPRESS_64                          \
-  v0 += v1; v2 += v3;                           \
-  v1 = ROTL64(v1,13); v3 = ROTL64(v3,16);       \
-  v1 ^= v0; v3 ^= v2;                           \
-  v0 = ROTL64(v0,32);                           \
-  v2 += v1; v0 += v3;                           \
-  v1 = ROTL64(v1,17); v3 = ROTL64(v3,21);       \
-  v1 ^= v2; v3 ^= v0;                           \
+#define SIPCOMPRESS_64                    \
+  v0 += v1; v2 += v3;                     \
+  v1 = ROTL64(v1,13); v3 = ROTL64(v3,16); \
+  v1 ^= v0; v3 ^= v2;                     \
+  v0 = ROTL64(v0,32);                     \
+  v2 += v1; v0 += v3;                     \
+  v1 = ROTL64(v1,17); v3 = ROTL64(v3,21); \
+  v1 ^= v2; v3 ^= v0;                     \
   v2 = ROTL64(v2,32)
 
 /* The 64bit 1-3 and 2-4 variants */
-template < bool variant_2_4, bool bswap >
-static uint64_t siphash_portable(const uint64_t key[2], const uint8_t * m, size_t len) {
-	uint64_t v0, v1, v2, v3;
-	uint64_t mi, k0, k1;
-	uint64_t last7;
-	size_t i, blocks;
+template <bool variant_2_4, bool bswap>
+static uint64_t siphash_portable( const uint64_t key[2], const uint8_t * m, size_t len ) {
+    uint64_t v0, v1, v2, v3;
+    uint64_t mi, k0, k1;
+    uint64_t last7;
+    size_t   i, blocks;
 
-	k0 = key[0];
-	k1 = key[1];
+    k0 = key[0];
+    k1 = key[1];
 
-	v0 = k0 ^ UINT64_C(0x736f6d6570736575);
+    v0 = k0 ^ UINT64_C(0x736f6d6570736575);
     v1 = k1 ^ UINT64_C(0x646f72616e646f6d);
-	v2 = k0 ^ UINT64_C(0x6c7967656e657261);
-	v3 = k1 ^ UINT64_C(0x7465646279746573);
+    v2 = k0 ^ UINT64_C(0x6c7967656e657261);
+    v3 = k1 ^ UINT64_C(0x7465646279746573);
 
-	for (i = 0, blocks = (len & ~7); i < blocks; i += 8) {
-		mi = GET_U64<bswap>(m, i);
-		v3 ^= mi;
-		SIPCOMPRESS_64;
+    for (i = 0, blocks = (len & ~7); i < blocks; i += 8) {
+        mi  = GET_U64<bswap>(m, i);
+        v3 ^= mi;
+        SIPCOMPRESS_64;
         if (variant_2_4) {
             SIPCOMPRESS_64;
         }
-		v0 ^= mi;
-	}
-
-	last7 = (uint64_t)(len & 0xff) << 56;
-	switch (len - blocks) {
-		case 7: last7 |= (uint64_t)m[i + 6] << 48;
-		case 6: last7 |= (uint64_t)m[i + 5] << 40;
-		case 5: last7 |= (uint64_t)m[i + 4] << 32;
-		case 4: last7 |= (uint64_t)m[i + 3] << 24;
-		case 3: last7 |= (uint64_t)m[i + 2] << 16;
-		case 2: last7 |= (uint64_t)m[i + 1] <<  8;
-		case 1: last7 |= (uint64_t)m[i + 0]      ;
-		case 0:
-		default:;
-	};
-
-	v3 ^= last7;
-	SIPCOMPRESS_64;
+        v0 ^= mi;
+    }
+
+    last7 = (uint64_t)(len & 0xff) << 56;
+    switch (len - blocks) {
+    case 7: last7 |= (uint64_t)m[i + 6] << 48;
+    case 6: last7 |= (uint64_t)m[i + 5] << 40;
+    case 5: last7 |= (uint64_t)m[i + 4] << 32;
+    case 4: last7 |= (uint64_t)m[i + 3] << 24;
+    case 3: last7 |= (uint64_t)m[i + 2] << 16;
+    case 2: last7 |= (uint64_t)m[i + 1] <<  8;
+    case 1: last7 |= (uint64_t)m[i + 0];
+    case 0:
+    default:;
+    }
+
+    v3 ^= last7;
+    SIPCOMPRESS_64;
     if (variant_2_4) {
         SIPCOMPRESS_64;
     }
-	v0 ^= last7;
-	v2 ^= 0xff;
-	SIPCOMPRESS_64;
-	SIPCOMPRESS_64;
-	SIPCOMPRESS_64;
+    v0 ^= last7;
+    v2 ^= 0xff;
+    SIPCOMPRESS_64;
+    SIPCOMPRESS_64;
+    SIPCOMPRESS_64;
     if (variant_2_4) {
         SIPCOMPRESS_64;
     }
-	return v0 ^ v1 ^ v2 ^ v3;
+    return v0 ^ v1 ^ v2 ^ v3;
 }
 
 //------------------------------------------------------------
 #if defined(HAVE_SSSE_3) || defined(HAVE_SSE_2)
-typedef __m128i xmmi;
-typedef __m64 qmm;
+typedef __m128i  xmmi;
+typedef __m64    qmm;
 
 typedef union packedelem64_t {
-    uint64_t u[2];
-    xmmi v;
+    uint64_t  u[2];
+    xmmi      v;
 } packedelem64;
 
 typedef union packedelem8_t {
-    uint8_t u[16];
-    xmmi v;
+    uint8_t  u[16];
+    xmmi     v;
 } packedelem8;
 
 /* 0,2,1,3 */
 static const packedelem64 siphash_init[2] = {
-	{{ UINT64_C(0x736f6d6570736575), UINT64_C(0x6c7967656e657261) }},
-	{{ UINT64_C(0x646f72616e646f6d), UINT64_C(0x7465646279746573) }}
+    { { UINT64_C(0x736f6d6570736575), UINT64_C(0x6c7967656e657261) } },
+    { { UINT64_C(0x646f72616e646f6d), UINT64_C(0x7465646279746573) } }
 };
 
 static const packedelem64 siphash_final = {
-	{ UINT64_C(0x0000000000000000), UINT64_C(0x00000000000000ff) }
+    { UINT64_C(0x0000000000000000), UINT64_C(0x00000000000000ff) }
 };
 
 static const packedelem8 siphash_rot16v3 = {
-	{14,15,8,9,10,11,12,13,8,9,10,11,12,13,14,15}
+    { 14, 15, 8, 9, 10, 11, 12, 13, 8, 9, 10, 11, 12, 13, 14, 15 }
 };
 
-template < bool variant_2_4, bool bswap >
-static uint64_t siphash_sse(const uint64_t key[2], const uint8_t * m, size_t len) {
-	xmmi k,v02,v20,v13,v11,v33,mi;
-	uint64_t last7;
-	uint32_t lo, hi;
-	size_t i, blocks;
-
-	k = _mm_loadu_si128((xmmi *)key);
-	v02 = siphash_init[0].v;
-	v13 = siphash_init[1].v;
-	v02 = _mm_xor_si128(v02, _mm_unpacklo_epi64(k, k));
-	v13 = _mm_xor_si128(v13, _mm_unpackhi_epi64(k, k));
-
-#if defined(HAVE_SSSE_3)
-#define sipcompress() \
-	v11 = v13; \
-	v33 = v13; \
-	v11 = _mm_or_si128(_mm_slli_epi64(v11, 13), _mm_srli_epi64(v11, 64-13)); \
-	v02 = _mm_add_epi64(v02, v13); \
-	v33 = _mm_shuffle_epi8(v33, siphash_rot16v3.v); \
-	v13 = _mm_unpacklo_epi64(v11, v33); \
-	v13 = _mm_xor_si128(v13, v02); \
-	v20 = _mm_shuffle_epi32(v02, _MM_SHUFFLE(0,1,3,2)); \
-	v11 = v13; \
-	v33 = _mm_shuffle_epi32(v13, _MM_SHUFFLE(1,0,3,2)); \
-	v11 = _mm_or_si128(_mm_slli_epi64(v11, 17), _mm_srli_epi64(v11, 64-17)); \
-	v20 = _mm_add_epi64(v20, v13); \
-	v33 = _mm_or_si128(_mm_slli_epi64(v33, 21), _mm_srli_epi64(v33, 64-21)); \
-	v13 = _mm_unpacklo_epi64(v11, v33); \
-	v13 = _mm_unpacklo_epi64(v11, v33); \
-	v02 = _mm_shuffle_epi32(v20, _MM_SHUFFLE(0,1,3,2)); \
+template <bool variant_2_4, bool bswap>
+static uint64_t siphash_sse( const uint64_t key[2], const uint8_t * m, size_t len ) {
+    xmmi     k, v02, v20, v13, v11, v33, mi;
+    uint64_t last7;
+    uint32_t lo, hi;
+    size_t   i, blocks;
+
+    k   = _mm_loadu_si128((xmmi *)key);
+    v02 = siphash_init[0].v;
+    v13 = siphash_init[1].v;
+    v02 = _mm_xor_si128(v02, _mm_unpacklo_epi64(k, k));
+    v13 = _mm_xor_si128(v13, _mm_unpackhi_epi64(k, k));
+
+  #if defined(HAVE_SSSE_3)
+#define sipcompress()                                                     \
+	v11 = v13;                                                        \
+	v33 = v13;                                                        \
+	v11 = _mm_or_si128(_mm_slli_epi64(v11, 13), _mm_srli_epi64(v11, 64-13));\
+	v02 = _mm_add_epi64(v02, v13);                                    \
+	v33 = _mm_shuffle_epi8(v33, siphash_rot16v3.v);                   \
+	v13 = _mm_unpacklo_epi64(v11, v33);                               \
+	v13 = _mm_xor_si128(v13, v02);                                    \
+	v20 = _mm_shuffle_epi32(v02, _MM_SHUFFLE(0,1,3,2));               \
+	v11 = v13;                                                        \
+	v33 = _mm_shuffle_epi32(v13, _MM_SHUFFLE(1,0,3,2));               \
+	v11 = _mm_or_si128(_mm_slli_epi64(v11, 17), _mm_srli_epi64(v11, 64-17));\
+	v20 = _mm_add_epi64(v20, v13);                                    \
+	v33 = _mm_or_si128(_mm_slli_epi64(v33, 21), _mm_srli_epi64(v33, 64-21));\
+	v13 = _mm_unpacklo_epi64(v11, v33);                               \
+	v13 = _mm_unpacklo_epi64(v11, v33);                               \
+	v02 = _mm_shuffle_epi32(v20, _MM_SHUFFLE(0,1,3,2));               \
 	v13 = _mm_xor_si128(v13, v20);
-#else
-#define sipcompress() \
-	v11 = v13; \
-	v33 = _mm_shuffle_epi32(v13, _MM_SHUFFLE(1,0,3,2)); \
-	v11 = _mm_or_si128(_mm_slli_epi64(v11, 13), _mm_srli_epi64(v11, 64-13)); \
-	v02 = _mm_add_epi64(v02, v13); \
-	v33 = _mm_or_si128(_mm_slli_epi64(v33, 16), _mm_srli_epi64(v33, 64-16)); \
-	v13 = _mm_unpacklo_epi64(v11, v33); \
-	v13 = _mm_xor_si128(v13, v02); \
-	v20 = _mm_shuffle_epi32(v02, _MM_SHUFFLE(0,1,3,2)); \
-	v11 = v13; \
-	v33 = _mm_shuffle_epi32(v13, _MM_SHUFFLE(1,0,3,2)); \
-	v11 = _mm_or_si128(_mm_slli_epi64(v11, 17), _mm_srli_epi64(v11, 64-17)); \
-	v20 = _mm_add_epi64(v20, v13); \
-	v33 = _mm_or_si128(_mm_slli_epi64(v33, 21), _mm_srli_epi64(v33, 64-21)); \
-	v13 = _mm_unpacklo_epi64(v11, v33); \
-	v13 = _mm_unpacklo_epi64(v11, v33); \
-	v02 = _mm_shuffle_epi32(v20, _MM_SHUFFLE(0,1,3,2)); \
+  #else
+#define sipcompress()                                                     \
+	v11 = v13;                                                        \
+	v33 = _mm_shuffle_epi32(v13, _MM_SHUFFLE(1,0,3,2));               \
+	v11 = _mm_or_si128(_mm_slli_epi64(v11, 13), _mm_srli_epi64(v11, 64-13));\
+	v02 = _mm_add_epi64(v02, v13);                                    \
+	v33 = _mm_or_si128(_mm_slli_epi64(v33, 16), _mm_srli_epi64(v33, 64-16));\
+	v13 = _mm_unpacklo_epi64(v11, v33);                               \
+	v13 = _mm_xor_si128(v13, v02);                                    \
+	v20 = _mm_shuffle_epi32(v02, _MM_SHUFFLE(0,1,3,2));               \
+	v11 = v13;                                                        \
+	v33 = _mm_shuffle_epi32(v13, _MM_SHUFFLE(1,0,3,2));               \
+	v11 = _mm_or_si128(_mm_slli_epi64(v11, 17), _mm_srli_epi64(v11, 64-17));\
+	v20 = _mm_add_epi64(v20, v13);                                    \
+	v33 = _mm_or_si128(_mm_slli_epi64(v33, 21), _mm_srli_epi64(v33, 64-21));\
+	v13 = _mm_unpacklo_epi64(v11, v33);                               \
+	v13 = _mm_unpacklo_epi64(v11, v33);                               \
+	v02 = _mm_shuffle_epi32(v20, _MM_SHUFFLE(0,1,3,2));               \
 	v13 = _mm_xor_si128(v13, v20);
-#endif
+  #endif
 
-	for (i = 0, blocks = (len & ~7); i < blocks; i += 8) {
-		mi = _mm_loadl_epi64((xmmi *)(m + i));
+    for (i = 0, blocks = (len & ~7); i < blocks; i += 8) {
+        mi = _mm_loadl_epi64((xmmi *)(m + i));
         if (bswap) {
             mi = mm_bswap64(mi);
         }
-		v13 = _mm_xor_si128(v13, _mm_slli_si128(mi, 8));
-		sipcompress();
+        v13 = _mm_xor_si128(v13, _mm_slli_si128(mi, 8));
+        sipcompress();
         if (variant_2_4) {
             sipcompress();
         }
-		v02 = _mm_xor_si128(v02, mi);
-	}
-
-	last7 = (uint64_t)(len & 0xff) << 56;
-	switch (len - blocks) {
-		case 7: last7 |= (uint64_t)m[i + 6] << 48;
-		case 6: last7 |= (uint64_t)m[i + 5] << 40;
-		case 5: last7 |= (uint64_t)m[i + 4] << 32;
-		case 4: last7 |= (uint64_t)m[i + 3] << 24;
-		case 3: last7 |= (uint64_t)m[i + 2] << 16;
-		case 2: last7 |= (uint64_t)m[i + 1] <<  8;
-		case 1: last7 |= (uint64_t)m[i + 0]      ;
-		case 0:
-		default:;
-	};
-
-	mi = _mm_unpacklo_epi32(_mm_cvtsi32_si128((uint32_t)last7),_mm_cvtsi32_si128((uint32_t)(last7 >> 32)));
-	v13 = _mm_xor_si128(v13, _mm_slli_si128(mi, 8));
-	sipcompress();
+        v02 = _mm_xor_si128(v02, mi);
+    }
+
+    last7 = (uint64_t)(len & 0xff) << 56;
+    switch (len - blocks) {
+    case 7: last7 |= (uint64_t)m[i + 6] << 48;
+    case 6: last7 |= (uint64_t)m[i + 5] << 40;
+    case 5: last7 |= (uint64_t)m[i + 4] << 32;
+    case 4: last7 |= (uint64_t)m[i + 3] << 24;
+    case 3: last7 |= (uint64_t)m[i + 2] << 16;
+    case 2: last7 |= (uint64_t)m[i + 1] <<  8;
+    case 1: last7 |= (uint64_t)m[i + 0];
+    case 0:
+    default:;
+    }
+
+    mi  = _mm_unpacklo_epi32(_mm_cvtsi32_si128((uint32_t)last7), _mm_cvtsi32_si128((uint32_t)(last7 >> 32)));
+    v13 = _mm_xor_si128(v13, _mm_slli_si128(mi, 8));
+    sipcompress();
     if (variant_2_4) {
         sipcompress();
     }
-	v02 = _mm_xor_si128(v02, mi);
-	v02 = _mm_xor_si128(v02, siphash_final.v);
-	sipcompress();
-	sipcompress();
-	sipcompress();
+    v02 = _mm_xor_si128(v02, mi);
+    v02 = _mm_xor_si128(v02, siphash_final.v);
+    sipcompress();
+    sipcompress();
+    sipcompress();
     if (variant_2_4) {
         sipcompress();
     }
 
-	v02 = _mm_xor_si128(v02, v13);
-	v02 = _mm_xor_si128(v02, _mm_shuffle_epi32(v02, _MM_SHUFFLE(1,0,3,2)));
-	lo = _mm_cvtsi128_si32(v02);
-	hi = _mm_cvtsi128_si32(_mm_srli_si128(v02, 4));
-	return ((uint64_t)hi << 32) | lo;
+    v02 = _mm_xor_si128(v02, v13);
+    v02 = _mm_xor_si128(v02, _mm_shuffle_epi32(v02, _MM_SHUFFLE(1, 0, 3, 2)));
+    lo  = _mm_cvtsi128_si32(v02);
+    hi  = _mm_cvtsi128_si32(_mm_srli_si128(v02, 4));
+    return ((uint64_t)hi << 32) | lo;
 }
+
 #endif
 
 //------------------------------------------------------------
 // the faster half 32bit variant for the linux kernel
-#define SIPCOMPRESS_32                                   \
-    do {                                                 \
-        v0 += v1;                                        \
-        v1 = ROTL32(v1, 5);                              \
-        v1 ^= v0;                                        \
-        v0 = ROTL32(v0, 16);                             \
-        v2 += v3;                                        \
-        v3 = ROTL32(v3, 8);                              \
-        v3 ^= v2;                                        \
-        v0 += v3;                                        \
-        v3 = ROTL32(v3, 7);                              \
-        v3 ^= v0;                                        \
-        v2 += v1;                                        \
-        v1 = ROTL32(v1, 13);                             \
-        v1 ^= v2;                                        \
-        v2 = ROTL32(v2, 16);                             \
+#define SIPCOMPRESS_32       \
+    do {                     \
+        v0 += v1;            \
+        v1 = ROTL32(v1, 5);  \
+        v1 ^= v0;            \
+        v0 = ROTL32(v0, 16); \
+        v2 += v3;            \
+        v3 = ROTL32(v3, 8);  \
+        v3 ^= v2;            \
+        v0 += v3;            \
+        v3 = ROTL32(v3, 7);  \
+        v3 ^= v0;            \
+        v2 += v1;            \
+        v1 = ROTL32(v1, 13); \
+        v1 ^= v2;            \
+        v2 = ROTL32(v2, 16); \
     } while (0)
 
-template < bool bswap >
-static uint32_t halfsiphash(const uint32_t key[2], const uint8_t *m, size_t len) {
-    uint32_t v0 = 0;
-    uint32_t v1 = 0;
-    uint32_t v2 = 0x6c796765;
-    uint32_t v3 = 0x74656462;
-    uint32_t k0 = key[0];
-    uint32_t k1 = key[1];
-    uint32_t mi;
-    const uint8_t *end = m + len - (len % sizeof(uint32_t));
-    const int left = len & 3;
-    uint32_t b = ((uint32_t)len) << 24;
+template <bool bswap>
+static uint32_t halfsiphash( const uint32_t key[2], const uint8_t * m, size_t len ) {
+    uint32_t        v0   = 0;
+    uint32_t        v1   = 0;
+    uint32_t        v2   = 0x6c796765;
+    uint32_t        v3   = 0x74656462;
+    uint32_t        k0   = key[0];
+    uint32_t        k1   = key[1];
+    uint32_t        mi;
+    const uint8_t * end  = m + len - (len % sizeof(uint32_t));
+    const int       left = len & 3;
+    uint32_t        b    = ((uint32_t)len) << 24;
+
     v3 ^= k1;
     v2 ^= k0;
     v1 ^= k1;
     v0 ^= k0;
 
     for (; m != end; m += 4) {
-        mi = GET_U32<bswap>(m, 0);
+        mi  = GET_U32<bswap>(m, 0);
         v3 ^= mi;
         SIPCOMPRESS_32;
         SIPCOMPRESS_32;
@@ -276,14 +278,14 @@ static uint32_t halfsiphash(const uint32_t key[2], const uint8_t *m, size_t len)
 
     switch (left) {
     case 3:
-        b |= ((uint32_t)m[2]) << 16;
+            b |= ((uint32_t)m[2]) << 16;
     case 2:
-        b |= ((uint32_t)m[1]) << 8;
+            b |= ((uint32_t)m[1]) <<  8;
     case 1:
-        b |= ((uint32_t)m[0]);
-        break;
+            b |= ((uint32_t)m[0]);
+            break;
     case 0:
-        break;
+            break;
     }
 
     v3 ^= b;
@@ -304,173 +306,177 @@ static uint32_t halfsiphash(const uint32_t key[2], const uint8_t *m, size_t len)
 // I could find no source for this other than rurban's SMHasher
 // fork. The slightly-bizarre seeding routine is a hardcoded 64-bit
 // version of the awkward global-variable+Rand() one in that code.
-template < bool bswap >
-static uint64_t tsip(const uint64_t seed, const uint8_t * m, uint64_t len) {
-  uint64_t v0, v1;
-  uint64_t mi, k0, k1;
-  uint64_t last7;
-
-  k0 = seed ^ UINT64_C(0x4915a64c00000000);
-  k1 = seed ^ UINT64_C(0x1c29205700000000);
-
-  v0 = k0 ^ UINT64_C(0x736f6d6570736575);
-  v1 = k1 ^ UINT64_C(0x646f72616e646f6d);
-
-#define tsipcompress()                          \
-  do {                                          \
-    v0 += v1;                                   \
-    v1 = ROTL64(v1, 13) ^ v0;                   \
-    v0 = ROTL64(v0, 35) + v1;                   \
-    v1 = ROTL64(v1, 17) ^ v0;                   \
-    v0 = ROTL64(v0, 21);                        \
+template <bool bswap>
+static uint64_t tsip( const uint64_t seed, const uint8_t * m, uint64_t len ) {
+    uint64_t v0, v1;
+    uint64_t mi, k0, k1;
+    uint64_t last7;
+
+    k0 = seed ^ UINT64_C(0x4915a64c00000000);
+    k1 = seed ^ UINT64_C(0x1c29205700000000);
+
+    v0 = k0   ^ UINT64_C(0x736f6d6570736575);
+    v1 = k1   ^ UINT64_C(0x646f72616e646f6d);
+
+#define tsipcompress()        \
+  do {                        \
+    v0 += v1;                 \
+    v1 = ROTL64(v1, 13) ^ v0; \
+    v0 = ROTL64(v0, 35) + v1; \
+    v1 = ROTL64(v1, 17) ^ v0; \
+    v0 = ROTL64(v0, 21);      \
   } while (0)
 
-  const uint8_t *end = m + (len & ~7);
-
-  while (m < end) {
-      mi = GET_U64<bswap>(m, 0);
-      v1 ^= mi;
-      tsipcompress();
-      v0 ^= mi;
-      m += 8;
-  }
-
-  last7 = (uint64_t)(len & 0xff) << 56;
-  switch (len & 7) {
-  case 7:
-    last7 |= (uint64_t)m[6] << 48;
-  case 6:
-    last7 |= (uint64_t)m[5] << 40;
-  case 5:
-    last7 |= (uint64_t)m[4] << 32;
-  case 4:
-    last7 |= (uint64_t)m[3] << 24;
-  case 3:
-    last7 |= (uint64_t)m[2] << 16;
-  case 2:
-    last7 |= (uint64_t)m[1] << 8;
-  case 1:
-    last7 |= (uint64_t)m[0];
-  case 0:
-  default:;
-  };
-
-  v1 ^= last7;
-  tsipcompress();
-  v0 ^= last7;
-
-  // finalization
-  v1 ^= 0xff;
-  tsipcompress();
-  v1 = ROTL64(v1, 32);
-  tsipcompress();
-  v1 = ROTL64(v1, 32);
-
-  return v0 ^ v1;
+    const uint8_t * end = m + (len & ~7);
+
+    while (m < end) {
+        mi  = GET_U64<bswap>(m, 0);
+        v1 ^= mi;
+        tsipcompress();
+        v0 ^= mi;
+        m  += 8;
+    }
+
+    last7 = (uint64_t)(len & 0xff) << 56;
+    switch (len & 7) {
+    case 7:
+            last7 |= (uint64_t)m[6] << 48;
+    case 6:
+            last7 |= (uint64_t)m[5] << 40;
+    case 5:
+            last7 |= (uint64_t)m[4] << 32;
+    case 4:
+            last7 |= (uint64_t)m[3] << 24;
+    case 3:
+            last7 |= (uint64_t)m[2] << 16;
+    case 2:
+            last7 |= (uint64_t)m[1] <<  8;
+    case 1:
+            last7 |= (uint64_t)m[0];
+    case 0:
+    default:;
+    }
+
+    v1 ^= last7;
+    tsipcompress();
+    v0 ^= last7;
+
+    // finalization
+    v1 ^= 0xff;
+    tsipcompress();
+    v1  = ROTL64(v1, 32);
+    tsipcompress();
+    v1  = ROTL64(v1, 32);
+
+    return v0 ^ v1;
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void SipHash_2_4(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void SipHash_2_4( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t key[2] = { seed, 0 };
     uint64_t h;
+
 #if defined(HAVE_SSSE_3) || defined(HAVE_SSE_2)
-    h = siphash_sse<true, bswap>(key, (const uint8_t *)in, len);
+    h = siphash_sse     <true, bswap>(key, (const uint8_t *)in, len);
 #else
     h = siphash_portable<true, bswap>(key, (const uint8_t *)in, len);
 #endif
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void SipHash_1_3(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void SipHash_1_3( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t key[2] = { seed, 0 };
     uint64_t h;
+
 #if defined(HAVE_SSSE_3) || defined(HAVE_SSE_2)
-    h = siphash_sse<false, bswap>(key, (const uint8_t *)in, len);
+    h = siphash_sse     <false, bswap>(key, (const uint8_t *)in, len);
 #else
     h = siphash_portable<false, bswap>(key, (const uint8_t *)in, len);
 #endif
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void HalfSipHash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void HalfSipHash( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t key[2] = { (uint32_t)seed, (uint32_t)(((uint64_t)seed) >> 32) };
     uint32_t h;
+
     h = halfsiphash<bswap>(key, (const uint8_t *)in, len);
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void TinySipHash(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void TinySipHash( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h;
+
     h = tsip<bswap>((uint64_t)seed, (const uint8_t *)in, len);
     PUT_U64<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(siphash,
-  $.src_url = "https://github.com/floodyberry/siphash",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/floodyberry/siphash",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(SipHash_2_4,
-  $.desc = "SipHash 2-4",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC,
-  $.impl_flags =
-        FLAG_IMPL_SLOW         |
-        FLAG_IMPL_TYPE_PUNNING |
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x57B661ED,
-  $.verification_BE = 0x01B634D0,
-  $.hashfn_native = SipHash_2_4<false>,
-  $.hashfn_bswap = SipHash_2_4<true>
-);
+   $.desc       = "SipHash 2-4",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC,
+   $.impl_flags =
+         FLAG_IMPL_SLOW         |
+         FLAG_IMPL_TYPE_PUNNING |
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x57B661ED,
+   $.verification_BE = 0x01B634D0,
+   $.hashfn_native   = SipHash_2_4<false>,
+   $.hashfn_bswap    = SipHash_2_4<true>
+ );
 
 REGISTER_HASH(SipHash_1_3,
-  $.desc = "SipHash 1-3",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC,
-  $.impl_flags =
-        FLAG_IMPL_SLOW         |
-        FLAG_IMPL_TYPE_PUNNING |
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x8936B193,
-  $.verification_BE = 0xBEB90EAC,
-  $.hashfn_native = SipHash_1_3<false>,
-  $.hashfn_bswap = SipHash_1_3<true>
-);
+   $.desc       = "SipHash 1-3",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC,
+   $.impl_flags =
+         FLAG_IMPL_SLOW         |
+         FLAG_IMPL_TYPE_PUNNING |
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x8936B193,
+   $.verification_BE = 0xBEB90EAC,
+   $.hashfn_native   = SipHash_1_3<false>,
+   $.hashfn_bswap    = SipHash_1_3<true>
+ );
 
 REGISTER_HASH(HalfSipHash,
-  $.desc = "SipHash half-width version",
-  $.hash_flags =
-        FLAG_HASH_CRYPTOGRAPHIC,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0xD2BE7FD8,
-  $.verification_BE = 0xEC8BC9AF,
-  $.hashfn_native = HalfSipHash<false>,
-  $.hashfn_bswap = HalfSipHash<true>
-);
+   $.desc       = "SipHash half-width version",
+   $.hash_flags =
+         FLAG_HASH_CRYPTOGRAPHIC,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0xD2BE7FD8,
+   $.verification_BE = 0xEC8BC9AF,
+   $.hashfn_native   = HalfSipHash<false>,
+   $.hashfn_bswap    = HalfSipHash<true>
+ );
 
 REGISTER_HASH(TinySipHash,
-  $.desc = "Damian Gryski's Tiny SipHash variant",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_ROTATE       |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x75C732C0,
-  $.verification_BE = 0xEFE9C35D,
-  $.hashfn_native = TinySipHash<false>,
-  $.hashfn_bswap = TinySipHash<true>
-);
+   $.desc       = "Damian Gryski's Tiny SipHash variant",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_ROTATE       |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x75C732C0,
+   $.verification_BE = 0xEFE9C35D,
+   $.hashfn_native   = TinySipHash<false>,
+   $.hashfn_bswap    = TinySipHash<true>
+ );
diff --git a/hashes/spookyhash.cpp b/hashes/spookyhash.cpp
index a5d5906a..14d64be4 100644
--- a/hashes/spookyhash.cpp
+++ b/hashes/spookyhash.cpp
@@ -61,16 +61,15 @@
 // By Bob Jenkins, public domain
 
 class SpookyHash {
-public:
+  public:
     //
     // SpookyHash: hash a single message in one call, produce 128-bit output
     //
-    template < uint32_t version, bool bswap >
-    static void Hash128(
-        const void *message,  // message to hash
-        size_t length,        // length of message in bytes
-        uint64_t *hash1,        // in/out: in seed 1, out hash value 1
-        uint64_t *hash2);       // in/out: in seed 2, out hash value 2
+    template <uint32_t version, bool bswap>
+    static void Hash128( const void * message, // message to hash
+            size_t length,                     // length of message in bytes
+            uint64_t * hash1,                  // in/out: in seed 1, out hash value 1
+            uint64_t * hash2 );                // in/out: in seed 2, out hash value 2
 
     //
     // This is used if the input is 96 bytes long or longer.
@@ -85,25 +84,22 @@ class SpookyHash {
     //   When run forward or backwards one Mix
     // I tried 3 pairs of each; they all differed by at least 212 bits.
     //
-    template < bool bswap >
-    static FORCE_INLINE void Mix(
-        const uint8_t * data,
-        uint64_t &s0, uint64_t &s1, uint64_t &s2, uint64_t &s3,
-        uint64_t &s4, uint64_t &s5, uint64_t &s6, uint64_t &s7,
-        uint64_t &s8, uint64_t &s9, uint64_t &s10,uint64_t &s11)
-    {
-      s0 += GET_U64<bswap>(data, 8*0);      s2 ^= s10;  s11 ^= s0;    s0 = ROTL64(s0,11);   s11 += s1;
-      s1 += GET_U64<bswap>(data, 8*1);      s3 ^= s11;   s0 ^= s1;    s1 = ROTL64(s1,32);    s0 += s2;
-      s2 += GET_U64<bswap>(data, 8*2);      s4 ^= s0;    s1 ^= s2;    s2 = ROTL64(s2,43);    s1 += s3;
-      s3 += GET_U64<bswap>(data, 8*3);      s5 ^= s1;    s2 ^= s3;    s3 = ROTL64(s3,31);    s2 += s4;
-      s4 += GET_U64<bswap>(data, 8*4);      s6 ^= s2;    s3 ^= s4;    s4 = ROTL64(s4,17);    s3 += s5;
-      s5 += GET_U64<bswap>(data, 8*5);      s7 ^= s3;    s4 ^= s5;    s5 = ROTL64(s5,28);    s4 += s6;
-      s6 += GET_U64<bswap>(data, 8*6);      s8 ^= s4;    s5 ^= s6;    s6 = ROTL64(s6,39);    s5 += s7;
-      s7 += GET_U64<bswap>(data, 8*7);      s9 ^= s5;    s6 ^= s7;    s7 = ROTL64(s7,57);    s6 += s8;
-      s8 += GET_U64<bswap>(data, 8*8);     s10 ^= s6;    s7 ^= s8;    s8 = ROTL64(s8,55);    s7 += s9;
-      s9 += GET_U64<bswap>(data, 8*9);     s11 ^= s7;    s8 ^= s9;    s9 = ROTL64(s9,54);    s8 += s10;
-      s10 += GET_U64<bswap>(data, 8*10);    s0 ^= s8;    s9 ^= s10;   s10 = ROTL64(s10,22);  s9 += s11;
-      s11 += GET_U64<bswap>(data, 8*11);    s1 ^= s9;   s10 ^= s11;   s11 = ROTL64(s11,46); s10 += s0;
+    template <bool bswap>
+    static FORCE_INLINE void Mix( const uint8_t * data, uint64_t & s0, uint64_t & s1, uint64_t & s2,
+            uint64_t & s3, uint64_t & s4, uint64_t & s5, uint64_t & s6, uint64_t & s7, uint64_t & s8,
+            uint64_t & s9, uint64_t & s10, uint64_t & s11 ) {
+        s0  += GET_U64<bswap>(data, 8 *  0);      s2 ^= s10;  s11 ^= s0;    s0  = ROTL64(s0, 11);   s11 += s1;
+        s1  += GET_U64<bswap>(data, 8 *  1);      s3 ^= s11;   s0 ^= s1;    s1  = ROTL64(s1, 32);    s0 += s2;
+        s2  += GET_U64<bswap>(data, 8 *  2);      s4 ^= s0;    s1 ^= s2;    s2  = ROTL64(s2, 43);    s1 += s3;
+        s3  += GET_U64<bswap>(data, 8 *  3);      s5 ^= s1;    s2 ^= s3;    s3  = ROTL64(s3, 31);    s2 += s4;
+        s4  += GET_U64<bswap>(data, 8 *  4);      s6 ^= s2;    s3 ^= s4;    s4  = ROTL64(s4, 17);    s3 += s5;
+        s5  += GET_U64<bswap>(data, 8 *  5);      s7 ^= s3;    s4 ^= s5;    s5  = ROTL64(s5, 28);    s4 += s6;
+        s6  += GET_U64<bswap>(data, 8 *  6);      s8 ^= s4;    s5 ^= s6;    s6  = ROTL64(s6, 39);    s5 += s7;
+        s7  += GET_U64<bswap>(data, 8 *  7);      s9 ^= s5;    s6 ^= s7;    s7  = ROTL64(s7, 57);    s6 += s8;
+        s8  += GET_U64<bswap>(data, 8 *  8);     s10 ^= s6;    s7 ^= s8;    s8  = ROTL64(s8, 55);    s7 += s9;
+        s9  += GET_U64<bswap>(data, 8 *  9);     s11 ^= s7;    s8 ^= s9;    s9  = ROTL64(s9, 54);    s8 += s10;
+        s10 += GET_U64<bswap>(data, 8 * 10);    s0   ^= s8;    s9 ^= s10;   s10 = ROTL64(s10, 22);  s9  += s11;
+        s11 += GET_U64<bswap>(data, 8 * 11);    s1   ^= s9;   s10 ^= s11;   s11 = ROTL64(s11, 46); s10  += s0;
     }
 
     //
@@ -122,45 +118,40 @@ class SpookyHash {
     // Two iterations was almost good enough for a 64-bit result, but a
     // 128-bit result is reported, so End() does three iterations.
     //
-    static FORCE_INLINE void EndPartial(
-        uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3,
-        uint64_t &h4, uint64_t &h5, uint64_t &h6, uint64_t &h7,
-        uint64_t &h8, uint64_t &h9, uint64_t &h10,uint64_t &h11)
-    {
-        h11+= h1;    h2 ^= h11;   h1 = ROTL64(h1,44);
-        h0 += h2;    h3 ^= h0;    h2 = ROTL64(h2,15);
-        h1 += h3;    h4 ^= h1;    h3 = ROTL64(h3,34);
-        h2 += h4;    h5 ^= h2;    h4 = ROTL64(h4,21);
-        h3 += h5;    h6 ^= h3;    h5 = ROTL64(h5,38);
-        h4 += h6;    h7 ^= h4;    h6 = ROTL64(h6,33);
-        h5 += h7;    h8 ^= h5;    h7 = ROTL64(h7,10);
-        h6 += h8;    h9 ^= h6;    h8 = ROTL64(h8,13);
-        h7 += h9;    h10^= h7;    h9 = ROTL64(h9,38);
-        h8 += h10;   h11^= h8;    h10= ROTL64(h10,53);
-        h9 += h11;   h0 ^= h9;    h11= ROTL64(h11,42);
-        h10+= h0;    h1 ^= h10;   h0 = ROTL64(h0,54);
+    static FORCE_INLINE void EndPartial( uint64_t & h0, uint64_t & h1, uint64_t & h2, uint64_t & h3, uint64_t & h4,
+            uint64_t & h5, uint64_t & h6, uint64_t & h7, uint64_t & h8,
+            uint64_t & h9, uint64_t & h10, uint64_t & h11 ) {
+        h11 += h1;    h2 ^= h11;   h1 =   ROTL64(h1 , 44);
+        h0  += h2;    h3 ^= h0;    h2 =   ROTL64(h2 , 15);
+        h1  += h3;    h4 ^= h1;    h3 =   ROTL64(h3 , 34);
+        h2  += h4;    h5 ^= h2;    h4 =   ROTL64(h4 , 21);
+        h3  += h5;    h6 ^= h3;    h5 =   ROTL64(h5 , 38);
+        h4  += h6;    h7 ^= h4;    h6 =   ROTL64(h6 , 33);
+        h5  += h7;    h8 ^= h5;    h7 =   ROTL64(h7 , 10);
+        h6  += h8;    h9 ^= h6;    h8 =   ROTL64(h8 , 13);
+        h7  += h9;    h10 ^= h7;    h9 =  ROTL64(h9 , 38);
+        h8  += h10;   h11 ^= h8;    h10 = ROTL64(h10, 53);
+        h9  += h11;   h0 ^= h9;    h11 =  ROTL64(h11, 42);
+        h10 += h0;    h1 ^= h10;   h0 =   ROTL64(h0 , 54);
     }
 
-    template < uint32_t version, bool bswap >
-    static FORCE_INLINE void End(
-        uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3,
-        uint64_t &h4, uint64_t &h5, uint64_t &h6, uint64_t &h7,
-        uint64_t &h8, uint64_t &h9, uint64_t &h10,uint64_t &h11,
-    const uint8_t * data)
-    {
+    template <uint32_t version, bool bswap>
+    static FORCE_INLINE void End( uint64_t & h0, uint64_t & h1, uint64_t & h2, uint64_t & h3,
+            uint64_t & h4, uint64_t & h5, uint64_t & h6, uint64_t & h7, uint64_t & h8, uint64_t & h9,
+            uint64_t & h10, uint64_t & h11, const uint8_t * data ) {
         if (version == 2) {
-            h0  += GET_U64<bswap>(data, 8*0);    h1 += GET_U64<bswap>(data, 8*1);
-            h2  += GET_U64<bswap>(data, 8*2);    h3 += GET_U64<bswap>(data, 8*3);
-            h4  += GET_U64<bswap>(data, 8*4);    h5 += GET_U64<bswap>(data, 8*5);
-            h6  += GET_U64<bswap>(data, 8*6);    h7 += GET_U64<bswap>(data, 8*7);
-            h8  += GET_U64<bswap>(data, 8*8);    h9 += GET_U64<bswap>(data, 8*9);
-            h10 += GET_U64<bswap>(data, 8*10);  h11 += GET_U64<bswap>(data, 8*11);
+            h0  += GET_U64<bswap>(data, 8 *  0);    h1 += GET_U64<bswap>(data, 8 *  1);
+            h2  += GET_U64<bswap>(data, 8 *  2);    h3 += GET_U64<bswap>(data, 8 *  3);
+            h4  += GET_U64<bswap>(data, 8 *  4);    h5 += GET_U64<bswap>(data, 8 *  5);
+            h6  += GET_U64<bswap>(data, 8 *  6);    h7 += GET_U64<bswap>(data, 8 *  7);
+            h8  += GET_U64<bswap>(data, 8 *  8);    h9 += GET_U64<bswap>(data, 8 *  9);
+            h10 += GET_U64<bswap>(data, 8 * 10);  h11  += GET_U64<bswap>(data, 8 * 11);
         } else {
-            Mix<bswap>(data,h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+            Mix<bswap>(data, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
         }
-        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
-        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
-        EndPartial(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
+        EndPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
+        EndPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
+        EndPartial(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
     }
 
     //
@@ -178,20 +169,19 @@ class SpookyHash {
     // with diffs defined by either xor or subtraction
     // with a base of all zeros plus a counter, or plus another bit, or random
     //
-    static FORCE_INLINE void ShortMix(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3)
-    {
-        h2 = ROTL64(h2,50);  h2 += h3;  h0 ^= h2;
-        h3 = ROTL64(h3,52);  h3 += h0;  h1 ^= h3;
-        h0 = ROTL64(h0,30);  h0 += h1;  h2 ^= h0;
-        h1 = ROTL64(h1,41);  h1 += h2;  h3 ^= h1;
-        h2 = ROTL64(h2,54);  h2 += h3;  h0 ^= h2;
-        h3 = ROTL64(h3,48);  h3 += h0;  h1 ^= h3;
-        h0 = ROTL64(h0,38);  h0 += h1;  h2 ^= h0;
-        h1 = ROTL64(h1,37);  h1 += h2;  h3 ^= h1;
-        h2 = ROTL64(h2,62);  h2 += h3;  h0 ^= h2;
-        h3 = ROTL64(h3,34);  h3 += h0;  h1 ^= h3;
-        h0 = ROTL64(h0,5);   h0 += h1;  h2 ^= h0;
-        h1 = ROTL64(h1,36);  h1 += h2;  h3 ^= h1;
+    static FORCE_INLINE void ShortMix( uint64_t & h0, uint64_t & h1, uint64_t & h2, uint64_t & h3 ) {
+        h2 = ROTL64(h2, 50);  h2 += h3;  h0 ^= h2;
+        h3 = ROTL64(h3, 52);  h3 += h0;  h1 ^= h3;
+        h0 = ROTL64(h0, 30);  h0 += h1;  h2 ^= h0;
+        h1 = ROTL64(h1, 41);  h1 += h2;  h3 ^= h1;
+        h2 = ROTL64(h2, 54);  h2 += h3;  h0 ^= h2;
+        h3 = ROTL64(h3, 48);  h3 += h0;  h1 ^= h3;
+        h0 = ROTL64(h0, 38);  h0 += h1;  h2 ^= h0;
+        h1 = ROTL64(h1, 37);  h1 += h2;  h3 ^= h1;
+        h2 = ROTL64(h2, 62);  h2 += h3;  h0 ^= h2;
+        h3 = ROTL64(h3, 34);  h3 += h0;  h1 ^= h3;
+        h0 = ROTL64(h0,  5);   h0 += h1;  h2 ^= h0;
+        h1 = ROTL64(h1, 36);  h1 += h2;  h3 ^= h1;
     }
 
     //
@@ -206,44 +196,41 @@ class SpookyHash {
     // For every pair of input bits,
     // with probability 50 +- .75% (the worst case is approximately that)
     //
-    static FORCE_INLINE void ShortEnd(uint64_t &h0, uint64_t &h1, uint64_t &h2, uint64_t &h3)
-    {
-        h3 ^= h2;  h2 = ROTL64(h2,15);  h3 += h2;
-        h0 ^= h3;  h3 = ROTL64(h3,52);  h0 += h3;
-        h1 ^= h0;  h0 = ROTL64(h0,26);  h1 += h0;
-        h2 ^= h1;  h1 = ROTL64(h1,51);  h2 += h1;
-        h3 ^= h2;  h2 = ROTL64(h2,28);  h3 += h2;
-        h0 ^= h3;  h3 = ROTL64(h3,9);   h0 += h3;
-        h1 ^= h0;  h0 = ROTL64(h0,47);  h1 += h0;
-        h2 ^= h1;  h1 = ROTL64(h1,54);  h2 += h1;
-        h3 ^= h2;  h2 = ROTL64(h2,32);  h3 += h2;
-        h0 ^= h3;  h3 = ROTL64(h3,25);  h0 += h3;
-        h1 ^= h0;  h0 = ROTL64(h0,63);  h1 += h0;
+    static FORCE_INLINE void ShortEnd( uint64_t & h0, uint64_t & h1, uint64_t & h2, uint64_t & h3 ) {
+        h3 ^= h2;  h2 = ROTL64(h2, 15);  h3 += h2;
+        h0 ^= h3;  h3 = ROTL64(h3, 52);  h0 += h3;
+        h1 ^= h0;  h0 = ROTL64(h0, 26);  h1 += h0;
+        h2 ^= h1;  h1 = ROTL64(h1, 51);  h2 += h1;
+        h3 ^= h2;  h2 = ROTL64(h2, 28);  h3 += h2;
+        h0 ^= h3;  h3 = ROTL64(h3,  9);   h0 += h3;
+        h1 ^= h0;  h0 = ROTL64(h0, 47);  h1 += h0;
+        h2 ^= h1;  h1 = ROTL64(h1, 54);  h2 += h1;
+        h3 ^= h2;  h2 = ROTL64(h2, 32);  h3 += h2;
+        h0 ^= h3;  h3 = ROTL64(h3, 25);  h0 += h3;
+        h1 ^= h0;  h0 = ROTL64(h0, 63);  h1 += h0;
     }
 
-private:
-
+  private:
     //
     // Short is used for messages under 192 bytes in length
     // Short has a low startup cost, the normal mode is good for long
     // keys, the cost crossover is at about 192 bytes.  The two modes were
     // held to the same quality bar.
     //
-    template < uint32_t version, bool bswap >
-    static void Short(
-        const void *message,  // message (array of bytes, not necessarily aligned)
-        size_t length,        // length of message (in bytes)
-        uint64_t *hash1,        // in/out: in the seed, out the hash value
-        uint64_t *hash2);       // in/out: in the seed, out the hash value
+    template <uint32_t version, bool bswap>
+    static void Short( const void * message, // message (array of bytes, not necessarily aligned)
+            size_t length,                   // length of message (in bytes)
+            uint64_t * hash1,                // in/out: in the seed, out the hash value
+            uint64_t * hash2 );              // in/out: in the seed, out the hash value
 
     // number of uint64_t's in internal state
-    static const size_t sc_numVars = 12;
+    static const size_t  sc_numVars = 12;
 
     // size of the internal state
-    static const size_t sc_blockSize = sc_numVars*8;
+    static const size_t  sc_blockSize = sc_numVars * 8;
 
     // size of buffer of unhashed data, in bytes
-    static const size_t sc_bufSize = 2*sc_blockSize;
+    static const size_t  sc_bufSize = 2 * sc_blockSize;
 
     //
     // sc_const: a constant which:
@@ -252,45 +239,45 @@ class SpookyHash {
     //  * is a not-very-regular mix of 1's and 0's
     //  * does not need any other special mathematical properties
     //
-    static const uint64_t sc_const = UINT64_C(0xdeadbeefdeadbeef);
-};
-
-template < uint32_t version, bool bswap >
-void SpookyHash::Short(const void *message, size_t length, uint64_t *hash1, uint64_t *hash2) {
-    size_t remainder = length % 32;
-    uint64_t a = *hash1;
-    uint64_t b = *hash2;
-    uint64_t c = sc_const;
-    uint64_t d = sc_const;
-    const uint8_t * ptr = (const uint8_t *)message;
+    static const uint64_t  sc_const = UINT64_C(0xdeadbeefdeadbeef);
+}; // class SpookyHash
+
+template <uint32_t version, bool bswap>
+void SpookyHash::Short( const void * message, size_t length, uint64_t * hash1, uint64_t * hash2 ) {
+    size_t          remainder = length % 32;
+    uint64_t        a         = *hash1;
+    uint64_t        b         = *hash2;
+    uint64_t        c         = sc_const;
+    uint64_t        d         = sc_const;
+    const uint8_t * ptr       = (const uint8_t *)message;
 
     if (length > 15) {
-        const uint8_t * end = ptr + (length/32)*32;
+        const uint8_t * end = ptr + (length / 32) * 32;
 
         // handle all complete sets of 32 bytes
         for (; ptr < end; ptr += 32) {
             c += GET_U64<bswap>(ptr, 0);
             d += GET_U64<bswap>(ptr, 8);
-            ShortMix(a,b,c,d);
+            ShortMix(a, b, c, d);
             a += GET_U64<bswap>(ptr, 16);
             b += GET_U64<bswap>(ptr, 24);
         }
 
-        //Handle the case of 16+ remaining bytes.
+        // Handle the case of 16+ remaining bytes.
         if (remainder >= 16) {
-            c += GET_U64<bswap>(ptr, 0);
-            d += GET_U64<bswap>(ptr, 8);
-            ShortMix(a,b,c,d);
-            ptr += 16;
+            c         += GET_U64<bswap>(ptr, 0);
+            d         += GET_U64<bswap>(ptr, 8);
+            ShortMix(a, b, c, d);
+            ptr       += 16;
             remainder -= 16;
         }
     }
 
     // Handle the last 0..15 bytes, and its length
     if (version == 1) {
-      d = ((uint64_t)length) << 56;
+        d = ((uint64_t)length) << 56;
     } else {
-      d += ((uint64_t)length) << 56;
+        d += ((uint64_t)length) << 56;
     }
     switch (remainder) {
     case 15: d += ((uint64_t)ptr[14]) << 48;
@@ -298,44 +285,44 @@ void SpookyHash::Short(const void *message, size_t length, uint64_t *hash1, uint
     case 13: d += ((uint64_t)ptr[12]) << 32;
     case 12: d += GET_U32<bswap>(ptr, 8); c += GET_U64<bswap>(ptr, 0); break;
     case 11: d += ((uint64_t)ptr[10]) << 16;
-    case 10: d += ((uint64_t)ptr[9]) << 8;
-    case 9:  d += (uint64_t)ptr[8];
-    case 8:  c += GET_U64<bswap>(ptr, 0); break;
-    case 7:  c += ((uint64_t)ptr[6]) << 48;
-    case 6:  c += ((uint64_t)ptr[5]) << 40;
-    case 5:  c += ((uint64_t)ptr[4]) << 32;
-    case 4:  c += GET_U32<bswap>(ptr, 0); break;
-    case 3:  c += ((uint64_t)ptr[2]) << 16;
-    case 2:  c += ((uint64_t)ptr[1]) << 8;
-    case 1:  c += (uint64_t)ptr[0]; break;
-    case 0:  c += sc_const; d += sc_const; break;
+    case 10: d += ((uint64_t)ptr[ 9]) <<  8;
+    case  9: d +=  (uint64_t)ptr[ 8];
+    case  8: c += GET_U64<bswap>(ptr, 0); break;
+    case  7: c += ((uint64_t)ptr[ 6]) << 48;
+    case  6: c += ((uint64_t)ptr[ 5]) << 40;
+    case  5: c += ((uint64_t)ptr[ 4]) << 32;
+    case  4: c += GET_U32<bswap>(ptr, 0); break;
+    case  3: c += ((uint64_t)ptr[ 2]) << 16;
+    case  2: c += ((uint64_t)ptr[ 1]) <<  8;
+    case  1: c += (uint64_t)ptr[0]; break;
+    case  0: c += sc_const; d += sc_const; break;
     }
-    ShortEnd(a,b,c,d);
+    ShortEnd(a, b, c, d);
     *hash1 = a;
     *hash2 = b;
 }
 
 // do the whole hash in one call
-template < uint32_t version, bool bswap >
-void SpookyHash::Hash128(const void * message, size_t length, uint64_t * hash1, uint64_t * hash2) {
+template <uint32_t version, bool bswap>
+void SpookyHash::Hash128( const void * message, size_t length, uint64_t * hash1, uint64_t * hash2 ) {
     if (length < sc_bufSize) {
         Short<version, bswap>(message, length, hash1, hash2);
         return;
     }
 
-    uint64_t h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11;
+    uint64_t        h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11;
     const uint8_t * ptr = (const uint8_t *)message;
-    const uint8_t * end = ptr + (length/sc_blockSize)*sc_blockSize;
-    size_t remainder;
+    const uint8_t * end = ptr + (length / sc_blockSize) * sc_blockSize;
+    size_t          remainder;
 
-    h0=h3=h6=h9  = *hash1;
-    h1=h4=h7=h10 = *hash2;
-    h2=h5=h8=h11 = sc_const;
+    h0 = h3 = h6 = h9  = *hash1;
+    h1 = h4 = h7 = h10 = *hash2;
+    h2 = h5 = h8 = h11 = sc_const;
 
     // handle all whole sc_blockSize blocks of bytes
     while (ptr < end) {
-      Mix<bswap>(ptr,h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11);
-      ptr += sc_blockSize;
+        Mix<bswap>(ptr, h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11);
+        ptr += sc_blockSize;
     }
 
     // handle the last partial block of sc_blockSize bytes
@@ -346,118 +333,119 @@ void SpookyHash::Hash128(const void * message, size_t length, uint64_t * hash1,
     buf[sc_blockSize - 1] = remainder;
 
     // do some final mixing
-    End<version,bswap>(h0,h1,h2,h3,h4,h5,h6,h7,h8,h9,h10,h11,buf);
+    End<version, bswap>(h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, buf);
     *hash1 = h0;
     *hash2 = h1;
 }
 
-template < uint32_t version, uint32_t hashlen, bool bswap >
-static void spookyhash(const void * in, const size_t len, const seed_t seed, void * out) {
-  uint64_t h1, h2;
-  h1 = h2 = (uint64_t)seed;
+template <uint32_t version, uint32_t hashlen, bool bswap>
+static void spookyhash( const void * in, const size_t len, const seed_t seed, void * out ) {
+    uint64_t h1, h2;
 
-  SpookyHash::Hash128<version,bswap>(in, len, &h1, &h2);
+    h1 = h2 = (uint64_t)seed;
 
-  h1 = COND_BSWAP(h1, bswap);
-  h2 = COND_BSWAP(h2, bswap);
+    SpookyHash::Hash128<version, bswap>(in, len, &h1, &h2);
 
-  if (hashlen > 64) {
-    memcpy(out, &h1, 8);
-    memcpy(((uint8_t *)out) + 8, &h2, hashlen/8 - 8);
-  } else {
-    memcpy(out, &h1, hashlen/8);
-  }
+    h1 = COND_BSWAP(h1, bswap);
+    h2 = COND_BSWAP(h2, bswap);
+
+    if (hashlen > 64) {
+        memcpy(out, &h1, 8);
+        memcpy(((uint8_t *)out) + 8, &h2, hashlen / 8 - 8);
+    } else {
+        memcpy(out, &h1, hashlen / 8);
+    }
 }
 
 REGISTER_FAMILY(spookyhash,
-  $.src_url = "https://www.burtleburtle.net/bob/hash/spooky.html",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://www.burtleburtle.net/bob/hash/spooky.html",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
-//{ 0x111af082, 0x26bb3cda, 0x94c4f96c, 0xec24c166 }
+// { 0x111af082, 0x26bb3cda, 0x94c4f96c, 0xec24c166 }
 REGISTER_HASH(SpookyHash1_32,
-  $.desc = "SpookyHash v1, 32-bit result",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-      FLAG_IMPL_ROTATE                 |
-      FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 32,
-  $.verification_LE = 0x3F798BBB,
-  $.verification_BE = 0x32C8248C,
-  $.hashfn_native = spookyhash<1,32,false>,
-  $.hashfn_bswap = spookyhash<1,32,true>
-);
+   $.desc       = "SpookyHash v1, 32-bit result",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+       FLAG_IMPL_ROTATE                 |
+       FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 32,
+   $.verification_LE = 0x3F798BBB,
+   $.verification_BE = 0x32C8248C,
+   $.hashfn_native   = spookyhash<1, 32, false>,
+   $.hashfn_bswap    = spookyhash<1, 32, true>
+ );
 
 REGISTER_HASH(SpookyHash1_64,
-  $.desc = "SpookyHash v1, 64-bit result",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-      FLAG_IMPL_ROTATE                 |
-      FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0xA7F955F1,
-  $.verification_BE = 0xD6BD6D2B,
-  $.hashfn_native = spookyhash<1,64,false>,
-  $.hashfn_bswap = spookyhash<1,64,true>
-);
+   $.desc       = "SpookyHash v1, 64-bit result",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+       FLAG_IMPL_ROTATE                 |
+       FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0xA7F955F1,
+   $.verification_BE = 0xD6BD6D2B,
+   $.hashfn_native   = spookyhash<1, 64, false>,
+   $.hashfn_bswap    = spookyhash<1, 64, true>
+ );
 
 REGISTER_HASH(SpookyHash1_128,
-  $.desc = "SpookyHash v1, 128-bit result",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-      FLAG_IMPL_ROTATE                 |
-      FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 128,
-  $.verification_LE = 0x8D263080,
-  $.verification_BE = 0xE9E5572C,
-  $.hashfn_native = spookyhash<1,128,false>,
-  $.hashfn_bswap = spookyhash<1,128,true>
-);
+   $.desc       = "SpookyHash v1, 128-bit result",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+       FLAG_IMPL_ROTATE                 |
+       FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 128,
+   $.verification_LE = 0x8D263080,
+   $.verification_BE = 0xE9E5572C,
+   $.hashfn_native   = spookyhash<1, 128, false>,
+   $.hashfn_bswap    = spookyhash<1, 128, true>
+ );
 
 REGISTER_HASH(SpookyHash2_32,
-  $.desc = "SpookyHash v2, 32-bit result",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-      FLAG_IMPL_ROTATE                 |
-      FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 32,
-  $.verification_LE = 0xA48BE265,
-  $.verification_BE = 0x9742FF7D,
-  $.hashfn_native = spookyhash<2,32,false>,
-  $.hashfn_bswap = spookyhash<2,32,true>,
-  $.sort_order = 10
-);
+   $.desc       = "SpookyHash v2, 32-bit result",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+       FLAG_IMPL_ROTATE                 |
+       FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 32,
+   $.verification_LE = 0xA48BE265,
+   $.verification_BE = 0x9742FF7D,
+   $.hashfn_native   = spookyhash<2, 32, false>,
+   $.hashfn_bswap    = spookyhash<2, 32, true>,
+   $.sort_order      = 10
+ );
 
 REGISTER_HASH(SpookyHash2_64,
-  $.desc = "SpookyHash v2, 64-bit result",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-      FLAG_IMPL_ROTATE                 |
-      FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x972C4BDC,
-  $.verification_BE = 0x6B914F15,
-  $.hashfn_native = spookyhash<2,64,false>,
-  $.hashfn_bswap = spookyhash<2,64,true>,
-  $.sort_order = 10
-);
+   $.desc       = "SpookyHash v2, 64-bit result",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+       FLAG_IMPL_ROTATE                 |
+       FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x972C4BDC,
+   $.verification_BE = 0x6B914F15,
+   $.hashfn_native   = spookyhash<2, 64, false>,
+   $.hashfn_bswap    = spookyhash<2, 64, true>,
+   $.sort_order      = 10
+ );
 
 REGISTER_HASH(SpookyHash2_128,
-  $.desc = "SpookyHash v2, 128-bit result",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-      FLAG_IMPL_ROTATE                 |
-      FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 128,
-  $.verification_LE = 0x893CFCBE,
-  $.verification_BE = 0x7C1EA273,
-  $.hashfn_native = spookyhash<2,128,false>,
-  $.hashfn_bswap = spookyhash<2,128,true>,
-  $.sort_order = 10
-);
+   $.desc       = "SpookyHash v2, 128-bit result",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+       FLAG_IMPL_ROTATE                 |
+       FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 128,
+   $.verification_LE = 0x893CFCBE,
+   $.verification_BE = 0x7C1EA273,
+   $.hashfn_native   = spookyhash<2, 128, false>,
+   $.hashfn_bswap    = spookyhash<2, 128, true>,
+   $.sort_order      = 10
+ );
diff --git a/hashes/superfasthash.cpp b/hashes/superfasthash.cpp
index 633e66c0..158c98f4 100644
--- a/hashes/superfasthash.cpp
+++ b/hashes/superfasthash.cpp
@@ -28,44 +28,44 @@
 #include "Hashlib.h"
 
 //------------------------------------------------------------
-template < bool bswap >
-static uint32_t SuperFastHash(const uint8_t * data, size_t len, const uint32_t seed) {
+template <bool bswap>
+static uint32_t SuperFastHash( const uint8_t * data, size_t len, const uint32_t seed ) {
     uint32_t hash = seed;
     uint32_t tmp;
-    size_t rem;
+    size_t   rem;
 
-    if (len <= 0 || data == NULL) return 0;
+    if ((len <= 0) || (data == NULL)) { return 0; }
 
     hash += len;
-    rem = len & 3;
+    rem   = len & 3;
     len >>= 2;
 
     /* Main loop */
-    for (;len > 0; len--) {
-        hash  +=  GET_U16<bswap>(data, 0);
-        tmp    = (GET_U16<bswap>(data, 2) << 11) ^ hash;
-        hash   = (hash << 16) ^ tmp;
-        hash  += hash >> 11;
-        data  += 2 * sizeof (uint16_t);
+    for (; len > 0; len--) {
+        hash += GET_U16<bswap>(data, 0);
+        tmp   = (GET_U16<bswap>(data, 2) << 11) ^ hash;
+        hash  = (hash                    << 16) ^ tmp;
+        hash += hash >> 11;
+        data += 2 * sizeof(uint16_t);
     }
 
     /* Handle end cases */
     switch (rem) {
     case 3:
-        hash += GET_U16<bswap>(data, 0);
-        hash ^= hash << 16;
-        hash ^= ((uint32_t)(int8_t)data[sizeof (uint16_t)]) << 18;
-        hash += hash >> 11;
-        break;
+            hash += GET_U16<bswap>(data, 0);
+            hash ^= hash << 16;
+            hash ^= ((uint32_t)(int8_t)data[sizeof(uint16_t)]) << 18;
+            hash += hash >> 11;
+            break;
     case 2:
-        hash += GET_U16<bswap>(data, 0);
-        hash ^= hash << 11;
-        hash += hash >> 17;
-        break;
+            hash += GET_U16<bswap>(data, 0);
+            hash ^= hash << 11;
+            hash += hash >> 17;
+            break;
     case 1:
-        hash += (int8_t)(*data);
-        hash ^= hash << 10;
-        hash += hash >> 1;
+            hash += (int8_t)(*data);
+            hash ^= hash << 10;
+            hash += hash >> 1;
     }
 
     /* Force "avalanching" of final 127 bits */
@@ -80,29 +80,30 @@ static uint32_t SuperFastHash(const uint8_t * data, size_t len, const uint32_t s
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void SFH(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void SFH( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = SuperFastHash<bswap>((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(superfasthash,
-  $.src_url = "http://www.azillionmonkeys.com/qed/hash.html",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "http://www.azillionmonkeys.com/qed/hash.html",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(SuperFastHash,
-  $.desc = "Paul Hsieh's SuperFastHash",
-  $.hash_flags =
-        FLAG_HASH_ENDIAN_INDEPENDENT  |
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_LICENSE_GPL3,
-  $.bits = 32,
-  $.verification_LE = 0xCFA52B38,
-  $.verification_BE = 0xDF0823CA,
-  $.hashfn_native = SFH<false>,
-  $.hashfn_bswap = SFH<true>
-);
+   $.desc       = "Paul Hsieh's SuperFastHash",
+   $.hash_flags =
+         FLAG_HASH_ENDIAN_INDEPENDENT  |
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_LICENSE_GPL3,
+   $.bits = 32,
+   $.verification_LE = 0xCFA52B38,
+   $.verification_BE = 0xDF0823CA,
+   $.hashfn_native   = SFH<false>,
+   $.hashfn_bswap    = SFH<true>
+ );
diff --git a/hashes/t1ha.cpp b/hashes/t1ha.cpp
index 2750fb30..7c44a5f2 100644
--- a/hashes/t1ha.cpp
+++ b/hashes/t1ha.cpp
@@ -59,7 +59,7 @@
 #include "Mathmult.h"
 
 #if defined(HAVE_X86_64_AES)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
 #include <cassert>
@@ -80,103 +80,102 @@
     defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) ||          \
     defined(__amd64__) || defined(__amd64) || defined(_M_X64) ||               \
     defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
-#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT
+  #define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT
 #else
-#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE
+  #define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE
 #endif
 
 //------------------------------------------------------------
 #if defined(__SANITIZE_ADDRESS__)
-#undef T1HA_USE_ALIGNED_ONESHOT_READ
-#define T1HA_USE_ALIGNED_ONESHOT_READ 0
-#undef T1HA_SYS_UNALIGNED_ACCESS
-#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE
+  #undef T1HA_USE_ALIGNED_ONESHOT_READ
+  #define T1HA_USE_ALIGNED_ONESHOT_READ 0
+  #undef T1HA_SYS_UNALIGNED_ACCESS
+  #define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE
 #endif
 
 #if !defined(PAGESIZE)
-#define PAGESIZE 4096
+  #define PAGESIZE 4096
 #endif
 
-#if T1HA_USE_ALIGNED_ONESHOT_READ &&                                    \
-    T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE &&       \
+#if T1HA_USE_ALIGNED_ONESHOT_READ &&                              \
+    T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE && \
     defined(PAGESIZE) && PAGESIZE > 42
-#define T1HA_USE_UNALIGNED_ONESHOT_READ 1
-#define can_read_underside(ptr, size)                                   \
-  ((size) <= sizeof(uintptr_t) && ((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0)
+  #define T1HA_USE_UNALIGNED_ONESHOT_READ 1
+  #define can_read_underside(ptr, size) \
+      ((size) <= sizeof(uintptr_t) && ((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0)
 #else
-#define T1HA_USE_UNALIGNED_ONESHOT_READ 0
-#define can_read_underside(ptr, size) false
+  #define T1HA_USE_UNALIGNED_ONESHOT_READ 0
+  #define can_read_underside(ptr, size) false
 #endif
 
 #define ALIGNMENT_16 2
 #define ALIGNMENT_32 4
 #if defined(HAVE_32BIT_PLATFORM)
-#define ALIGNMENT_64 4
+  #define ALIGNMENT_64 4
 #else
-#define ALIGNMENT_64 8
+  #define ALIGNMENT_64 8
 #endif
 
 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
-#define __GNUC_PREREQ(maj, min)                                                \
-  ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+  #define __GNUC_PREREQ(maj, min) \
+      ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
 #else
-#define __GNUC_PREREQ(maj, min) 0
+  #define __GNUC_PREREQ(maj, min) 0
 #endif
 
 #if !defined(__has_builtin)
-#define __has_builtin(x) (0)
+  #define __has_builtin(x) (0)
 #endif
 
 #if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned)
 
-#define read_aligned(ptr, bits)                                         \
-  (*(const uint##bits##_t *)__builtin_assume_aligned(ptr, ALIGNMENT_##bits))
+  #define read_aligned(ptr, bits) \
+      (*(const uint ## bits ## _t *)__builtin_assume_aligned(ptr, ALIGNMENT_ ## bits))
 
 #elif (__GNUC_PREREQ(3, 3) || __has_attribute(aligned)) && !defined(__clang__)
 
-#define read_aligned(ptr, bits)                                         \
-  (*(const uint##bits##_t __attribute__((aligned(ALIGNMENT_##bits))) *)(ptr))
+  #define read_aligned(ptr, bits) \
+      (*(const uint ## bits ## _t __attribute__((aligned(ALIGNMENT_ ## bits))) *)(ptr))
 
 #elif __has_attribute(assume_aligned)
 
 static __always_inline const
-    uint16_t *__attribute__((assume_aligned(ALIGNMENT_16)))
-    cast_aligned_16(const void *ptr) {
-  return (const uint16_t *)ptr;
+uint16_t * __attribute__((assume_aligned(ALIGNMENT_16))) cast_aligned_16( const void * ptr ) {
+    return (const uint16_t *)ptr;
 }
+
 static __always_inline const
-    uint32_t *__attribute__((assume_aligned(ALIGNMENT_32)))
-    cast_aligned_32(const void *ptr) {
-  return (const uint32_t *)ptr;
+uint32_t * __attribute__((assume_aligned(ALIGNMENT_32))) cast_aligned_32( const void * ptr ) {
+    return (const uint32_t *)ptr;
 }
+
 static __always_inline const
-    uint64_t *__attribute__((assume_aligned(ALIGNMENT_64)))
-    cast_aligned_64(const void *ptr) {
-  return (const uint64_t *)ptr;
+uint64_t * __attribute__((assume_aligned(ALIGNMENT_64))) cast_aligned_64( const void * ptr ) {
+    return (const uint64_t *)ptr;
 }
 
-#define read_aligned(ptr, bits) (*cast_aligned_##bits(ptr))
+  #define read_aligned(ptr, bits) (*cast_aligned_ ## bits(ptr))
 
 #elif defined(_MSC_VER)
 
-#define read_aligned(ptr, bits)                                         \
-  (*(const __declspec(align(ALIGNMENT_##bits)) uint##bits##_t *)(ptr))
+  #define read_aligned(ptr, bits) \
+      (*(const __declspec(align(ALIGNMENT_ ## bits)) uint ## bits ## _t *)(ptr))
 
 #else
 
-#define read_aligned(ptr, bits) (*(const uint##bits##_t *)(ptr))
+  #define read_aligned(ptr, bits) (*(const uint ## bits ## _t *)(ptr))
 
 #endif /* read_aligned */
 
 //------------------------------------------------------------
 // 'magic' primes
-static const uint64_t prime_0 = UINT64_C(0xEC99BF0D8372CAAB);
-static const uint64_t prime_1 = UINT64_C(0x82434FE90EDCEF39);
-static const uint64_t prime_2 = UINT64_C(0xD4F06DB99D67BE4B);
-static const uint64_t prime_3 = UINT64_C(0xBD9CACC22C6E9571);
-static const uint64_t prime_4 = UINT64_C(0x9C06FAF4D023E3AB);
-static const uint64_t prime_5 = UINT64_C(0xC060724A8424F345);
-static const uint64_t prime_6 = UINT64_C(0xCB5AF53AE3AAAC31);
+static const uint64_t prime_0   = UINT64_C(0xEC99BF0D8372CAAB);
+static const uint64_t prime_1   = UINT64_C(0x82434FE90EDCEF39);
+static const uint64_t prime_2   = UINT64_C(0xD4F06DB99D67BE4B);
+static const uint64_t prime_3   = UINT64_C(0xBD9CACC22C6E9571);
+static const uint64_t prime_4   = UINT64_C(0x9C06FAF4D023E3AB);
+static const uint64_t prime_5   = UINT64_C(0xC060724A8424F345);
+static const uint64_t prime_6   = UINT64_C(0xCB5AF53AE3AAAC31);
 
 static const uint32_t prime32_0 = UINT32_C(0x92D78269);
 static const uint32_t prime32_1 = UINT32_C(0xCA9B4735);
@@ -195,17 +194,17 @@ enum t1ha_modes {
 };
 
 #define MODE_NATIVE(m) (((m) == MODE_LE_NATIVE) || ((m) == MODE_BE_NATIVE))
-#define MODE_BSWAP(m)  (((m) == MODE_LE_BSWAP ) || ((m) == MODE_BE_BSWAP ))
-#define MODE_BE_SYS(m) (((m) == MODE_BE_BSWAP ) || ((m) == MODE_BE_NATIVE))
-#define MODE_LE_SYS(m) (((m) == MODE_LE_NATIVE) || ((m) == MODE_LE_BSWAP ))
-#define MODE_BE_OUT(m) (((m) == MODE_LE_BSWAP ) || ((m) == MODE_BE_NATIVE))
-#define MODE_LE_OUT(m) (((m) == MODE_LE_NATIVE) || ((m) == MODE_BE_BSWAP ))
+#define MODE_BSWAP(m)  (((m) == MODE_LE_BSWAP) || ((m) == MODE_BE_BSWAP))
+#define MODE_BE_SYS(m) (((m) == MODE_BE_BSWAP) || ((m) == MODE_BE_NATIVE))
+#define MODE_LE_SYS(m) (((m) == MODE_LE_NATIVE) || ((m) == MODE_LE_BSWAP))
+#define MODE_BE_OUT(m) (((m) == MODE_LE_BSWAP) || ((m) == MODE_BE_NATIVE))
+#define MODE_LE_OUT(m) (((m) == MODE_LE_NATIVE) || ((m) == MODE_BE_BSWAP))
 
 //------------------------------------------------------------
-template < enum t1ha_modes mode, bool aligned >
-static FORCE_INLINE uint32_t fetch16(const void * v) {
+template <enum t1ha_modes mode, bool aligned>
+static FORCE_INLINE uint32_t fetch16( const void * v ) {
     constexpr bool force_aligned = (T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE);
-    uint16_t result;
+    uint16_t       result;
 
     if (aligned) { assert(((uintptr_t)v) % ALIGNMENT_16 == 0); }
 
@@ -213,7 +212,7 @@ static FORCE_INLINE uint32_t fetch16(const void * v) {
         return COND_BSWAP(read_aligned(v, 16), MODE_BSWAP(mode));
     }
 
-    const uint8_t *p = (const uint8_t *)v;
+    const uint8_t * p = (const uint8_t *)v;
     if (MODE_BE_OUT(mode)) {
         return (uint16_t)p[0] << 8 | p[1];
     } else {
@@ -221,10 +220,10 @@ static FORCE_INLINE uint32_t fetch16(const void * v) {
     }
 }
 
-template < enum t1ha_modes mode, bool aligned >
-static FORCE_INLINE uint32_t fetch32(const void * v) {
+template <enum t1ha_modes mode, bool aligned>
+static FORCE_INLINE uint32_t fetch32( const void * v ) {
     constexpr bool force_aligned = (T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE);
-    uint32_t result;
+    uint32_t       result;
 
     if (aligned) { assert(((uintptr_t)v) % ALIGNMENT_32 == 0); }
 
@@ -233,18 +232,18 @@ static FORCE_INLINE uint32_t fetch32(const void * v) {
     }
 
     if (MODE_BE_OUT(mode)) {
-        return (uint32_t)fetch16<mode,false>(v) << 16 |
-            fetch16<mode,false>((const uint8_t *)v + 2);
+        return (uint32_t)fetch16<mode, false>(v) << 16 |
+               fetch16<mode, false>((const uint8_t *)v + 2);
     } else {
-        return fetch16<mode,false>(v) |
-            (uint32_t)fetch16<mode,false>((const uint8_t *)v + 2) << 16;
+        return fetch16<mode, false>(v) |
+               (uint32_t)fetch16<mode, false>((const uint8_t *)v + 2) << 16;
     }
 }
 
-template < enum t1ha_modes mode, bool aligned >
-static FORCE_INLINE uint64_t fetch64(const void * v) {
+template <enum t1ha_modes mode, bool aligned>
+static FORCE_INLINE uint64_t fetch64( const void * v ) {
     constexpr bool force_aligned = (T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE);
-    uint64_t result;
+    uint64_t       result;
 
     if (aligned) { assert(((uintptr_t)v) % ALIGNMENT_64 == 0); }
 
@@ -253,19 +252,19 @@ static FORCE_INLINE uint64_t fetch64(const void * v) {
     }
 
     if (MODE_BE_OUT(mode)) {
-        return (uint64_t)fetch32<mode,false>(v) << 32 |
-            fetch32<mode,false>((const uint8_t *)v + 4);
+        return (uint64_t)fetch32<mode, false>(v) << 32 |
+               fetch32<mode, false>((const uint8_t *)v + 4);
     } else {
-        return fetch32<mode,false>(v) |
-            (uint64_t)fetch32<mode,false>((const uint8_t *)v + 4) << 32;
+        return fetch32<mode, false>(v) |
+               (uint64_t)fetch32<mode, false>((const uint8_t *)v + 4) << 32;
     }
 }
 
 //------------------------------------------------------------
-template < enum t1ha_modes mode, bool aligned >
-static FORCE_INLINE uint32_t tail32(const void *v, size_t tail) {
-    constexpr bool unaligned_wordwise = (T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT);
-    const uint8_t *const p = (const uint8_t *)v;
+template <enum t1ha_modes mode, bool aligned>
+static FORCE_INLINE uint32_t tail32( const void * v, size_t tail ) {
+    constexpr bool        unaligned_wordwise = (T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT);
+    const uint8_t * const p = (const uint8_t *)v;
     uint32_t r = 0;
 
     if (aligned && T1HA_USE_ALIGNED_ONESHOT_READ) {
@@ -283,46 +282,48 @@ static FORCE_INLINE uint32_t tail32(const void *v, size_t tail) {
          * <marcin.zukowski@gmail.com> for the reminder.
          */
         const unsigned offset = (4 - tail) & 3;
-        const unsigned shift = offset << 3;
+        const unsigned shift  = offset << 3;
         if (MODE_LE_OUT(mode)) {
             if (likely(can_read_underside(p, 4))) {
-                return fetch32<mode,false>(p - offset) >> shift;
+                return fetch32<mode, false>(p - offset) >> shift;
             }
-            return fetch32<mode,false>(p) & ((~UINT32_C(0)) >> shift);
+            return fetch32<mode, false>(p) & ((~UINT32_C(0)) >> shift);
         } else {
             if (likely(can_read_underside(p, 4))) {
-                return fetch32<mode,false>(p - offset) & ((~UINT32_C(0)) >> shift);
+                return fetch32<mode, false>(p - offset) & ((~UINT32_C(0)) >> shift);
             }
-            return fetch32<mode,false>(p) >> shift;
+            return fetch32<mode, false>(p) >> shift;
         }
     }
 
     if ((mode == MODE_LE_NATIVE) && (aligned || unaligned_wordwise)) {
         switch (tail & 3) {
         case 3:
-            r = (uint32_t)p[2] << 16;
-            /* fall through */
+                r = (uint32_t)p[2] << 16;
+        /* fall through */
         case 2:
-            return r + fetch16<mode, aligned>(p);
+                return r + fetch16<mode, aligned>(p);
         case 1:
-            return p[0];
+                return p[0];
         case 0:
-            return fetch32<mode, aligned>(v);
+                return fetch32<mode, aligned>(v);
         }
     }
 
     if ((mode == MODE_BE_NATIVE) && (aligned || unaligned_wordwise)) {
-        /* For most CPUs this code is better when not needed
-         * copying for alignment or byte reordering. */
+        /*
+         * For most CPUs this code is better when not needed
+         * copying for alignment or byte reordering.
+         */
         switch (tail & 3) {
         case 3:
-            return fetch16<mode, aligned>(p) << 8 | p[2];
+                return fetch16<mode, aligned>(p) << 8 | p[2];
         case 2:
-            return fetch16<mode, aligned>(p);
+                return fetch16<mode, aligned>(p);
         case 1:
-            return p[0];
+                return p[0];
         case 0:
-            return fetch32<mode, aligned>(p);
+                return fetch32<mode, aligned>(p);
         }
     }
 
@@ -330,19 +331,19 @@ static FORCE_INLINE uint32_t tail32(const void *v, size_t tail) {
             ((mode == MODE_LE_NATIVE) && !aligned && !unaligned_wordwise)) {
         switch (tail & 3) {
         case 0:
-            r += p[3];
-            r <<= 8;
-            /* fall through */
+                r  += p[3];
+                r <<= 8;
+        /* fall through */
         case 3:
-            r += p[2];
-            r <<= 8;
-            /* fall through */
+                r  += p[2];
+                r <<= 8;
+        /* fall through */
         case 2:
-            r += p[1];
-            r <<= 8;
-            /* fall through */
+                r  += p[1];
+                r <<= 8;
+        /* fall through */
         case 1:
-            return r + p[0];
+                return r + p[0];
         }
     }
 
@@ -350,14 +351,14 @@ static FORCE_INLINE uint32_t tail32(const void *v, size_t tail) {
             ((mode == MODE_BE_NATIVE) && !aligned && !unaligned_wordwise)) {
         switch (tail & 3) {
         case 0:
-            return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 |
-                (uint32_t)p[0] << 24;
+                return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 |
+                       (uint32_t)p[0] << 24;
         case 3:
-            return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16;
+                return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16;
         case 2:
-            return p[1] | (uint32_t)p[0] << 8;
+                return p[1] | (uint32_t)p[0] << 8;
         case 1:
-            return p[0];
+                return p[0];
         }
     }
 
@@ -366,19 +367,19 @@ static FORCE_INLINE uint32_t tail32(const void *v, size_t tail) {
 }
 
 //------------------------------------------------------------
-template < enum t1ha_modes mode, bool aligned >
-static FORCE_INLINE uint64_t tail64(const void *v, size_t tail) {
-    constexpr bool unaligned_wordwise = (T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT);
-    const uint8_t *const p = (const uint8_t *)v;
+template <enum t1ha_modes mode, bool aligned>
+static FORCE_INLINE uint64_t tail64( const void * v, size_t tail ) {
+    constexpr bool        unaligned_wordwise = (T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT);
+    const uint8_t * const p = (const uint8_t *)v;
     uint64_t r = 0;
 
     if (aligned && T1HA_USE_ALIGNED_ONESHOT_READ) {
         /* We can perform a 'oneshot' read, which is little bit faster. */
         const unsigned shift = ((8 - tail) & 7) << 3;
         if (MODE_LE_OUT(mode)) {
-            return fetch64<mode,true>(p) & ((~UINT64_C(0)) >> shift);
+            return fetch64<mode, true>(p) & ((~UINT64_C(0)) >> shift);
         } else {
-            return fetch64<mode,true>(p) >> shift;
+            return fetch64<mode, true>(p) >> shift;
         }
     } else if (!aligned && T1HA_USE_UNALIGNED_ONESHOT_READ) {
         /*
@@ -387,17 +388,17 @@ static FORCE_INLINE uint64_t tail64(const void *v, size_t tail) {
          * <marcin.zukowski@gmail.com> for the reminder.
          */
         const unsigned offset = (8 - tail) & 7;
-        const unsigned shift = offset << 3;
+        const unsigned shift  = offset << 3;
         if (MODE_LE_OUT(mode)) {
             if (likely(can_read_underside(p, 8))) {
-                return fetch64<mode,false>(p - offset) >> shift;
+                return fetch64<mode, false>(p - offset) >> shift;
             }
-            return fetch64<mode,false>(p) & ((~UINT64_C(0)) >> shift);
+            return fetch64<mode, false>(p) & ((~UINT64_C(0)) >> shift);
         } else {
             if (likely(can_read_underside(p, 8))) {
-                return fetch64<mode,false>(p - offset) & ((~UINT64_C(0)) >> shift);
+                return fetch64<mode, false>(p - offset) & ((~UINT64_C(0)) >> shift);
             }
-            return fetch64<mode,false>(p) >> shift;
+            return fetch64<mode, false>(p) >> shift;
         }
     }
 
@@ -405,27 +406,27 @@ static FORCE_INLINE uint64_t tail64(const void *v, size_t tail) {
         /* For most CPUs this code is better when not needed byte reordering. */
         switch (tail & 7) {
         case 0:
-            return fetch64<mode,aligned>(p);
+                return fetch64<mode, aligned>(p);
         case 7:
-            r = (uint64_t)p[6] << 8;
-            /* fall through */
+                r = (uint64_t)p[6] << 8;
+        /* fall through */
         case 6:
-            r += p[5];
-            r <<= 8;
-            /* fall through */
+                r  += p        [5];
+                r <<= 8;
+        /* fall through */
         case 5:
-            r += p[4];
-            r <<= 32;
-            /* fall through */
+                r  += p        [4];
+                r <<= 32;
+        /* fall through */
         case 4:
-            return r + fetch32<mode,aligned>(p);
+                return r + fetch32<mode, aligned>(p);
         case 3:
-            r = (uint64_t)p[2] << 16;
-            /* fall through */
+                r = (uint64_t)p[2] << 16;
+        /* fall through */
         case 2:
-            return r + fetch16<mode,aligned>(p);
+                return r + fetch16<mode, aligned>(p);
         case 1:
-            return p[0];
+                return p[0];
         }
     }
 
@@ -433,22 +434,22 @@ static FORCE_INLINE uint64_t tail64(const void *v, size_t tail) {
         /* For most CPUs this code is better when not byte reordering. */
         switch (tail & 7) {
         case 1:
-            return p[0];
+                return p[0];
         case 2:
-            return fetch16<mode,aligned>(p);
+                return fetch16<mode, aligned>(p);
         case 3:
-            return (uint32_t)fetch16<mode,aligned>(p) << 8 | p[2];
+                return (uint32_t)fetch16<mode, aligned>(p) << 8 | p[2];
         case 4:
-            return fetch32<mode,aligned>(p);
+                return fetch32<mode, aligned>(p);
         case 5:
-            return (uint64_t)fetch32<mode,aligned>(p) << 8 | p[4];
+                return (uint64_t)fetch32<mode, aligned>(p) << 8 | p[4];
         case 6:
-            return (uint64_t)fetch32<mode,aligned>(p) << 16 | fetch16<mode,aligned>(p + 4);
+                return (uint64_t)fetch32<mode, aligned>(p) << 16 | fetch16<mode, aligned>(p + 4);
         case 7:
-            return (uint64_t)fetch32<mode,aligned>(p) << 24 |
-                (uint32_t)fetch16<mode,aligned>(p + 4) << 8 | p[6];
+                return (uint64_t)fetch32<mode, aligned>(p) << 24 |
+                       (uint32_t)fetch16<mode, aligned>(p + 4) << 8 | p[6];
         case 0:
-            return fetch64<mode,aligned>(p);
+                return fetch64<mode, aligned>(p);
         }
     }
 
@@ -456,34 +457,34 @@ static FORCE_INLINE uint64_t tail64(const void *v, size_t tail) {
             ((mode == MODE_LE_NATIVE) && !aligned && !unaligned_wordwise)) {
         switch (tail & 7) {
         case 0:
-            r = p[7] << 8;
-            /* fall through */
+                r = p  [7] << 8;
+        /* fall through */
         case 7:
-            r += p[6];
-            r <<= 8;
-            /* fall through */
+                r  += p[6];
+                r <<= 8;
+        /* fall through */
         case 6:
-            r += p[5];
-            r <<= 8;
-            /* fall through */
+                r  += p[5];
+                r <<= 8;
+        /* fall through */
         case 5:
-            r += p[4];
-            r <<= 8;
-            /* fall through */
+                r  += p[4];
+                r <<= 8;
+        /* fall through */
         case 4:
-            r += p[3];
-            r <<= 8;
-            /* fall through */
+                r  += p[3];
+                r <<= 8;
+        /* fall through */
         case 3:
-            r += p[2];
-            r <<= 8;
-            /* fall through */
+                r  += p[2];
+                r <<= 8;
+        /* fall through */
         case 2:
-            r += p[1];
-            r <<= 8;
-            /* fall through */
+                r  += p[1];
+                r <<= 8;
+        /* fall through */
         case 1:
-            return r + p[0];
+                return r + p[0];
         }
     }
 
@@ -491,28 +492,28 @@ static FORCE_INLINE uint64_t tail64(const void *v, size_t tail) {
             ((mode == MODE_BE_NATIVE) && !aligned && !unaligned_wordwise)) {
         switch (tail & 7) {
         case 1:
-            return p[0];
+                return p[0];
         case 2:
-            return p[1] | (uint32_t)p[0] << 8;
+                return p[1] | (uint32_t)p[0] << 8;
         case 3:
-            return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16;
+                return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16;
         case 4:
-            return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 |
-                (uint32_t)p[0] << 24;
+                return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 |
+                       (uint32_t)p[0] << 24;
         case 5:
-            return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 |
-                (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32;
+                return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 |
+                       (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32;
         case 6:
-            return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 |
-                (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40;
+                return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 |
+                       (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40;
         case 7:
-            return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 |
-                (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 |
-                (uint64_t)p[0] << 48;
+                return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 |
+                       (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 |
+                       (uint64_t)p[0] << 48;
         case 0:
-            return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 |
-                (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 |
-                (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
+                return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 |
+                       (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 |
+                       (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56;
         }
     }
 
@@ -522,127 +523,131 @@ static FORCE_INLINE uint64_t tail64(const void *v, size_t tail) {
 
 //------------------------------------------------------------
 // T1HA0 (non-AES version)
-static FORCE_INLINE void mixup32(uint32_t *a, uint32_t *b, uint32_t v,
-        uint32_t prime) {
+static FORCE_INLINE void mixup32( uint32_t * a, uint32_t * b, uint32_t v, uint32_t prime ) {
     uint32_t rlo, rhi;
+
     mult32_64(rlo, rhi, *b + v, prime);
     *a ^= rlo;
     *b += rhi;
 }
 
-static FORCE_INLINE uint64_t final32(uint32_t a, uint32_t b) {
-  uint64_t l = (b ^ ROTR32(a, 13)) | (uint64_t)a << 32;
-  l *= prime_0;
-  l ^= l >> 41;
-  l *= prime_4;
-  l ^= l >> 47;
-  l *= prime_6;
-  return l;
+static FORCE_INLINE uint64_t final32( uint32_t a, uint32_t b ) {
+    uint64_t l = (b ^ ROTR32(a, 13)) | (uint64_t)a << 32;
+
+    l *= prime_0;
+    l ^= l >> 41;
+    l *= prime_4;
+    l ^= l >> 47;
+    l *= prime_6;
+    return l;
 }
 
-template < enum t1ha_modes mode, bool aligned32 >
-static uint64_t t1ha0_32_impl(const void *data, size_t len, uint64_t seed) {
-  uint32_t a = ROTR32((uint32_t)len, 17) + (uint32_t)seed;
-  uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32);
+template <enum t1ha_modes mode, bool aligned32>
+static uint64_t t1ha0_32_impl( const void * data, size_t len, uint64_t seed ) {
+    uint32_t a         = ROTR32((uint32_t)len, 17) + (uint32_t)seed;
+    uint32_t b         = (uint32_t)len ^ (uint32_t)(seed >> 32);
 
-  const uint32_t *v = (const uint32_t *)data;
+    const uint32_t * v = (const uint32_t *)data;
 
-  if (unlikely(len > 16)) {
-    uint32_t c = ~a;
-    uint32_t d = ROTR32(b, 5);
-    const uint32_t *detent =
-        (const uint32_t *)((const uint8_t *)data + len - 15);
-    do {
-        const uint32_t w0 = fetch32<mode,aligned32>(v + 0);
-        const uint32_t w1 = fetch32<mode,aligned32>(v + 1);
-        const uint32_t w2 = fetch32<mode,aligned32>(v + 2);
-        const uint32_t w3 = fetch32<mode,aligned32>(v + 3);
-        v += 4;
-        prefetch(v);
-
-        const uint32_t d13 = w1 + ROTR32(w3 + d, 17);
-        const uint32_t c02 = w0 ^ ROTR32(w2 + c, 11);
-        d ^= ROTR32(a + w0, 3);
-        c ^= ROTR32(b + w1, 7);
-        b = prime32_1 * (c02 + w3);
-        a = prime32_0 * (d13 ^ w2);
-    } while (likely(v < detent));
-
-    c += a;
-    d += b;
-    a ^= prime32_6 * (ROTR32(c, 16) + d);
-    b ^= prime32_5 * (c + ROTR32(d, 16));
-
-    len &= 15;
-  }
-
-  switch (len) {
-  default:
-      mixup32(&a, &b, fetch32<mode,aligned32>(v++), prime32_4);
-      /* fall through */
-  case 12:
-  case 11:
-  case 10:
-  case 9:
-      mixup32(&b, &a, fetch32<mode,aligned32>(v++), prime32_3);
-      /* fall through */
-  case 8:
-  case 7:
-  case 6:
-  case 5:
-      mixup32(&a, &b, fetch32<mode,aligned32>(v++), prime32_2);
-      /* fall through */
-  case 4:
-  case 3:
-  case 2:
-  case 1:
-      mixup32(&b, &a, tail32<mode,aligned32>(v, len), prime32_1);
-      /* fall through */
-  case 0:
-      return final32(a, b);
-  }
+    if (unlikely(len > 16)) {
+        uint32_t         c      = ~a;
+        uint32_t         d      = ROTR32(b, 5);
+        const uint32_t * detent =
+                (const uint32_t *)((const uint8_t *)data + len - 15);
+        do {
+            const uint32_t w0 = fetch32<mode, aligned32>(v + 0);
+            const uint32_t w1 = fetch32<mode, aligned32>(v + 1);
+            const uint32_t w2 = fetch32<mode, aligned32>(v + 2);
+            const uint32_t w3 = fetch32<mode, aligned32>(v + 3);
+            v += 4;
+            prefetch(v);
+
+            const uint32_t d13 = w1 + ROTR32(w3 + d, 17);
+            const uint32_t c02 = w0 ^ ROTR32(w2 + c, 11);
+            d ^= ROTR32(a + w0, 3);
+            c ^= ROTR32(b + w1, 7);
+            b  = prime32_1 * (c02 + w3);
+            a  = prime32_0 * (d13 ^ w2);
+        } while (likely(v < detent));
+
+        c   += a;
+        d   += b;
+        a   ^= prime32_6 * (ROTR32(c    , 16) + d);
+        b   ^= prime32_5 * (c + ROTR32(d, 16)    );
+
+        len &= 15;
+    }
+
+    switch (len) {
+    default:
+             mixup32(&a, &b, fetch32<mode, aligned32>(v++)  , prime32_4);
+    /* fall through */
+    case 12:
+    case 11:
+    case 10:
+    case  9:
+             mixup32(&b, &a, fetch32<mode, aligned32>(v++)  , prime32_3);
+    /* fall through */
+    case  8:
+    case  7:
+    case  6:
+    case  5:
+             mixup32(&a, &b, fetch32<mode, aligned32>(v++)  , prime32_2);
+    /* fall through */
+    case  4:
+    case  3:
+    case  2:
+    case  1:
+             mixup32(&b, &a, tail32<mode, aligned32>(v, len), prime32_1);
+    /* fall through */
+    case  0:
+             return final32(a, b);
+    }
 }
 
 //------------------------------------------------------------
 // T1HA1
 
 /* xor high and low parts of full 128-bit product */
-static FORCE_INLINE uint64_t mux64(uint64_t v, uint64_t prime) {
-  uint64_t l, h;
-  mult64_128(l, h, v, prime);
-  return l ^ h;
+static FORCE_INLINE uint64_t mux64( uint64_t v, uint64_t prime ) {
+    uint64_t l, h;
+
+    mult64_128(l, h, v, prime);
+    return l ^ h;
 }
 
 /* xor-mul-xor mixer */
-static FORCE_INLINE uint64_t mix64(uint64_t v, uint64_t p) {
-  v *= p;
-  return v ^ ROTR64(v, 41);
+static FORCE_INLINE uint64_t mix64( uint64_t v, uint64_t p ) {
+    v *= p;
+    return v ^ ROTR64(v, 41);
 }
 
-static FORCE_INLINE uint64_t final_weak_avalanche(uint64_t a, uint64_t b) {
-  /* LY: for performance reason on a some not high-end CPUs
-   * I replaced the second mux64() operation by mix64().
-   * Unfortunately this approach fails the "strict avalanche criteria",
-   * see test results at https://github.com/demerphq/smhasher. */
-  return mux64(ROTR64(a + b, 17), prime_4) + mix64(a ^ b, prime_0);
+static FORCE_INLINE uint64_t final_weak_avalanche( uint64_t a, uint64_t b ) {
+    /*
+     * LY: for performance reason on a some not high-end CPUs
+     * I replaced the second mux64() operation by mix64().
+     * Unfortunately this approach fails the "strict avalanche criteria",
+     * see test results at https://github.com/demerphq/smhasher.
+     */
+    return mux64(ROTR64(a + b, 17), prime_4) + mix64(a ^ b, prime_0);
 }
 
-template < enum t1ha_modes mode, bool aligned64 >
-static uint64_t t1ha1_impl(const void *data, size_t len, uint64_t seed) {
-    const uint64_t *v = (const uint64_t *)data;
-    uint64_t a = seed;
-    uint64_t b = len;
+template <enum t1ha_modes mode, bool aligned64>
+static uint64_t t1ha1_impl( const void * data, size_t len, uint64_t seed ) {
+    const uint64_t * v = (const uint64_t *)data;
+    uint64_t         a = seed;
+    uint64_t         b = len;
 
     if (unlikely(len > 32)) {
-        uint64_t c = ROTR64(len, 17) + seed;
-        uint64_t d = len ^ ROTR64(seed, 17);
-        const uint64_t *detent =
-            (const uint64_t *)((const uint8_t *)data + len - 31);
+        uint64_t         c      = ROTR64(len, 17) + seed;
+        uint64_t         d      = len ^ ROTR64(seed, 17);
+        const uint64_t * detent =
+                (const uint64_t *)((const uint8_t *)data + len - 31);
         do {
-            const uint64_t w0 = fetch64<mode,aligned64>(v + 0);
-            const uint64_t w1 = fetch64<mode,aligned64>(v + 1);
-            const uint64_t w2 = fetch64<mode,aligned64>(v + 2);
-            const uint64_t w3 = fetch64<mode,aligned64>(v + 3);
+            const uint64_t w0 = fetch64<mode, aligned64>(v + 0);
+            const uint64_t w1 = fetch64<mode, aligned64>(v + 1);
+            const uint64_t w2 = fetch64<mode, aligned64>(v + 2);
+            const uint64_t w3 = fetch64<mode, aligned64>(v + 3);
             v += 4;
             prefetch(v);
 
@@ -654,15 +659,15 @@ static uint64_t t1ha1_impl(const void *data, size_t len, uint64_t seed) {
             a ^= prime_1 * (d02 + w3);
         } while (likely(v < detent));
 
-        a ^= prime_6 * (ROTR64(c, 17) + d);
-        b ^= prime_5 * (c + ROTR64(d, 17));
+        a   ^= prime_6 * (ROTR64(c    , 17) + d);
+        b   ^= prime_5 * (c + ROTR64(d, 17)    );
         len &= 31;
     }
 
     switch (len) {
     default:
-        b += mux64(fetch64<mode,aligned64>(v++), prime_4);
-        /* fall through */
+             b += mux64(fetch64<mode, aligned64>(v++)  , prime_4);
+    /* fall through */
     case 24:
     case 23:
     case 22:
@@ -671,8 +676,8 @@ static uint64_t t1ha1_impl(const void *data, size_t len, uint64_t seed) {
     case 19:
     case 18:
     case 17:
-        a += mux64(fetch64<mode,aligned64>(v++), prime_3);
-        /* fall through */
+             a += mux64(fetch64<mode, aligned64>(v++)  , prime_3);
+    /* fall through */
     case 16:
     case 15:
     case 14:
@@ -680,21 +685,21 @@ static uint64_t t1ha1_impl(const void *data, size_t len, uint64_t seed) {
     case 12:
     case 11:
     case 10:
-    case 9:
-        b += mux64(fetch64<mode,aligned64>(v++), prime_2);
-        /* fall through */
-    case 8:
-    case 7:
-    case 6:
-    case 5:
-    case 4:
-    case 3:
-    case 2:
-    case 1:
-        a += mux64(tail64<mode,aligned64>(v, len), prime_1);
-        /* fall through */
-    case 0:
-        return final_weak_avalanche(a, b);
+    case  9:
+             b += mux64(fetch64<mode, aligned64>(v++)  , prime_2);
+    /* fall through */
+    case  8:
+    case  7:
+    case  6:
+    case  5:
+    case  4:
+    case  3:
+    case  2:
+    case  1:
+             a += mux64(tail64<mode, aligned64>(v, len), prime_1);
+    /* fall through */
+    case  0:
+             return final_weak_avalanche(a, b);
     }
 }
 
@@ -703,99 +708,103 @@ static uint64_t t1ha1_impl(const void *data, size_t len, uint64_t seed) {
 
 // XXX T1HA_ALIGN_PREFIX and T1HA_ALIGN_SUFFIX were not ported
 typedef union t1ha_state256 {
-  uint8_t bytes[32];
-  uint32_t u32[8];
-  uint64_t u64[4];
-  struct {
-    uint64_t a, b, c, d;
-  } n;
+    uint8_t   bytes[32];
+    uint32_t  u32[8];
+    uint64_t  u64[4];
+    struct {
+        uint64_t  a, b, c, d;
+    }  n;
 } t1ha_state256_t;
 
 typedef struct t1ha_context {
-  t1ha_state256_t state;
-  t1ha_state256_t buffer;
-  size_t partial;
-  uint64_t total;
+    t1ha_state256_t  state;
+    t1ha_state256_t  buffer;
+    size_t           partial;
+    uint64_t         total;
 } t1ha_context_t;
 
-static FORCE_INLINE void init_ab(t1ha_state256_t * s, uint64_t x, uint64_t y) {
-  s->n.a = x;
-  s->n.b = y;
+static FORCE_INLINE void init_ab( t1ha_state256_t * s, uint64_t x, uint64_t y ) {
+    s->n.a = x;
+    s->n.b = y;
 }
 
-static FORCE_INLINE void init_cd(t1ha_state256_t * s, uint64_t x, uint64_t y) {
-  s->n.c = ROTR64(y, 23) + ~x;
-  s->n.d = ~y + ROTR64(x, 19);
+static FORCE_INLINE void init_cd( t1ha_state256_t * s, uint64_t x, uint64_t y ) {
+    s->n.c = ROTR64(y, 23) + ~x;
+    s->n.d = ~y + ROTR64(x, 19);
 }
 
-static FORCE_INLINE void squash(t1ha_state256_t * s) {
-  s->n.a ^= prime_6 * (s->n.c + ROTR64(s->n.d, 23));
-  s->n.b ^= prime_5 * (ROTR64(s->n.c, 19) + s->n.d);
+static FORCE_INLINE void squash( t1ha_state256_t * s ) {
+    s->n.a ^= prime_6 * (s->n.c + ROTR64(s->n.d, 23)         );
+    s->n.b ^= prime_5 * (ROTR64(s->n.c         , 19) + s->n.d);
 }
 
-static FORCE_INLINE void mixup64(uint64_t * RESTRICT a,
-        uint64_t * RESTRICT b, uint64_t v, uint64_t prime) {
+static FORCE_INLINE void mixup64( uint64_t * RESTRICT a, uint64_t * RESTRICT b, uint64_t v, uint64_t prime ) {
     uint64_t l, h;
+
     mult64_128(l, h, *b + v, prime);
     *a ^= l;
     *b += h;
 }
 
-static FORCE_INLINE uint64_t final64(uint64_t a, uint64_t b) {
-  uint64_t x = (a + ROTR64(b, 41)) * prime_0;
-  uint64_t y = (ROTR64(a, 23) + b) * prime_6;
-  return mux64(x ^ y, prime_5);
+static FORCE_INLINE uint64_t final64( uint64_t a, uint64_t b ) {
+    uint64_t x = (a + ROTR64(b, 41)    ) * prime_0;
+    uint64_t y = (ROTR64(a    , 23) + b) * prime_6;
+
+    return mux64(x ^ y, prime_5);
 }
 
-static FORCE_INLINE uint64_t final128(uint64_t a, uint64_t b, uint64_t c,
-        uint64_t d, uint64_t * h) {
-  mixup64(&a, &b, ROTR64(c, 41) ^ d, prime_0);
-  mixup64(&b, &c, ROTR64(d, 23) ^ a, prime_6);
-  mixup64(&c, &d, ROTR64(a, 19) ^ b, prime_5);
-  mixup64(&d, &a, ROTR64(b, 31) ^ c, prime_4);
-  *h = c + d;
-  return a ^ b;
+static FORCE_INLINE uint64_t final128( uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t * h ) {
+    mixup64(&a, &b, ROTR64(c, 41) ^ d, prime_0);
+    mixup64(&b, &c, ROTR64(d, 23) ^ a, prime_6);
+    mixup64(&c, &d, ROTR64(a, 19) ^ b, prime_5);
+    mixup64(&d, &a, ROTR64(b, 31) ^ c, prime_4);
+    *h = c + d;
+    return a ^ b;
 }
 
-template < enum t1ha_modes mode, bool aligned64 >
-static void T1HA2_UPDATE(t1ha_state256_t * const s, const uint64_t *v) {
-    const uint64_t w0 = fetch64<mode,aligned64>(v + 0);
-    const uint64_t w1 = fetch64<mode,aligned64>(v + 1);
-    const uint64_t w2 = fetch64<mode,aligned64>(v + 2);
-    const uint64_t w3 = fetch64<mode,aligned64>(v + 3);
+template <enum t1ha_modes mode, bool aligned64>
+static void T1HA2_UPDATE( t1ha_state256_t * const s, const uint64_t * v ) {
+    const uint64_t w0  = fetch64<mode, aligned64>(v + 0);
+    const uint64_t w1  = fetch64<mode, aligned64>(v + 1);
+    const uint64_t w2  = fetch64<mode, aligned64>(v + 2);
+    const uint64_t w3  = fetch64<mode, aligned64>(v + 3);
 
     const uint64_t d02 = w0 + ROTR64(w2 + s->n.d, 56);
     const uint64_t c13 = w1 + ROTR64(w3 + s->n.c, 19);
+
     s->n.d ^= s->n.b + ROTR64(w1, 38);
     s->n.c ^= s->n.a + ROTR64(w0, 57);
     s->n.b ^= prime_6 * (c13 + w2);
     s->n.a ^= prime_5 * (d02 + w3);
 }
 
-template < enum t1ha_modes mode, bool aligned64 >
-static const void * T1HA2_LOOP(t1ha_state256_t * const state, const void *data, size_t len) {
-    const void *detent = (const uint8_t *)data + len - 31;
+template <enum t1ha_modes mode, bool aligned64>
+static const void * T1HA2_LOOP( t1ha_state256_t * const state, const void * data, size_t len ) {
+    const void * detent = (const uint8_t *)data + len - 31;
+
     do {
-        const uint64_t *v = (const uint64_t *)data;
+        const uint64_t * v = (const uint64_t *)data;
         data = v + 4;
         prefetch(data);
-        T1HA2_UPDATE<mode,aligned64>(state, v);
+        T1HA2_UPDATE<mode, aligned64>(state, v);
     } while (likely(data < detent));
     return data;
 }
 
-template < enum t1ha_modes mode, bool aligned64, bool use_ABCD >
-static uint64_t T1HA2_TAIL(t1ha_state256_t * const s, const void *data, size_t len, uint64_t * RESTRICT extra_result = NULL) {
-    const uint64_t *v = (const uint64_t *)data;
-    uint64_t val;
+template <enum t1ha_modes mode, bool aligned64, bool use_ABCD>
+static uint64_t T1HA2_TAIL( t1ha_state256_t * const s, const void * data,
+        size_t len, uint64_t * RESTRICT extra_result = NULL ) {
+    const uint64_t * v = (const uint64_t *)data;
+    uint64_t         val;
+
     switch (len) {
     default:
-        if (use_ABCD) {
-            mixup64(&s->n.a, &s->n.d, fetch64<mode,aligned64>(v++), prime_4);
-        } else {
-            mixup64(&s->n.a, &s->n.b, fetch64<mode,aligned64>(v++), prime_4);
-        }
-        /* fall through */
+             if (use_ABCD) {
+                 mixup64(&s->n.a, &s->n.d, fetch64<mode, aligned64>(v++), prime_4);
+             } else {
+                 mixup64(&s->n.a, &s->n.b, fetch64<mode, aligned64>(v++), prime_4);
+             }
+    /* fall through */
     case 24:
     case 23:
     case 22:
@@ -804,9 +813,9 @@ static uint64_t T1HA2_TAIL(t1ha_state256_t * const s, const void *data, size_t l
     case 19:
     case 18:
     case 17:
-        // ".b, .a" for either value of use_ABCD
-        mixup64(&s->n.b, &s->n.a, fetch64<mode,aligned64>(v++), prime_3);
-        /* fall through */
+             // ".b, .a" for either value of use_ABCD
+             mixup64(&s->n.b, &s->n.a, fetch64<mode, aligned64>(v++), prime_3);
+    /* fall through */
     case 16:
     case 15:
     case 14:
@@ -814,92 +823,91 @@ static uint64_t T1HA2_TAIL(t1ha_state256_t * const s, const void *data, size_t l
     case 12:
     case 11:
     case 10:
-    case 9:
-        if (use_ABCD) {
-            mixup64(&s->n.c, &s->n.b, fetch64<mode,aligned64>(v++), prime_2);
-        } else {
-            mixup64(&s->n.a, &s->n.b, fetch64<mode,aligned64>(v++), prime_2);
-        }
-        /* fall through */
-    case 8:
-    case 7:
-    case 6:
-    case 5:
-    case 4:
-    case 3:
-    case 2:
-    case 1:
-        val = tail64<mode,aligned64>(v, len);
-        if (use_ABCD) {
-            mixup64(&s->n.d, &s->n.c, val, prime_1);
-        } else {
-            mixup64(&s->n.b, &s->n.a, val, prime_1);
-        }
-        /* fall through */
-    case 0:
-        if (use_ABCD) {
-            return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result);
-        } else {
-            return final64(s->n.a, s->n.b);
-        }
+    case  9:
+             if (use_ABCD) {
+                 mixup64(&s->n.c, &s->n.b, fetch64<mode, aligned64>(v++), prime_2);
+             } else {
+                 mixup64(&s->n.a, &s->n.b, fetch64<mode, aligned64>(v++), prime_2);
+             }
+    /* fall through */
+    case  8:
+    case  7:
+    case  6:
+    case  5:
+    case  4:
+    case  3:
+    case  2:
+    case  1:
+             val = tail64<mode, aligned64>(v, len);
+             if (use_ABCD) {
+                 mixup64(&s->n.d, &s->n.c, val, prime_1);
+             } else {
+                 mixup64(&s->n.b, &s->n.a, val, prime_1);
+             }
+    /* fall through */
+    case  0:
+             if (use_ABCD) {
+                 return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result);
+             } else {
+                 return final64(s->n.a, s->n.b);
+             }
     }
 }
 
-static void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) {
-  init_ab(&ctx->state, seed_x, seed_y);
-  init_cd(&ctx->state, seed_x, seed_y);
-  ctx->partial = 0;
-  ctx->total = 0;
+static void t1ha2_init( t1ha_context_t * ctx, uint64_t seed_x, uint64_t seed_y ) {
+    init_ab(&ctx->state, seed_x, seed_y);
+    init_cd(&ctx->state, seed_x, seed_y);
+    ctx->partial = 0;
+    ctx->total   = 0;
 }
 
-template < enum t1ha_modes mode >
-static void t1ha2_update(t1ha_context_t * RESTRICT ctx, const void * RESTRICT data,
-                  size_t length) {
-  ctx->total += length;
-
-  if (ctx->partial) {
-    const size_t left = 32 - ctx->partial;
-    const size_t chunk = (length >= left) ? left : length;
-    memcpy(ctx->buffer.bytes + ctx->partial, data, chunk);
-    ctx->partial += chunk;
-    if (ctx->partial < 32) {
-      assert(left >= length);
-      return;
+template <enum t1ha_modes mode>
+static void t1ha2_update( t1ha_context_t * RESTRICT ctx, const void * RESTRICT data, size_t length ) {
+    ctx->total += length;
+
+    if (ctx->partial) {
+        const size_t left  = 32 - ctx->partial;
+        const size_t chunk = (length >= left) ? left : length;
+        memcpy(ctx->buffer.bytes + ctx->partial, data, chunk);
+        ctx->partial += chunk;
+        if (ctx->partial < 32) {
+            assert(left >= length);
+            return;
+        }
+        ctx->partial = 0;
+        data         = (const uint8_t *)data + chunk;
+        length      -= chunk;
+        T1HA2_UPDATE<mode, true>(&ctx->state, ctx->buffer.u64);
+    }
+
+    if (length >= 32) {
+        if ((T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT) ||
+                ((((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0)) {
+            data = T1HA2_LOOP<mode, false>(&ctx->state, data, length);
+        } else {
+            data = T1HA2_LOOP<mode, true >(&ctx->state, data, length);
+        }
+        length &= 31;
+    }
+
+    if (length) {
+        memcpy(ctx->buffer.bytes, data, ctx->partial = length);
     }
-    ctx->partial = 0;
-    data = (const uint8_t *)data + chunk;
-    length -= chunk;
-    T1HA2_UPDATE<mode,true>(&ctx->state, ctx->buffer.u64);
-  }
-
-  if (length >= 32) {
-      if ((T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT) ||
-              ((((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0)) {
-          data = T1HA2_LOOP<mode,false>(&ctx->state, data, length);
-      } else {
-          data = T1HA2_LOOP<mode,true>(&ctx->state, data, length);
-      }
-      length &= 31;
-  }
-
-  if (length) {
-    memcpy(ctx->buffer.bytes, data, ctx->partial = length);
-  }
 }
 
-template < enum t1ha_modes mode >
-static uint64_t t1ha2_final(t1ha_context_t * RESTRICT ctx,
-        uint64_t * RESTRICT extra_result) {
-  uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63);
-  bits = COND_BSWAP(bits, MODE_BE_SYS(mode));
-  t1ha2_update<mode>(ctx, &bits, 8);
+template <enum t1ha_modes mode>
+static uint64_t t1ha2_final( t1ha_context_t * RESTRICT ctx, uint64_t * RESTRICT extra_result ) {
+    uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63);
 
-  if (likely(!extra_result)) {
-    squash(&ctx->state);
-    return T1HA2_TAIL<mode,true,false>(&ctx->state, ctx->buffer.u64, ctx->partial);
-  }
+    bits = COND_BSWAP(bits, MODE_BE_SYS(mode));
+    t1ha2_update<mode>(ctx, &bits, 8);
 
-  return T1HA2_TAIL<mode,true,true>(&ctx->state, ctx->buffer.u64, ctx->partial, extra_result);
+    if (likely(!extra_result)) {
+        squash(&ctx->state);
+        return T1HA2_TAIL<mode, true, false>(&ctx->state, ctx->buffer.u64, ctx->partial);
+    }
+
+    return T1HA2_TAIL<mode, true, true>(&ctx->state, ctx->buffer.u64, ctx->partial, extra_result);
 }
 
 //------------------------------------------------------------
@@ -909,184 +917,186 @@ static uint64_t t1ha2_final(t1ha_context_t * RESTRICT ctx,
 // versionA is t1ha0_ia32aes_avx1/t1ha0_ia32aes_noavx, which appear to
 // be identical. versionB is t1ha0_ia32aes_avx2, which does not appear
 // to need AVX2. ¯\_(ツ)_/¯
-template < enum t1ha_modes mode, bool versionB >
-static uint64_t t1ha0_aes_impl(const void *data, size_t len, uint64_t seed) {
-  uint64_t a = seed;
-  uint64_t b = len;
-
-  if (unlikely(len > 32)) {
-    __m128i x = _mm_set_epi64x(a, b);
-    __m128i y;
-
-    if (versionB) {
-        const __m128i *v = (const __m128i *)data;
-        const __m128i *const detent =
-            (const __m128i *)((const uint8_t *)data + (len & ~15ul));
-        y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_0, prime_1));
-        data = detent;
-
-        if (len & 16) {
-            x = _mm_add_epi64(x, _mm_loadu_si128(v++));
-            y = _mm_aesenc_si128(x, y);
-        }
-        len &= 15;
+template <enum t1ha_modes mode, bool versionB>
+static uint64_t t1ha0_aes_impl( const void * data, size_t len, uint64_t seed ) {
+    uint64_t a = seed;
+    uint64_t b = len;
 
-        if (v + 7 < detent) {
-            __m128i salt = y;
-            do {
-                __m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt);
-                t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
-                t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
-                t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
-
-                t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
-                t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
-                t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
-                t = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
-
-                salt = _mm_add_epi64(salt, _mm_set_epi64x(prime_5, prime_6));
-                t = _mm_aesenc_si128(x, t);
-                x = _mm_add_epi64(y, x);
-                y = t;
-            } while (v + 7 < detent);
-        }
+    if (unlikely(len > 32)) {
+        __m128i x = _mm_set_epi64x(a, b);
+        __m128i y;
+
+        if (versionB) {
+            const __m128i *       v      = (const __m128i *)data;
+            const __m128i * const detent =
+                    (const __m128i *)((const uint8_t *)data + (len & ~15ul));
+            y    = _mm_aesenc_si128(x, _mm_set_epi64x(prime_0, prime_1));
+            data = detent;
+
+            if (len & 16) {
+                x = _mm_add_epi64(x, _mm_loadu_si128(v++));
+                y = _mm_aesenc_si128(x, y);
+            }
+            len &= 15;
+
+            if (v + 7 < detent) {
+                __m128i salt = y;
+                do {
+                    __m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt);
+                    t    = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
+                    t    = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
+                    t    = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
+
+                    t    = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
+                    t    = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
+                    t    = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
+                    t    = _mm_aesdec_si128(t, _mm_loadu_si128(v++));
+
+                    salt = _mm_add_epi64(salt, _mm_set_epi64x(prime_5, prime_6));
+                    t    = _mm_aesenc_si128(x, t);
+                    x    = _mm_add_epi64(y, x);
+                    y    = t;
+                } while (v + 7 < detent);
+            }
 
-        while (v < detent) {
-            __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++));
-            __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
-            x = _mm_aesdec_si128(x, v0y);
-            y = _mm_aesdec_si128(y, v1x);
-        }
-    } else {
-        const __m128i * RESTRICT v = (const __m128i *)data;
-        const __m128i * RESTRICT const detent =
-            (const __m128i *)((const uint8_t *)data + len - 127);
-        y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_5, prime_6));
-
-        while (v < detent) {
-            __m128i v0 = _mm_loadu_si128(v + 0);
-            __m128i v1 = _mm_loadu_si128(v + 1);
-            __m128i v2 = _mm_loadu_si128(v + 2);
-            __m128i v3 = _mm_loadu_si128(v + 3);
-            __m128i v4 = _mm_loadu_si128(v + 4);
-            __m128i v5 = _mm_loadu_si128(v + 5);
-            __m128i v6 = _mm_loadu_si128(v + 6);
-            __m128i v7 = _mm_loadu_si128(v + 7);
-
-            __m128i v0y = _mm_aesenc_si128(v0, y);
-            __m128i v2x6 = _mm_aesenc_si128(v2, _mm_xor_si128(x, v6));
-            __m128i v45_67 =
-                _mm_xor_si128(_mm_aesenc_si128(v4, v5), _mm_add_epi64(v6, v7));
-
-            __m128i v0y7_1 = _mm_aesdec_si128(_mm_sub_epi64(v7, v0y), v1);
-            __m128i v2x6_3 = _mm_aesenc_si128(v2x6, v3);
-
-            x = _mm_aesenc_si128(v45_67, _mm_add_epi64(x, y));
-            y = _mm_aesenc_si128(v2x6_3, _mm_xor_si128(v0y7_1, v5));
-            v += 8;
-        }
+            while (v < detent) {
+                __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++));
+                __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
+                x = _mm_aesdec_si128(x, v0y);
+                y = _mm_aesdec_si128(y, v1x);
+            }
+        } else {
+            const __m128i * RESTRICT       v      = (const __m128i *)data;
+            const __m128i * RESTRICT const detent =
+                    (const __m128i *)((const uint8_t *)data + len - 127);
+            y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_5, prime_6));
+
+            while (v < detent) {
+                __m128i v0     = _mm_loadu_si128(v + 0);
+                __m128i v1     = _mm_loadu_si128(v + 1);
+                __m128i v2     = _mm_loadu_si128(v + 2);
+                __m128i v3     = _mm_loadu_si128(v + 3);
+                __m128i v4     = _mm_loadu_si128(v + 4);
+                __m128i v5     = _mm_loadu_si128(v + 5);
+                __m128i v6     = _mm_loadu_si128(v + 6);
+                __m128i v7     = _mm_loadu_si128(v + 7);
+
+                __m128i v0y    = _mm_aesenc_si128(v0, y);
+                __m128i v2x6   = _mm_aesenc_si128(v2, _mm_xor_si128(x, v6));
+                __m128i v45_67 =
+                        _mm_xor_si128(_mm_aesenc_si128(v4, v5), _mm_add_epi64(v6, v7));
+
+                __m128i v0y7_1 = _mm_aesdec_si128(_mm_sub_epi64(v7, v0y), v1);
+                __m128i v2x6_3 = _mm_aesenc_si128(v2x6, v3);
+
+                x  = _mm_aesenc_si128(v45_67, _mm_add_epi64(x, y)      );
+                y  = _mm_aesenc_si128(v2x6_3, _mm_xor_si128(v0y7_1, v5));
+                v += 8;
+            }
 
-        if (len & 64) {
-            __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++));
-            __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
-            x = _mm_aesdec_si128(x, v0y);
-            y = _mm_aesdec_si128(y, v1x);
+            if (len & 64) {
+                __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++));
+                __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
+                x = _mm_aesdec_si128(x, v0y);
+                y = _mm_aesdec_si128(y, v1x);
 
-            __m128i v2y = _mm_add_epi64(y, _mm_loadu_si128(v++));
-            __m128i v3x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
-            x = _mm_aesdec_si128(x, v2y);
-            y = _mm_aesdec_si128(y, v3x);
-        }
+                __m128i v2y = _mm_add_epi64(y, _mm_loadu_si128(v++));
+                __m128i v3x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
+                x = _mm_aesdec_si128(x, v2y);
+                y = _mm_aesdec_si128(y, v3x);
+            }
 
-        if (len & 32) {
-            __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++));
-            __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
-            x = _mm_aesdec_si128(x, v0y);
-            y = _mm_aesdec_si128(y, v1x);
-        }
+            if (len & 32) {
+                __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++));
+                __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++));
+                x = _mm_aesdec_si128(x, v0y);
+                y = _mm_aesdec_si128(y, v1x);
+            }
 
-        if (len & 16) {
-            y = _mm_add_epi64(x, y);
-            x = _mm_aesdec_si128(x, _mm_loadu_si128(v++));
+            if (len & 16) {
+                y = _mm_add_epi64(x, y);
+                x = _mm_aesdec_si128(x, _mm_loadu_si128(v++));
+            }
+
+            data = v;
+            len &= 15;
         }
 
-        data = v;
-        len &= 15;
+        x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y);
+  #if defined(HAVE_32BIT_PLATFORM)
+    #if defined(HAVE_SSE_4_1)
+        a =     (uint32_t)_mm_extract_epi32(x, 0)  |
+                (uint64_t)_mm_extract_epi32(x, 1) << 32;
+        b =     (uint32_t)_mm_extract_epi32(x, 2)  |
+                (uint64_t)_mm_extract_epi32(x, 3) << 32;
+    #else
+        a  =    (uint32_t)_mm_cvtsi128_si32(x);
+        a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32;
+        x  = _mm_unpackhi_epi64(x, x);
+        b  =    (uint32_t)_mm_cvtsi128_si32(x);
+        b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32;
+    #endif
+        _mm_empty();
+  #else /* HAVE_32BIT_PLATFORM */
+    #if defined(HAVE_SSE_4_1)
+        a = _mm_extract_epi64(x, 0);
+        b = _mm_extract_epi64(x, 1);
+    #else
+        a = _mm_cvtsi128_si64(x);
+        b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x));
+    #endif
+    #if defined(HAVE_AVX)
+        _mm256_zeroall();
+    #endif
+  #endif
     }
 
-    x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y);
-#if defined(HAVE_32BIT_PLATFORM)
-#if defined(HAVE_SSE_4_1)
-    a = (uint32_t)_mm_extract_epi32(x, 0) |
-        (uint64_t)_mm_extract_epi32(x, 1) << 32;
-    b = (uint32_t)_mm_extract_epi32(x, 2) |
-        (uint64_t)_mm_extract_epi32(x, 3) << 32;
-#else
-    a = (uint32_t)_mm_cvtsi128_si32(x);
-    a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32;
-    x = _mm_unpackhi_epi64(x, x);
-    b = (uint32_t)_mm_cvtsi128_si32(x);
-    b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32;
-#endif
-    _mm_empty();
-#else /* HAVE_32BIT_PLATFORM */
-#if defined(HAVE_SSE_4_1)
-    a = _mm_extract_epi64(x, 0);
-    b = _mm_extract_epi64(x, 1);
-#else
-    a = _mm_cvtsi128_si64(x);
-    b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x));
-#endif
-#if defined(HAVE_AVX)
-    _mm256_zeroall();
-#endif
-#endif
-  }
-
-  const uint64_t *v = (const uint64_t *)data;
-  switch (len) {
-  default:
-    mixup64(&a, &b, fetch64<mode,false>(v++), prime_4);
-  /* fall through */
-  case 24:
-  case 23:
-  case 22:
-  case 21:
-  case 20:
-  case 19:
-  case 18:
-  case 17:
-      mixup64(&b, &a, fetch64<mode,false>(v++), prime_3);
-  /* fall through */
-  case 16:
-  case 15:
-  case 14:
-  case 13:
-  case 12:
-  case 11:
-  case 10:
-  case 9:
-      mixup64(&a, &b, fetch64<mode,false>(v++), prime_2);
-  /* fall through */
-  case 8:
-  case 7:
-  case 6:
-  case 5:
-  case 4:
-  case 3:
-  case 2:
-  case 1:
-      mixup64(&b, &a, tail64<mode,false>(v, len), prime_1);
-      /* fall through */
-  case 0:
-    return final64(a, b);
-  }
+    const uint64_t * v = (const uint64_t *)data;
+    switch (len) {
+    default:
+             mixup64(&a, &b, fetch64<mode, false>(v++)  , prime_4);
+    /* fall through */
+    case 24:
+    case 23:
+    case 22:
+    case 21:
+    case 20:
+    case 19:
+    case 18:
+    case 17:
+             mixup64(&b, &a, fetch64<mode, false>(v++)  , prime_3);
+    /* fall through */
+    case 16:
+    case 15:
+    case 14:
+    case 13:
+    case 12:
+    case 11:
+    case 10:
+    case  9:
+             mixup64(&a, &b, fetch64<mode, false>(v++)  , prime_2);
+    /* fall through */
+    case  8:
+    case  7:
+    case  6:
+    case  5:
+    case  4:
+    case  3:
+    case  2:
+    case  1:
+             mixup64(&b, &a, tail64<mode, false>(v, len), prime_1);
+    /* fall through */
+    case  0:
+             return final64(a, b);
+    }
 }
+
 #endif
 
-template < enum t1ha_modes mode >
-static void t1ha0(const void * in, const size_t len, const seed_t seed, void * out) {
+template <enum t1ha_modes mode>
+static void t1ha0( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t hash;
+
     // If unaligned access is fast, don't worry about
     // checking/handling pointer alignments. Otherwise, use
     // aligned-specific code if possible.
@@ -1103,9 +1113,10 @@ static void t1ha0(const void * in, const size_t len, const seed_t seed, void * o
     PUT_U64<MODE_BSWAP(mode)>(hash, (uint8_t *)out, 0);
 }
 
-template < enum t1ha_modes mode >
-static void t1ha1(const void * in, const size_t len, const seed_t seed, void * out) {
+template <enum t1ha_modes mode>
+static void t1ha1( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t hash;
+
     // If unaligned access is fast, don't worry about
     // checking/handling pointer alignments. Otherwise, use
     // aligned-specific code if possible.
@@ -1122,22 +1133,22 @@ static void t1ha1(const void * in, const size_t len, const seed_t seed, void * o
     PUT_U64<MODE_BSWAP(mode)>(hash, (uint8_t *)out, 0);
 }
 
-template < enum t1ha_modes mode, bool xwidth >
-static void t1ha2(const void * in, const size_t len, const seed_t seed, void * out) {
+template <enum t1ha_modes mode, bool xwidth>
+static void t1ha2( const void * in, const size_t len, const seed_t seed, void * out ) {
     alignas(16) t1ha_state256_t state;
-    uint64_t hash, xhash = 0;
-    uint64_t length = (uint64_t)len;
+    uint64_t   hash, xhash = 0;
+    uint64_t   length        = (uint64_t)len;
     const bool use_unaligned =
-        (T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT) ||
-        ((((uintptr_t)in) & (ALIGNMENT_64 - 1)) != 0);
+            (T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT) ||
+            ((((uintptr_t)in) & (ALIGNMENT_64 - 1)) != 0);
 
     init_ab(&state, (uint64_t)seed, length);
     if (unlikely(length > 32)) {
         init_cd(&state, (uint64_t)seed, length);
         if (use_unaligned) {
-            in = T1HA2_LOOP<mode,false>(&state, in, length);
+            in = T1HA2_LOOP<mode, false>(&state, in, length);
         } else {
-            in = T1HA2_LOOP<mode,true>(&state, in, length);
+            in = T1HA2_LOOP<mode, true >(&state, in, length);
         }
         if (!xwidth) {
             squash(&state);
@@ -1148,12 +1159,12 @@ static void t1ha2(const void * in, const size_t len, const seed_t seed, void * o
     }
     if (use_unaligned) {
         hash = xwidth ?
-            T1HA2_TAIL<mode,false,true> (&state, in, length, &xhash) :
-            T1HA2_TAIL<mode,false,false>(&state, in, length) ;
+                    T1HA2_TAIL<mode, false, true >(&state, in, length, &xhash) :
+                    T1HA2_TAIL<mode, false, false>(&state, in, length);
     } else {
         hash = xwidth ?
-            T1HA2_TAIL<mode,true,true> (&state, in, length, &xhash) :
-            T1HA2_TAIL<mode,true,false>(&state, in, length) ;
+                    T1HA2_TAIL<mode, true, true >(&state, in, length, &xhash) :
+                    T1HA2_TAIL<mode, true, false>(&state, in, length);
     }
     PUT_U64<MODE_BSWAP(mode)>(hash, (uint8_t *)out, 0);
     if (xwidth) {
@@ -1165,8 +1176,8 @@ static void t1ha2(const void * in, const size_t len, const seed_t seed, void * o
 // initialization, while published SMHasher validation codes use it
 // once. Default to once so SMHasher3 tests are consistent, but allow
 // selftests to use published KAT tables.
-template < enum t1ha_modes mode, bool xwidth, bool selftest_seeding = false >
-static void t1ha2_incr(const void * in, const size_t len, const seed_t seed, void * out) {
+template <enum t1ha_modes mode, bool xwidth, bool selftest_seeding = false>
+static void t1ha2_incr( const void * in, const size_t len, const seed_t seed, void * out ) {
     alignas(16) t1ha_context_t ctx;
     uint64_t hash, xhash = 0;
     uint64_t length = (uint64_t)len;
@@ -1182,270 +1193,284 @@ static void t1ha2_incr(const void * in, const size_t len, const seed_t seed, voi
 }
 
 #if defined(HAVE_X86_64_AES)
-template < bool bswap >
-static void t1ha0_aesA(const void * in, const size_t len, const seed_t seed, void * out) {
+
+template <bool bswap>
+static void t1ha0_aesA( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t hash;
-    hash = t1ha0_aes_impl<MODE_LE_NATIVE,false>(in, len, (uint64_t)seed);
+
+    hash = t1ha0_aes_impl<MODE_LE_NATIVE, false>(in, len, (uint64_t)seed);
     PUT_U64<bswap>(hash, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void t1ha0_aesB(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void t1ha0_aesB( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t hash;
-    hash = t1ha0_aes_impl<MODE_LE_NATIVE,true>(in, len, (uint64_t)seed);
+
+    hash = t1ha0_aes_impl<MODE_LE_NATIVE, true>(in, len, (uint64_t)seed);
     PUT_U64<bswap>(hash, (uint8_t *)out, 0);
 }
+
 #endif
 
 //------------------------------------------------------------
-static const uint8_t t1ha_test_pattern[64] = {
-    0,    1,    2,    3,    4,    5,    6,    7,    0xFF, 0x7F, 0x3F,
-    0x1F, 0xF,  8,    16,   32,   64,   0x80, 0xFE, 0xFC, 0xF8, 0xF0,
-    0xE0, 0xC0, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x55, 0xAA, 11,
-    17,   19,   23,   29,   37,   42,   43,   'a',  'b',  'c',  'd',
-    'e',  'f',  'g',  'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
-    'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x'
+static const uint8_t t1ha_test_pattern      [64] = {
+       0,    1,    2,    3,    4,    5,    6,    7 , 0xFF, 0x7F, 0x3F,
+    0x1F,  0xF,    8,   16,   32,   64, 0x80, 0xFE , 0xFC, 0xF8, 0xF0,
+    0xE0, 0xC0, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF , 0x55, 0xAA,   11,
+      17,   19,   23,   29,   37,   42,   43,   'a',  'b',  'c',  'd',
+    'e' ,  'f',  'g',  'h',  'i',  'j',  'k',  'l' ,  'm',  'n',  'o',
+    'p' , 'q' , 'r' , 's' , 't' , 'u' , 'v' , 'w'  , 'x'
 };
 
-static const uint64_t t1ha_refval_32le[81] = { 0,
-  UINT64_C(0xC92229C10FAEA50E), UINT64_C(0x3DF1354B0DFDC443), UINT64_C(0x968F016D60417BB3), UINT64_C(0x85AAFB50C6DA770F),
-  UINT64_C(0x66CCE3BB6842C7D6), UINT64_C(0xDDAA39C11537C226), UINT64_C(0x35958D281F0C9C8C), UINT64_C(0x8C5D64B091DE608E),
-  UINT64_C(0x4094DF680D39786B), UINT64_C(0x1014F4AA2A2EDF4D), UINT64_C(0x39D21891615AA310), UINT64_C(0x7EF51F67C398C7C4),
-  UINT64_C(0x06163990DDBF319D), UINT64_C(0xE229CAA00C8D6F3F), UINT64_C(0xD2240B4B0D54E0F5), UINT64_C(0xEA2E7E905DDEAF94),
-  UINT64_C(0x8D4F8A887183A5CE), UINT64_C(0x44337F9A63C5820C), UINT64_C(0x94938D1E86A9B797), UINT64_C(0x96E9CABA5CA210CC),
-  UINT64_C(0x6EFBB9CC9E8F7708), UINT64_C(0x3D12EA0282FB8BBC), UINT64_C(0x5DA781EE205A2C48), UINT64_C(0xFA4A51A12677FE12),
-  UINT64_C(0x81D5F04E20660B28), UINT64_C(0x57258D043BCD3841), UINT64_C(0x5C9BEB62059C1ED2), UINT64_C(0x57A02162F9034B33),
-  UINT64_C(0xBA2A13E457CE19B8), UINT64_C(0xE593263BF9451F3A), UINT64_C(0x0BC1175539606BC5), UINT64_C(0xA3E2929E9C5F289F),
-  UINT64_C(0x86BDBD06835E35F7), UINT64_C(0xA180950AB48BAADC), UINT64_C(0x7812C994D9924028), UINT64_C(0x308366011415F46B),
-  UINT64_C(0x77FE9A9991C5F959), UINT64_C(0x925C340B70B0B1E3), UINT64_C(0xCD9C5BA4C41E2E10), UINT64_C(0x7CC4E7758B94CD93),
-  UINT64_C(0x898B235962EA4625), UINT64_C(0xD7E3E5BF22893286), UINT64_C(0x396F4CDD33056C64), UINT64_C(0x740AB2E32F17CD9F),
-  UINT64_C(0x60D12FF9CD15B321), UINT64_C(0xBEE3A6C9903A81D8), UINT64_C(0xB47040913B33C35E), UINT64_C(0x19EE8C2ACC013CFF),
-  UINT64_C(0x5DEC94C5783B55C4), UINT64_C(0x78DC122D562C5F1D), UINT64_C(0x6520F008DA1C181E), UINT64_C(0x77CAF155A36EBF7C),
-  UINT64_C(0x0A09E02BDB883CA6), UINT64_C(0xFD5D9ADA7E3FB895), UINT64_C(0xC6F5FDD9EEAB83B5), UINT64_C(0x84589BB29F52A92A),
-  UINT64_C(0x9B2517F13F8E9814), UINT64_C(0x6F752AF6A52E31EC), UINT64_C(0x8E717799E324CE8A), UINT64_C(0x84D90AEF39262D58),
-  UINT64_C(0x79C27B13FC28944D), UINT64_C(0xE6D6DF6438E0044A), UINT64_C(0x51B603E400D79CA4), UINT64_C(0x6A902B28C588B390),
-  UINT64_C(0x8D7F8DE9E6CB1D83), UINT64_C(0xCF1A4DC11CA7F044), UINT64_C(0xEF02E43C366786F1), UINT64_C(0x89915BCDBCFBE30F),
-  UINT64_C(0x5928B306F1A9CC7F), UINT64_C(0xA8B59092996851C5), UINT64_C(0x22050A20427E8B25), UINT64_C(0x6E6D64018941E7EE),
-  UINT64_C(0x9798C898B81AE846), UINT64_C(0x80EF218CDC30124A), UINT64_C(0xFCE45E60D55B0284), UINT64_C(0x4010E735D3147C35),
-  UINT64_C(0xEB647D999FD8DC7E), UINT64_C(0xD3544DCAB14FE907), UINT64_C(0xB588B27D8438700C), UINT64_C(0xA49EBFC43E057A4C)
+static const uint64_t t1ha_refval_32le      [81] = {
+    0,
+    UINT64_C(0xC92229C10FAEA50E), UINT64_C(0x3DF1354B0DFDC443), UINT64_C(0x968F016D60417BB3), UINT64_C(0x85AAFB50C6DA770F),
+    UINT64_C(0x66CCE3BB6842C7D6), UINT64_C(0xDDAA39C11537C226), UINT64_C(0x35958D281F0C9C8C), UINT64_C(0x8C5D64B091DE608E),
+    UINT64_C(0x4094DF680D39786B), UINT64_C(0x1014F4AA2A2EDF4D), UINT64_C(0x39D21891615AA310), UINT64_C(0x7EF51F67C398C7C4),
+    UINT64_C(0x06163990DDBF319D), UINT64_C(0xE229CAA00C8D6F3F), UINT64_C(0xD2240B4B0D54E0F5), UINT64_C(0xEA2E7E905DDEAF94),
+    UINT64_C(0x8D4F8A887183A5CE), UINT64_C(0x44337F9A63C5820C), UINT64_C(0x94938D1E86A9B797), UINT64_C(0x96E9CABA5CA210CC),
+    UINT64_C(0x6EFBB9CC9E8F7708), UINT64_C(0x3D12EA0282FB8BBC), UINT64_C(0x5DA781EE205A2C48), UINT64_C(0xFA4A51A12677FE12),
+    UINT64_C(0x81D5F04E20660B28), UINT64_C(0x57258D043BCD3841), UINT64_C(0x5C9BEB62059C1ED2), UINT64_C(0x57A02162F9034B33),
+    UINT64_C(0xBA2A13E457CE19B8), UINT64_C(0xE593263BF9451F3A), UINT64_C(0x0BC1175539606BC5), UINT64_C(0xA3E2929E9C5F289F),
+    UINT64_C(0x86BDBD06835E35F7), UINT64_C(0xA180950AB48BAADC), UINT64_C(0x7812C994D9924028), UINT64_C(0x308366011415F46B),
+    UINT64_C(0x77FE9A9991C5F959), UINT64_C(0x925C340B70B0B1E3), UINT64_C(0xCD9C5BA4C41E2E10), UINT64_C(0x7CC4E7758B94CD93),
+    UINT64_C(0x898B235962EA4625), UINT64_C(0xD7E3E5BF22893286), UINT64_C(0x396F4CDD33056C64), UINT64_C(0x740AB2E32F17CD9F),
+    UINT64_C(0x60D12FF9CD15B321), UINT64_C(0xBEE3A6C9903A81D8), UINT64_C(0xB47040913B33C35E), UINT64_C(0x19EE8C2ACC013CFF),
+    UINT64_C(0x5DEC94C5783B55C4), UINT64_C(0x78DC122D562C5F1D), UINT64_C(0x6520F008DA1C181E), UINT64_C(0x77CAF155A36EBF7C),
+    UINT64_C(0x0A09E02BDB883CA6), UINT64_C(0xFD5D9ADA7E3FB895), UINT64_C(0xC6F5FDD9EEAB83B5), UINT64_C(0x84589BB29F52A92A),
+    UINT64_C(0x9B2517F13F8E9814), UINT64_C(0x6F752AF6A52E31EC), UINT64_C(0x8E717799E324CE8A), UINT64_C(0x84D90AEF39262D58),
+    UINT64_C(0x79C27B13FC28944D), UINT64_C(0xE6D6DF6438E0044A), UINT64_C(0x51B603E400D79CA4), UINT64_C(0x6A902B28C588B390),
+    UINT64_C(0x8D7F8DE9E6CB1D83), UINT64_C(0xCF1A4DC11CA7F044), UINT64_C(0xEF02E43C366786F1), UINT64_C(0x89915BCDBCFBE30F),
+    UINT64_C(0x5928B306F1A9CC7F), UINT64_C(0xA8B59092996851C5), UINT64_C(0x22050A20427E8B25), UINT64_C(0x6E6D64018941E7EE),
+    UINT64_C(0x9798C898B81AE846), UINT64_C(0x80EF218CDC30124A), UINT64_C(0xFCE45E60D55B0284), UINT64_C(0x4010E735D3147C35),
+    UINT64_C(0xEB647D999FD8DC7E), UINT64_C(0xD3544DCAB14FE907), UINT64_C(0xB588B27D8438700C), UINT64_C(0xA49EBFC43E057A4C)
 };
 
-static const uint64_t t1ha_refval_32be[81] = { 0,
-  UINT64_C(0xC92229C10FAEA50E), UINT64_C(0x0FE212630DD87E0F), UINT64_C(0x968F016D60417BB3), UINT64_C(0xE6B12B2C889913AB),
-  UINT64_C(0xAA3787887A9DA368), UINT64_C(0x06EE7202D53CEF39), UINT64_C(0x6149AFB2C296664B), UINT64_C(0x86C893210F9A5805),
-  UINT64_C(0x8379E5DA988AA04C), UINT64_C(0x24763AA7CE411A60), UINT64_C(0x9CF9C64B395A4CF8), UINT64_C(0xFFC192C338DDE904),
-  UINT64_C(0x094575BAB319E5F5), UINT64_C(0xBBBACFE7728C6511), UINT64_C(0x36B8C3CEBE4EF409), UINT64_C(0xAA0BA8A3397BA4D0),
-  UINT64_C(0xF9F85CF7124EE653), UINT64_C(0x3ADF4F7DF2A887AE), UINT64_C(0xAA2A0F5964AA9A7A), UINT64_C(0xF18B563F42D36EB8),
-  UINT64_C(0x034366CEF8334F5C), UINT64_C(0xAE2E85180E330E5F), UINT64_C(0xA5CE9FBFDF5C65B8), UINT64_C(0x5E509F25A9CA9B0B),
-  UINT64_C(0xE30D1358C2013BD2), UINT64_C(0xBB3A04D5EB8111FE), UINT64_C(0xB04234E82A15A28D), UINT64_C(0x87426A56D0EA0E2F),
-  UINT64_C(0x095086668E07F9F8), UINT64_C(0xF4CD3A43B6A6AEA5), UINT64_C(0x73F9B9B674D472A6), UINT64_C(0x558344229A1E4DCF),
-  UINT64_C(0x0AD4C95B2279181A), UINT64_C(0x5E3D19D80821CA6B), UINT64_C(0x652492D25BEBA258), UINT64_C(0xEFA84B02EAB849B1),
-  UINT64_C(0x81AD2D253059AC2C), UINT64_C(0x1400CCB0DFB2F457), UINT64_C(0x5688DC72A839860E), UINT64_C(0x67CC130E0FD1B0A7),
-  UINT64_C(0x0A851E3A94E21E69), UINT64_C(0x2EA0000B6A073907), UINT64_C(0xAE9776FF9BF1D02E), UINT64_C(0xC0A96B66B160631C),
-  UINT64_C(0xA93341DE4ED7C8F0), UINT64_C(0x6FBADD8F5B85E141), UINT64_C(0xB7D295F1C21E0CBA), UINT64_C(0x6D6114591B8E434F),
-  UINT64_C(0xF5B6939B63D97BE7), UINT64_C(0x3C80D5053F0E5DB4), UINT64_C(0xAC520ACC6B73F62D), UINT64_C(0xD1051F5841CF3966),
-  UINT64_C(0x62245AEA644AE760), UINT64_C(0x0CD56BE15497C62D), UINT64_C(0x5BB93435C4988FB6), UINT64_C(0x5FADB88EB18DB512),
-  UINT64_C(0xC897CAE2242475CC), UINT64_C(0xF1A094EF846DC9BB), UINT64_C(0x2B1D8B24924F79B6), UINT64_C(0xC6DF0C0E8456EB53),
-  UINT64_C(0xE6A40128303A9B9C), UINT64_C(0x64D37AF5EFFA7BD9), UINT64_C(0x90FEB70A5AE2A598), UINT64_C(0xEC3BA5F126D9FF4B),
-  UINT64_C(0x3121C8EC3AC51B29), UINT64_C(0x3B41C4D422166EC1), UINT64_C(0xB4878DDCBF48ED76), UINT64_C(0x5CB850D77CB762E4),
-  UINT64_C(0x9A27A43CC1DD171F), UINT64_C(0x2FDFFC6F99CB424A), UINT64_C(0xF54A57E09FDEA7BB), UINT64_C(0x5F78E5EE2CAB7039),
-  UINT64_C(0xB8BA95883DB31CBA), UINT64_C(0x131C61EB84AF86C3), UINT64_C(0x84B1F64E9C613DA7), UINT64_C(0xE94C1888C0C37C02),
-  UINT64_C(0xEA08F8BFB2039CDE), UINT64_C(0xCCC6D04D243EC753), UINT64_C(0x8977D105298B0629), UINT64_C(0x7AAA976494A5905E)
+static const uint64_t t1ha_refval_32be      [81] = {
+    0,
+    UINT64_C(0xC92229C10FAEA50E), UINT64_C(0x0FE212630DD87E0F), UINT64_C(0x968F016D60417BB3), UINT64_C(0xE6B12B2C889913AB),
+    UINT64_C(0xAA3787887A9DA368), UINT64_C(0x06EE7202D53CEF39), UINT64_C(0x6149AFB2C296664B), UINT64_C(0x86C893210F9A5805),
+    UINT64_C(0x8379E5DA988AA04C), UINT64_C(0x24763AA7CE411A60), UINT64_C(0x9CF9C64B395A4CF8), UINT64_C(0xFFC192C338DDE904),
+    UINT64_C(0x094575BAB319E5F5), UINT64_C(0xBBBACFE7728C6511), UINT64_C(0x36B8C3CEBE4EF409), UINT64_C(0xAA0BA8A3397BA4D0),
+    UINT64_C(0xF9F85CF7124EE653), UINT64_C(0x3ADF4F7DF2A887AE), UINT64_C(0xAA2A0F5964AA9A7A), UINT64_C(0xF18B563F42D36EB8),
+    UINT64_C(0x034366CEF8334F5C), UINT64_C(0xAE2E85180E330E5F), UINT64_C(0xA5CE9FBFDF5C65B8), UINT64_C(0x5E509F25A9CA9B0B),
+    UINT64_C(0xE30D1358C2013BD2), UINT64_C(0xBB3A04D5EB8111FE), UINT64_C(0xB04234E82A15A28D), UINT64_C(0x87426A56D0EA0E2F),
+    UINT64_C(0x095086668E07F9F8), UINT64_C(0xF4CD3A43B6A6AEA5), UINT64_C(0x73F9B9B674D472A6), UINT64_C(0x558344229A1E4DCF),
+    UINT64_C(0x0AD4C95B2279181A), UINT64_C(0x5E3D19D80821CA6B), UINT64_C(0x652492D25BEBA258), UINT64_C(0xEFA84B02EAB849B1),
+    UINT64_C(0x81AD2D253059AC2C), UINT64_C(0x1400CCB0DFB2F457), UINT64_C(0x5688DC72A839860E), UINT64_C(0x67CC130E0FD1B0A7),
+    UINT64_C(0x0A851E3A94E21E69), UINT64_C(0x2EA0000B6A073907), UINT64_C(0xAE9776FF9BF1D02E), UINT64_C(0xC0A96B66B160631C),
+    UINT64_C(0xA93341DE4ED7C8F0), UINT64_C(0x6FBADD8F5B85E141), UINT64_C(0xB7D295F1C21E0CBA), UINT64_C(0x6D6114591B8E434F),
+    UINT64_C(0xF5B6939B63D97BE7), UINT64_C(0x3C80D5053F0E5DB4), UINT64_C(0xAC520ACC6B73F62D), UINT64_C(0xD1051F5841CF3966),
+    UINT64_C(0x62245AEA644AE760), UINT64_C(0x0CD56BE15497C62D), UINT64_C(0x5BB93435C4988FB6), UINT64_C(0x5FADB88EB18DB512),
+    UINT64_C(0xC897CAE2242475CC), UINT64_C(0xF1A094EF846DC9BB), UINT64_C(0x2B1D8B24924F79B6), UINT64_C(0xC6DF0C0E8456EB53),
+    UINT64_C(0xE6A40128303A9B9C), UINT64_C(0x64D37AF5EFFA7BD9), UINT64_C(0x90FEB70A5AE2A598), UINT64_C(0xEC3BA5F126D9FF4B),
+    UINT64_C(0x3121C8EC3AC51B29), UINT64_C(0x3B41C4D422166EC1), UINT64_C(0xB4878DDCBF48ED76), UINT64_C(0x5CB850D77CB762E4),
+    UINT64_C(0x9A27A43CC1DD171F), UINT64_C(0x2FDFFC6F99CB424A), UINT64_C(0xF54A57E09FDEA7BB), UINT64_C(0x5F78E5EE2CAB7039),
+    UINT64_C(0xB8BA95883DB31CBA), UINT64_C(0x131C61EB84AF86C3), UINT64_C(0x84B1F64E9C613DA7), UINT64_C(0xE94C1888C0C37C02),
+    UINT64_C(0xEA08F8BFB2039CDE), UINT64_C(0xCCC6D04D243EC753), UINT64_C(0x8977D105298B0629), UINT64_C(0x7AAA976494A5905E)
 };
 
-static const uint64_t t1ha_refval_64le[81] = { 0,
-  UINT64_C(0x6A580668D6048674), UINT64_C(0xA2FE904AFF0D0879), UINT64_C(0xE3AB9C06FAF4D023), UINT64_C(0x6AF1C60874C95442),
-  UINT64_C(0xB3557E561A6C5D82), UINT64_C(0x0AE73C696F3D37C0), UINT64_C(0x5EF25F7062324941), UINT64_C(0x9B784F3B4CE6AF33),
-  UINT64_C(0x6993BB206A74F070), UINT64_C(0xF1E95DF109076C4C), UINT64_C(0x4E1EB70C58E48540), UINT64_C(0x5FDD7649D8EC44E4),
-  UINT64_C(0x559122C706343421), UINT64_C(0x380133D58665E93D), UINT64_C(0x9CE74296C8C55AE4), UINT64_C(0x3556F9A5757AB6D0),
-  UINT64_C(0xF62751F7F25C469E), UINT64_C(0x851EEC67F6516D94), UINT64_C(0xED463EE3848A8695), UINT64_C(0xDC8791FEFF8ED3AC),
-  UINT64_C(0x2569C744E1A282CF), UINT64_C(0xF90EB7C1D70A80B9), UINT64_C(0x68DFA6A1B8050A4C), UINT64_C(0x94CCA5E8210D2134),
-  UINT64_C(0xF5CC0BEABC259F52), UINT64_C(0x40DBC1F51618FDA7), UINT64_C(0x0807945BF0FB52C6), UINT64_C(0xE5EF7E09DE70848D),
-  UINT64_C(0x63E1DF35FEBE994A), UINT64_C(0x2025E73769720D5A), UINT64_C(0xAD6120B2B8A152E1), UINT64_C(0x2A71D9F13959F2B7),
-  UINT64_C(0x8A20849A27C32548), UINT64_C(0x0BCBC9FE3B57884E), UINT64_C(0x0E028D255667AEAD), UINT64_C(0xBE66DAD3043AB694),
-  UINT64_C(0xB00E4C1238F9E2D4), UINT64_C(0x5C54BDE5AE280E82), UINT64_C(0x0E22B86754BC3BC4), UINT64_C(0x016707EBF858B84D),
-  UINT64_C(0x990015FBC9E095EE), UINT64_C(0x8B9AF0A3E71F042F), UINT64_C(0x6AA56E88BD380564), UINT64_C(0xAACE57113E681A0F),
-  UINT64_C(0x19F81514AFA9A22D), UINT64_C(0x80DABA3D62BEAC79), UINT64_C(0x715210412CABBF46), UINT64_C(0xD8FA0B9E9D6AA93F),
-  UINT64_C(0x6C2FC5A4109FD3A2), UINT64_C(0x5B3E60EEB51DDCD8), UINT64_C(0x0A7C717017756FE7), UINT64_C(0xA73773805CA31934),
-  UINT64_C(0x4DBD6BB7A31E85FD), UINT64_C(0x24F619D3D5BC2DB4), UINT64_C(0x3E4AF35A1678D636), UINT64_C(0x84A1A8DF8D609239),
-  UINT64_C(0x359C862CD3BE4FCD), UINT64_C(0xCF3A39F5C27DC125), UINT64_C(0xC0FF62F8FD5F4C77), UINT64_C(0x5E9F2493DDAA166C),
-  UINT64_C(0x17424152BE1CA266), UINT64_C(0xA78AFA5AB4BBE0CD), UINT64_C(0x7BFB2E2CEF118346), UINT64_C(0x647C3E0FF3E3D241),
-  UINT64_C(0x0352E4055C13242E), UINT64_C(0x6F42FC70EB660E38), UINT64_C(0x0BEBAD4FABF523BA), UINT64_C(0x9269F4214414D61D),
-  UINT64_C(0x1CA8760277E6006C), UINT64_C(0x7BAD25A859D87B5D), UINT64_C(0xAD645ADCF7414F1D), UINT64_C(0xB07F517E88D7AFB3),
-  UINT64_C(0xB321C06FB5FFAB5C), UINT64_C(0xD50F162A1EFDD844), UINT64_C(0x1DFD3D1924FBE319), UINT64_C(0xDFAEAB2F09EF7E78),
-  UINT64_C(0xA7603B5AF07A0B1E), UINT64_C(0x41CD044C0E5A4EE3), UINT64_C(0xF64D2F86E813BF33), UINT64_C(0xFF9FDB99305EB06A)
+static const uint64_t t1ha_refval_64le      [81] = {
+    0,
+    UINT64_C(0x6A580668D6048674), UINT64_C(0xA2FE904AFF0D0879), UINT64_C(0xE3AB9C06FAF4D023), UINT64_C(0x6AF1C60874C95442),
+    UINT64_C(0xB3557E561A6C5D82), UINT64_C(0x0AE73C696F3D37C0), UINT64_C(0x5EF25F7062324941), UINT64_C(0x9B784F3B4CE6AF33),
+    UINT64_C(0x6993BB206A74F070), UINT64_C(0xF1E95DF109076C4C), UINT64_C(0x4E1EB70C58E48540), UINT64_C(0x5FDD7649D8EC44E4),
+    UINT64_C(0x559122C706343421), UINT64_C(0x380133D58665E93D), UINT64_C(0x9CE74296C8C55AE4), UINT64_C(0x3556F9A5757AB6D0),
+    UINT64_C(0xF62751F7F25C469E), UINT64_C(0x851EEC67F6516D94), UINT64_C(0xED463EE3848A8695), UINT64_C(0xDC8791FEFF8ED3AC),
+    UINT64_C(0x2569C744E1A282CF), UINT64_C(0xF90EB7C1D70A80B9), UINT64_C(0x68DFA6A1B8050A4C), UINT64_C(0x94CCA5E8210D2134),
+    UINT64_C(0xF5CC0BEABC259F52), UINT64_C(0x40DBC1F51618FDA7), UINT64_C(0x0807945BF0FB52C6), UINT64_C(0xE5EF7E09DE70848D),
+    UINT64_C(0x63E1DF35FEBE994A), UINT64_C(0x2025E73769720D5A), UINT64_C(0xAD6120B2B8A152E1), UINT64_C(0x2A71D9F13959F2B7),
+    UINT64_C(0x8A20849A27C32548), UINT64_C(0x0BCBC9FE3B57884E), UINT64_C(0x0E028D255667AEAD), UINT64_C(0xBE66DAD3043AB694),
+    UINT64_C(0xB00E4C1238F9E2D4), UINT64_C(0x5C54BDE5AE280E82), UINT64_C(0x0E22B86754BC3BC4), UINT64_C(0x016707EBF858B84D),
+    UINT64_C(0x990015FBC9E095EE), UINT64_C(0x8B9AF0A3E71F042F), UINT64_C(0x6AA56E88BD380564), UINT64_C(0xAACE57113E681A0F),
+    UINT64_C(0x19F81514AFA9A22D), UINT64_C(0x80DABA3D62BEAC79), UINT64_C(0x715210412CABBF46), UINT64_C(0xD8FA0B9E9D6AA93F),
+    UINT64_C(0x6C2FC5A4109FD3A2), UINT64_C(0x5B3E60EEB51DDCD8), UINT64_C(0x0A7C717017756FE7), UINT64_C(0xA73773805CA31934),
+    UINT64_C(0x4DBD6BB7A31E85FD), UINT64_C(0x24F619D3D5BC2DB4), UINT64_C(0x3E4AF35A1678D636), UINT64_C(0x84A1A8DF8D609239),
+    UINT64_C(0x359C862CD3BE4FCD), UINT64_C(0xCF3A39F5C27DC125), UINT64_C(0xC0FF62F8FD5F4C77), UINT64_C(0x5E9F2493DDAA166C),
+    UINT64_C(0x17424152BE1CA266), UINT64_C(0xA78AFA5AB4BBE0CD), UINT64_C(0x7BFB2E2CEF118346), UINT64_C(0x647C3E0FF3E3D241),
+    UINT64_C(0x0352E4055C13242E), UINT64_C(0x6F42FC70EB660E38), UINT64_C(0x0BEBAD4FABF523BA), UINT64_C(0x9269F4214414D61D),
+    UINT64_C(0x1CA8760277E6006C), UINT64_C(0x7BAD25A859D87B5D), UINT64_C(0xAD645ADCF7414F1D), UINT64_C(0xB07F517E88D7AFB3),
+    UINT64_C(0xB321C06FB5FFAB5C), UINT64_C(0xD50F162A1EFDD844), UINT64_C(0x1DFD3D1924FBE319), UINT64_C(0xDFAEAB2F09EF7E78),
+    UINT64_C(0xA7603B5AF07A0B1E), UINT64_C(0x41CD044C0E5A4EE3), UINT64_C(0xF64D2F86E813BF33), UINT64_C(0xFF9FDB99305EB06A)
 };
 
-static const uint64_t t1ha_refval_64be[81] = { 0,
-  UINT64_C(0x6A580668D6048674), UINT64_C(0xDECC975A0E3B8177), UINT64_C(0xE3AB9C06FAF4D023), UINT64_C(0xE401FA8F1B6AF969),
-  UINT64_C(0x67DB1DAE56FB94E3), UINT64_C(0x1106266A09B7A073), UINT64_C(0x550339B1EF2C7BBB), UINT64_C(0x290A2BAF590045BB),
-  UINT64_C(0xA182C1258C09F54A), UINT64_C(0x137D53C34BE7143A), UINT64_C(0xF6D2B69C6F42BEDC), UINT64_C(0x39643EAF2CA2E4B4),
-  UINT64_C(0x22A81F139A2C9559), UINT64_C(0x5B3D6AEF0AF33807), UINT64_C(0x56E3F80A68643C08), UINT64_C(0x9E423BE502378780),
-  UINT64_C(0xCDB0986F9A5B2FD5), UINT64_C(0xD5B3C84E7933293F), UINT64_C(0xE5FB8C90399E9742), UINT64_C(0x5D393C1F77B2CF3D),
-  UINT64_C(0xC8C82F5B2FF09266), UINT64_C(0xACA0230CA6F7B593), UINT64_C(0xCB5805E2960D1655), UINT64_C(0x7E2AD5B704D77C95),
-  UINT64_C(0xC5E903CDB8B9EB5D), UINT64_C(0x4CC7D0D21CC03511), UINT64_C(0x8385DF382CFB3E93), UINT64_C(0xF17699D0564D348A),
-  UINT64_C(0xF77EE7F8274A4C8D), UINT64_C(0xB9D8CEE48903BABE), UINT64_C(0xFE0EBD2A82B9CFE9), UINT64_C(0xB49FB6397270F565),
-  UINT64_C(0x173735C8C342108E), UINT64_C(0xA37C7FBBEEC0A2EA), UINT64_C(0xC13F66F462BB0B6E), UINT64_C(0x0C04F3C2B551467E),
-  UINT64_C(0x76A9CB156810C96E), UINT64_C(0x2038850919B0B151), UINT64_C(0xCEA19F2B6EED647B), UINT64_C(0x6746656D2FA109A4),
-  UINT64_C(0xF05137F221007F37), UINT64_C(0x892FA9E13A3B4948), UINT64_C(0x4D57B70D37548A32), UINT64_C(0x1A7CFB3D566580E6),
-  UINT64_C(0x7CB30272A45E3FAC), UINT64_C(0x137CCFFD9D51423F), UINT64_C(0xB87D96F3B82DF266), UINT64_C(0x33349AEE7472ED37),
-  UINT64_C(0x5CC0D3C99555BC07), UINT64_C(0x4A8F4FA196D964EF), UINT64_C(0xE82A0D64F281FBFA), UINT64_C(0x38A1BAC2C36823E1),
-  UINT64_C(0x77D197C239FD737E), UINT64_C(0xFB07746B4E07DF26), UINT64_C(0xC8A2198E967672BD), UINT64_C(0x5F1A146D143FA05A),
-  UINT64_C(0x26B877A1201AB7AC), UINT64_C(0x74E5B145214723F8), UINT64_C(0xE9CE10E3C70254BC), UINT64_C(0x299393A0C05B79E8),
-  UINT64_C(0xFD2D2B9822A5E7E2), UINT64_C(0x85424FEA50C8E50A), UINT64_C(0xE6839E714B1FFFE5), UINT64_C(0x27971CCB46F9112A),
-  UINT64_C(0xC98695A2E0715AA9), UINT64_C(0x338E1CBB4F858226), UINT64_C(0xFC6B5C5CF7A8D806), UINT64_C(0x8973CAADDE8DA50C),
-  UINT64_C(0x9C6D47AE32EBAE72), UINT64_C(0x1EBF1F9F21D26D78), UINT64_C(0x80A9704B8E153859), UINT64_C(0x6AFD20A939F141FB),
-  UINT64_C(0xC35F6C2B3B553EEF), UINT64_C(0x59529E8B0DC94C1A), UINT64_C(0x1569DF036EBC4FA1), UINT64_C(0xDA32B88593C118F9),
-  UINT64_C(0xF01E4155FF5A5660), UINT64_C(0x765A2522DCE2B185), UINT64_C(0xCEE95554128073EF), UINT64_C(0x60F072A5CA51DE2F)
+static const uint64_t t1ha_refval_64be      [81] = {
+    0,
+    UINT64_C(0x6A580668D6048674), UINT64_C(0xDECC975A0E3B8177), UINT64_C(0xE3AB9C06FAF4D023), UINT64_C(0xE401FA8F1B6AF969),
+    UINT64_C(0x67DB1DAE56FB94E3), UINT64_C(0x1106266A09B7A073), UINT64_C(0x550339B1EF2C7BBB), UINT64_C(0x290A2BAF590045BB),
+    UINT64_C(0xA182C1258C09F54A), UINT64_C(0x137D53C34BE7143A), UINT64_C(0xF6D2B69C6F42BEDC), UINT64_C(0x39643EAF2CA2E4B4),
+    UINT64_C(0x22A81F139A2C9559), UINT64_C(0x5B3D6AEF0AF33807), UINT64_C(0x56E3F80A68643C08), UINT64_C(0x9E423BE502378780),
+    UINT64_C(0xCDB0986F9A5B2FD5), UINT64_C(0xD5B3C84E7933293F), UINT64_C(0xE5FB8C90399E9742), UINT64_C(0x5D393C1F77B2CF3D),
+    UINT64_C(0xC8C82F5B2FF09266), UINT64_C(0xACA0230CA6F7B593), UINT64_C(0xCB5805E2960D1655), UINT64_C(0x7E2AD5B704D77C95),
+    UINT64_C(0xC5E903CDB8B9EB5D), UINT64_C(0x4CC7D0D21CC03511), UINT64_C(0x8385DF382CFB3E93), UINT64_C(0xF17699D0564D348A),
+    UINT64_C(0xF77EE7F8274A4C8D), UINT64_C(0xB9D8CEE48903BABE), UINT64_C(0xFE0EBD2A82B9CFE9), UINT64_C(0xB49FB6397270F565),
+    UINT64_C(0x173735C8C342108E), UINT64_C(0xA37C7FBBEEC0A2EA), UINT64_C(0xC13F66F462BB0B6E), UINT64_C(0x0C04F3C2B551467E),
+    UINT64_C(0x76A9CB156810C96E), UINT64_C(0x2038850919B0B151), UINT64_C(0xCEA19F2B6EED647B), UINT64_C(0x6746656D2FA109A4),
+    UINT64_C(0xF05137F221007F37), UINT64_C(0x892FA9E13A3B4948), UINT64_C(0x4D57B70D37548A32), UINT64_C(0x1A7CFB3D566580E6),
+    UINT64_C(0x7CB30272A45E3FAC), UINT64_C(0x137CCFFD9D51423F), UINT64_C(0xB87D96F3B82DF266), UINT64_C(0x33349AEE7472ED37),
+    UINT64_C(0x5CC0D3C99555BC07), UINT64_C(0x4A8F4FA196D964EF), UINT64_C(0xE82A0D64F281FBFA), UINT64_C(0x38A1BAC2C36823E1),
+    UINT64_C(0x77D197C239FD737E), UINT64_C(0xFB07746B4E07DF26), UINT64_C(0xC8A2198E967672BD), UINT64_C(0x5F1A146D143FA05A),
+    UINT64_C(0x26B877A1201AB7AC), UINT64_C(0x74E5B145214723F8), UINT64_C(0xE9CE10E3C70254BC), UINT64_C(0x299393A0C05B79E8),
+    UINT64_C(0xFD2D2B9822A5E7E2), UINT64_C(0x85424FEA50C8E50A), UINT64_C(0xE6839E714B1FFFE5), UINT64_C(0x27971CCB46F9112A),
+    UINT64_C(0xC98695A2E0715AA9), UINT64_C(0x338E1CBB4F858226), UINT64_C(0xFC6B5C5CF7A8D806), UINT64_C(0x8973CAADDE8DA50C),
+    UINT64_C(0x9C6D47AE32EBAE72), UINT64_C(0x1EBF1F9F21D26D78), UINT64_C(0x80A9704B8E153859), UINT64_C(0x6AFD20A939F141FB),
+    UINT64_C(0xC35F6C2B3B553EEF), UINT64_C(0x59529E8B0DC94C1A), UINT64_C(0x1569DF036EBC4FA1), UINT64_C(0xDA32B88593C118F9),
+    UINT64_C(0xF01E4155FF5A5660), UINT64_C(0x765A2522DCE2B185), UINT64_C(0xCEE95554128073EF), UINT64_C(0x60F072A5CA51DE2F)
 };
 
-static const uint64_t t1ha_refval_2atonce[81] = { 0,
-  UINT64_C(0x772C7311BE32FF42), UINT64_C(0x444753D23F207E03), UINT64_C(0x71F6DF5DA3B4F532), UINT64_C(0x555859635365F660),
-  UINT64_C(0xE98808F1CD39C626), UINT64_C(0x2EB18FAF2163BB09), UINT64_C(0x7B9DD892C8019C87), UINT64_C(0xE2B1431C4DA4D15A),
-  UINT64_C(0x1984E718A5477F70), UINT64_C(0x08DD17B266484F79), UINT64_C(0x4C83A05D766AD550), UINT64_C(0x92DCEBB131D1907D),
-  UINT64_C(0xD67BC6FC881B8549), UINT64_C(0xF6A9886555FBF66B), UINT64_C(0x6E31616D7F33E25E), UINT64_C(0x36E31B7426E3049D),
-  UINT64_C(0x4F8E4FAF46A13F5F), UINT64_C(0x03EB0CB3253F819F), UINT64_C(0x636A7769905770D2), UINT64_C(0x3ADF3781D16D1148),
-  UINT64_C(0x92D19CB1818BC9C2), UINT64_C(0x283E68F4D459C533), UINT64_C(0xFA83A8A88DECAA04), UINT64_C(0x8C6F00368EAC538C),
-  UINT64_C(0x7B66B0CF3797B322), UINT64_C(0x5131E122FDABA3FF), UINT64_C(0x6E59FF515C08C7A9), UINT64_C(0xBA2C5269B2C377B0),
-  UINT64_C(0xA9D24FD368FE8A2B), UINT64_C(0x22DB13D32E33E891), UINT64_C(0x7B97DFC804B876E5), UINT64_C(0xC598BDFCD0E834F9),
-  UINT64_C(0xB256163D3687F5A7), UINT64_C(0x66D7A73C6AEF50B3), UINT64_C(0x25A7201C85D9E2A3), UINT64_C(0x911573EDA15299AA),
-  UINT64_C(0x5C0062B669E18E4C), UINT64_C(0x17734ADE08D54E28), UINT64_C(0xFFF036E33883F43B), UINT64_C(0xFE0756E7777DF11E),
-  UINT64_C(0x37972472D023F129), UINT64_C(0x6CFCE201B55C7F57), UINT64_C(0xE019D1D89F02B3E1), UINT64_C(0xAE5CC580FA1BB7E6),
-  UINT64_C(0x295695FB7E59FC3A), UINT64_C(0x76B6C820A40DD35E), UINT64_C(0xB1680A1768462B17), UINT64_C(0x2FB6AF279137DADA),
-  UINT64_C(0x28FB6B4366C78535), UINT64_C(0xEC278E53924541B1), UINT64_C(0x164F8AAB8A2A28B5), UINT64_C(0xB6C330AEAC4578AD),
-  UINT64_C(0x7F6F371070085084), UINT64_C(0x94DEAD60C0F448D3), UINT64_C(0x99737AC232C559EF), UINT64_C(0x6F54A6F9CA8EDD57),
-  UINT64_C(0x979B01E926BFCE0C), UINT64_C(0xF7D20BC85439C5B4), UINT64_C(0x64EDB27CD8087C12), UINT64_C(0x11488DE5F79C0BE2),
-  UINT64_C(0x25541DDD1680B5A4), UINT64_C(0x8B633D33BE9D1973), UINT64_C(0x404A3113ACF7F6C6), UINT64_C(0xC59DBDEF8550CD56),
-  UINT64_C(0x039D23C68F4F992C), UINT64_C(0x5BBB48E4BDD6FD86), UINT64_C(0x41E312248780DF5A), UINT64_C(0xD34791CE75D4E94F),
-  UINT64_C(0xED523E5D04DCDCFF), UINT64_C(0x7A6BCE0B6182D879), UINT64_C(0x21FB37483CAC28D8), UINT64_C(0x19A1B66E8DA878AD),
-  UINT64_C(0x6F804C5295B09ABE), UINT64_C(0x2A4BE5014115BA81), UINT64_C(0xA678ECC5FC924BE0), UINT64_C(0x50F7A54A99A36F59),
-  UINT64_C(0x0FD7E63A39A66452), UINT64_C(0x5AB1B213DD29C4E4), UINT64_C(0xF3ED80D9DF6534C5), UINT64_C(0xC736B12EF90615FD)
+static const uint64_t t1ha_refval_2atonce   [81] = {
+    0,
+    UINT64_C(0x772C7311BE32FF42), UINT64_C(0x444753D23F207E03), UINT64_C(0x71F6DF5DA3B4F532), UINT64_C(0x555859635365F660),
+    UINT64_C(0xE98808F1CD39C626), UINT64_C(0x2EB18FAF2163BB09), UINT64_C(0x7B9DD892C8019C87), UINT64_C(0xE2B1431C4DA4D15A),
+    UINT64_C(0x1984E718A5477F70), UINT64_C(0x08DD17B266484F79), UINT64_C(0x4C83A05D766AD550), UINT64_C(0x92DCEBB131D1907D),
+    UINT64_C(0xD67BC6FC881B8549), UINT64_C(0xF6A9886555FBF66B), UINT64_C(0x6E31616D7F33E25E), UINT64_C(0x36E31B7426E3049D),
+    UINT64_C(0x4F8E4FAF46A13F5F), UINT64_C(0x03EB0CB3253F819F), UINT64_C(0x636A7769905770D2), UINT64_C(0x3ADF3781D16D1148),
+    UINT64_C(0x92D19CB1818BC9C2), UINT64_C(0x283E68F4D459C533), UINT64_C(0xFA83A8A88DECAA04), UINT64_C(0x8C6F00368EAC538C),
+    UINT64_C(0x7B66B0CF3797B322), UINT64_C(0x5131E122FDABA3FF), UINT64_C(0x6E59FF515C08C7A9), UINT64_C(0xBA2C5269B2C377B0),
+    UINT64_C(0xA9D24FD368FE8A2B), UINT64_C(0x22DB13D32E33E891), UINT64_C(0x7B97DFC804B876E5), UINT64_C(0xC598BDFCD0E834F9),
+    UINT64_C(0xB256163D3687F5A7), UINT64_C(0x66D7A73C6AEF50B3), UINT64_C(0x25A7201C85D9E2A3), UINT64_C(0x911573EDA15299AA),
+    UINT64_C(0x5C0062B669E18E4C), UINT64_C(0x17734ADE08D54E28), UINT64_C(0xFFF036E33883F43B), UINT64_C(0xFE0756E7777DF11E),
+    UINT64_C(0x37972472D023F129), UINT64_C(0x6CFCE201B55C7F57), UINT64_C(0xE019D1D89F02B3E1), UINT64_C(0xAE5CC580FA1BB7E6),
+    UINT64_C(0x295695FB7E59FC3A), UINT64_C(0x76B6C820A40DD35E), UINT64_C(0xB1680A1768462B17), UINT64_C(0x2FB6AF279137DADA),
+    UINT64_C(0x28FB6B4366C78535), UINT64_C(0xEC278E53924541B1), UINT64_C(0x164F8AAB8A2A28B5), UINT64_C(0xB6C330AEAC4578AD),
+    UINT64_C(0x7F6F371070085084), UINT64_C(0x94DEAD60C0F448D3), UINT64_C(0x99737AC232C559EF), UINT64_C(0x6F54A6F9CA8EDD57),
+    UINT64_C(0x979B01E926BFCE0C), UINT64_C(0xF7D20BC85439C5B4), UINT64_C(0x64EDB27CD8087C12), UINT64_C(0x11488DE5F79C0BE2),
+    UINT64_C(0x25541DDD1680B5A4), UINT64_C(0x8B633D33BE9D1973), UINT64_C(0x404A3113ACF7F6C6), UINT64_C(0xC59DBDEF8550CD56),
+    UINT64_C(0x039D23C68F4F992C), UINT64_C(0x5BBB48E4BDD6FD86), UINT64_C(0x41E312248780DF5A), UINT64_C(0xD34791CE75D4E94F),
+    UINT64_C(0xED523E5D04DCDCFF), UINT64_C(0x7A6BCE0B6182D879), UINT64_C(0x21FB37483CAC28D8), UINT64_C(0x19A1B66E8DA878AD),
+    UINT64_C(0x6F804C5295B09ABE), UINT64_C(0x2A4BE5014115BA81), UINT64_C(0xA678ECC5FC924BE0), UINT64_C(0x50F7A54A99A36F59),
+    UINT64_C(0x0FD7E63A39A66452), UINT64_C(0x5AB1B213DD29C4E4), UINT64_C(0xF3ED80D9DF6534C5), UINT64_C(0xC736B12EF90615FD)
 };
 
-static const uint64_t t1ha_refval_2atonce128[81] = { UINT64_C(0x4EC7F6A48E33B00A),
-  UINT64_C(0xB7B7FAA5BD7D8C1E), UINT64_C(0x3269533F66534A76), UINT64_C(0x6C3EC6B687923BFC), UINT64_C(0xC096F5E7EFA471A9),
-  UINT64_C(0x79D8AFB550CEA471), UINT64_C(0xCEE0507A20FD5119), UINT64_C(0xFB04CFFC14A9F4BF), UINT64_C(0xBD4406E923807AF2),
-  UINT64_C(0x375C02FF11010491), UINT64_C(0xA6EA4C2A59E173FF), UINT64_C(0xE0A606F0002CADDF), UINT64_C(0xE13BEAE6EBC07897),
-  UINT64_C(0xF069C2463E48EA10), UINT64_C(0x75BEE1A97089B5FA), UINT64_C(0x378F22F8DE0B8085), UINT64_C(0x9C726FC4D53D0D8B),
-  UINT64_C(0x71F6130A2D08F788), UINT64_C(0x7A9B20433FF6CF69), UINT64_C(0xFF49B7CD59BF6D61), UINT64_C(0xCCAAEE0D1CA9C6B3),
-  UINT64_C(0xC77889D86039D2AD), UINT64_C(0x7B378B5BEA9B0475), UINT64_C(0x6520BFA79D59AD66), UINT64_C(0x2441490CB8A37267),
-  UINT64_C(0xA715A66B7D5CF473), UINT64_C(0x9AE892C88334FD67), UINT64_C(0xD2FFE9AEC1D2169A), UINT64_C(0x790B993F18B18CBB),
-  UINT64_C(0xA0D02FBCF6A7B1AD), UINT64_C(0xA90833E6F151D0C1), UINT64_C(0x1AC7AFA37BD79BE0), UINT64_C(0xD5383628B2881A24),
-  UINT64_C(0xE5526F9D63F9F8F1), UINT64_C(0xC1F165A01A6D1F4D), UINT64_C(0x6CCEF8FF3FCFA3F2), UINT64_C(0x2030F18325E6DF48),
-  UINT64_C(0x289207230E3FB17A), UINT64_C(0x077B66F713A3C4B9), UINT64_C(0x9F39843CAF871754), UINT64_C(0x512FDA0F808ACCF3),
-  UINT64_C(0xF4D9801CD0CD1F14), UINT64_C(0x28A0C749ED323638), UINT64_C(0x94844CAFA671F01C), UINT64_C(0xD0E261876B8ACA51),
-  UINT64_C(0x8FC2A648A4792EA2), UINT64_C(0x8EF87282136AF5FE), UINT64_C(0x5FE6A54A9FBA6B40), UINT64_C(0xA3CC5B8FE6223D54),
-  UINT64_C(0xA8C3C0DD651BB01C), UINT64_C(0x625E9FDD534716F3), UINT64_C(0x1AB2604083C33AC5), UINT64_C(0xDE098853F8692F12),
-  UINT64_C(0x4B0813891BD87624), UINT64_C(0x4AB89C4553D182AD), UINT64_C(0x92C15AA2A3C27ADA), UINT64_C(0xFF2918D68191F5D9),
-  UINT64_C(0x06363174F641C325), UINT64_C(0x667112ADA74A2059), UINT64_C(0x4BD605D6B5E53D7D), UINT64_C(0xF2512C53663A14C8),
-  UINT64_C(0x21857BCB1852667C), UINT64_C(0xAFBEBD0369AEE228), UINT64_C(0x7049340E48FBFD6B), UINT64_C(0x50710E1924F46954),
-  UINT64_C(0x869A75E04A976A3F), UINT64_C(0x5A41ABBDD6373889), UINT64_C(0xA781778389B4B188), UINT64_C(0x21A3AFCED6C925B6),
-  UINT64_C(0x107226192EC10B42), UINT64_C(0x62A862E84EC2F9B1), UINT64_C(0x2B15E91659606DD7), UINT64_C(0x613934D1F9EC5A42),
-  UINT64_C(0x4DC3A96DC5361BAF), UINT64_C(0xC80BBA4CB5F12903), UINT64_C(0x3E3EDAE99A7D6987), UINT64_C(0x8F97B2D55941DCB0),
-  UINT64_C(0x4C9787364C3E4EC1), UINT64_C(0xEF0A2D07BEA90CA7), UINT64_C(0x5FABF32C70AEEAFB), UINT64_C(0x3356A5CFA8F23BF4)
+static const uint64_t t1ha_refval_2atonce128[81] = {
+    UINT64_C(0x4EC7F6A48E33B00A),
+    UINT64_C(0xB7B7FAA5BD7D8C1E), UINT64_C(0x3269533F66534A76), UINT64_C(0x6C3EC6B687923BFC), UINT64_C(0xC096F5E7EFA471A9),
+    UINT64_C(0x79D8AFB550CEA471), UINT64_C(0xCEE0507A20FD5119), UINT64_C(0xFB04CFFC14A9F4BF), UINT64_C(0xBD4406E923807AF2),
+    UINT64_C(0x375C02FF11010491), UINT64_C(0xA6EA4C2A59E173FF), UINT64_C(0xE0A606F0002CADDF), UINT64_C(0xE13BEAE6EBC07897),
+    UINT64_C(0xF069C2463E48EA10), UINT64_C(0x75BEE1A97089B5FA), UINT64_C(0x378F22F8DE0B8085), UINT64_C(0x9C726FC4D53D0D8B),
+    UINT64_C(0x71F6130A2D08F788), UINT64_C(0x7A9B20433FF6CF69), UINT64_C(0xFF49B7CD59BF6D61), UINT64_C(0xCCAAEE0D1CA9C6B3),
+    UINT64_C(0xC77889D86039D2AD), UINT64_C(0x7B378B5BEA9B0475), UINT64_C(0x6520BFA79D59AD66), UINT64_C(0x2441490CB8A37267),
+    UINT64_C(0xA715A66B7D5CF473), UINT64_C(0x9AE892C88334FD67), UINT64_C(0xD2FFE9AEC1D2169A), UINT64_C(0x790B993F18B18CBB),
+    UINT64_C(0xA0D02FBCF6A7B1AD), UINT64_C(0xA90833E6F151D0C1), UINT64_C(0x1AC7AFA37BD79BE0), UINT64_C(0xD5383628B2881A24),
+    UINT64_C(0xE5526F9D63F9F8F1), UINT64_C(0xC1F165A01A6D1F4D), UINT64_C(0x6CCEF8FF3FCFA3F2), UINT64_C(0x2030F18325E6DF48),
+    UINT64_C(0x289207230E3FB17A), UINT64_C(0x077B66F713A3C4B9), UINT64_C(0x9F39843CAF871754), UINT64_C(0x512FDA0F808ACCF3),
+    UINT64_C(0xF4D9801CD0CD1F14), UINT64_C(0x28A0C749ED323638), UINT64_C(0x94844CAFA671F01C), UINT64_C(0xD0E261876B8ACA51),
+    UINT64_C(0x8FC2A648A4792EA2), UINT64_C(0x8EF87282136AF5FE), UINT64_C(0x5FE6A54A9FBA6B40), UINT64_C(0xA3CC5B8FE6223D54),
+    UINT64_C(0xA8C3C0DD651BB01C), UINT64_C(0x625E9FDD534716F3), UINT64_C(0x1AB2604083C33AC5), UINT64_C(0xDE098853F8692F12),
+    UINT64_C(0x4B0813891BD87624), UINT64_C(0x4AB89C4553D182AD), UINT64_C(0x92C15AA2A3C27ADA), UINT64_C(0xFF2918D68191F5D9),
+    UINT64_C(0x06363174F641C325), UINT64_C(0x667112ADA74A2059), UINT64_C(0x4BD605D6B5E53D7D), UINT64_C(0xF2512C53663A14C8),
+    UINT64_C(0x21857BCB1852667C), UINT64_C(0xAFBEBD0369AEE228), UINT64_C(0x7049340E48FBFD6B), UINT64_C(0x50710E1924F46954),
+    UINT64_C(0x869A75E04A976A3F), UINT64_C(0x5A41ABBDD6373889), UINT64_C(0xA781778389B4B188), UINT64_C(0x21A3AFCED6C925B6),
+    UINT64_C(0x107226192EC10B42), UINT64_C(0x62A862E84EC2F9B1), UINT64_C(0x2B15E91659606DD7), UINT64_C(0x613934D1F9EC5A42),
+    UINT64_C(0x4DC3A96DC5361BAF), UINT64_C(0xC80BBA4CB5F12903), UINT64_C(0x3E3EDAE99A7D6987), UINT64_C(0x8F97B2D55941DCB0),
+    UINT64_C(0x4C9787364C3E4EC1), UINT64_C(0xEF0A2D07BEA90CA7), UINT64_C(0x5FABF32C70AEEAFB), UINT64_C(0x3356A5CFA8F23BF4)
 };
 
-static const uint64_t t1ha_refval_2stream[81] = { UINT64_C(0x3C8426E33CB41606),
-  UINT64_C(0xFD74BE70EE73E617), UINT64_C(0xF43DE3CDD8A20486), UINT64_C(0x882FBCB37E8EA3BB), UINT64_C(0x1AA2CDD34CAA3D4B),
-  UINT64_C(0xEE755B2BFAE07ED5), UINT64_C(0xD4E225250D92E213), UINT64_C(0xA09B49083205965B), UINT64_C(0xD47B21724EF9EC9E),
-  UINT64_C(0xAC888FC3858CEE11), UINT64_C(0x94F820D85736F244), UINT64_C(0x1707951CCA920932), UINT64_C(0x8E0E45603F7877F0),
-  UINT64_C(0x9FD2592C0E3A7212), UINT64_C(0x9A66370F3AE3D427), UINT64_C(0xD33382D2161DE2B7), UINT64_C(0x9A35BE079DA7115F),
-  UINT64_C(0x73457C7FF58B4EC3), UINT64_C(0xBE8610BD53D7CE98), UINT64_C(0x65506DFE5CCD5371), UINT64_C(0x286A321AF9D5D9FA),
-  UINT64_C(0xB81EF9A7EF3C536D), UINT64_C(0x2CFDB5E6825C6E86), UINT64_C(0xB2A58CBFDFDD303A), UINT64_C(0xD26094A42B950635),
-  UINT64_C(0xA34D666A5F02AD9A), UINT64_C(0x0151E013EBCC72E5), UINT64_C(0x9254A6EA7FCB6BB5), UINT64_C(0x10C9361B3869DC2B),
-  UINT64_C(0xD7EC55A060606276), UINT64_C(0xA2FF7F8BF8976FFD), UINT64_C(0xB5181BB6852DCC88), UINT64_C(0x0EE394BB6178BAFF),
-  UINT64_C(0x3A8B4B400D21B89C), UINT64_C(0xEC270461970960FD), UINT64_C(0x615967FAB053877E), UINT64_C(0xFA51BF1CFEB4714C),
-  UINT64_C(0x29FDA8383070F375), UINT64_C(0xC3B663061BC52EDA), UINT64_C(0x192BBAF1F1A57923), UINT64_C(0x6D193B52F93C53AF),
-  UINT64_C(0x7F6F5639FE87CA1E), UINT64_C(0x69F7F9140B32EDC8), UINT64_C(0xD0F2416FB24325B6), UINT64_C(0x62C0E37FEDD49FF3),
-  UINT64_C(0x57866A4B809D373D), UINT64_C(0x9848D24BD935E137), UINT64_C(0xDFC905B66734D50A), UINT64_C(0x9A938DD194A68529),
-  UINT64_C(0x8276C44DF0625228), UINT64_C(0xA4B35D00AD67C0AB), UINT64_C(0x3D9CB359842DB452), UINT64_C(0x4241BFA8C23B267F),
-  UINT64_C(0x650FA517BEF15952), UINT64_C(0x782DE2ABD8C7B1E1), UINT64_C(0x4EAE456166CA3E15), UINT64_C(0x40CDF3A02614E337),
-  UINT64_C(0xAD84092C46102172), UINT64_C(0x0C68479B03F9A167), UINT64_C(0x7E1BA046749E181C), UINT64_C(0x3F3AB41A697382C1),
-  UINT64_C(0xC5E5DD6586EBFDC4), UINT64_C(0xFF926CD4EB02555C), UINT64_C(0x035CFE67F89E709B), UINT64_C(0x89F06AB6464A1B9D),
-  UINT64_C(0x8EFF58F3F7DEA758), UINT64_C(0x8B54AC657902089F), UINT64_C(0xC6C4F1F9F8DA4D64), UINT64_C(0xBDB729048AAAC93A),
-  UINT64_C(0xEA76BA628F5E5CD6), UINT64_C(0x742159B728B8A979), UINT64_C(0x6D151CD3C720E53D), UINT64_C(0xE97FFF9368FCDC42),
-  UINT64_C(0xCA5B38314914FBDA), UINT64_C(0xDD92C91D8B858EAE), UINT64_C(0x66E5F07CF647CBF2), UINT64_C(0xD4CF9B42F4985AFB),
-  UINT64_C(0x72AE17AC7D92F6B7), UINT64_C(0xB8206B22AB0472E1), UINT64_C(0x385876B5CFD42479), UINT64_C(0x03294A249EBE6B26)
+static const uint64_t t1ha_refval_2stream   [81] = {
+    UINT64_C(0x3C8426E33CB41606),
+    UINT64_C(0xFD74BE70EE73E617), UINT64_C(0xF43DE3CDD8A20486), UINT64_C(0x882FBCB37E8EA3BB), UINT64_C(0x1AA2CDD34CAA3D4B),
+    UINT64_C(0xEE755B2BFAE07ED5), UINT64_C(0xD4E225250D92E213), UINT64_C(0xA09B49083205965B), UINT64_C(0xD47B21724EF9EC9E),
+    UINT64_C(0xAC888FC3858CEE11), UINT64_C(0x94F820D85736F244), UINT64_C(0x1707951CCA920932), UINT64_C(0x8E0E45603F7877F0),
+    UINT64_C(0x9FD2592C0E3A7212), UINT64_C(0x9A66370F3AE3D427), UINT64_C(0xD33382D2161DE2B7), UINT64_C(0x9A35BE079DA7115F),
+    UINT64_C(0x73457C7FF58B4EC3), UINT64_C(0xBE8610BD53D7CE98), UINT64_C(0x65506DFE5CCD5371), UINT64_C(0x286A321AF9D5D9FA),
+    UINT64_C(0xB81EF9A7EF3C536D), UINT64_C(0x2CFDB5E6825C6E86), UINT64_C(0xB2A58CBFDFDD303A), UINT64_C(0xD26094A42B950635),
+    UINT64_C(0xA34D666A5F02AD9A), UINT64_C(0x0151E013EBCC72E5), UINT64_C(0x9254A6EA7FCB6BB5), UINT64_C(0x10C9361B3869DC2B),
+    UINT64_C(0xD7EC55A060606276), UINT64_C(0xA2FF7F8BF8976FFD), UINT64_C(0xB5181BB6852DCC88), UINT64_C(0x0EE394BB6178BAFF),
+    UINT64_C(0x3A8B4B400D21B89C), UINT64_C(0xEC270461970960FD), UINT64_C(0x615967FAB053877E), UINT64_C(0xFA51BF1CFEB4714C),
+    UINT64_C(0x29FDA8383070F375), UINT64_C(0xC3B663061BC52EDA), UINT64_C(0x192BBAF1F1A57923), UINT64_C(0x6D193B52F93C53AF),
+    UINT64_C(0x7F6F5639FE87CA1E), UINT64_C(0x69F7F9140B32EDC8), UINT64_C(0xD0F2416FB24325B6), UINT64_C(0x62C0E37FEDD49FF3),
+    UINT64_C(0x57866A4B809D373D), UINT64_C(0x9848D24BD935E137), UINT64_C(0xDFC905B66734D50A), UINT64_C(0x9A938DD194A68529),
+    UINT64_C(0x8276C44DF0625228), UINT64_C(0xA4B35D00AD67C0AB), UINT64_C(0x3D9CB359842DB452), UINT64_C(0x4241BFA8C23B267F),
+    UINT64_C(0x650FA517BEF15952), UINT64_C(0x782DE2ABD8C7B1E1), UINT64_C(0x4EAE456166CA3E15), UINT64_C(0x40CDF3A02614E337),
+    UINT64_C(0xAD84092C46102172), UINT64_C(0x0C68479B03F9A167), UINT64_C(0x7E1BA046749E181C), UINT64_C(0x3F3AB41A697382C1),
+    UINT64_C(0xC5E5DD6586EBFDC4), UINT64_C(0xFF926CD4EB02555C), UINT64_C(0x035CFE67F89E709B), UINT64_C(0x89F06AB6464A1B9D),
+    UINT64_C(0x8EFF58F3F7DEA758), UINT64_C(0x8B54AC657902089F), UINT64_C(0xC6C4F1F9F8DA4D64), UINT64_C(0xBDB729048AAAC93A),
+    UINT64_C(0xEA76BA628F5E5CD6), UINT64_C(0x742159B728B8A979), UINT64_C(0x6D151CD3C720E53D), UINT64_C(0xE97FFF9368FCDC42),
+    UINT64_C(0xCA5B38314914FBDA), UINT64_C(0xDD92C91D8B858EAE), UINT64_C(0x66E5F07CF647CBF2), UINT64_C(0xD4CF9B42F4985AFB),
+    UINT64_C(0x72AE17AC7D92F6B7), UINT64_C(0xB8206B22AB0472E1), UINT64_C(0x385876B5CFD42479), UINT64_C(0x03294A249EBE6B26)
 };
 
-static const uint64_t t1ha_refval_2stream128[81] = { UINT64_C(0xCD2801D3B92237D6),
-  UINT64_C(0x10E4D47BD821546D), UINT64_C(0x9100704B9D65CD06), UINT64_C(0xD6951CB4016313EF), UINT64_C(0x24DB636F96F474DA),
-  UINT64_C(0x3F4AF7DF3C49E422), UINT64_C(0xBFF25B8AF143459B), UINT64_C(0xA157EC13538BE549), UINT64_C(0xD3F5F52C47DBD419),
-  UINT64_C(0x0EF3D7D735AF1575), UINT64_C(0x46B7B892823F7B1B), UINT64_C(0xEE22EA4655213289), UINT64_C(0x56AD76F02FE929BC),
-  UINT64_C(0x9CF6CD1AC886546E), UINT64_C(0xAF45CE47AEA0B933), UINT64_C(0x535F9DC09F3996B7), UINT64_C(0x1F0C3C01694AE128),
-  UINT64_C(0x18495069BE0766F7), UINT64_C(0x37E5FFB3D72A4CB1), UINT64_C(0x6D6C2E9299F30709), UINT64_C(0x4F39E693F50B41E3),
-  UINT64_C(0xB11FC4EF0658E116), UINT64_C(0x48BFAACB78E5079B), UINT64_C(0xE1B4C89C781B3AD0), UINT64_C(0x81D2F34888D333A1),
-  UINT64_C(0xF6D02270D2EA449C), UINT64_C(0xC884C3C2C3CE1503), UINT64_C(0x711AE16BA157A9B9), UINT64_C(0x1E6140C642558C9D),
-  UINT64_C(0x35AB3D238F5DC55B), UINT64_C(0x33F07B6AEF051177), UINT64_C(0xE57336776EEFA71C), UINT64_C(0x6D445F8318BA3752),
-  UINT64_C(0xD4F5F6631934C988), UINT64_C(0xD5E260085727C4A2), UINT64_C(0x5B54B41EC180B4FA), UINT64_C(0x7F5D75769C15A898),
-  UINT64_C(0xAE5A6DB850CA33C6), UINT64_C(0x038CCB8044663403), UINT64_C(0xDA16310133DC92B8), UINT64_C(0x6A2FFB7AB2B7CE2B),
-  UINT64_C(0xDC1832D9229BAE20), UINT64_C(0x8C62C479F5ABC9E4), UINT64_C(0x5EB7B617857C9CCB), UINT64_C(0xB79CF7D749A1E80D),
-  UINT64_C(0xDE7FAC3798324FD3), UINT64_C(0x8178911813685D06), UINT64_C(0x6A726CBD394D4410), UINT64_C(0x6CBE6B3280DA1113),
-  UINT64_C(0x6829BA4410CF1148), UINT64_C(0xFA7E417EB26C5BC6), UINT64_C(0x22ED87884D6E3A49), UINT64_C(0x15F1472D5115669D),
-  UINT64_C(0x2EA0B4C8BF69D318), UINT64_C(0xDFE87070AA545503), UINT64_C(0x6B4C14B5F7144AB9), UINT64_C(0xC1ED49C06126551A),
-  UINT64_C(0x351919FC425C3899), UINT64_C(0x7B569C0FA6F1BD3E), UINT64_C(0x713AC2350844CFFD), UINT64_C(0xE9367F9A638C2FF3),
-  UINT64_C(0x97F17D325AEA0786), UINT64_C(0xBCB907CC6CF75F91), UINT64_C(0x0CB7517DAF247719), UINT64_C(0xBE16093CC45BE8A9),
-  UINT64_C(0x786EEE97359AD6AB), UINT64_C(0xB7AFA4F326B97E78), UINT64_C(0x2694B67FE23E502E), UINT64_C(0x4CB492826E98E0B4),
-  UINT64_C(0x838D119F74A416C7), UINT64_C(0x70D6A91E4E5677FD), UINT64_C(0xF3E4027AD30000E6), UINT64_C(0x9BDF692795807F77),
-  UINT64_C(0x6A371F966E034A54), UINT64_C(0x8789CF41AE4D67EF), UINT64_C(0x02688755484D60AE), UINT64_C(0xD5834B3A4BF5CE42),
-  UINT64_C(0x9405FC61440DE25D), UINT64_C(0x35EB280A157979B6), UINT64_C(0x48D40D6A525297AC), UINT64_C(0x6A87DC185054BADA)
+static const uint64_t t1ha_refval_2stream128[81] = {
+    UINT64_C(0xCD2801D3B92237D6),
+    UINT64_C(0x10E4D47BD821546D), UINT64_C(0x9100704B9D65CD06), UINT64_C(0xD6951CB4016313EF), UINT64_C(0x24DB636F96F474DA),
+    UINT64_C(0x3F4AF7DF3C49E422), UINT64_C(0xBFF25B8AF143459B), UINT64_C(0xA157EC13538BE549), UINT64_C(0xD3F5F52C47DBD419),
+    UINT64_C(0x0EF3D7D735AF1575), UINT64_C(0x46B7B892823F7B1B), UINT64_C(0xEE22EA4655213289), UINT64_C(0x56AD76F02FE929BC),
+    UINT64_C(0x9CF6CD1AC886546E), UINT64_C(0xAF45CE47AEA0B933), UINT64_C(0x535F9DC09F3996B7), UINT64_C(0x1F0C3C01694AE128),
+    UINT64_C(0x18495069BE0766F7), UINT64_C(0x37E5FFB3D72A4CB1), UINT64_C(0x6D6C2E9299F30709), UINT64_C(0x4F39E693F50B41E3),
+    UINT64_C(0xB11FC4EF0658E116), UINT64_C(0x48BFAACB78E5079B), UINT64_C(0xE1B4C89C781B3AD0), UINT64_C(0x81D2F34888D333A1),
+    UINT64_C(0xF6D02270D2EA449C), UINT64_C(0xC884C3C2C3CE1503), UINT64_C(0x711AE16BA157A9B9), UINT64_C(0x1E6140C642558C9D),
+    UINT64_C(0x35AB3D238F5DC55B), UINT64_C(0x33F07B6AEF051177), UINT64_C(0xE57336776EEFA71C), UINT64_C(0x6D445F8318BA3752),
+    UINT64_C(0xD4F5F6631934C988), UINT64_C(0xD5E260085727C4A2), UINT64_C(0x5B54B41EC180B4FA), UINT64_C(0x7F5D75769C15A898),
+    UINT64_C(0xAE5A6DB850CA33C6), UINT64_C(0x038CCB8044663403), UINT64_C(0xDA16310133DC92B8), UINT64_C(0x6A2FFB7AB2B7CE2B),
+    UINT64_C(0xDC1832D9229BAE20), UINT64_C(0x8C62C479F5ABC9E4), UINT64_C(0x5EB7B617857C9CCB), UINT64_C(0xB79CF7D749A1E80D),
+    UINT64_C(0xDE7FAC3798324FD3), UINT64_C(0x8178911813685D06), UINT64_C(0x6A726CBD394D4410), UINT64_C(0x6CBE6B3280DA1113),
+    UINT64_C(0x6829BA4410CF1148), UINT64_C(0xFA7E417EB26C5BC6), UINT64_C(0x22ED87884D6E3A49), UINT64_C(0x15F1472D5115669D),
+    UINT64_C(0x2EA0B4C8BF69D318), UINT64_C(0xDFE87070AA545503), UINT64_C(0x6B4C14B5F7144AB9), UINT64_C(0xC1ED49C06126551A),
+    UINT64_C(0x351919FC425C3899), UINT64_C(0x7B569C0FA6F1BD3E), UINT64_C(0x713AC2350844CFFD), UINT64_C(0xE9367F9A638C2FF3),
+    UINT64_C(0x97F17D325AEA0786), UINT64_C(0xBCB907CC6CF75F91), UINT64_C(0x0CB7517DAF247719), UINT64_C(0xBE16093CC45BE8A9),
+    UINT64_C(0x786EEE97359AD6AB), UINT64_C(0xB7AFA4F326B97E78), UINT64_C(0x2694B67FE23E502E), UINT64_C(0x4CB492826E98E0B4),
+    UINT64_C(0x838D119F74A416C7), UINT64_C(0x70D6A91E4E5677FD), UINT64_C(0xF3E4027AD30000E6), UINT64_C(0x9BDF692795807F77),
+    UINT64_C(0x6A371F966E034A54), UINT64_C(0x8789CF41AE4D67EF), UINT64_C(0x02688755484D60AE), UINT64_C(0xD5834B3A4BF5CE42),
+    UINT64_C(0x9405FC61440DE25D), UINT64_C(0x35EB280A157979B6), UINT64_C(0x48D40D6A525297AC), UINT64_C(0x6A87DC185054BADA)
 };
 
 #if defined(HAVE_X86_64_AES)
-static const uint64_t t1ha_refval_ia32aes_a[81] = { 0,
-  UINT64_C(0x772C7311BE32FF42), UINT64_C(0xB231AC660E5B23B5), UINT64_C(0x71F6DF5DA3B4F532), UINT64_C(0x555859635365F660),
-  UINT64_C(0xE98808F1CD39C626), UINT64_C(0x2EB18FAF2163BB09), UINT64_C(0x7B9DD892C8019C87), UINT64_C(0xE2B1431C4DA4D15A),
-  UINT64_C(0x1984E718A5477F70), UINT64_C(0x08DD17B266484F79), UINT64_C(0x4C83A05D766AD550), UINT64_C(0x92DCEBB131D1907D),
-  UINT64_C(0xD67BC6FC881B8549), UINT64_C(0xF6A9886555FBF66B), UINT64_C(0x6E31616D7F33E25E), UINT64_C(0x36E31B7426E3049D),
-  UINT64_C(0x4F8E4FAF46A13F5F), UINT64_C(0x03EB0CB3253F819F), UINT64_C(0x636A7769905770D2), UINT64_C(0x3ADF3781D16D1148),
-  UINT64_C(0x92D19CB1818BC9C2), UINT64_C(0x283E68F4D459C533), UINT64_C(0xFA83A8A88DECAA04), UINT64_C(0x8C6F00368EAC538C),
-  UINT64_C(0x7B66B0CF3797B322), UINT64_C(0x5131E122FDABA3FF), UINT64_C(0x6E59FF515C08C7A9), UINT64_C(0xBA2C5269B2C377B0),
-  UINT64_C(0xA9D24FD368FE8A2B), UINT64_C(0x22DB13D32E33E891), UINT64_C(0x7B97DFC804B876E5), UINT64_C(0xC598BDFCD0E834F9),
-  UINT64_C(0xB256163D3687F5A7), UINT64_C(0x66D7A73C6AEF50B3), UINT64_C(0xBB34C6A4396695D2), UINT64_C(0x7F46E1981C3256AD),
-  UINT64_C(0x4B25A9B217A6C5B4), UINT64_C(0x7A0A6BCDD2321DA9), UINT64_C(0x0A1F55E690A7B44E), UINT64_C(0x8F451A91D7F05244),
-  UINT64_C(0x624D5D3C9B9800A7), UINT64_C(0x09DDC2B6409DDC25), UINT64_C(0x3E155765865622B6), UINT64_C(0x96519FAC9511B381),
-  UINT64_C(0x512E58482FE4FBF0), UINT64_C(0x1AB260EA7D54AE1C), UINT64_C(0x67976F12CC28BBBD), UINT64_C(0x0607B5B2E6250156),
-  UINT64_C(0x7E700BEA717AD36E), UINT64_C(0x06A058D9D61CABB3), UINT64_C(0x57DA5324A824972F), UINT64_C(0x1193BA74DBEBF7E7),
-  UINT64_C(0xC18DC3140E7002D4), UINT64_C(0x9F7CCC11DFA0EF17), UINT64_C(0xC487D6C20666A13A), UINT64_C(0xB67190E4B50EF0C8),
-  UINT64_C(0xA53DAA608DF0B9A5), UINT64_C(0x7E13101DE87F9ED3), UINT64_C(0x7F8955AE2F05088B), UINT64_C(0x2DF7E5A097AD383F),
-  UINT64_C(0xF027683A21EA14B5), UINT64_C(0x9BB8AEC3E3360942), UINT64_C(0x92BE39B54967E7FE), UINT64_C(0x978C6D332E7AFD27),
-  UINT64_C(0xED512FE96A4FAE81), UINT64_C(0x9E1099B8140D7BA3), UINT64_C(0xDFD5A5BE1E6FE9A6), UINT64_C(0x1D82600E23B66DD4),
-  UINT64_C(0x3FA3C3B7EE7B52CE), UINT64_C(0xEE84F7D2A655EF4C), UINT64_C(0x2A4361EC769E3BEB), UINT64_C(0x22E4B38916636702),
-  UINT64_C(0x0063096F5D39A115), UINT64_C(0x6C51B24DAAFA5434), UINT64_C(0xBAFB1DB1B411E344), UINT64_C(0xFF529F161AE0C4B0),
-  UINT64_C(0x1290EAE3AC0A686F), UINT64_C(0xA7B0D4585447D1BE), UINT64_C(0xAED3D18CB6CCAD53), UINT64_C(0xFC73D46F8B41BEC6)
+static const uint64_t t1ha_refval_ia32aes_a [81] = {
+    0,
+    UINT64_C(0x772C7311BE32FF42), UINT64_C(0xB231AC660E5B23B5), UINT64_C(0x71F6DF5DA3B4F532), UINT64_C(0x555859635365F660),
+    UINT64_C(0xE98808F1CD39C626), UINT64_C(0x2EB18FAF2163BB09), UINT64_C(0x7B9DD892C8019C87), UINT64_C(0xE2B1431C4DA4D15A),
+    UINT64_C(0x1984E718A5477F70), UINT64_C(0x08DD17B266484F79), UINT64_C(0x4C83A05D766AD550), UINT64_C(0x92DCEBB131D1907D),
+    UINT64_C(0xD67BC6FC881B8549), UINT64_C(0xF6A9886555FBF66B), UINT64_C(0x6E31616D7F33E25E), UINT64_C(0x36E31B7426E3049D),
+    UINT64_C(0x4F8E4FAF46A13F5F), UINT64_C(0x03EB0CB3253F819F), UINT64_C(0x636A7769905770D2), UINT64_C(0x3ADF3781D16D1148),
+    UINT64_C(0x92D19CB1818BC9C2), UINT64_C(0x283E68F4D459C533), UINT64_C(0xFA83A8A88DECAA04), UINT64_C(0x8C6F00368EAC538C),
+    UINT64_C(0x7B66B0CF3797B322), UINT64_C(0x5131E122FDABA3FF), UINT64_C(0x6E59FF515C08C7A9), UINT64_C(0xBA2C5269B2C377B0),
+    UINT64_C(0xA9D24FD368FE8A2B), UINT64_C(0x22DB13D32E33E891), UINT64_C(0x7B97DFC804B876E5), UINT64_C(0xC598BDFCD0E834F9),
+    UINT64_C(0xB256163D3687F5A7), UINT64_C(0x66D7A73C6AEF50B3), UINT64_C(0xBB34C6A4396695D2), UINT64_C(0x7F46E1981C3256AD),
+    UINT64_C(0x4B25A9B217A6C5B4), UINT64_C(0x7A0A6BCDD2321DA9), UINT64_C(0x0A1F55E690A7B44E), UINT64_C(0x8F451A91D7F05244),
+    UINT64_C(0x624D5D3C9B9800A7), UINT64_C(0x09DDC2B6409DDC25), UINT64_C(0x3E155765865622B6), UINT64_C(0x96519FAC9511B381),
+    UINT64_C(0x512E58482FE4FBF0), UINT64_C(0x1AB260EA7D54AE1C), UINT64_C(0x67976F12CC28BBBD), UINT64_C(0x0607B5B2E6250156),
+    UINT64_C(0x7E700BEA717AD36E), UINT64_C(0x06A058D9D61CABB3), UINT64_C(0x57DA5324A824972F), UINT64_C(0x1193BA74DBEBF7E7),
+    UINT64_C(0xC18DC3140E7002D4), UINT64_C(0x9F7CCC11DFA0EF17), UINT64_C(0xC487D6C20666A13A), UINT64_C(0xB67190E4B50EF0C8),
+    UINT64_C(0xA53DAA608DF0B9A5), UINT64_C(0x7E13101DE87F9ED3), UINT64_C(0x7F8955AE2F05088B), UINT64_C(0x2DF7E5A097AD383F),
+    UINT64_C(0xF027683A21EA14B5), UINT64_C(0x9BB8AEC3E3360942), UINT64_C(0x92BE39B54967E7FE), UINT64_C(0x978C6D332E7AFD27),
+    UINT64_C(0xED512FE96A4FAE81), UINT64_C(0x9E1099B8140D7BA3), UINT64_C(0xDFD5A5BE1E6FE9A6), UINT64_C(0x1D82600E23B66DD4),
+    UINT64_C(0x3FA3C3B7EE7B52CE), UINT64_C(0xEE84F7D2A655EF4C), UINT64_C(0x2A4361EC769E3BEB), UINT64_C(0x22E4B38916636702),
+    UINT64_C(0x0063096F5D39A115), UINT64_C(0x6C51B24DAAFA5434), UINT64_C(0xBAFB1DB1B411E344), UINT64_C(0xFF529F161AE0C4B0),
+    UINT64_C(0x1290EAE3AC0A686F), UINT64_C(0xA7B0D4585447D1BE), UINT64_C(0xAED3D18CB6CCAD53), UINT64_C(0xFC73D46F8B41BEC6)
 };
 
-static const uint64_t t1ha_refval_ia32aes_b[81] = { 0,
-  UINT64_C(0x772C7311BE32FF42), UINT64_C(0x4398F62A8CB6F72A), UINT64_C(0x71F6DF5DA3B4F532), UINT64_C(0x555859635365F660),
-  UINT64_C(0xE98808F1CD39C626), UINT64_C(0x2EB18FAF2163BB09), UINT64_C(0x7B9DD892C8019C87), UINT64_C(0xE2B1431C4DA4D15A),
-  UINT64_C(0x1984E718A5477F70), UINT64_C(0x08DD17B266484F79), UINT64_C(0x4C83A05D766AD550), UINT64_C(0x92DCEBB131D1907D),
-  UINT64_C(0xD67BC6FC881B8549), UINT64_C(0xF6A9886555FBF66B), UINT64_C(0x6E31616D7F33E25E), UINT64_C(0x36E31B7426E3049D),
-  UINT64_C(0x4F8E4FAF46A13F5F), UINT64_C(0x03EB0CB3253F819F), UINT64_C(0x636A7769905770D2), UINT64_C(0x3ADF3781D16D1148),
-  UINT64_C(0x92D19CB1818BC9C2), UINT64_C(0x283E68F4D459C533), UINT64_C(0xFA83A8A88DECAA04), UINT64_C(0x8C6F00368EAC538C),
-  UINT64_C(0x7B66B0CF3797B322), UINT64_C(0x5131E122FDABA3FF), UINT64_C(0x6E59FF515C08C7A9), UINT64_C(0xBA2C5269B2C377B0),
-  UINT64_C(0xA9D24FD368FE8A2B), UINT64_C(0x22DB13D32E33E891), UINT64_C(0x7B97DFC804B876E5), UINT64_C(0xC598BDFCD0E834F9),
-  UINT64_C(0xB256163D3687F5A7), UINT64_C(0x66D7A73C6AEF50B3), UINT64_C(0xE810F88E85CEA11A), UINT64_C(0x4814F8F3B83E4394),
-  UINT64_C(0x9CABA22D10A2F690), UINT64_C(0x0D10032511F58111), UINT64_C(0xE9A36EF5EEA3CD58), UINT64_C(0xC79242DE194D9D7C),
-  UINT64_C(0xC3871AA0435EE5C8), UINT64_C(0x52890BED43CCF4CD), UINT64_C(0x07A1D0861ACCD373), UINT64_C(0x227B816FF0FEE9ED),
-  UINT64_C(0x59FFBF73AACFC0C4), UINT64_C(0x09AB564F2BEDAD0C), UINT64_C(0xC05F744F2EE38318), UINT64_C(0x7B50B621D547C661),
-  UINT64_C(0x0C1F71CB4E68E5D1), UINT64_C(0x0E33A47881D4DBAA), UINT64_C(0xF5C3BF198E9A7C2E), UINT64_C(0x16328FD8C0F68A91),
-  UINT64_C(0xA3E399C9AB3E9A59), UINT64_C(0x163AE71CBCBB18B8), UINT64_C(0x18F17E4A8C79F7AB), UINT64_C(0x9250E2EA37014B45),
-  UINT64_C(0x7BBBB111D60B03E4), UINT64_C(0x3DAA4A3071A0BD88), UINT64_C(0xA28828D790A2D6DC), UINT64_C(0xBC70FC88F64BE3F1),
-  UINT64_C(0xA3E48008BA4333C7), UINT64_C(0x739E435ACAFC79F7), UINT64_C(0x42BBB360BE007CC6), UINT64_C(0x4FFB6FD2AF74EC92),
-  UINT64_C(0x2A799A2994673146), UINT64_C(0xBE0A045B69D48E9F), UINT64_C(0x549432F54FC6A278), UINT64_C(0x371D3C60369FC702),
-  UINT64_C(0xDB4557D415B08CA7), UINT64_C(0xE8692F0A83850B37), UINT64_C(0x022E46AEB36E9AAB), UINT64_C(0x117AC9B814E4652D),
-  UINT64_C(0xA361041267AE9048), UINT64_C(0x277CB51C961C3DDA), UINT64_C(0xAFFC96F377CB8A8D), UINT64_C(0x83CC79FA01DD1BA7),
-  UINT64_C(0xA494842ACF4B802C), UINT64_C(0xFC6D9CDDE2C34A3F), UINT64_C(0x4ED6863CE455F7A7), UINT64_C(0x630914D0DB7AAE98)
+static const uint64_t t1ha_refval_ia32aes_b [81] = {
+    0,
+    UINT64_C(0x772C7311BE32FF42), UINT64_C(0x4398F62A8CB6F72A), UINT64_C(0x71F6DF5DA3B4F532), UINT64_C(0x555859635365F660),
+    UINT64_C(0xE98808F1CD39C626), UINT64_C(0x2EB18FAF2163BB09), UINT64_C(0x7B9DD892C8019C87), UINT64_C(0xE2B1431C4DA4D15A),
+    UINT64_C(0x1984E718A5477F70), UINT64_C(0x08DD17B266484F79), UINT64_C(0x4C83A05D766AD550), UINT64_C(0x92DCEBB131D1907D),
+    UINT64_C(0xD67BC6FC881B8549), UINT64_C(0xF6A9886555FBF66B), UINT64_C(0x6E31616D7F33E25E), UINT64_C(0x36E31B7426E3049D),
+    UINT64_C(0x4F8E4FAF46A13F5F), UINT64_C(0x03EB0CB3253F819F), UINT64_C(0x636A7769905770D2), UINT64_C(0x3ADF3781D16D1148),
+    UINT64_C(0x92D19CB1818BC9C2), UINT64_C(0x283E68F4D459C533), UINT64_C(0xFA83A8A88DECAA04), UINT64_C(0x8C6F00368EAC538C),
+    UINT64_C(0x7B66B0CF3797B322), UINT64_C(0x5131E122FDABA3FF), UINT64_C(0x6E59FF515C08C7A9), UINT64_C(0xBA2C5269B2C377B0),
+    UINT64_C(0xA9D24FD368FE8A2B), UINT64_C(0x22DB13D32E33E891), UINT64_C(0x7B97DFC804B876E5), UINT64_C(0xC598BDFCD0E834F9),
+    UINT64_C(0xB256163D3687F5A7), UINT64_C(0x66D7A73C6AEF50B3), UINT64_C(0xE810F88E85CEA11A), UINT64_C(0x4814F8F3B83E4394),
+    UINT64_C(0x9CABA22D10A2F690), UINT64_C(0x0D10032511F58111), UINT64_C(0xE9A36EF5EEA3CD58), UINT64_C(0xC79242DE194D9D7C),
+    UINT64_C(0xC3871AA0435EE5C8), UINT64_C(0x52890BED43CCF4CD), UINT64_C(0x07A1D0861ACCD373), UINT64_C(0x227B816FF0FEE9ED),
+    UINT64_C(0x59FFBF73AACFC0C4), UINT64_C(0x09AB564F2BEDAD0C), UINT64_C(0xC05F744F2EE38318), UINT64_C(0x7B50B621D547C661),
+    UINT64_C(0x0C1F71CB4E68E5D1), UINT64_C(0x0E33A47881D4DBAA), UINT64_C(0xF5C3BF198E9A7C2E), UINT64_C(0x16328FD8C0F68A91),
+    UINT64_C(0xA3E399C9AB3E9A59), UINT64_C(0x163AE71CBCBB18B8), UINT64_C(0x18F17E4A8C79F7AB), UINT64_C(0x9250E2EA37014B45),
+    UINT64_C(0x7BBBB111D60B03E4), UINT64_C(0x3DAA4A3071A0BD88), UINT64_C(0xA28828D790A2D6DC), UINT64_C(0xBC70FC88F64BE3F1),
+    UINT64_C(0xA3E48008BA4333C7), UINT64_C(0x739E435ACAFC79F7), UINT64_C(0x42BBB360BE007CC6), UINT64_C(0x4FFB6FD2AF74EC92),
+    UINT64_C(0x2A799A2994673146), UINT64_C(0xBE0A045B69D48E9F), UINT64_C(0x549432F54FC6A278), UINT64_C(0x371D3C60369FC702),
+    UINT64_C(0xDB4557D415B08CA7), UINT64_C(0xE8692F0A83850B37), UINT64_C(0x022E46AEB36E9AAB), UINT64_C(0x117AC9B814E4652D),
+    UINT64_C(0xA361041267AE9048), UINT64_C(0x277CB51C961C3DDA), UINT64_C(0xAFFC96F377CB8A8D), UINT64_C(0x83CC79FA01DD1BA7),
+    UINT64_C(0xA494842ACF4B802C), UINT64_C(0xFC6D9CDDE2C34A3F), UINT64_C(0x4ED6863CE455F7A7), UINT64_C(0x630914D0DB7AAE98)
 };
 
 #endif
 
 static uint64_t testno;
 
-static FORCE_INLINE bool probe(void (*hash)(const void * in, const size_t len, const seed_t seed, void * out),
-        const uint64_t reference, bool bswap,
-        const void *data, unsigned len, uint64_t seed) {
+static FORCE_INLINE bool probe( void (* hash)(const void * in, const size_t len, const seed_t seed,
+        void * out), const uint64_t reference, bool bswap, const void * data, unsigned len, uint64_t seed ) {
     uint8_t result[32];
+
     hash(data, len, seed, &result);
     const uint64_t actual = bswap ? GET_U64<true>(result, 0) : GET_U64<false>(result, 0);
     testno++;
@@ -1455,13 +1480,14 @@ static FORCE_INLINE bool probe(void (*hash)(const void * in, const size_t len, c
     return actual != reference;
 }
 
-static bool t1ha_selfcheck(void (*hash)(const void * in, const size_t len, const seed_t seed, void * out),
-        const uint64_t *reference_values, bool bswap) {
+static bool t1ha_selfcheck( void (* hash)(const void * in, const size_t len, const seed_t seed,
+        void * out), const uint64_t * reference_values, bool bswap ) {
     bool failed = false;
+
     testno = 0;
 
     const uint64_t zero = 0;
-    failed |= probe(hash, /* empty-zero */ *reference_values++, bswap, NULL, 0, zero);
+    failed |= probe(hash, /* empty-zero */ *reference_values++, bswap, NULL, 0, zero );
     failed |= probe(hash, /* empty-all1 */ *reference_values++, bswap, NULL, 0, ~zero);
     failed |= probe(hash, /* bin64-zero */ *reference_values++, bswap, t1ha_test_pattern, 64, zero);
 
@@ -1469,19 +1495,20 @@ static bool t1ha_selfcheck(void (*hash)(const void * in, const size_t len, const
     for (int i = 1; i < 64; i++) {
         /* bin%i-1p%i */
         failed |= probe(hash, *reference_values++, bswap, t1ha_test_pattern, i, seed);
-        seed <<= 1;
+        seed  <<= 1;
     }
 
     seed = ~zero;
     for (int i = 1; i <= 7; i++) {
-        seed <<= 1;
-        /* align%i_F%i */;
+        seed  <<= 1;
+        /* align%i_F%i */
         failed |= probe(hash, *reference_values++, bswap, t1ha_test_pattern + i, 64 - i, seed);
     }
 
     uint8_t pattern_long[512];
-    for (size_t i = 0; i < sizeof(pattern_long); ++i)
+    for (size_t i = 0; i < sizeof(pattern_long); ++i) {
         pattern_long[i] = (uint8_t)i;
+    }
     for (int i = 0; i <= 7; i++) {
         /* long-%05i */
         failed |= probe(hash, *reference_values++, bswap, pattern_long + i, 128 + i * 17, seed);
@@ -1490,20 +1517,16 @@ static bool t1ha_selfcheck(void (*hash)(const void * in, const size_t len, const
     return failed;
 }
 
-static bool t1ha0_selftest(void) {
+static bool t1ha0_selftest( void ) {
     bool failed = false;
 
     failed |= t1ha_selfcheck(isLE() ?
-            t1ha0<MODE_LE_NATIVE> :
-            t1ha0<MODE_BE_BSWAP>,
-            t1ha_refval_32le,
-            isLE() ? false : true);
+                t1ha0<MODE_LE_NATIVE> :
+                t1ha0<MODE_BE_BSWAP >, t1ha_refval_32le, isLE() ? false : true);
 
     failed |= t1ha_selfcheck(isLE() ?
-            t1ha0<MODE_LE_BSWAP> :
-            t1ha0<MODE_BE_NATIVE>,
-            t1ha_refval_32be,
-            isBE() ? false : true);
+                t1ha0<MODE_LE_BSWAP > :
+                t1ha0<MODE_BE_NATIVE>, t1ha_refval_32be, isBE() ? false : true);
 
     if (failed) {
         printf("t1ha0 self-test FAILED!\n");
@@ -1511,20 +1534,16 @@ static bool t1ha0_selftest(void) {
     return !failed;
 }
 
-static bool t1ha1_selftest(void) {
+static bool t1ha1_selftest( void ) {
     bool failed = false;
 
     failed |= t1ha_selfcheck(isLE() ?
-            t1ha1<MODE_LE_NATIVE> :
-            t1ha1<MODE_BE_BSWAP>,
-            t1ha_refval_64le,
-            isLE() ? false : true);
+                t1ha1<MODE_LE_NATIVE> :
+                t1ha1<MODE_BE_BSWAP >, t1ha_refval_64le, isLE() ? false : true);
 
     failed |= t1ha_selfcheck(isLE() ?
-            t1ha1<MODE_LE_BSWAP> :
-            t1ha1<MODE_BE_NATIVE>,
-            t1ha_refval_64be,
-            isBE() ? false : true);
+                t1ha1<MODE_LE_BSWAP > :
+                t1ha1<MODE_BE_NATIVE>, t1ha_refval_64be, isBE() ? false : true);
 
     if (failed) {
         printf("t1ha1 self-test FAILED!\n");
@@ -1532,20 +1551,16 @@ static bool t1ha1_selftest(void) {
     return !failed;
 }
 
-static bool t1ha2_selftest(void) {
+static bool t1ha2_selftest( void ) {
     bool failed = false;
 
     failed |= t1ha_selfcheck(isLE()     ?
-            t1ha2<MODE_LE_NATIVE,false> :
-            t1ha2<MODE_BE_BSWAP,false>,
-            t1ha_refval_2atonce,
-            isLE() ? false : true);
+                t1ha2<MODE_LE_NATIVE, false> :
+                t1ha2<MODE_BE_BSWAP, false>, t1ha_refval_2atonce   , isLE() ? false : true);
 
     failed |= t1ha_selfcheck(isLE()    ?
-            t1ha2<MODE_LE_NATIVE,true> :
-            t1ha2<MODE_BE_BSWAP,true>,
-            t1ha_refval_2atonce128,
-            isLE() ? false : true);
+                t1ha2<MODE_LE_NATIVE, true > :
+                t1ha2<MODE_BE_BSWAP, true >, t1ha_refval_2atonce128, isLE() ? false : true);
 
     if (failed) {
         printf("t1ha2 self-test FAILED!\n");
@@ -1553,20 +1568,16 @@ static bool t1ha2_selftest(void) {
     return !failed;
 }
 
-static bool t1ha2_incr_selftest(void) {
+static bool t1ha2_incr_selftest( void ) {
     bool failed = false;
 
     failed |= t1ha_selfcheck(isLE()     ?
-            t1ha2_incr<MODE_LE_NATIVE,false,true> :
-            t1ha2_incr<MODE_BE_BSWAP,false,true>,
-            t1ha_refval_2stream,
-            isLE() ? false : true);
+                t1ha2_incr<MODE_LE_NATIVE, false, true> :
+                t1ha2_incr<MODE_BE_BSWAP, false, true>, t1ha_refval_2stream   , isLE() ? false : true);
 
     failed |= t1ha_selfcheck(isLE()    ?
-            t1ha2_incr<MODE_LE_NATIVE,true,true> :
-            t1ha2_incr<MODE_BE_BSWAP,true,true>,
-            t1ha_refval_2stream128,
-            isLE() ? false : true);
+                t1ha2_incr<MODE_LE_NATIVE, true , true> :
+                t1ha2_incr<MODE_BE_BSWAP, true , true>, t1ha_refval_2stream128, isLE() ? false : true);
 
     if (failed) {
         printf("t1ha2-incr self-test FAILED!\n");
@@ -1575,171 +1586,169 @@ static bool t1ha2_incr_selftest(void) {
 }
 
 #if defined(HAVE_X86_64_AES)
-static bool t1ha0_aes_selftest(void) {
+
+static bool t1ha0_aes_selftest( void ) {
     bool failed = false;
 
-    failed |= t1ha_selfcheck(t1ha0_aesA<false>,
-            t1ha_refval_ia32aes_a,
-            false);
+    failed |= t1ha_selfcheck(t1ha0_aesA<false>, t1ha_refval_ia32aes_a, false);
 
-    failed |= t1ha_selfcheck(t1ha0_aesB<false>,
-            t1ha_refval_ia32aes_b,
-            false);
+    failed |= t1ha_selfcheck(t1ha0_aesB<false>, t1ha_refval_ia32aes_b, false);
 
     if (failed) {
         printf("t1ha0-aes self-test FAILED!\n");
     }
     return !failed;
 }
+
 #endif
 
 REGISTER_FAMILY(t1ha,
-  $.src_url = "https://web.archive.org/web/20211209095620/https://github.com/erthink/t1ha",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://web.archive.org/web/20211209095620/https://github.com/erthink/t1ha",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(t1ha0,
-  $.desc = "Fast Positive Hash #0 (portable, 32-bit core)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB          |
-        FLAG_IMPL_MULTIPLY               |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_ZLIB           ,
-  $.bits = 64,
-  $.verification_LE = 0x7F7D7B29,
-  $.verification_BE = 0x6B552A17, // To get old 0xDA6A4061 value, see above
-  $.hashfn_native = isLE() ? t1ha0<MODE_LE_NATIVE> : t1ha0<MODE_BE_NATIVE>,
-  $.hashfn_bswap = isLE() ? t1ha0<MODE_LE_BSWAP> : t1ha0<MODE_BE_BSWAP>,
-  $.initfn = t1ha0_selftest
-);
+   $.desc       = "Fast Positive Hash #0 (portable, 32-bit core)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB          |
+         FLAG_IMPL_MULTIPLY               |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 64,
+   $.verification_LE = 0x7F7D7B29,
+   $.verification_BE = 0x6B552A17, // To get old 0xDA6A4061 value, see above
+   $.hashfn_native   = isLE () ? t1ha0<MODE_LE_NATIVE> : t1ha0<MODE_BE_NATIVE>,
+   $.hashfn_bswap    = isLE () ? t1ha0<MODE_LE_BSWAP> : t1ha0<MODE_BE_BSWAP>,
+   $.initfn = t1ha0_selftest
+ );
 
 REGISTER_HASH(t1ha1,
-  $.desc = "Fast Positive Hash #1 (portable, 64-bit core)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB          |
-        FLAG_IMPL_MULTIPLY_64_128        |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_ZLIB           ,
-  $.bits = 64,
-  $.verification_LE = 0xD6836381,
-  $.verification_BE = 0xB895E54F, // To get old 0x93F864DE value, see above
-  $.hashfn_native = isLE() ? t1ha1<MODE_LE_NATIVE> : t1ha1<MODE_BE_NATIVE>,
-  $.hashfn_bswap = isLE() ? t1ha1<MODE_LE_BSWAP> : t1ha1<MODE_BE_BSWAP>,
-  $.initfn = t1ha1_selftest
-);
+   $.desc       = "Fast Positive Hash #1 (portable, 64-bit core)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB          |
+         FLAG_IMPL_MULTIPLY_64_128        |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 64,
+   $.verification_LE = 0xD6836381,
+   $.verification_BE = 0xB895E54F, // To get old 0x93F864DE value, see above
+   $.hashfn_native   = isLE () ? t1ha1<MODE_LE_NATIVE> : t1ha1<MODE_BE_NATIVE>,
+   $.hashfn_bswap    = isLE () ? t1ha1<MODE_LE_BSWAP> : t1ha1<MODE_BE_BSWAP>,
+   $.initfn = t1ha1_selftest
+ );
 
 REGISTER_HASH(t1ha2_64,
-  $.desc = "Fast Positive Hash #2 (portable, 64-bit core)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB          |
-        FLAG_IMPL_TYPE_PUNNING           |
-        FLAG_IMPL_MULTIPLY_64_128        |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_ZLIB           ,
-  $.bits = 64,
-  $.verification_LE = 0x8F16C948,
-  $.verification_BE = 0x061CB08C,
-  $.hashfn_native = isLE() ? t1ha2<MODE_LE_NATIVE,false> : t1ha2<MODE_BE_NATIVE,false>,
-  $.hashfn_bswap = isLE() ? t1ha2<MODE_LE_BSWAP,false> : t1ha2<MODE_BE_BSWAP,false>,
-  $.initfn = t1ha2_selftest
-);
+   $.desc       = "Fast Positive Hash #2 (portable, 64-bit core)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB          |
+         FLAG_IMPL_TYPE_PUNNING           |
+         FLAG_IMPL_MULTIPLY_64_128        |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 64,
+   $.verification_LE = 0x8F16C948,
+   $.verification_BE = 0x061CB08C,
+   $.hashfn_native   = isLE () ? t1ha2<MODE_LE_NATIVE, false> : t1ha2<MODE_BE_NATIVE, false>,
+   $.hashfn_bswap    = isLE () ? t1ha2<MODE_LE_BSWAP, false> : t1ha2<MODE_BE_BSWAP, false>,
+   $.initfn = t1ha2_selftest
+ );
 
 REGISTER_HASH(t1ha2_128,
-  $.desc = "Fast Positive Hash #2 (portable, 64-bit core)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB          |
-        FLAG_IMPL_TYPE_PUNNING           |
-        FLAG_IMPL_MULTIPLY_64_128        |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_ZLIB           ,
-  $.bits = 128,
-  $.verification_LE = 0xB44C43A1,
-  $.verification_BE = 0x95EB2DA8,
-  $.hashfn_native = isLE() ? t1ha2<MODE_LE_NATIVE,true> : t1ha2<MODE_BE_NATIVE,true>,
-  $.hashfn_bswap = isLE() ? t1ha2<MODE_LE_BSWAP,true> : t1ha2<MODE_BE_BSWAP,true>,
-  $.initfn = t1ha2_selftest
-);
+   $.desc       = "Fast Positive Hash #2 (portable, 64-bit core)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB          |
+         FLAG_IMPL_TYPE_PUNNING           |
+         FLAG_IMPL_MULTIPLY_64_128        |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 128,
+   $.verification_LE = 0xB44C43A1,
+   $.verification_BE = 0x95EB2DA8,
+   $.hashfn_native   = isLE () ? t1ha2<MODE_LE_NATIVE, true> : t1ha2<MODE_BE_NATIVE, true>,
+   $.hashfn_bswap    = isLE () ? t1ha2<MODE_LE_BSWAP, true> : t1ha2<MODE_BE_BSWAP, true>,
+   $.initfn = t1ha2_selftest
+ );
 
 REGISTER_HASH(t1ha2_64__incr,
-  $.desc = "Fast Positive Hash #2 (portable, 64-bit core, incremental version)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB          |
-        FLAG_IMPL_TYPE_PUNNING           |
-        FLAG_IMPL_MULTIPLY_64_128        |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_INCREMENTAL            |
-        FLAG_IMPL_INCREMENTAL_DIFFERENT  |
-        FLAG_IMPL_LICENSE_ZLIB           ,
-  $.bits = 64,
-  $.verification_LE = 0xDED9B580,
-  $.verification_BE = 0xB355A009,
-  $.hashfn_native = isLE() ? t1ha2_incr<MODE_LE_NATIVE,false> : t1ha2_incr<MODE_BE_NATIVE,false>,
-  $.hashfn_bswap = isLE() ? t1ha2_incr<MODE_LE_BSWAP,false> : t1ha2_incr<MODE_BE_BSWAP,false>,
-  $.initfn = t1ha2_incr_selftest
-);
+   $.desc       = "Fast Positive Hash #2 (portable, 64-bit core, incremental version)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB          |
+         FLAG_IMPL_TYPE_PUNNING           |
+         FLAG_IMPL_MULTIPLY_64_128        |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_INCREMENTAL            |
+         FLAG_IMPL_INCREMENTAL_DIFFERENT  |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 64,
+   $.verification_LE = 0xDED9B580,
+   $.verification_BE = 0xB355A009,
+   $.hashfn_native   = isLE () ? t1ha2_incr<MODE_LE_NATIVE, false> : t1ha2_incr<MODE_BE_NATIVE, false>,
+   $.hashfn_bswap    = isLE () ? t1ha2_incr<MODE_LE_BSWAP, false> : t1ha2_incr<MODE_BE_BSWAP, false>,
+   $.initfn = t1ha2_incr_selftest
+ );
 
 REGISTER_HASH(t1ha2_128__incr,
-  $.desc = "Fast Positive Hash #2 (portable, 64-bit core, incremental version)",
-  $.hash_flags =
-        0,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB          |
-        FLAG_IMPL_TYPE_PUNNING           |
-        FLAG_IMPL_MULTIPLY_64_128        |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_INCREMENTAL            |
-        FLAG_IMPL_INCREMENTAL_DIFFERENT  |
-        FLAG_IMPL_LICENSE_ZLIB           ,
-  $.bits = 128,
-  $.verification_LE = 0xE929E756,
-  $.verification_BE = 0x3898932B,
-  $.hashfn_native = isLE() ? t1ha2_incr<MODE_LE_NATIVE,true> : t1ha2_incr<MODE_BE_NATIVE,true>,
-  $.hashfn_bswap = isLE() ? t1ha2_incr<MODE_LE_BSWAP,true> : t1ha2_incr<MODE_BE_BSWAP,true>,
-  $.initfn = t1ha2_incr_selftest
-);
+   $.desc       = "Fast Positive Hash #2 (portable, 64-bit core, incremental version)",
+   $.hash_flags =
+         0,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB          |
+         FLAG_IMPL_TYPE_PUNNING           |
+         FLAG_IMPL_MULTIPLY_64_128        |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_INCREMENTAL            |
+         FLAG_IMPL_INCREMENTAL_DIFFERENT  |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 128,
+   $.verification_LE = 0xE929E756,
+   $.verification_BE = 0x3898932B,
+   $.hashfn_native   = isLE () ? t1ha2_incr<MODE_LE_NATIVE, true> : t1ha2_incr<MODE_BE_NATIVE, true>,
+   $.hashfn_bswap    = isLE () ? t1ha2_incr<MODE_LE_BSWAP, true> : t1ha2_incr<MODE_BE_BSWAP, true>,
+   $.initfn = t1ha2_incr_selftest
+ );
 
 #if defined(HAVE_X86_64_AES)
 REGISTER_HASH(t1ha0__aesA,
-  $.desc = "Fast Positive Hash #0a (AES-NI)",
-  $.hash_flags =
-        FLAG_HASH_AES_BASED              ,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB          |
-        FLAG_IMPL_MULTIPLY_64_128        |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_ZLIB           ,
-  $.bits = 64,
-  $.verification_LE = 0xF07C4DA5,
-  $.verification_BE = 0x6848847F,
-  $.hashfn_native = t1ha0_aesA<false>,
-  $.hashfn_bswap = t1ha0_aesA<true>,
-  $.initfn = t1ha0_aes_selftest
-);
+   $.desc       = "Fast Positive Hash #0a (AES-NI)",
+   $.hash_flags =
+         FLAG_HASH_AES_BASED,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB          |
+         FLAG_IMPL_MULTIPLY_64_128        |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 64,
+   $.verification_LE = 0xF07C4DA5,
+   $.verification_BE = 0x6848847F,
+   $.hashfn_native   = t1ha0_aesA<false>,
+   $.hashfn_bswap    = t1ha0_aesA<true>,
+   $.initfn = t1ha0_aes_selftest
+ );
 
 REGISTER_HASH(t1ha0__aesB,
-  $.desc = "Fast Positive Hash #0b (AES-NI)",
-  $.hash_flags =
-        FLAG_HASH_AES_BASED              ,
-  $.impl_flags =
-        FLAG_IMPL_READ_PAST_EOB          |
-        FLAG_IMPL_MULTIPLY_64_128        |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_LICENSE_ZLIB           ,
-  $.bits = 64,
-  $.verification_LE = 0x8B38C599,
-  $.verification_BE = 0x010611E9,
-  $.hashfn_native = t1ha0_aesB<false>,
-  $.hashfn_bswap = t1ha0_aesB<true>,
-  $.initfn = t1ha0_aes_selftest
-);
+   $.desc       = "Fast Positive Hash #0b (AES-NI)",
+   $.hash_flags =
+         FLAG_HASH_AES_BASED,
+   $.impl_flags =
+         FLAG_IMPL_READ_PAST_EOB          |
+         FLAG_IMPL_MULTIPLY_64_128        |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_LICENSE_ZLIB,
+   $.bits = 64,
+   $.verification_LE = 0x8B38C599,
+   $.verification_BE = 0x010611E9,
+   $.hashfn_native   = t1ha0_aesB<false>,
+   $.hashfn_bswap    = t1ha0_aesB<true>,
+   $.initfn = t1ha0_aes_selftest
+ );
 #endif
diff --git a/hashes/tabulation.cpp b/hashes/tabulation.cpp
index 672fd3c4..798745ca 100644
--- a/hashes/tabulation.cpp
+++ b/hashes/tabulation.cpp
@@ -4,7 +4,7 @@
  * Copyright (c) 2020-2021 Reini Urban
  * Copyright (c) 2020      Thomas Dybdahl Ahle
  * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
+ *      The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -52,11 +52,11 @@
 // test it with the RNG you plan on using to seed it.
 static uint64_t BSD_nextrand;
 
-static void BSD_srand(uint64_t seed) {
+static void BSD_srand( uint64_t seed ) {
     BSD_nextrand = seed;
 }
 
-static uint32_t BSD_rand(void) {
+static uint32_t BSD_rand( void ) {
     /*
      * Compute x = (7^5 * x) mod (2^31 - 1)
      * without overflowing 31 bits:
@@ -65,112 +65,122 @@ static uint32_t BSD_rand(void) {
      * Park and Miller, Communications of the ACM, vol. 31, no. 10,
      * October 1988, p. 1195.
      */
-	uint64_t hi, lo, x;
-
-    x = (BSD_nextrand % 0x7ffffffe) + 1;
-	hi = x / 127773;
-	lo = x % 127773;
-	x = 16807 * lo - 2836 * hi;
-	if (x < 0)
-		x += 0x7fffffff;
+    uint64_t hi, lo, x;
+
+    x  = (BSD_nextrand % 0x7ffffffe)      + 1;
+    hi = x / 127773;
+    lo = x % 127773;
+    x  = 16807         * lo        - 2836 * hi;
+    if (x < 0) {
+        x += 0x7fffffff;
+    }
     BSD_nextrand = --x;
-	return x;
+    return x;
 }
 
 static uint64_t tab_rand64() {
-   // we don't know how many bits we get from rand(),
-   // but it is at least 16, so we concatenate a couple.
-   uint64_t r = 0;
-   for (int i = 0; i < 4; i++) {
-      r <<= 16;
-      r ^= BSD_rand();
-   }
-   return r;
+    // we don't know how many bits we get from rand(),
+    // but it is at least 16, so we concatenate a couple.
+    uint64_t r = 0;
+
+    for (int i = 0; i < 4; i++) {
+        r <<= 16;
+        r  ^= BSD_rand();
+    }
+    return r;
 }
 
 #if defined(HAVE_INT128)
+
 static inline uint128_t tab_rand128() {
-   return (uint128_t)tab_rand64() << 64 | tab_rand64();
+    return (uint128_t)tab_rand64() << 64 | tab_rand64();
 }
+
 #endif
 
 //-----------------------------------------------------------------------------
 // 32 Bit Version
-const static uint64_t MERSENNE_31 = (UINT64_C(1) << 31) - 1;
-const static int CHAR_SIZE = 8;
-const static int BLOCK_SIZE_32 = 1<<8;
+const static uint64_t MERSENNE_31   = (UINT64_C(1) << 31) - 1;
+const static int      CHAR_SIZE     = 8;
+const static int      BLOCK_SIZE_32 = 1 << 8;
 
 static uint64_t multiply_shift_random_64[BLOCK_SIZE_32];
 static uint32_t multiply_shift_a_64;
 static uint64_t multiply_shift_b_64;
-static int32_t tabulation_32[32/CHAR_SIZE][1<<CHAR_SIZE];
-static bool have_broken_rand = false;
-
-static uintptr_t tabulation32_seed(const seed_t seed) {
-   BSD_srand((uint64_t)seed);
-   // the lazy mersenne combination requires 30 bits values in the polynomial.
-   multiply_shift_a_64 = tab_rand64() & ((UINT64_C(1) << 30) - 1);
-   if (!multiply_shift_a_64) {
-      multiply_shift_a_64 = tab_rand64() & ((UINT64_C(1) << 30) - 1);
-   }
-   if (!multiply_shift_a_64) {
-      have_broken_rand = true;
-      multiply_shift_a_64 = UINT64_C(0xababababbeafcafe) & ((UINT64_C(1) << 30) - 1);
-   }
-   multiply_shift_b_64 = tab_rand64();
-   if (!multiply_shift_b_64) {
-      multiply_shift_b_64 = have_broken_rand ? 0xdeadbeef : tab_rand64();
-   }
-   for (int i = 0; i < BLOCK_SIZE_32; i++) {
-      multiply_shift_random_64[i] = tab_rand64();
-      if (!multiply_shift_random_64[i]) {
-         multiply_shift_random_64[i] = have_broken_rand ? 0xdeadbeef : tab_rand64();
-      }
-   }
-   for (int i = 0; i < 32/CHAR_SIZE; i++)
-      for (int j = 0; j < 1<<CHAR_SIZE; j++)
-         tabulation_32[i][j] = tab_rand64();
-   return 0;
+static int32_t  tabulation_32[32 / CHAR_SIZE][1 << CHAR_SIZE];
+static bool     have_broken_rand = false;
+
+static uintptr_t tabulation32_seed( const seed_t seed ) {
+    BSD_srand((uint64_t)seed);
+    // the lazy mersenne combination requires 30 bits values in the polynomial.
+    multiply_shift_a_64 = tab_rand64() & ((UINT64_C(1) << 30) - 1);
+    if (!multiply_shift_a_64) {
+        multiply_shift_a_64 = tab_rand64() & ((UINT64_C(1) << 30) - 1);
+    }
+    if (!multiply_shift_a_64) {
+        have_broken_rand    = true;
+        multiply_shift_a_64 = UINT64_C(0xababababbeafcafe) & ((UINT64_C(1) << 30) - 1);
+    }
+    multiply_shift_b_64 = tab_rand64();
+    if (!multiply_shift_b_64) {
+        multiply_shift_b_64 = have_broken_rand ? 0xdeadbeef : tab_rand64();
+    }
+    for (int i = 0; i < BLOCK_SIZE_32; i++) {
+        multiply_shift_random_64[i] = tab_rand64();
+        if (!multiply_shift_random_64[i]) {
+            multiply_shift_random_64[i] = have_broken_rand ? 0xdeadbeef : tab_rand64();
+        }
+    }
+    for (int i = 0; i < 32 / CHAR_SIZE; i++) {
+        for (int j = 0; j < 1 << CHAR_SIZE; j++) {
+            tabulation_32[i][j] = tab_rand64();
+        }
+    }
+    return 0;
 }
 
-static inline uint32_t combine31(uint32_t h, uint32_t x, uint32_t a) {
-   uint64_t temp = (uint64_t)h * x + a;
-   return ((uint32_t)temp & MERSENNE_31) + (uint32_t)(temp >> 31);
+static inline uint32_t combine31( uint32_t h, uint32_t x, uint32_t a ) {
+    uint64_t temp = (uint64_t)h * x + a;
+
+    return ((uint32_t)temp & MERSENNE_31) + (uint32_t)(temp >> 31);
 }
 
-template < bool bswap >
-static void tabulation32(const void * in, const size_t len, const seed_t seed, void * out) {
-   const uint8_t * buf = (const uint8_t *)in;
-   size_t len_words_32 = len/4;
-   size_t len_blocks_32 = len_words_32/BLOCK_SIZE_32;
-
-   uint32_t h = len ^ seed;
-
-   for (size_t b = 0; b < len_blocks_32; b++) {
-      uint32_t block_hash = 0;
-      for (int i = 0; i < BLOCK_SIZE_32; i++, buf += 4)
-        block_hash ^= multiply_shift_random_64[i] * GET_U32<bswap>(buf,0) >> 32;
-      h = combine31(h, multiply_shift_a_64, block_hash >> 2);
-   }
-
-   int remaining_words = len_words_32 % BLOCK_SIZE_32;
-   for (int i = 0; i < remaining_words; i++, buf += 4)
-      h ^= multiply_shift_random_64[i] * GET_U32<bswap>(buf,0) >> 32;
-
-   int remaining_bytes = len % 4;
-   if (remaining_bytes) {
-      uint32_t last = 0;
-      if (remaining_bytes & 2) {last = GET_U16<bswap>(buf,0); buf += 2;}
-      if (remaining_bytes & 1) {last = (last << 8) | (*buf);}
-      h ^= multiply_shift_b_64 * last >> 32;
-   }
-
-   // Finalization
-   uint32_t tab = 0;
-   for (int i = 0; i < 32/CHAR_SIZE; i++, h >>= CHAR_SIZE)
-       tab ^= tabulation_32[i][h & ((1<<CHAR_SIZE)-1)];
-
-   PUT_U32<bswap>(tab, (uint8_t *)out, 0);
+template <bool bswap>
+static void tabulation32( const void * in, const size_t len, const seed_t seed, void * out ) {
+    const uint8_t * buf = (const uint8_t *)in;
+    size_t          len_words_32  = len / 4;
+    size_t          len_blocks_32 = len_words_32 / BLOCK_SIZE_32;
+
+    uint32_t h = len ^ seed;
+
+    for (size_t b = 0; b < len_blocks_32; b++) {
+        uint32_t block_hash = 0;
+        for (int i = 0; i < BLOCK_SIZE_32; i++, buf += 4) {
+            block_hash ^= multiply_shift_random_64[i] * GET_U32<bswap>(buf, 0) >> 32;
+        }
+        h = combine31(h, multiply_shift_a_64, block_hash >> 2);
+    }
+
+    int remaining_words = len_words_32 % BLOCK_SIZE_32;
+    for (int i = 0; i < remaining_words; i++, buf += 4) {
+        h ^= multiply_shift_random_64[i] * GET_U32<bswap>(buf, 0) >> 32;
+    }
+
+    int remaining_bytes = len % 4;
+    if (remaining_bytes) {
+        uint32_t last = 0;
+        if (remaining_bytes & 2) { last = GET_U16<bswap>(buf, 0); buf += 2; }
+        if (remaining_bytes & 1) { last = (last << 8) | (*buf); }
+        h ^= multiply_shift_b_64 * last >> 32;
+    }
+
+    // Finalization
+    uint32_t tab = 0;
+    for (int i = 0; i < 32 / CHAR_SIZE; i++, h >>= CHAR_SIZE) {
+        tab ^= tabulation_32[i][h & ((1 << CHAR_SIZE) - 1)];
+    }
+
+    PUT_U32<bswap>(tab, (uint8_t *)out, 0);
 }
 
 #if defined(HAVE_INT128)
@@ -179,178 +189,185 @@ static void tabulation32(const void * in, const size_t len, const seed_t seed, v
 const static uint64_t TAB_MERSENNE_61 = (UINT64_C(1) << 61) - 1;
 // multiply shift works on fixed length strings, so we operate in blocks.
 // this size can be tuned depending on the system.
-const static int TAB_BLOCK_SIZE = 1<<8;
+const static int TAB_BLOCK_SIZE = 1 << 8;
 
 static uint128_t tab_multiply_shift_random[TAB_BLOCK_SIZE];
 static uint128_t tab_multiply_shift_a;
 static uint128_t tab_multiply_shift_b;
-static int64_t tabulation[64/CHAR_SIZE][1<<CHAR_SIZE];
-
-static uintptr_t tabulation64_seed(const seed_t seed) {
-   BSD_srand((uint64_t)seed);
-   // the lazy mersenne combination requires 60 bits values in the polynomial.
-   // rurban: added checks for bad seeds
-   tab_multiply_shift_a = tab_rand128() & ((UINT64_C(1) << 60) - 1);
-   tab_multiply_shift_b = tab_rand128();
-   if (!tab_multiply_shift_a) tab_multiply_shift_a = tab_rand128() & ((UINT64_C(1) << 60) - 1);
-   if (!tab_multiply_shift_a) {
-      have_broken_rand = true;
-      tab_multiply_shift_a = UINT64_C(0xababababbeafcafe) & ((UINT64_C(1) << 60) - 1);
-   }
-   if (!tab_multiply_shift_b) tab_multiply_shift_b = tab_rand128();
-   if (!tab_multiply_shift_b) {
-      have_broken_rand = true;
-      tab_multiply_shift_b++;
-   }
-   for (int i = 0; i < TAB_BLOCK_SIZE; i++) {
-      tab_multiply_shift_random[i] = tab_rand128();
-      if (!tab_multiply_shift_random[i])
-         tab_multiply_shift_random[i] = 0x12345678;
-   }
-   if (have_broken_rand)
-      assert(TAB_BLOCK_SIZE >= 64/CHAR_SIZE);
-   for (int i = 0; i < 64/CHAR_SIZE; i++)
-      for (int j = 0; j < 1<<CHAR_SIZE; j++)
-         tabulation[i][j] = have_broken_rand ? tab_multiply_shift_random[i] : tab_rand128();
-   return 0;
+static int64_t   tabulation[64 / CHAR_SIZE][1 << CHAR_SIZE];
+
+static uintptr_t tabulation64_seed( const seed_t seed ) {
+    BSD_srand((uint64_t)seed);
+    // the lazy mersenne combination requires 60 bits values in the polynomial.
+    // rurban: added checks for bad seeds
+    tab_multiply_shift_a = tab_rand128() & ((UINT64_C(1) << 60) - 1);
+    tab_multiply_shift_b = tab_rand128();
+    if (!tab_multiply_shift_a) { tab_multiply_shift_a = tab_rand128() & ((UINT64_C(1) << 60) - 1); }
+    if (!tab_multiply_shift_a) {
+        have_broken_rand     = true;
+        tab_multiply_shift_a = UINT64_C(0xababababbeafcafe) & ((UINT64_C(1) << 60) - 1);
+    }
+    if (!tab_multiply_shift_b) { tab_multiply_shift_b = tab_rand128(); }
+    if (!tab_multiply_shift_b) {
+        have_broken_rand = true;
+        tab_multiply_shift_b++;
+    }
+    for (int i = 0; i < TAB_BLOCK_SIZE; i++) {
+        tab_multiply_shift_random[i] = tab_rand128();
+        if (!tab_multiply_shift_random[i]) {
+            tab_multiply_shift_random[i] = 0x12345678;
+        }
+    }
+    if (have_broken_rand) {
+        assert(TAB_BLOCK_SIZE >= 64 / CHAR_SIZE);
+    }
+    for (int i = 0; i < 64 / CHAR_SIZE; i++) {
+        for (int j = 0; j < 1 << CHAR_SIZE; j++) {
+            tabulation[i][j] = have_broken_rand ? tab_multiply_shift_random[i] : tab_rand128();
+        }
+    }
+    return 0;
 }
 
-static inline uint64_t combine61(uint64_t h, uint64_t x, uint64_t a) {
-   // we assume 2^b-1 >= 2u-1. in other words
-   // x <= u-1 <= 2^(b-1)-1 (at most 60 bits)
-   // a <= p-1  = 2^b-2     (60 bits suffices)
-      // actually, checking the proof, it's fine if a is 61 bits.
-   // h <= 2p-1 = 2^62-3. this will also be guaranteed of the output.
+static inline uint64_t combine61( uint64_t h, uint64_t x, uint64_t a ) {
+    // we assume 2^b-1 >= 2u-1. in other words
+    // x <= u-1 <= 2^(b-1)-1 (at most 60 bits)
+    // a <= p-1  = 2^b-2     (60 bits suffices)
+    // actually, checking the proof, it's fine if a is 61 bits.
+    // h <= 2p-1 = 2^62-3. this will also be guaranteed of the output.
 
-    //uint128_t temp = (uint128_t)h * x + a;
-    //return ((uint64_t)temp & TAB_MERSENNE_61) + (uint64_t)(temp >> 61);
+    // uint128_t temp = (uint128_t)h * x + a;
+    // return ((uint64_t)temp & TAB_MERSENNE_61) + (uint64_t)(temp >> 61);
 
     uint64_t rhi = 0, rlo = a;
+
     fma64_128(rlo, rhi, h, x);
 
-    rhi <<= (64 - 61);
-    rhi |= (rlo >> 61);
-    rlo &= TAB_MERSENNE_61;
+    rhi <<= (64   - 61);
+    rhi  |= (rlo >> 61);
+    rlo  &= TAB_MERSENNE_61;
 
     return rlo + rhi;
 }
 
-template < bool bswap >
-static void tabulation64(const void * in, const size_t len, const seed_t seed, void * out) {
-   const uint8_t * buf = (const uint8_t *)in;
-
-   // the idea is to compute a fast "signature" of the string before doing
-   // tabulation hashing. this signature only has to be collision resistant,
-   // so we can use the variabe-length-hashing polynomial mod-mersenne scheme
-   // from thorup.
-   // because of the birthday paradox, the signature needs to be around twice
-   // as many bits as in the number of keys tested. since smhasher tests
-   // collisions in keys in the order of millions, we need the signatures to
-   // be at least 40 bits. we settle on 64.
-
-   // we mix in len in the basis, since smhasher considers two keys
-   // of different length to be different, even if all the extra bits are 0.
-   // this is needed for the appendzero test.
-
-   uint64_t h = len ^ seed ^ (seed << 8);
-
-   if (len >= 8) {
-      const size_t len_words = len/8;
-      if (len_words >= TAB_BLOCK_SIZE) {
-         const size_t len_blocks = len_words/TAB_BLOCK_SIZE;
-
-         // to save time, we partition the string in blocks of ~ 256 words.
-         // each word is hashed using a fast strongly-universal multiply-shift,
-         // and since the xor of independent strongly-universal hash functions
-         // is also universal, we get a unique value for each block.
-         for (size_t b = 0; b < len_blocks; b++) {
-            uint64_t block_hash = 0;
-            for (int i = 0; i < TAB_BLOCK_SIZE; i++, buf += 8) {
-               // we don't have to shift yet, but shifting by 64 allows the
-               // compiler to produce a single "high bits only" multiplication instruction.
-               block_hash ^= (tab_multiply_shift_random[i] * GET_U64<bswap>(buf,0)) >> 64;
-
-               // the following is very fast, basically using mum, but theoretically wrong.
-               // __uint128_t mum = (__uint128_t)tab_multiply_shift_random_64[i] * take64(buf);
-               // block_hash ^= mum ^ (mum >> 64);
+template <bool bswap>
+static void tabulation64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    const uint8_t * buf = (const uint8_t *)in;
+
+    // the idea is to compute a fast "signature" of the string before doing
+    // tabulation hashing. this signature only has to be collision resistant,
+    // so we can use the variabe-length-hashing polynomial mod-mersenne scheme
+    // from thorup.
+    // because of the birthday paradox, the signature needs to be around twice
+    // as many bits as in the number of keys tested. since smhasher tests
+    // collisions in keys in the order of millions, we need the signatures to
+    // be at least 40 bits. we settle on 64.
+
+    // we mix in len in the basis, since smhasher considers two keys
+    // of different length to be different, even if all the extra bits are 0.
+    // this is needed for the appendzero test.
+
+    uint64_t h = len ^ seed ^ (seed << 8);
+
+    if (len >= 8) {
+        const size_t len_words = len / 8;
+        if (len_words >= TAB_BLOCK_SIZE) {
+            const size_t len_blocks = len_words / TAB_BLOCK_SIZE;
+
+            // to save time, we partition the string in blocks of ~ 256 words.
+            // each word is hashed using a fast strongly-universal multiply-shift,
+            // and since the xor of independent strongly-universal hash functions
+            // is also universal, we get a unique value for each block.
+            for (size_t b = 0; b < len_blocks; b++) {
+                uint64_t block_hash = 0;
+                for (int i = 0; i < TAB_BLOCK_SIZE; i++, buf += 8) {
+                    // we don't have to shift yet, but shifting by 64 allows the
+                    // compiler to produce a single "high bits only" multiplication instruction.
+                    block_hash ^= (tab_multiply_shift_random[i] * GET_U64<bswap>(buf, 0)) >> 64;
+
+                    // the following is very fast, basically using mum, but theoretically wrong.
+                    // __uint128_t mum = (__uint128_t)tab_multiply_shift_random_64[i] * take64(buf);
+                    // block_hash ^= mum ^ (mum >> 64);
+                }
+
+                // finally we combine the block hash using variable length hashing.
+                // values have to be less than mersenne for the combination to work.
+                // we can shift down, since any shift of multiply-shift outputs is
+                // strongly-universal.
+                h = combine61(h, tab_multiply_shift_a, block_hash >> 4);
             }
 
-            // finally we combine the block hash using variable length hashing.
-            // values have to be less than mersenne for the combination to work.
-            // we can shift down, since any shift of multiply-shift outputs is
-            // strongly-universal.
-            h = combine61(h, tab_multiply_shift_a, block_hash >> 4);
-         }
-
-         // in principle we should finish the mersenne modular reduction.
-         // however, this isn't be needed, since it can never reduce collisions.
-         // if (h >= TAB_MERSENNE_61) h -= TAB_MERSENNE_61;
-      }
-
-      // then read the remaining words
-      const int remaining_words = len_words % TAB_BLOCK_SIZE;
-      for (int i = 0; i < remaining_words; i++, buf += 8)
-         h ^= tab_multiply_shift_random[i] * GET_U64<bswap>(buf,0) >> 64;
-   }
-
-   // now get the remaining bytes
-   const int remaining_bytes = len % 8;
-   if (remaining_bytes) {
-      uint64_t last = 0;
-      if (remaining_bytes & 4) {last = GET_U32<bswap>(buf,0); buf += 4;}
-      if (remaining_bytes & 2) {last = (last << 16) | GET_U16<bswap>(buf,0); buf += 2;}
-      if (remaining_bytes & 1) {last = (last << 8) | (*buf);}
-      h ^= tab_multiply_shift_b * last >> 64;
-   }
-
-   uint64_t tab = 0;
-   for (int i = 0; i < 64/CHAR_SIZE; i++, h >>= CHAR_SIZE)
-      tab ^= tabulation[i][h % (1<<CHAR_SIZE)];
-
-   PUT_U64<bswap>(tab, (uint8_t *)out, 0);
+            // in principle we should finish the mersenne modular reduction.
+            // however, this isn't be needed, since it can never reduce collisions.
+            // if (h >= TAB_MERSENNE_61) h -= TAB_MERSENNE_61;
+        }
+
+        // then read the remaining words
+        const int remaining_words = len_words % TAB_BLOCK_SIZE;
+        for (int i = 0; i < remaining_words; i++, buf += 8) {
+            h ^= tab_multiply_shift_random[i] * GET_U64<bswap>(buf, 0) >> 64;
+        }
+    }
+
+    // now get the remaining bytes
+    const int remaining_bytes = len % 8;
+    if (remaining_bytes) {
+        uint64_t last = 0;
+        if (remaining_bytes & 4) { last = GET_U32<bswap>(buf, 0); buf += 4; }
+        if (remaining_bytes & 2) { last = (last << 16) | GET_U16<bswap>(buf, 0); buf += 2; }
+        if (remaining_bytes & 1) { last = (last << 8) | (*buf); }
+        h ^= tab_multiply_shift_b * last >> 64;
+    }
+
+    uint64_t tab = 0;
+    for (int i = 0; i < 64 / CHAR_SIZE; i++, h >>= CHAR_SIZE) {
+        tab ^= tabulation[i][h % (1 << CHAR_SIZE)];
+    }
+
+    PUT_U64<bswap>(tab, (uint8_t *)out, 0);
 }
 
 #endif
 
 //-----------------------------------------------------------------------------
 REGISTER_FAMILY(tabulation,
-  $.src_url = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/rurban/smhasher/blob/master/Hashes.cpp",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(tabulation_32,
-  $.desc = "32-bit Tabulation with Multiply-Shift Mixer",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE         |
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS         | // Implementation not yet thread-safe
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0xF951BEFF,
-  $.verification_BE = 0xFEB31CB2,
-  $.seedfn = tabulation32_seed,
-  $.hashfn_native = tabulation32<false>,
-  $.hashfn_bswap = tabulation32<true>
-);
+   $.desc       = "32-bit Tabulation with Multiply-Shift Mixer",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE         |
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS         |// Implementation not yet thread-safe
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0xF951BEFF,
+   $.verification_BE = 0xFEB31CB2,
+   $.seedfn = tabulation32_seed,
+   $.hashfn_native   = tabulation32<false>,
+   $.hashfn_bswap    = tabulation32<true>
+ );
 
 #if defined(HAVE_INT128)
 REGISTER_HASH(tabulation_64,
-  $.desc = "64-bit Tabulation with Multiply-Shift Mixer",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE         |
-        FLAG_HASH_SYSTEM_SPECIFIC,
-  $.impl_flags =
-        FLAG_IMPL_SANITY_FAILS         | // Implementation not yet thread-safe
-        FLAG_IMPL_128BIT               |
-        FLAG_IMPL_MULTIPLY_64_128      |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 64,
-  $.verification_LE = 0x9CE7C3BC,
-  $.verification_BE = 0x4EE5569F,
-  $.seedfn = tabulation64_seed,
-  $.hashfn_native = tabulation64<false>,
-  $.hashfn_bswap = tabulation64<true>
-);
+   $.desc       = "64-bit Tabulation with Multiply-Shift Mixer",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE         |
+         FLAG_HASH_SYSTEM_SPECIFIC,
+   $.impl_flags =
+         FLAG_IMPL_SANITY_FAILS         |// Implementation not yet thread-safe
+         FLAG_IMPL_128BIT               |
+         FLAG_IMPL_MULTIPLY_64_128      |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 64,
+   $.verification_LE = 0x9CE7C3BC,
+   $.verification_BE = 0x4EE5569F,
+   $.seedfn = tabulation64_seed,
+   $.hashfn_native   = tabulation64<false>,
+   $.hashfn_bswap    = tabulation64<true>
+ );
 #endif
diff --git a/hashes/umash.cpp b/hashes/umash.cpp
index 1485efef..a028bd1e 100644
--- a/hashes/umash.cpp
+++ b/hashes/umash.cpp
@@ -28,51 +28,53 @@
 #include "Hashlib.h"
 
 #if defined(HAVE_X86_64_CLMUL)
-#include "Intrinsics.h"
-#include <cassert>
+  #include "Intrinsics.h"
+  #include <cassert>
 
 /* We only use 128-bit vector, as pairs of 64-bit integers. */
 typedef __m128i v128;
 
 //------------------------------------------------------------
-#include "Mathmult.h"
+  #include "Mathmult.h"
 
-static inline void mul128(uint64_t x, uint64_t y, uint64_t & hi, uint64_t & lo) {
+static inline void mul128( uint64_t x, uint64_t y, uint64_t & hi, uint64_t & lo ) {
     mult64_128(lo, hi, x, y);
 }
 
 // This is an efficient and portable replacement for GCC's
 // __builtin_uaddl_overflow(). XXX The builtin detection might happen
 // later, but for now this is good enough.
-static inline bool add_overflow(uint64_t x, uint64_t y, uint64_t * sumlo) {
-//#if defined(HAVE_BUILTIN_UADD)
+static inline bool add_overflow( uint64_t x, uint64_t y, uint64_t * sumlo ) {
+// #if defined(HAVE_BUILTIN_UADD)
 //    return __builtin_uaddl_overflow(x, y, sumlo);
-//#else
+// #else
     uint64_t c = 0;
-    x += y;
-    c += (x < y);
+
+    x     += y;
+    c     += (x < y);
     *sumlo = x;
     return (c == 0) ? false : true;
-//#endif
+// #endif
 }
 
-static NEVER_INLINE uint64_t add_mod_slow_slow_path(uint64_t sum, uint64_t fixup) {
+static NEVER_INLINE uint64_t add_mod_slow_slow_path( uint64_t sum, uint64_t fixup ) {
     /* Reduce sum, mod 2**64 - 8. */
-    sum = (sum >= (uint64_t)-8) ? sum + 8 : sum;
+    sum  = (sum >= (uint64_t)-8) ? sum + 8 : sum;
     /* sum < 2**64 - 8, so this doesn't overflow. */
     sum += fixup;
     /* Reduce again. */
-    sum = (sum >= (uint64_t)-8) ? sum + 8 : sum;
+    sum  = (sum >= (uint64_t)-8) ? sum + 8 : sum;
     return sum;
 }
 
-static inline uint64_t add_mod_slow(uint64_t x, uint64_t y) {
+static inline uint64_t add_mod_slow( uint64_t x, uint64_t y ) {
     uint64_t sum;
     uint64_t fixup = 0;
 
     /* x + y \equiv sum + fixup */
-    if (add_overflow(x, y, &sum))
+    if (add_overflow(x, y, &sum)) {
         fixup = 8;
+    }
 
     /*
      * We must ensure `sum + fixup < 2**64 - 8`.
@@ -82,71 +84,73 @@ static inline uint64_t add_mod_slow(uint64_t x, uint64_t y) {
      * pseudorandom inputs, but `sum < 2**64 - 16` is almost
      * always true, for pseudorandom `sum`.
      */
-    if (likely(sum < (uint64_t)-16))
+    if (likely(sum < (uint64_t)-16)) {
         return sum + fixup;
+    }
 
     return add_mod_slow_slow_path(sum, fixup);
 }
 
-static inline uint64_t add_mod_fast(uint64_t x, uint64_t y) {
+static inline uint64_t add_mod_fast( uint64_t x, uint64_t y ) {
     uint64_t sum;
 
     /* If `sum` overflows, `sum + 8` does not. */
-    return (add_overflow(x, y, &sum) ? sum + 8 : sum);
+    return add_overflow(x, y, &sum) ? sum + 8 : sum;
 }
 
-static inline uint64_t mul_mod_fast(uint64_t m, uint64_t x) {
+static inline uint64_t mul_mod_fast( uint64_t m, uint64_t x ) {
     uint64_t hi, lo;
 
     mul128(m, x, hi, lo);
     return add_mod_fast(lo, 8 * hi);
 }
 
-static inline uint64_t horner_double_update(uint64_t acc, uint64_t m0, uint64_t m1, uint64_t x, uint64_t y) {
+static inline uint64_t horner_double_update( uint64_t acc, uint64_t m0, uint64_t m1, uint64_t x, uint64_t y ) {
     acc = add_mod_fast(acc, x);
     return add_mod_slow(mul_mod_fast(m0, acc), mul_mod_fast(m1, y));
 }
 
-static inline v128 v128_create(uint64_t lo, uint64_t hi) {
+static inline v128 v128_create( uint64_t lo, uint64_t hi ) {
     return _mm_set_epi64x(hi, lo);
 }
 
-static inline uint64_t v128_getlo(v128 x) {
+static inline uint64_t v128_getlo( v128 x ) {
     return _mm_cvtsi128_si64(x);
 }
 
-static inline uint64_t v128_gethi(v128 x) {
+static inline uint64_t v128_gethi( v128 x ) {
     return _mm_extract_epi64(x, 1);
 }
 
 /* Shift each 64-bit lane left by one bit. */
-static inline v128 v128_shift(v128 x) {
+static inline v128 v128_shift( v128 x ) {
     return _mm_add_epi64(x, x);
 }
 
 /* Computes the 128-bit carryless product of x and y. */
-static inline v128 v128_clmul(uint64_t x, uint64_t y) {
+static inline v128 v128_clmul( uint64_t x, uint64_t y ) {
     return _mm_clmulepi64_si128(_mm_cvtsi64_si128(x), _mm_cvtsi64_si128(y), 0x00);
 }
 
 /* Computes the 128-bit carryless product of the high and low halves of x. */
-static inline v128 v128_clmul_cross(v128 x) {
+static inline v128 v128_clmul_cross( v128 x ) {
     return _mm_clmulepi64_si128(x, x, 0x01);
 }
 
 //------------------------------------------------------------
 enum {
-    UMASH_OH_PARAM_COUNT = 32,
-    UMASH_OH_TWISTING_COUNT = 2,
-    BLOCK_SIZE = (sizeof(uint64_t) * UMASH_OH_PARAM_COUNT),
+    UMASH_OH_PARAM_COUNT            = 32,
+    UMASH_OH_TWISTING_COUNT         = 2,
+    BLOCK_SIZE                      = (sizeof(uint64_t) * UMASH_OH_PARAM_COUNT),
     UMASH_MULTIPLE_BLOCKS_THRESHOLD = 1024,
-    SPLIT_ACCUMULATOR_MAX_FIXUP = 3,
-    OH_SHORT_HASH_SHIFT = 4,
+    SPLIT_ACCUMULATOR_MAX_FIXUP     = 3,
+    OH_SHORT_HASH_SHIFT             = 4,
 };
 
-#define ARRAY_SIZE(ARR) (sizeof(ARR) / sizeof(ARR[0]))
+  #define ARRAY_SIZE(ARR) (sizeof(ARR) / sizeof(ARR[0]))
 
-/**
+/*
+ *
  * A single UMASH params struct stores the parameters for a pair of
  * independent `UMASH` functions.
  */
@@ -155,12 +159,12 @@ struct umash_params {
      * Each uint64_t[2] array consists of {f^2, f}, where f is a
      * random multiplier in mod 2**61 - 1.
      */
-    uint64_t poly[2][2];
+    uint64_t  poly[2][2];
     /*
      * The second (twisted) OH function uses an additional
      * 128-bit constant stored in the last two elements.
      */
-    uint64_t oh[UMASH_OH_PARAM_COUNT + UMASH_OH_TWISTING_COUNT];
+    uint64_t  oh[UMASH_OH_PARAM_COUNT + UMASH_OH_TWISTING_COUNT];
     /*
      * The seed value that the params were derived from. This is added
      * for SMHasher3, so that the seed input parameter to the hash
@@ -168,45 +172,47 @@ struct umash_params {
      * thread-local umash_params table. It lets this umash
      * implementation be thread-safe.
      */
-    uint64_t base_seed;
+    uint64_t  base_seed;
 };
 
-/**
+/*
+ *
  * A fingerprint consists of two independent `UMASH` hash values.
  */
 struct umash_fp {
-    uint64_t hash[2];
+    uint64_t  hash[2];
 };
 
-/**
+/*
+ *
  * Returns `then` if `cond` is true, `otherwise` if false.
  *
  * This noise helps compiler emit conditional moves.
  */
-static inline const void * select_ptr(bool cond, const void * then, const void * otherwise) {
+static inline const void * select_ptr( bool cond, const void * then, const void * otherwise ) {
     const void * ret;
 
-#if defined(HAVE_X86_64_ASM)
+  #if defined(HAVE_X86_64_ASM)
     /* Force strict evaluation of both arguments. */
-    __asm__("" ::"r"(then), "r"(otherwise));
-#endif
+    __asm__ ("" ::"r"(then), "r"(otherwise));
+  #endif
 
     ret = (cond) ? then : otherwise;
 
-#if defined(HAVE_X86_64_ASM)
+  #if defined(HAVE_X86_64_ASM)
     /* And also force the result to be materialised with a blackhole. */
-    __asm__("" : "+r"(ret));
-#endif
+    __asm__ ("" : "+r"(ret));
+  #endif
 
     return ret;
 }
 
 //------------------------------------------------------------
 // SHORT -- [0, 8] byte inputs
-template < bool bswap >
-static inline uint64_t vec_to_u64(const void *data, size_t n_bytes) {
+template <bool bswap>
+static inline uint64_t vec_to_u64( const void * data, size_t n_bytes ) {
     const uint8_t zeros[2] = { 0 };
-    uint32_t hi, lo;
+    uint32_t      hi, lo;
 
     /*
      * If there are at least 4 bytes to read, read the first 4 in
@@ -219,7 +225,7 @@ static inline uint64_t vec_to_u64(const void *data, size_t n_bytes) {
     } else {
         /* 0 <= n_bytes < 4.  Decode the size in binary. */
         uint16_t word;
-        uint8_t byte;
+        uint8_t  byte;
 
         /*
          * If the size is odd, load the first byte in `byte`;
@@ -232,8 +238,7 @@ static inline uint64_t vec_to_u64(const void *data, size_t n_bytes) {
          * If the size is 2 or 3, load the last two bytes in `word`;
          * otherwise, load in a zero.
          */
-        memcpy(&word,
-            select_ptr(n_bytes & 2, (const uint8_t *)data + n_bytes - 2, zeros), 2);
+        memcpy(&word, select_ptr(n_bytes & 2, (const uint8_t *)data + n_bytes - 2, zeros), 2);
         /*
          * We have now read `bytes[0 ... n_bytes - 1]`
          * exactly once without overwriting any data.
@@ -248,66 +253,64 @@ static inline uint64_t vec_to_u64(const void *data, size_t n_bytes) {
     return COND_BSWAP(((uint64_t)hi << 32) | (lo + hi), bswap);
 }
 
-template < bool bswap >
-static uint64_t umash_short(const uint64_t *params, uint64_t seed,
-        const void *data, size_t n_bytes) {
+template <bool bswap>
+static uint64_t umash_short( const uint64_t * params, uint64_t seed, const void * data, size_t n_bytes ) {
     uint64_t h;
 
     seed += params[n_bytes];
-    h = vec_to_u64<bswap>(data, n_bytes);
-    h ^= h >> 30;
-    h *= UINT64_C(0xbf58476d1ce4e5b9);
-    h = (h ^ seed) ^ (h >> 27);
-    h *= UINT64_C(0x94d049bb133111eb);
-    h ^= h >> 31;
+    h     = vec_to_u64<bswap>(data, n_bytes);
+    h    ^= h >> 30;
+    h    *= UINT64_C(0xbf58476d1ce4e5b9);
+    h     = (h ^ seed) ^ (h >> 27);
+    h    *= UINT64_C(0x94d049bb133111eb);
+    h    ^= h >> 31;
     return h;
 }
 
-template < bool bswap >
-static struct umash_fp umash_fp_short(const uint64_t *params, uint64_t seed,
-        const void *data, size_t n_bytes) {
+template <bool bswap>
+static struct umash_fp umash_fp_short( const uint64_t * params, uint64_t seed, const void * data, size_t n_bytes ) {
     struct umash_fp ret;
-    uint64_t h;
+    uint64_t        h;
 
     ret.hash[0] = seed + params[n_bytes];
     ret.hash[1] = seed + params[n_bytes + OH_SHORT_HASH_SHIFT];
 
-    h = vec_to_u64<bswap>(data, n_bytes);
+    h  = vec_to_u64<bswap>(data, n_bytes);
     h ^= h >> 30;
     h *= UINT64_C(0xbf58476d1ce4e5b9);
     h ^= h >> 27;
 
-#define TAIL(i)                                         \
-    do {                                                \
-        ret.hash[i] ^= h;                               \
-        ret.hash[i] *= UINT64_C(0x94d049bb133111eb);    \
-        ret.hash[i] ^= ret.hash[i] >> 31;               \
+#define TAIL(i)                                      \
+    do {                                             \
+        ret.hash[i] ^= h;                            \
+        ret.hash[i] *= UINT64_C(0x94d049bb133111eb); \
+        ret.hash[i] ^= ret.hash[i] >> 31;            \
     } while (0)
 
     TAIL(0);
     TAIL(1);
-#undef TAIL
+  #undef TAIL
 
     return ret;
 }
 
 //------------------------------------------------------------
 // MEDIUM -- [9, 16] byte inputs
-static inline uint64_t finalize(uint64_t x) {
+static inline uint64_t finalize( uint64_t x ) {
     return (x ^ ROTL64(x, 8)) ^ ROTL64(x, 33);
 }
 
-template < bool bswap >
-static uint64_t umash_medium(const uint64_t multipliers[2], const uint64_t *oh, uint64_t seed,
-        const void *data, size_t n_bytes) {
+template <bool bswap>
+static uint64_t umash_medium( const uint64_t multipliers[2], const uint64_t * oh,
+        uint64_t seed, const void * data, size_t n_bytes ) {
     uint64_t enh_hi, enh_lo;
 
     {
         const uint8_t * data8 = (const uint8_t *)data;
-        uint64_t x, y;
+        uint64_t        x, y;
 
-        x = GET_U64<bswap>(data8, 0);
-        y = GET_U64<bswap>(data8, n_bytes - 8);
+        x  = GET_U64<bswap>(data8,       0);
+        y  = GET_U64<bswap>(data8, n_bytes - 8);
         x += oh[0];
         y += oh[1];
 
@@ -317,45 +320,45 @@ static uint64_t umash_medium(const uint64_t multipliers[2], const uint64_t *oh,
 
     enh_hi ^= enh_lo;
     return finalize(horner_double_update(
-        /*acc=*/0, multipliers[0], multipliers[1], enh_lo, enh_hi));
+            /*acc=*/ 0, multipliers[0], multipliers[1], enh_lo, enh_hi));
 }
 
-template < bool bswap >
-static struct umash_fp umash_fp_medium(const uint64_t multipliers[2][2],
-        const uint64_t *oh, uint64_t seed, const void *data, size_t n_bytes) {
+template <bool bswap>
+static struct umash_fp umash_fp_medium( const uint64_t multipliers[2][2], const uint64_t * oh,
+        uint64_t seed, const void * data, size_t n_bytes ) {
     struct umash_fp ret;
-    const uint64_t offset = seed ^ n_bytes;
-    uint64_t enh_hi, enh_lo;
-    v128 v;
-    uint64_t lrc[2] = { oh[UMASH_OH_PARAM_COUNT], oh[UMASH_OH_PARAM_COUNT + 1] };
-    uint64_t x, y;
-    uint64_t a, b;
+    const uint64_t  offset = seed ^ n_bytes;
+    uint64_t        enh_hi, enh_lo;
+    v128            v;
+    uint64_t        lrc[2] = { oh[UMASH_OH_PARAM_COUNT], oh[UMASH_OH_PARAM_COUNT + 1] };
+    uint64_t        x, y;
+    uint64_t        a, b;
 
     /* Expand the 9-16 bytes to 16. */
     const uint8_t * data8 = (const uint8_t *)data;
-    x = GET_U64<bswap>(data8, 0);
-    y = GET_U64<bswap>(data8, n_bytes - 8);
 
-    a = oh[0];
-    b = oh[1];
+    x       = GET_U64<bswap>(data8,       0);
+    y       = GET_U64<bswap>(data8, n_bytes - 8);
+
+    a       = oh[0];
+    b       = oh[1];
 
     lrc[0] ^= x ^ a;
     lrc[1] ^= y ^ b;
-    v = v128_clmul(lrc[0], lrc[1]);
+    v       = v128_clmul(lrc[0], lrc[1]);
 
-    a += x;
-    b += y;
+    a      += x;
+    b      += y;
 
     mul128(a, b, enh_hi, enh_lo);
-    enh_hi += offset;
-    enh_hi ^= enh_lo;
+    enh_hi     += offset;
+    enh_hi     ^= enh_lo;
 
     ret.hash[0] = finalize(horner_double_update(
-        /*acc=*/0, multipliers[0][0], multipliers[0][1], enh_lo, enh_hi));
+            /*acc=*/ 0, multipliers[0][0], multipliers[0][1], enh_lo, enh_hi));
 
-    ret.hash[1] = finalize(horner_double_update(/*acc=*/0,
-                    multipliers[1][0], multipliers[1][1],
-                    enh_lo ^ v128_getlo(v), enh_hi ^ v128_gethi(v)));
+    ret.hash[1] = finalize(horner_double_update(/*acc=*/ 0, multipliers[1][0], multipliers[1][1],
+            enh_lo ^ v128_getlo(v), enh_hi ^ v128_gethi(v)));
 
     return ret;
 }
@@ -363,26 +366,24 @@ static struct umash_fp umash_fp_medium(const uint64_t multipliers[2][2],
 //------------------------------------------------------------
 // LONG -- [17, size_t) byte inputs
 struct umash_oh {
-    uint64_t bits[2];
+    uint64_t  bits[2];
 };
 
 struct split_accumulator {
-    uint64_t base;
-    uint64_t fixup;
+    uint64_t  base;
+    uint64_t  fixup;
 };
 
-static inline uint64_t split_accumulator_eval(struct split_accumulator acc) {
+static inline uint64_t split_accumulator_eval( struct split_accumulator acc ) {
     return add_mod_slow(acc.base, 8 * acc.fixup);
 }
 
-static inline struct split_accumulator split_accumulator_update(
-    const struct split_accumulator acc, const uint64_t m0,
-    const uint64_t m1, uint64_t h0, const uint64_t h1) {
-
+static inline struct split_accumulator split_accumulator_update( const struct split_accumulator acc,
+        const uint64_t m0, const uint64_t m1, uint64_t h0, const uint64_t h1 ) {
     uint64_t partial;
     uint64_t lo0, hi0, lo1, hi1;
     uint64_t hi, sum;
-    int8_t fixup;
+    int8_t   fixup;
 
     mul128(m1, h1, hi1, lo1);
 
@@ -407,25 +408,24 @@ static inline struct split_accumulator split_accumulator_update(
     assert(hi0 < (1UL << 61));
     assert(hi1 < (1UL << 61));
     /* hi0 and hi1 < 2**61, so this addition never overflows. */
-    hi = hi0 + hi1;
+    hi     = hi0 + hi1;
 
     fixup += (hi & (1ULL << 61)) != 0;
-    hi *= 8;
+    hi    *= 8;
 
     fixup += add_overflow(sum, hi, &sum);
 
     return (struct split_accumulator) {
-        .base = sum,
-            /* Avoid sign extension: we know `fixup` is non-negative. */
-        .fixup = (uint8_t)fixup,
+               . base = sum,
+               /* Avoid sign extension: we know `fixup` is non-negative. */
+               .fixup = (uint8_t)fixup,
     };
 }
 
 // This is umash_multiple_blocks_generic().
-template < bool bswap >
-static uint64_t umash_multiple_blocks(uint64_t initial,
-        const uint64_t multipliers[2], const uint64_t *oh_ptr, uint64_t seed,
-        const void *blocks, size_t n_blocks) {
+template <bool bswap>
+static uint64_t umash_multiple_blocks( uint64_t initial, const uint64_t multipliers[2], const uint64_t * oh_ptr,
+        uint64_t seed, const void * blocks, size_t n_blocks ) {
     const uint64_t m0 = multipliers[0];
     const uint64_t m1 = multipliers[1];
     const uint64_t kx = oh_ptr[UMASH_OH_PARAM_COUNT - 2];
@@ -437,7 +437,7 @@ static uint64_t umash_multiple_blocks(uint64_t initial,
     do {
         const uint8_t * data = (const uint8_t *)blocks;
         struct umash_oh oh;
-        v128 acc = { 0, 0 };
+        v128            acc  = { 0, 0 };
 
         blocks = (const uint8_t *)blocks + BLOCK_SIZE;
 
@@ -448,19 +448,19 @@ static uint64_t umash_multiple_blocks(uint64_t initial,
          * the inner loop's xor-reduction tree widely: the
          * bottleneck is in the carryless multiplications.
          */
-#define FORCE() ((void)0)
-
-#define PH(I)                                               \
-        do {                                                \
-            v128 x, k;                                      \
-                                                            \
-            x = _mm_loadu_si128((const v128 *)data);        \
-            if (bswap) { x = mm_bswap64(x); }               \
-            data = data + sizeof(x);                        \
-                                                            \
-            k = _mm_loadu_si128((const v128 *)&oh_ptr[I]);  \
-            x ^= k;                                         \
-            acc ^= v128_clmul_cross(x);                     \
+  #define FORCE() ((void)0)
+
+#define PH(I)                                              \
+        do {                                               \
+            v128 x, k;                                     \
+                                                           \
+            x = _mm_loadu_si128((const v128 *)data);       \
+            if (bswap) { x = mm_bswap64(x); }              \
+            data = data + sizeof(x);                       \
+                                                           \
+            k = _mm_loadu_si128((const v128 *)&oh_ptr[I]); \
+            x ^= k;                                        \
+            acc ^= v128_clmul_cross(x);                    \
         } while (0)
 
         PH(0);
@@ -471,7 +471,7 @@ static uint64_t umash_multiple_blocks(uint64_t initial,
         PH(6);
         FORCE();
 
-        PH(8);
+        PH( 8);
         PH(10);
         FORCE();
 
@@ -493,8 +493,8 @@ static uint64_t umash_multiple_blocks(uint64_t initial,
 
         PH(28);
 
-#undef PH
-#undef FORCE
+  #undef PH
+  #undef FORCE
 
         memcpy(&oh, &acc, sizeof(oh));
 
@@ -502,72 +502,70 @@ static uint64_t umash_multiple_blocks(uint64_t initial,
         {
             uint64_t x, y, enh_hi, enh_lo;
 
-            x = GET_U64<bswap>(data, 0);
-            y = GET_U64<bswap>(data, 8);
+            x  = GET_U64<bswap>(data, 0);
+            y  = GET_U64<bswap>(data, 8);
 
             x += kx;
             y += ky;
 
             mul128(x, y, enh_hi, enh_lo);
-            enh_hi += seed;
+            enh_hi     += seed;
 
             oh.bits[0] ^= enh_lo;
             oh.bits[1] ^= enh_hi ^ enh_lo;
         }
 
         ret = split_accumulator_update(ret, m0, m1, oh.bits[0], oh.bits[1]);
-
     } while (--n_blocks);
 
     return split_accumulator_eval(ret);
 }
 
-template < bool bswap >
-static struct umash_fp umash_fprint_multiple_blocks(struct umash_fp initial,
-        const uint64_t multipliers[2][2], const uint64_t *oh, uint64_t seed,
-        const void * blocks, size_t n_blocks) {
+template <bool bswap>
+static struct umash_fp umash_fprint_multiple_blocks( struct umash_fp initial, const uint64_t multipliers[2][2],
+        const uint64_t * oh, uint64_t seed, const void * blocks, size_t n_blocks ) {
     const v128 lrc_init =
-        v128_create(oh[UMASH_OH_PARAM_COUNT], oh[UMASH_OH_PARAM_COUNT + 1]);
-    const uint64_t m00 = multipliers[0][0];
-    const uint64_t m01 = multipliers[0][1];
-    const uint64_t m10 = multipliers[1][0];
-    const uint64_t m11 = multipliers[1][1];
+            v128_create(oh[UMASH_OH_PARAM_COUNT], oh[UMASH_OH_PARAM_COUNT + 1]);
+    const uint64_t m00  = multipliers[0][0];
+    const uint64_t m01  = multipliers[0][1];
+    const uint64_t m10  = multipliers[1][0];
+    const uint64_t m11  = multipliers[1][1];
     struct split_accumulator acc0 = { .base = initial.hash[0] };
     struct split_accumulator acc1 = { .base = initial.hash[1] };
 
     do {
         struct umash_oh compressed[2];
-        v128 acc = { 0, 0 }; /* Base umash */
-        v128 acc_shifted = { 0, 0 }; /* Accumulates shifted values */
-        v128 lrc = lrc_init;
-        const uint8_t * data = (const uint8_t *)blocks;
+        v128            acc         = { 0, 0 }; /* Base umash */
+        v128            acc_shifted = { 0, 0 }; /* Accumulates shifted values */
+        v128            lrc         = lrc_init;
+        const uint8_t * data        = (const uint8_t *)blocks;
 
         blocks = (const uint8_t *)blocks + BLOCK_SIZE;
 
-#define FORCE() ((void)0)
-
-#define TWIST(I)                                        \
-        do {                                            \
-            v128 x, k;                                  \
-                                                        \
-            x = _mm_loadu_si128((const v128 *)data);    \
-            if (bswap) { x = mm_bswap64(x); }           \
-            data = data + sizeof(x);                    \
-                                                        \
-            k = _mm_loadu_si128((const v128 *)&oh[I]);  \
-                                                        \
-            x ^= k;                                     \
-            lrc ^= x;                                   \
-                                                        \
-            x = v128_clmul_cross(x);                    \
-                                                        \
-            acc ^= x;                                   \
-                                                        \
-            if (I == 28)                                \
-                break;                                  \
-                                                        \
-            acc_shifted ^= x;                           \
-            acc_shifted = v128_shift(acc_shifted);      \
+  #define FORCE() ((void)0)
+
+#define TWIST(I)                                       \
+        do {                                           \
+            v128 x, k;                                 \
+                                                       \
+            x = _mm_loadu_si128((const v128 *)data);   \
+            if (bswap) { x = mm_bswap64(x); }          \
+            data = data + sizeof(x);                   \
+                                                       \
+            k = _mm_loadu_si128((const v128 *)&oh[I]); \
+                                                       \
+            x ^= k;                                    \
+            lrc ^= x;                                  \
+                                                       \
+            x = v128_clmul_cross(x);                   \
+                                                       \
+            acc ^= x;                                  \
+                                                       \
+            if (I == 28)                               \
+                break;                                 \
+                                                       \
+            acc_shifted ^= x;                          \
+            acc_shifted = v128_shift(acc_shifted);     \
         } while (0)
 
         TWIST(0);
@@ -601,32 +599,32 @@ static struct umash_fp umash_fprint_multiple_blocks(struct umash_fp initial,
         TWIST(28);
         FORCE();
 
-#undef TWIST
-#undef FORCE
+  #undef TWIST
+  #undef FORCE
 
         {
             v128 x, k;
 
-            x = _mm_loadu_si128((const v128 *)data);
+            x    = _mm_loadu_si128((const v128 *)data);
             if (bswap) { x = mm_bswap64(x); }
-            k = _mm_loadu_si128((const v128 *)&oh[30]);
+            k    = _mm_loadu_si128((const v128 *)&oh[30]);
 
             lrc ^= x ^ k;
         }
 
         acc_shifted ^= acc;
-        acc_shifted = v128_shift(acc_shifted);
+        acc_shifted  = v128_shift(acc_shifted);
 
         acc_shifted ^= v128_clmul_cross(lrc);
 
-        memcpy(&compressed[0], &acc, sizeof(compressed[0]));
+        memcpy(&compressed[0], &acc        , sizeof(compressed[0]));
         memcpy(&compressed[1], &acc_shifted, sizeof(compressed[1]));
 
         {
             uint64_t x, y, kx, ky, enh_hi, enh_lo;
 
-            x = GET_U64<bswap>(data, 0);
-            y = GET_U64<bswap>(data, 8);
+            x  = GET_U64<bswap>(data, 0);
+            y  = GET_U64<bswap>(data, 8);
 
             kx = x + oh[30];
             ky = y + oh[31];
@@ -642,42 +640,39 @@ static struct umash_fp umash_fprint_multiple_blocks(struct umash_fp initial,
             compressed[1].bits[1] ^= enh_hi;
         }
 
-        acc0 = split_accumulator_update(
-                                        acc0, m00, m01, compressed[0].bits[0], compressed[0].bits[1]);
-        acc1 = split_accumulator_update(
-                                        acc1, m10, m11, compressed[1].bits[0], compressed[1].bits[1]);
+        acc0 = split_accumulator_update(acc0, m00, m01, compressed[0].bits[0], compressed[0].bits[1]);
+        acc1 = split_accumulator_update(acc1, m10, m11, compressed[1].bits[0], compressed[1].bits[1]);
     } while (--n_blocks);
 
     return (struct umash_fp) {
-                .hash = {
-                        split_accumulator_eval(acc0),
-                        split_accumulator_eval(acc1),
-                },
-   };
+               . hash = {
+                   split_accumulator_eval(acc0),
+                   split_accumulator_eval(acc1),
+               },
+    };
 }
 
-template < bool bswap >
-static struct umash_oh oh_varblock(const uint64_t *params, uint64_t tag,
-        const void * block, size_t n_bytes) {
+template <bool bswap>
+static struct umash_oh oh_varblock( const uint64_t * params, uint64_t tag, const void * block, size_t n_bytes ) {
     struct umash_oh ret;
-    v128 acc = { 0, 0 };
+    v128            acc = { 0, 0 };
 
     /* The final block processes `remaining > 0` bytes. */
-    size_t remaining = 1 + ((n_bytes - 1) % sizeof(v128));
-    size_t end_full_pairs = (n_bytes - remaining) / sizeof(uint64_t);
-    const uint8_t * last_ptr = (const uint8_t *)block + n_bytes - sizeof(v128);
-    size_t i;
+    size_t          remaining      = 1 + ((n_bytes - 1        ) % sizeof(v128)   );
+    size_t          end_full_pairs =      (n_bytes - remaining) / sizeof(uint64_t);
+    const uint8_t * last_ptr       = (const uint8_t *)block + n_bytes - sizeof(v128);
+    size_t          i;
 
     for (i = 0; i < end_full_pairs; i += 2) {
         v128 x, k;
 
-        x = _mm_loadu_si128((const v128 *)block);
+        x     = _mm_loadu_si128((const v128 *)block);
         if (bswap) { x = mm_bswap64(x); }
         block = (const uint8_t *)block + sizeof(x);
 
-        k = _mm_loadu_si128((const v128 *)&params[i]);
-        x ^= k;
-        acc ^= v128_clmul_cross(x);
+        k     = _mm_loadu_si128((const v128 *)&params[i]);
+        x    ^= k;
+        acc  ^= v128_clmul_cross(x);
     }
 
     memcpy(&ret, &acc, sizeof(ret));
@@ -686,13 +681,13 @@ static struct umash_oh oh_varblock(const uint64_t *params, uint64_t tag,
     {
         uint64_t x, y, enh_hi, enh_lo;
 
-        x = GET_U64<bswap>(last_ptr, 0);
-        y = GET_U64<bswap>(last_ptr, 8);
+        x            = GET_U64<bswap>(last_ptr, 0);
+        y            = GET_U64<bswap>(last_ptr, 8);
 
-        x += params[i];
-        y += params[i + 1];
+        x           += params[i];
+        y           += params[i + 1];
         mul128(x, y, enh_hi, enh_lo);
-        enh_hi += tag;
+        enh_hi      += tag;
 
         ret.bits[0] ^= enh_lo;
         ret.bits[1] ^= enh_hi ^ enh_lo;
@@ -701,39 +696,40 @@ static struct umash_oh oh_varblock(const uint64_t *params, uint64_t tag,
     return ret;
 }
 
-template < bool bswap >
-static void oh_varblock_fprint(struct umash_oh dst[2], const uint64_t *params,
-         uint64_t tag, const void * block, size_t n_bytes) {
-    v128 acc = { 0, 0 }; /* Base umash */
+template <bool bswap>
+static void oh_varblock_fprint( struct umash_oh dst[2], const uint64_t * params,
+        uint64_t tag, const void * block, size_t n_bytes ) {
+    v128 acc         = { 0, 0 }; /* Base umash */
     v128 acc_shifted = { 0, 0 }; /* Accumulates shifted values */
     v128 lrc;
     /* The final block processes `remaining > 0` bytes. */
-    size_t remaining = 1 + ((n_bytes - 1) % sizeof(v128));
-    size_t end_full_pairs = (n_bytes - remaining) / sizeof(uint64_t);
-    const uint8_t * last_ptr = (const uint8_t *)block + n_bytes - sizeof(v128);
-    size_t i;
+    size_t          remaining      = 1 + ((n_bytes - 1        ) % sizeof(v128)   );
+    size_t          end_full_pairs =      (n_bytes - remaining) / sizeof(uint64_t);
+    const uint8_t * last_ptr       = (const uint8_t *)block + n_bytes - sizeof(v128);
+    size_t          i;
 
     lrc = v128_create(params[UMASH_OH_PARAM_COUNT], params[UMASH_OH_PARAM_COUNT + 1]);
     for (i = 0; i < end_full_pairs; i += 2) {
         v128 x, k;
 
-        x = _mm_loadu_si128((const v128 *)block);
+        x     = _mm_loadu_si128((const v128 *)block);
         if (bswap) { x = mm_bswap64(x); }
         block = (const uint8_t *)block + sizeof(x);
 
-        k = _mm_loadu_si128((const v128 *)&params[i]);
+        k     = _mm_loadu_si128((const v128 *)&params[i]);
 
-        x ^= k;
-        lrc ^= x;
+        x    ^= k;
+        lrc  ^= x;
 
-        x = v128_clmul_cross(x);
+        x     = v128_clmul_cross(x);
 
-        acc ^= x;
-        if (i + 2 >= end_full_pairs)
+        acc  ^= x;
+        if (i + 2 >= end_full_pairs) {
             break;
+        }
 
         acc_shifted ^= x;
-        acc_shifted = v128_shift(acc_shifted);
+        acc_shifted  = v128_shift(acc_shifted);
     }
 
     /*
@@ -743,34 +739,34 @@ static void oh_varblock_fprint(struct umash_oh dst[2], const uint64_t *params,
     {
         v128 x, k;
 
-        x = _mm_loadu_si128((const v128 *)last_ptr);
+        x    = _mm_loadu_si128((const v128 *)last_ptr);
         if (bswap) { x = mm_bswap64(x); }
-        k = _mm_loadu_si128((const v128 *)&params[end_full_pairs]);
+        k    = _mm_loadu_si128((const v128 *)&params[end_full_pairs]);
 
         lrc ^= x ^ k;
     }
 
     acc_shifted ^= acc;
-    acc_shifted = v128_shift(acc_shifted);
+    acc_shifted  = v128_shift(acc_shifted);
 
     acc_shifted ^= v128_clmul_cross(lrc);
 
-    memcpy(&dst[0], &acc, sizeof(dst[0]));
+    memcpy(&dst[0], &acc        , sizeof(dst[0]));
     memcpy(&dst[1], &acc_shifted, sizeof(dst[1]));
 
     {
         uint64_t x, y, kx, ky, enh_hi, enh_lo;
 
-        x = GET_U64<bswap>(last_ptr, 0);
-        y = GET_U64<bswap>(last_ptr, 8);
+        x  = GET_U64<bswap>(last_ptr, 0);
+        y  = GET_U64<bswap>(last_ptr, 8);
 
-        kx = x + params[end_full_pairs];
+        kx = x + params[end_full_pairs    ];
         ky = y + params[end_full_pairs + 1];
 
         mul128(kx, ky, enh_hi, enh_lo);
-        enh_hi += tag;
+        enh_hi         += tag;
 
-        enh_hi ^= enh_lo;
+        enh_hi         ^= enh_lo;
         dst[0].bits[0] ^= enh_lo;
         dst[0].bits[1] ^= enh_hi;
 
@@ -779,23 +775,24 @@ static void oh_varblock_fprint(struct umash_oh dst[2], const uint64_t *params,
     }
 }
 
-template < bool bswap >
-static uint64_t umash_long(const uint64_t multipliers[2], const uint64_t *oh,
-        uint64_t seed, const void *data, size_t n_bytes) {
+template <bool bswap>
+static uint64_t umash_long( const uint64_t multipliers[2], const uint64_t * oh,
+        uint64_t seed, const void * data, size_t n_bytes ) {
     uint64_t acc = 0;
 
     // This invokes the optional routines for very long inputs
     if (unlikely(n_bytes >= UMASH_MULTIPLE_BLOCKS_THRESHOLD)) {
-        size_t n_block = n_bytes / BLOCK_SIZE;
-        const void *remaining;
+        size_t       n_block = n_bytes / BLOCK_SIZE;
+        const void * remaining;
 
-        n_bytes %= BLOCK_SIZE;
+        n_bytes  %= BLOCK_SIZE;
         remaining = (const uint8_t *)data + (n_block * BLOCK_SIZE);
-        acc = umash_multiple_blocks<bswap>(acc, multipliers, oh, seed, data, n_block);
+        acc       = umash_multiple_blocks<bswap>(acc, multipliers, oh, seed, data, n_block);
 
-        data = remaining;
-        if (n_bytes == 0)
+        data      = remaining;
+        if (n_bytes == 0) {
             goto finalize;
+        }
 
         goto last_block;
     }
@@ -804,51 +801,50 @@ static uint64_t umash_long(const uint64_t multipliers[2], const uint64_t *oh,
         struct umash_oh compressed;
 
         compressed = oh_varblock<bswap>(oh, seed, data, BLOCK_SIZE);
-        data = (const uint8_t *)data + BLOCK_SIZE;
-        n_bytes -= BLOCK_SIZE;
+        data       = (const uint8_t *)data + BLOCK_SIZE;
+        n_bytes   -= BLOCK_SIZE;
 
-        acc = horner_double_update(acc, multipliers[0], multipliers[1],
-            compressed.bits[0], compressed.bits[1]);
+        acc        = horner_double_update(acc, multipliers[0], multipliers[1], compressed.bits[0], compressed.bits[1]);
     }
 
-last_block:
+  last_block:
     /* Do the final block. */
     {
         struct umash_oh compressed;
 
-        seed ^= (uint8_t)n_bytes;
+        seed      ^= (uint8_t)n_bytes;
         compressed = oh_varblock<bswap>(oh, seed, data, n_bytes);
-        acc = horner_double_update(acc, multipliers[0], multipliers[1],
-            compressed.bits[0], compressed.bits[1]);
+        acc        = horner_double_update(acc, multipliers[0], multipliers[1], compressed.bits[0], compressed.bits[1]);
     }
 
-finalize:
+  finalize:
     return finalize(acc);
 }
 
-template < bool bswap >
-static struct umash_fp umash_fp_long(const uint64_t multipliers[2][2], const uint64_t *oh,
-        uint64_t seed, const void *data, size_t n_bytes) {
+template <bool bswap>
+static struct umash_fp umash_fp_long( const uint64_t multipliers[2][2], const uint64_t * oh,
+        uint64_t seed, const void * data, size_t n_bytes ) {
     struct umash_oh compressed[2];
     struct umash_fp ret;
-    uint64_t acc[2] = { 0, 0 };
+    uint64_t        acc[2] = { 0, 0 };
 
     // This invokes the optional routines for very long inputs
     if (unlikely(n_bytes >= UMASH_MULTIPLE_BLOCKS_THRESHOLD)) {
-        struct umash_fp poly = { .hash = { 0, 0 } };
-        size_t n_block = n_bytes / BLOCK_SIZE;
-        const void *remaining;
+        struct umash_fp poly = { .hash = { 0     , 0 } };
+        size_t          n_block        = n_bytes / BLOCK_SIZE;
+        const void *    remaining;
 
-        n_bytes %= BLOCK_SIZE;
+        n_bytes  %= BLOCK_SIZE;
         remaining = (const uint8_t *)data + (n_block * BLOCK_SIZE);
-        poly = umash_fprint_multiple_blocks<bswap>(poly, multipliers, oh, seed, data, n_block);
+        poly      = umash_fprint_multiple_blocks<bswap>(poly, multipliers, oh, seed, data, n_block);
 
-        acc[0] = poly.hash[0];
-        acc[1] = poly.hash[1];
+        acc[0]    = poly.hash[0];
+        acc[1]    = poly.hash[1];
 
-        data = remaining;
-        if (n_bytes == 0)
+        data      = remaining;
+        if (n_bytes == 0) {
             goto finalize;
+        }
 
         goto last_block;
     }
@@ -862,27 +858,27 @@ static struct umash_fp umash_fp_long(const uint64_t multipliers[2][2], const uin
 
         UPDATE(0);
         UPDATE(1);
-#undef UPDATE
+  #undef UPDATE
 
-        data = (const uint8_t *)data + BLOCK_SIZE;
+        data     = (const uint8_t *)data + BLOCK_SIZE;
         n_bytes -= BLOCK_SIZE;
     }
 
-last_block:
+  last_block:
     oh_varblock_fprint<bswap>(compressed, oh, seed ^ (uint8_t)n_bytes, data, n_bytes);
 
-#define FINAL(i)                                                        \
-    do {                                                                \
-        acc[i] = horner_double_update(acc[i], multipliers[i][0],        \
-                multipliers[i][1], compressed[i].bits[0],               \
-                compressed[i].bits[1]);                                 \
+#define FINAL(i)                                                 \
+    do {                                                         \
+        acc[i] = horner_double_update(acc[i], multipliers[i][0], \
+                multipliers[i][1], compressed[i].bits[0],        \
+                compressed[i].bits[1]);                          \
     } while (0)
 
     FINAL(0);
     FINAL(1);
-#undef FINAL
+  #undef FINAL
 
-finalize:
+  finalize:
     ret.hash[0] = finalize(acc[0]);
     ret.hash[1] = finalize(acc[1]);
     return ret;
@@ -890,9 +886,8 @@ static struct umash_fp umash_fp_long(const uint64_t multipliers[2][2], const uin
 
 //------------------------------------------------------------
 // This is hardcoded to which == 0.
-template < bool bswap >
-static uint64_t umash_full(const struct umash_params *params, uint64_t seed,
-        const void *data, size_t n_bytes) {
+template <bool bswap>
+static uint64_t umash_full( const struct umash_params * params, uint64_t seed, const void * data, size_t n_bytes ) {
     /*
      * It's not that short inputs are necessarily more likely, but
      * we want to make sure they fall through correctly to
@@ -909,9 +904,9 @@ static uint64_t umash_full(const struct umash_params *params, uint64_t seed,
     }
 }
 
-template < bool bswap >
-static struct umash_fp umash_fprint(const struct umash_params *params, uint64_t seed,
-        const void *data, size_t n_bytes) {
+template <bool bswap>
+static struct umash_fp umash_fprint( const struct umash_params * params,
+        uint64_t seed, const void * data, size_t n_bytes ) {
     if (likely(n_bytes <= sizeof(v128))) {
         if (likely(n_bytes <= sizeof(uint64_t))) {
             return umash_fp_short<bswap>(params->oh, seed, data, n_bytes);
@@ -924,74 +919,73 @@ static struct umash_fp umash_fprint(const struct umash_params *params, uint64_t
 }
 
 //------------------------------------------------------------
-static void core_salsa20(uint8_t * out, const uint8_t in[16], const uint8_t key[32],
-        const uint8_t constant[16]) {
+static void core_salsa20( uint8_t * out, const uint8_t in[16], const uint8_t key[32], const uint8_t constant[16] ) {
     enum { ROUNDS = 20 };
     uint32_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
     uint32_t j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
 
-     j0 =  x0 = GET_U32<false>(constant, 0);
-     j1 =  x1 = GET_U32<false>(key, 0);
-     j2 =  x2 = GET_U32<false>(key, 4);
-     j3 =  x3 = GET_U32<false>(key, 8);
-     j4 =  x4 = GET_U32<false>(key, 12);
-     j5 =  x5 = GET_U32<false>(constant, 4);
-     j6 =  x6 = GET_U32<false>(in, 0);
-     j7 =  x7 = GET_U32<false>(in, 4);
-     j8 =  x8 = GET_U32<false>(in, 8);
-     j9 =  x9 = GET_U32<false>(in, 12);
-    j10 = x10 = GET_U32<false>(constant, 8);
-    j11 = x11 = GET_U32<false>(key, 16);
-    j12 = x12 = GET_U32<false>(key, 20);
-    j13 = x13 = GET_U32<false>(key, 24);
-    j14 = x14 = GET_U32<false>(key, 28);
+    j0  =  x0 = GET_U32<false>(constant,  0);
+    j1  =  x1 = GET_U32<false>(key     ,  0);
+    j2  =  x2 = GET_U32<false>(key     ,  4);
+    j3  =  x3 = GET_U32<false>(key     ,  8);
+    j4  =  x4 = GET_U32<false>(key     , 12);
+    j5  =  x5 = GET_U32<false>(constant,  4);
+    j6  =  x6 = GET_U32<false>(in      ,  0);
+    j7  =  x7 = GET_U32<false>(in      ,  4);
+    j8  =  x8 = GET_U32<false>(in      ,  8);
+    j9  =  x9 = GET_U32<false>(in      , 12);
+    j10 = x10 = GET_U32<false>(constant,  8);
+    j11 = x11 = GET_U32<false>(key     , 16);
+    j12 = x12 = GET_U32<false>(key     , 20);
+    j13 = x13 = GET_U32<false>(key     , 24);
+    j14 = x14 = GET_U32<false>(key     , 28);
     j15 = x15 = GET_U32<false>(constant, 12);
 
     for (size_t i = 0; i < ROUNDS; i += 2) {
-         x4 ^= ROTL32(x0 + x12, 7);
-         x8 ^= ROTL32(x4 + x0, 9);
-        x12 ^= ROTL32(x8 + x4, 13);
-         x0 ^= ROTL32(x12 + x8, 18);
-         x9 ^= ROTL32(x5 + x1, 7);
-        x13 ^= ROTL32(x9 + x5, 9);
-         x1 ^= ROTL32(x13 + x9, 13);
-         x5 ^= ROTL32(x1 + x13, 18);
-        x14 ^= ROTL32(x10 + x6, 7);
-         x2 ^= ROTL32(x14 + x10, 9);
-         x6 ^= ROTL32(x2 + x14, 13);
-        x10 ^= ROTL32(x6 + x2, 18);
-         x3 ^= ROTL32(x15 + x11, 7);
-         x7 ^= ROTL32(x3 + x15, 9);
-        x11 ^= ROTL32(x7 + x3, 13);
-        x15 ^= ROTL32(x11 + x7, 18);
-         x1 ^= ROTL32(x0 + x3, 7);
-         x2 ^= ROTL32(x1 + x0, 9);
-         x3 ^= ROTL32(x2 + x1, 13);
-         x0 ^= ROTL32(x3 + x2, 18);
-         x6 ^= ROTL32(x5 + x4, 7);
-         x7 ^= ROTL32(x6 + x5, 9);
-         x4 ^= ROTL32(x7 + x6, 13);
-         x5 ^= ROTL32(x4 + x7, 18);
-        x11 ^= ROTL32(x10 + x9, 7);
-         x8 ^= ROTL32(x11 + x10, 9);
-         x9 ^= ROTL32(x8 + x11, 13);
-        x10 ^= ROTL32(x9 + x8, 18);
-        x12 ^= ROTL32(x15 + x14, 7);
-        x13 ^= ROTL32(x12 + x15, 9);
+        x4  ^= ROTL32(x0 + x12 ,  7);
+        x8  ^= ROTL32(x4 + x0  ,  9);
+        x12 ^= ROTL32(x8 + x4  , 13);
+        x0  ^= ROTL32(x12 + x8 , 18);
+        x9  ^= ROTL32(x5 + x1  ,  7);
+        x13 ^= ROTL32(x9 + x5  ,  9);
+        x1  ^= ROTL32(x13 + x9 , 13);
+        x5  ^= ROTL32(x1 + x13 , 18);
+        x14 ^= ROTL32(x10 + x6 ,  7);
+        x2  ^= ROTL32(x14 + x10,  9);
+        x6  ^= ROTL32(x2 + x14 , 13);
+        x10 ^= ROTL32(x6 + x2  , 18);
+        x3  ^= ROTL32(x15 + x11,  7);
+        x7  ^= ROTL32(x3 + x15 ,  9);
+        x11 ^= ROTL32(x7 + x3  , 13);
+        x15 ^= ROTL32(x11 + x7 , 18);
+        x1  ^= ROTL32(x0 + x3  ,  7);
+        x2  ^= ROTL32(x1 + x0  ,  9);
+        x3  ^= ROTL32(x2 + x1  , 13);
+        x0  ^= ROTL32(x3 + x2  , 18);
+        x6  ^= ROTL32(x5 + x4  ,  7);
+        x7  ^= ROTL32(x6 + x5  ,  9);
+        x4  ^= ROTL32(x7 + x6  , 13);
+        x5  ^= ROTL32(x4 + x7  , 18);
+        x11 ^= ROTL32(x10 + x9 ,  7);
+        x8  ^= ROTL32(x11 + x10,  9);
+        x9  ^= ROTL32(x8 + x11 , 13);
+        x10 ^= ROTL32(x9 + x8  , 18);
+        x12 ^= ROTL32(x15 + x14,  7);
+        x13 ^= ROTL32(x12 + x15,  9);
         x14 ^= ROTL32(x13 + x12, 13);
         x15 ^= ROTL32(x14 + x13, 18);
     }
 
-    x0 += j0;
-    x1 += j1;
-    x2 += j2;
-    x3 += j3;
-    x4 += j4;
-    x5 += j5;
-    x6 += j6;
-    x7 += j7;
-    x8 += j8;
-    x9 += j9;
+    x0  += j0;
+    x1  += j1;
+    x2  += j2;
+    x3  += j3;
+    x4  += j4;
+    x5  += j5;
+    x6  += j6;
+    x7  += j7;
+    x8  += j8;
+    x9  += j9;
     x10 += j10;
     x11 += j11;
     x12 += j12;
@@ -999,16 +993,16 @@ static void core_salsa20(uint8_t * out, const uint8_t in[16], const uint8_t key[
     x14 += j14;
     x15 += j15;
 
-    PUT_U32<false>( x0, out, 0);
-    PUT_U32<false>( x1, out, 4);
-    PUT_U32<false>( x2, out, 8);
-    PUT_U32<false>( x3, out, 12);
-    PUT_U32<false>( x4, out, 16);
-    PUT_U32<false>( x5, out, 20);
-    PUT_U32<false>( x6, out, 24);
-    PUT_U32<false>( x7, out, 28);
-    PUT_U32<false>( x8, out, 32);
-    PUT_U32<false>( x9, out, 36);
+    PUT_U32<false>(x0 , out,  0);
+    PUT_U32<false>(x1 , out,  4);
+    PUT_U32<false>(x2 , out,  8);
+    PUT_U32<false>(x3 , out, 12);
+    PUT_U32<false>(x4 , out, 16);
+    PUT_U32<false>(x5 , out, 20);
+    PUT_U32<false>(x6 , out, 24);
+    PUT_U32<false>(x7 , out, 28);
+    PUT_U32<false>(x8 , out, 32);
+    PUT_U32<false>(x9 , out, 36);
     PUT_U32<false>(x10, out, 40);
     PUT_U32<false>(x11, out, 44);
     PUT_U32<false>(x12, out, 48);
@@ -1017,13 +1011,13 @@ static void core_salsa20(uint8_t * out, const uint8_t in[16], const uint8_t key[
     PUT_U32<false>(x15, out, 60);
 }
 
-static void salsa20_stream(void *dst, size_t len, const uint8_t nonce[8],
-        const uint8_t key[32]) {
+static void salsa20_stream( void * dst, size_t len, const uint8_t nonce[8], const uint8_t key[32] ) {
     static const uint8_t sigma[17] = "expand 32-byte k";
     uint8_t in[16];
 
-    if (len == 0)
+    if (len == 0) {
         return;
+    }
 
     memcpy(in, nonce, 8);
     memset(in + 8, 0, 8);
@@ -1033,12 +1027,12 @@ static void salsa20_stream(void *dst, size_t len, const uint8_t nonce[8],
 
         unsigned int u = 1;
         for (size_t i = 8; i < 16; i++) {
-            u += in[i];
+            u    += in[i];
             in[i] = u;
-            u >>= 8;
+            u   >>= 8;
         }
 
-        dst = (uint8_t *)dst + 64;
+        dst  = (uint8_t *)dst + 64;
         len -= 64;
     }
 
@@ -1049,30 +1043,31 @@ static void salsa20_stream(void *dst, size_t len, const uint8_t nonce[8],
     }
 }
 
-static bool value_is_repeated(const uint64_t *values, size_t n, uint64_t needle) {
+static bool value_is_repeated( const uint64_t * values, size_t n, uint64_t needle ) {
     for (size_t i = 0; i < n; i++) {
-        if (values[i] == needle)
+        if (values[i] == needle) {
             return true;
+        }
     }
     return false;
 }
 
-static bool umash_params_prepare(struct umash_params *params) {
+static bool umash_params_prepare( struct umash_params * params ) {
     static const uint64_t modulo = (1UL << 61) - 1;
     /*
      * The polynomial parameters have two redundant fields (for
      * the pre-squared multipliers).  Use them as our source of
      * extra entropy if needed.
      */
-    uint64_t buf[] = { params->poly[0][0], params->poly[1][0] };
-    size_t buf_idx = 0;
-
-#define GET_RANDOM(DST)                         \
-    do {                                        \
-        if (buf_idx >= ARRAY_SIZE(buf))         \
-            return false;                       \
-                                                \
-        (DST) = buf[buf_idx++];                 \
+    uint64_t buf[]   = { params->poly[0][0], params->poly[1][0] };
+    size_t   buf_idx = 0;
+
+#define GET_RANDOM(DST)                 \
+    do {                                \
+        if (buf_idx >= ARRAY_SIZE(buf)) \
+            return false;               \
+                                        \
+        (DST) = buf[buf_idx++];         \
     } while (0)
 
     /* Check the polynomial multipliers: we don't want 0s. */
@@ -1085,8 +1080,9 @@ static bool umash_params_prepare(struct umash_params *params) {
              * guarantee uniformity.
              */
             f &= (1UL << 61) - 1;
-            if (f != 0 && f < modulo)
+            if ((f != 0) && (f < modulo)) {
                 break;
+            }
 
             GET_RANDOM(f);
         }
@@ -1098,35 +1094,38 @@ static bool umash_params_prepare(struct umash_params *params) {
 
     /* Avoid repeated OH noise values. */
     for (size_t i = 0; i < ARRAY_SIZE(params->oh); i++) {
-        while (value_is_repeated(params->oh, i, params->oh[i]))
+        while (value_is_repeated(params->oh, i, params->oh[i])) {
             GET_RANDOM(params->oh[i]);
+        }
     }
 
     return true;
 }
 
-static void umash_params_derive(struct umash_params *params, uint64_t bits, const void *key) {
+static void umash_params_derive( struct umash_params * params, uint64_t bits, const void * key ) {
     uint8_t umash_key[33] = "Do not use UMASH VS adversaries.";
 
     params->base_seed = bits;
 
-    if (key != NULL)
+    if (key != NULL) {
         memcpy(umash_key, key, sizeof(umash_key));
+    }
 
     while (true) {
         uint8_t nonce[8];
 
-        for (size_t i = 0; i < 8; i++)
+        for (size_t i = 0; i < 8; i++) {
             nonce[i] = bits >> (8 * i);
+        }
 
         /*
          * The "- sizeof(uint64_t)" is so that params->base_seed
          * doesn't get overwritten.
          */
-        salsa20_stream(params, sizeof(*params) - sizeof(uint64_t),
-                nonce, umash_key);
-        if (umash_params_prepare(params))
+        salsa20_stream(params, sizeof(*params) - sizeof(uint64_t), nonce, umash_key);
+        if (umash_params_prepare(params)) {
             return;
+        }
 
         /*
          * This should practically never fail, so really
@@ -1145,37 +1144,40 @@ static void umash_params_derive(struct umash_params *params, uint64_t bits, cons
 // mode. This is because the (now) thread-local global table would
 // never be initialized in the thread, and so would be all zeroes.
 
-static uintptr_t umash_slow_reseed(const seed_t seed) {
+static uintptr_t umash_slow_reseed( const seed_t seed ) {
     static thread_local struct umash_params umash_params_local;
+
     umash_params_derive(&umash_params_local, seed, NULL);
     return (uintptr_t)(&umash_params_local);
 }
 
 static struct umash_params umash_params_global;
 
-static bool umash_init(void) {
+static bool umash_init( void ) {
     umash_params_derive(&umash_params_global, 0, NULL);
     umash_slow_reseed(0);
     return true;
 }
 
-template < bool reseed, bool bswap >
-static void UMASH(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool reseed, bool bswap>
+static void UMASH( const void * in, const size_t len, const seed_t seed, void * out ) {
     const struct umash_params * params = reseed ?
-        (const struct umash_params *)(uintptr_t)seed :
-        &umash_params_global;
+                (const struct umash_params *)(uintptr_t)seed :
+                &umash_params_global;
     const uint64_t hseed = reseed ? params->base_seed : (uint64_t)seed;
-    uint64_t hash = umash_full<bswap>(params, hseed, in, len);
+    uint64_t       hash  = umash_full<bswap>(params, hseed, in, len);
+
     PUT_U64<false>(hash, (uint8_t *)out, 0);
 }
 
-template < bool reseed, bool bswap >
-static void UMASH_FP(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool reseed, bool bswap>
+static void UMASH_FP( const void * in, const size_t len, const seed_t seed, void * out ) {
     const struct umash_params * params = reseed ?
-        (const struct umash_params *)(uintptr_t)seed :
-        &umash_params_global;
-    const uint64_t hseed = reseed ? params->base_seed : (uint64_t)seed;
-    struct umash_fp hash = umash_fprint<bswap>(params, hseed, in, len);
+                (const struct umash_params *)(uintptr_t)seed :
+                &umash_params_global;
+    const uint64_t  hseed = reseed ? params->base_seed : (uint64_t)seed;
+    struct umash_fp hash  = umash_fprint<bswap>(params, hseed, in, len);
+
     PUT_U64<false>(hash.hash[0], (uint8_t *)out, 0);
     PUT_U64<false>(hash.hash[1], (uint8_t *)out, 8);
 }
@@ -1183,80 +1185,80 @@ static void UMASH_FP(const void * in, const size_t len, const seed_t seed, void
 #endif
 //------------------------------------------------------------
 REGISTER_FAMILY(umash,
-  $.src_url = "https://github.com/backtrace-labs/umash",
-  $.src_status = HashFamilyInfo::SRC_ACTIVE
-);
+   $.src_url    = "https://github.com/backtrace-labs/umash",
+   $.src_status = HashFamilyInfo::SRC_ACTIVE
+ );
 
 #if defined(HAVE_X86_64_CLMUL)
 
 REGISTER_HASH(UMASH_64,
-  $.desc = "UMASH-64 (which == 0)",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE        |
-        FLAG_HASH_CLMUL_BASED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128     |
-        FLAG_IMPL_ROTATE              |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x36A264CD,
-  $.verification_BE = 0x84DA635B,
-  $.hashfn_native = UMASH<false,false>,
-  $.hashfn_bswap = UMASH<false,true>,
-  $.initfn = umash_init
-);
+   $.desc       = "UMASH-64 (which == 0)",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE        |
+         FLAG_HASH_CLMUL_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128     |
+         FLAG_IMPL_ROTATE              |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x36A264CD,
+   $.verification_BE = 0x84DA635B,
+   $.hashfn_native   = UMASH<false, false>,
+   $.hashfn_bswap    = UMASH<false, true>,
+   $.initfn = umash_init
+ );
 
 REGISTER_HASH(UMASH_64__reseed,
-  $.desc = "UMASH-64 (which == 0, with full reseeding)",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE        |
-        FLAG_HASH_CLMUL_BASED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128     |
-        FLAG_IMPL_ROTATE              |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 64,
-  $.verification_LE = 0x161495C6,
-  $.verification_BE = 0xF18B8420,
-  $.hashfn_native = UMASH<true,false>,
-  $.hashfn_bswap = UMASH<true,true>,
-  $.seedfn = umash_slow_reseed,
-  $.initfn = umash_init
-);
+   $.desc       = "UMASH-64 (which == 0, with full reseeding)",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE        |
+         FLAG_HASH_CLMUL_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128     |
+         FLAG_IMPL_ROTATE              |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 64,
+   $.verification_LE = 0x161495C6,
+   $.verification_BE = 0xF18B8420,
+   $.hashfn_native   = UMASH<true, false>,
+   $.hashfn_bswap    = UMASH<true, true>,
+   $.seedfn = umash_slow_reseed,
+   $.initfn = umash_init
+ );
 
 REGISTER_HASH(UMASH_128,
-  $.desc = "UMASH-128",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE        |
-        FLAG_HASH_CLMUL_BASED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128     |
-        FLAG_IMPL_ROTATE              |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x63857D05,
-  $.verification_BE = 0xE87FFB4B,
-  $.hashfn_native = UMASH_FP<false,false>,
-  $.hashfn_bswap = UMASH_FP<false,true>,
-  $.initfn = umash_init
-);
+   $.desc       = "UMASH-128",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE        |
+         FLAG_HASH_CLMUL_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128     |
+         FLAG_IMPL_ROTATE              |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x63857D05,
+   $.verification_BE = 0xE87FFB4B,
+   $.hashfn_native   = UMASH_FP<false, false>,
+   $.hashfn_bswap    = UMASH_FP<false, true>,
+   $.initfn = umash_init
+ );
 
 REGISTER_HASH(UMASH_128__reseed,
-  $.desc = "UMASH-128 (with full reseeding)",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE        |
-        FLAG_HASH_CLMUL_BASED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128     |
-        FLAG_IMPL_ROTATE              |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 128,
-  $.verification_LE = 0x36D4EC95,
-  $.verification_BE = 0x9F870C9C,
-  $.hashfn_native = UMASH_FP<true,false>,
-  $.hashfn_bswap = UMASH_FP<true,true>,
-  $.seedfn = umash_slow_reseed,
-  $.initfn = umash_init
-);
+   $.desc       = "UMASH-128 (with full reseeding)",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE        |
+         FLAG_HASH_CLMUL_BASED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128     |
+         FLAG_IMPL_ROTATE              |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 128,
+   $.verification_LE = 0x36D4EC95,
+   $.verification_BE = 0x9F870C9C,
+   $.hashfn_native   = UMASH_FP<true, false>,
+   $.hashfn_bswap    = UMASH_FP<true, true>,
+   $.seedfn = umash_slow_reseed,
+   $.initfn = umash_init
+ );
 
 #endif
diff --git a/hashes/vmac.cpp b/hashes/vmac.cpp
index 62e95e9f..8cbdead8 100644
--- a/hashes/vmac.cpp
+++ b/hashes/vmac.cpp
@@ -40,44 +40,46 @@
 
 //-----------------------------------------------------------------------------
 // Constants and masks
-const uint64_t p64   = UINT64_C(0xfffffffffffffeff);  /* 2^64 - 257 prime  */
-const uint64_t m62   = UINT64_C(0x3fffffffffffffff);  /* 62-bit mask       */
-const uint64_t m63   = UINT64_C(0x7fffffffffffffff);  /* 63-bit mask       */
-const uint64_t m64   = UINT64_C(0xffffffffffffffff);  /* 64-bit mask       */
-const uint64_t mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
+const uint64_t p64   = UINT64_C(0xfffffffffffffeff); /* 2^64 - 257 prime  */
+const uint64_t m62   = UINT64_C(0x3fffffffffffffff); /* 62-bit mask       */
+const uint64_t m63   = UINT64_C(0x7fffffffffffffff); /* 63-bit mask       */
+const uint64_t m64   = UINT64_C(0xffffffffffffffff); /* 64-bit mask       */
+const uint64_t mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask     */
 
 //-----------------------------------------------------------------------------
 // macros from Crypto++ for sharing inline assembly code between MSVC and GNU C
 #if defined(__GNUC__)
-	// define these in two steps to allow arguments to be expanded
-	#define GNU_AS2(x, y) #x ", " #y ";"
-	#define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
-	#define GNU_ASL(x) "\n" #x ":"
-	#define GNU_ASJ(x, y, z) #x " " #y #z ";"
-	#define AS2(x, y) GNU_AS2(x, y)
-	#define AS3(x, y, z) GNU_AS3(x, y, z)
-	#define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
-	#define ASL(x) GNU_ASL(x)
-	#define ASJ(x, y, z) GNU_ASJ(x, y, z)
+// define these in two steps to allow arguments to be expanded
+  #define GNU_AS2(x, y) #x ", " #y ";"
+  #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
+  #define GNU_ASL(x) "\n" #x ":"
+  #define GNU_ASJ(x, y, z) #x " " #y #z ";"
+  #define AS2(x, y) GNU_AS2(x, y)
+  #define AS3(x, y, z) GNU_AS3(x, y, z)
+  #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
+  #define ASL(x) GNU_ASL(x)
+  #define ASJ(x, y, z) GNU_ASJ(x, y, z)
 #else
-	#define AS2(x, y) __asm {x, y}
-	#define AS3(x, y, z) __asm {x, y, z}
-	#define ASS(x, y, a, b, c, d) __asm {x, y, _MM_SHUFFLE(a, b, c, d)}
-	#define ASL(x) __asm {label##x:}
-	#define ASJ(x, y, z) __asm {x label##y}
+  #define AS2(x, y) __asm { x, y }
+  #define AS3(x, y, z) __asm { x, y, z }
+  #define ASS(x, y, a, b, c, d) __asm { x, y, _MM_SHUFFLE(a, b, c, d) }
+  #define ASL(x) __asm { \
+      label ## x:        \
+      }
+  #define ASJ(x, y, z) __asm { x label ## y }
 #endif
 
 //-----------------------------------------------------------------------------
 
-#define ADD128(rh,rl,ih,il) add128(rl, rh, il, ih)
+#define ADD128(rh, rl, ih, il) add128(rl, rh, il, ih)
 
-#define MUL64(rh,rl,i1,i2)  mult64_128(rl, rh, i1, i2)
+#define MUL64(rh, rl, i1, i2)  mult64_128(rl, rh, i1, i2)
 
 // PMUL is a special case of MUL where one carry bit is guaranteed to
 // not be needed. We'll just ignore that for now.
 #define PMUL64 MUL64
 
-#define MUL32(i1,i2)    ((uint64_t)(uint32_t)(i1)*(uint32_t)(i2))
+#define MUL32(i1, i2)    ((uint64_t)(uint32_t)(i1) * (uint32_t)(i2))
 
 //-----------------------------------------------------------------------------
 // For highest performance the L1 NH and L2 polynomial hashes should be
@@ -86,27 +88,29 @@ const uint64_t mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
 //-----------------------------------------------------------------------------
 // Portable code (64-bit/32-bit details are behind mathmult.h macros)
 
-template < bool bswap >
-static inline void nh_16_portable(const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t & rh, uint64_t & rl) {
-    //uint64_t th, tl;
+template <bool bswap>
+static inline void nh_16_portable( const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t & rh, uint64_t & rl ) {
+    // uint64_t th, tl;
     rh = rl = 0;
-    for (size_t i = 0; i < nw; i+= 2) {
+    for (size_t i = 0; i < nw; i += 2) {
 #if 0
-        MUL64(th, tl, (GET_U64<bswap>(mp, i*8) + kp[i]), (GET_U64<bswap>(mp, i*8 + 8) + kp[i + 1]));
+        MUL64(th, tl, (GET_U64<bswap>(mp, i * 8) + kp[i]), (GET_U64<bswap>(mp, i * 8 + 8) + kp[i + 1]));
         ADD128(rh, rl, th, tl);
 #else
-        fma64_128(rl, rh, (GET_U64<bswap>(mp, i*8) + kp[i]), (GET_U64<bswap>(mp, i*8 + 8) + kp[i + 1]));
+        fma64_128(rl, rh, (GET_U64<bswap>(mp, i * 8) + kp[i]), (GET_U64<bswap>(mp, i * 8 + 8) + kp[i + 1]));
 #endif
     }
 }
 
 // Using fma64_128() here is a tiny bit slower because there is less
 // freedom to reorder things and take advantage of more registers
-template < bool bswap >
-static inline void nh_vmac_nhbytes_portable(const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t & rh, uint64_t & rl) {
+template <bool bswap>
+static inline void nh_vmac_nhbytes_portable( const uint8_t * mp, const uint64_t * kp,
+        size_t nw, uint64_t & rh, uint64_t & rl ) {
     uint64_t th, tl;
+
     rh = rl = 0;
-    for (size_t i = 0; i < nw; i+= 8) {
+    for (size_t i = 0; i < nw; i += 8) {
         MUL64(th, tl, (GET_U64<bswap>(mp, (i + 0) * 8) + kp[i + 0]), (GET_U64<bswap>(mp, (i + 1) * 8) + kp[i + 1]));
         ADD128(rh, rl, th, tl);
         MUL64(th, tl, (GET_U64<bswap>(mp, (i + 2) * 8) + kp[i + 2]), (GET_U64<bswap>(mp, (i + 3) * 8) + kp[i + 3]));
@@ -118,28 +122,28 @@ static inline void nh_vmac_nhbytes_portable(const uint8_t * mp, const uint64_t *
     }
 }
 
-static inline void poly_step_portable(uint64_t & ah, uint64_t & al, const uint64_t & kh,
-        const uint64_t & kl, const uint64_t & mh, const uint64_t & ml) {
+static inline void poly_step_portable( uint64_t & ah, uint64_t & al, const uint64_t & kh,
+        const uint64_t & kl, const uint64_t & mh, const uint64_t & ml ) {
     uint64_t t1h, t1l, t2h, t2l, t3h, t3l, z = 0;
 
     /* compute ab*cd, put bd into result registers */
-    PMUL64(t3h, t3l, al, kh);
-    PMUL64(t2h, t2l, ah, kl);
-    PMUL64(t1h, t1l, ah, 2*kh);
-    PMUL64(ah, al, al, kl);
+    PMUL64(t3h, t3l, al, kh     );
+    PMUL64(t2h, t2l, ah, kl     );
+    PMUL64(t1h, t1l, ah,  2 * kh);
+    PMUL64(ah , al , al, kl     );
     /* add 2 * ac to result */
-    ADD128(ah, al, t1h, t1l);
+    ADD128(ah , al , t1h, t1l);
     /* add together ad + bc */
     ADD128(t2h, t2l, t3h, t3l);
     /* now (ah,al), (t2l,2*t2h) need summing */
     /* first add the high registers, carrying into t2h */
-    ADD128(t2h, ah, z, t2l);
+    ADD128(t2h, ah , z  , t2l);
     /* double t2h and add top bit of ah */
     t2h = 2 * t2h + (ah >> 63);
     ah &= m63;
     /* now add the low registers */
-    ADD128(ah, al, mh, ml);
-    ADD128(ah, al, z, t2h);
+    ADD128(ah , al , mh , ml );
+    ADD128(ah , al , z  , t2h);
 }
 
 //-----------------------------------------------------------------------------
@@ -147,236 +151,237 @@ static inline void poly_step_portable(uint64_t & ah, uint64_t & al, const uint64
 
 #if defined(HAVE_32BIT_PLATFORM) && defined(HAVE_SSE_2)
 
-template < bool bswap >
-static void nh_16_sse2(const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t & rh, uint64_t & rl) {
-	// This assembly version, using MMX registers, is just as fast as the
-	// intrinsics version (which uses XMM registers) on the Intel Core 2,
-	// but is much faster on the Pentium 4. In order to schedule multiplies
-	// as early as possible, the loop interleaves operations for the current
-	// block and the next block. To mask out high 32-bits, we use "movd"
-	// to move the lower 32-bits to the stack and then back. Surprisingly,
-	// this is faster than any other method.
-#if defined(__GNUC__)
-	__asm__ __volatile__
-	(
-		".intel_syntax noprefix;"
-#else
-		AS2(	mov		esi, mp)
-		AS2(	mov		edi, kp)
-		AS2(	mov		ecx, nw)
-		AS2(	mov		eax, &rl)
-		AS2(	mov		edx, &rh)
-#endif
-		AS2(	sub		esp, 12)
-		AS2(	movq	mm6, [esi])
-		AS2(	paddq	mm6, [edi])
-		AS2(	movq	mm5, [esi+8])
-		AS2(	paddq	mm5, [edi+8])
-		AS2(	add		esi, 16)
-		AS2(	add		edi, 16)
-		AS2(	movq	mm4, mm6)
-		ASS(	pshufw	mm2, mm6, 1, 0, 3, 2)
-		AS2(	pmuludq	mm6, mm5)
-		ASS(	pshufw	mm3, mm5, 1, 0, 3, 2)
-		AS2(	pmuludq	mm5, mm2)
-		AS2(	pmuludq	mm2, mm3)
-		AS2(	pmuludq	mm3, mm4)
-		AS2(	pxor	mm7, mm7)
-		AS2(	movd	[esp], mm6)
-		AS2(	psrlq	mm6, 32)
-		AS2(	movd	[esp+4], mm5)
-		AS2(	psrlq	mm5, 32)
-		AS2(	sub		ecx, 2)
-		ASJ(	jz,		1, f)
-		ASL(0)
-		AS2(	movq	mm0, [esi])
-		AS2(	paddq	mm0, [edi])
-		AS2(	movq	mm1, [esi+8])
-		AS2(	paddq	mm1, [edi+8])
-		AS2(	add		esi, 16)
-		AS2(	add		edi, 16)
-		AS2(	movq	mm4, mm0)
-		AS2(	paddq	mm5, mm2)
-		ASS(	pshufw	mm2, mm0, 1, 0, 3, 2)
-		AS2(	pmuludq	mm0, mm1)
-		AS2(	movd	[esp+8], mm3)
-		AS2(	psrlq	mm3, 32)
-		AS2(	paddq	mm5, mm3)
-		ASS(	pshufw	mm3, mm1, 1, 0, 3, 2)
-		AS2(	pmuludq	mm1, mm2)
-		AS2(	pmuludq	mm2, mm3)
-		AS2(	pmuludq	mm3, mm4)
-		AS2(	movd	mm4, [esp])
-		AS2(	paddq	mm7, mm4)
-		AS2(	movd	mm4, [esp+4])
-		AS2(	paddq	mm6, mm4)
-		AS2(	movd	mm4, [esp+8])
-		AS2(	paddq	mm6, mm4)
-		AS2(	movd	[esp], mm0)
-		AS2(	psrlq	mm0, 32)
-		AS2(	paddq	mm6, mm0)
-		AS2(	movd	[esp+4], mm1)
-		AS2(	psrlq	mm1, 32)
-		AS2(	paddq	mm5, mm1)
-		AS2(	sub		ecx, 2)
-		ASJ(	jnz,	0, b)
-		ASL(1)
-		AS2(	paddq	mm5, mm2)
-		AS2(	movd	[esp+8], mm3)
-		AS2(	psrlq	mm3, 32)
-		AS2(	paddq	mm5, mm3)
-		AS2(	movd	mm4, [esp])
-		AS2(	paddq	mm7, mm4)
-		AS2(	movd	mm4, [esp+4])
-		AS2(	paddq	mm6, mm4)
-		AS2(	movd	mm4, [esp+8])
-		AS2(	paddq	mm6, mm4)
-
-		ASS(	pshufw	mm0, mm7, 3, 2, 1, 0)
-		AS2(	psrlq	mm7, 32)
-		AS2(	paddq	mm6, mm7)
-		AS2(	punpckldq	mm0, mm6)
-		AS2(	psrlq	mm6, 32)
-		AS2(	paddq	mm5, mm6)
-		AS2(	movq	[eax], mm0)
-		AS2(	movq	[edx], mm5)
-		AS2(	add		esp, 12)
-#if defined(__GNUC__)
-		".att_syntax prefix;"
-		:
-		: "S" (mp), "D" (kp), "c" (nw), "a" (&rl), "d" (&rh)
-		: "memory", "cc"
-	);
-#endif
+template <bool bswap>
+static void nh_16_sse2( const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t & rh, uint64_t & rl ) {
+    // This assembly version, using MMX registers, is just as fast as the
+    // intrinsics version (which uses XMM registers) on the Intel Core 2,
+    // but is much faster on the Pentium 4. In order to schedule multiplies
+    // as early as possible, the loop interleaves operations for the current
+    // block and the next block. To mask out high 32-bits, we use "movd"
+    // to move the lower 32-bits to the stack and then back. Surprisingly,
+    // this is faster than any other method.
+  #if defined(__GNUC__)
+    __asm__ __volatile__
+    (
+        ".intel_syntax noprefix;"
+  #else
+        AS2(mov     esi, mp )
+        AS2(mov     edi, kp )
+        AS2(mov     ecx, nw )
+        AS2(mov     eax, &rl)
+        AS2(mov     edx, &rh)
+  #endif
+        AS2(sub     esp,  12        )
+        AS2(movq    mm6,   [esi]    )
+        AS2(paddq   mm6,   [edi]    )
+        AS2(movq    mm5,   [esi + 8])
+        AS2(paddq   mm5,   [edi + 8])
+        AS2(add     esi,  16        )
+        AS2(add     edi,  16        )
+        AS2(movq    mm4, mm6        )
+        ASS(    pshufw  mm2, mm6, 1, 0, 3, 2)
+        AS2(    pmuludq mm6, mm5)
+        ASS(    pshufw  mm3, mm5, 1, 0, 3, 2)
+        AS2(pmuludq mm5    , mm2)
+        AS2(pmuludq mm2    , mm3)
+        AS2(pmuludq mm3    , mm4)
+        AS2(pxor    mm7    , mm7)
+        AS2(movd    [esp]  , mm6)
+        AS2(psrlq   mm6    ,  32)
+        AS2(movd    [esp+4], mm5)
+        AS2(psrlq   mm5    ,  32)
+        AS2(sub     ecx    ,   2)
+        ASJ(    jz,     1, f)
+        ASL(0)
+        AS2(movq    mm0,   [esi]    )
+        AS2(paddq   mm0,   [edi]    )
+        AS2(movq    mm1,   [esi + 8])
+        AS2(paddq   mm1,   [edi + 8])
+        AS2(add     esi,  16        )
+        AS2(add     edi,  16        )
+        AS2(movq    mm4, mm0        )
+        AS2(paddq   mm5, mm2        )
+        ASS(    pshufw  mm2, mm0, 1, 0, 3, 2)
+        AS2(pmuludq mm0    , mm1)
+        AS2(movd    [esp+8], mm3)
+        AS2(psrlq   mm3    ,  32)
+        AS2(paddq   mm5    , mm3)
+        ASS(    pshufw  mm3, mm1, 1, 0, 3, 2)
+        AS2(pmuludq mm1    , mm2        )
+        AS2(pmuludq mm2    , mm3        )
+        AS2(pmuludq mm3    , mm4        )
+        AS2(movd    mm4    ,   [esp]    )
+        AS2(paddq   mm7    , mm4        )
+        AS2(movd    mm4    ,   [esp + 4])
+        AS2(paddq   mm6    , mm4        )
+        AS2(movd    mm4    ,   [esp + 8])
+        AS2(paddq   mm6    , mm4        )
+        AS2(movd    [esp]  , mm0        )
+        AS2(psrlq   mm0    ,  32        )
+        AS2(paddq   mm6    , mm0        )
+        AS2(movd    [esp+4], mm1        )
+        AS2(psrlq   mm1    ,  32        )
+        AS2(paddq   mm5    , mm1        )
+        AS2(sub     ecx    ,   2        )
+        ASJ(    jnz,    0, b)
+        ASL(1)
+        AS2(paddq   mm5    , mm2        )
+        AS2(movd    [esp+8], mm3        )
+        AS2(psrlq   mm3    ,  32        )
+        AS2(paddq   mm5    , mm3        )
+        AS2(movd    mm4    ,   [esp]    )
+        AS2(paddq   mm7    , mm4        )
+        AS2(movd    mm4    ,   [esp + 4])
+        AS2(paddq   mm6    , mm4        )
+        AS2(movd    mm4    ,   [esp + 8])
+        AS2(paddq   mm6    , mm4        )
+
+        ASS(    pshufw  mm0, mm7, 3, 2, 1, 0)
+        AS2(psrlq   mm7    ,  32)
+        AS2(paddq   mm6    , mm7)
+        AS2(punpckldq   mm0, mm6)
+        AS2(psrlq   mm6    ,  32)
+        AS2(paddq   mm5    , mm6)
+        AS2(movq    [eax]  , mm0)
+        AS2(movq    [edx]  , mm5)
+        AS2(add     esp    ,  12)
+  #if defined(__GNUC__)
+        ".att_syntax prefix;"
+        :
+        : "S" (mp), "D" (kp), "c" (nw), "a" (&rl), "d" (&rh)
+        : "memory", "cc"
+    );
+  #else
+  #endif
 }
 
-static void poly_step_sse2(uint64_t & ah, uint64_t & al, const uint64_t & kh,
-        const uint64_t & kl, const uint64_t & mh, const uint64_t & ml) {
+static void poly_step_sse2( uint64_t & ah, uint64_t & al, const uint64_t & kh,
+        const uint64_t & kl, const uint64_t & mh, const uint64_t & ml ) {
     // This code tries to schedule the multiplies as early as possible to overcome
     // the long latencies on the Pentium 4. It also minimizes "movq" instructions
     // which are very expensive on the P4.
 
-#define a0 [eax+0]
-#define a1 [eax+4]
-#define a2 [ebx+0]
-#define a3 [ebx+4]
-#define k0 [ecx+0]
-#define k1 [ecx+4]
-#define k2 [edx+0]
-#define k3 [edx+4]
-
-#if defined(__GNUC__)
-	uint32_t temp;
-	__asm__ __volatile__
-	(
-		"mov %%ebx, %0;"
-		"mov %1, %%ebx;"
-		".intel_syntax noprefix;"
-#else
-		AS2(	mov		ebx, &ah)
-		AS2(	mov		edx, &kh)
-		AS2(	mov		eax, &al)
-		AS2(	mov		ecx, &kl)
-		AS2(	mov		esi, &mh)
-		AS2(	mov		edi, &ml)
-#endif
-
-		AS2(	movd	mm0, a3)
-		AS2(	movq	mm4, mm0)
-		AS2(	pmuludq	mm0, k3)		// a3*k3
-		AS2(	movd	mm1, a0)
-		AS2(	pmuludq	mm1, k2)		// a0*k2
-		AS2(	movd	mm2, a1)
-		AS2(	movd	mm6, k1)
-		AS2(	pmuludq	mm2, mm6)		// a1*k1
-		AS2(	movd	mm3, a2)
-		AS2(	movq	mm5, mm3)
-		AS2(	movd	mm7, k0)
-		AS2(	pmuludq	mm3, mm7)		// a2*k0
-		AS2(	pmuludq	mm4, mm7)		// a3*k0
-		AS2(	pmuludq	mm5, mm6)		// a2*k1
-		AS2(	psllq	mm0, 1)
-		AS2(	paddq	mm0, [esi])
-		AS2(	paddq	mm0, mm1)
-		AS2(	movd	mm1, a1)
-		AS2(	paddq	mm4, mm5)
-		AS2(	movq	mm5, mm1)
-		AS2(	pmuludq	mm1, k2)		// a1*k2
-		AS2(	paddq	mm0, mm2)
-		AS2(	movd	mm2, a0)
-		AS2(	paddq	mm0, mm3)
-		AS2(	movq	mm3, mm2)
-		AS2(	pmuludq	mm2, k3)		// a0*k3
-		AS2(	pmuludq	mm3, mm7)		// a0*k0
-		AS2(	movd	esi, mm0)
-		AS2(	psrlq	mm0, 32)
-		AS2(	pmuludq	mm7, mm5)		// a1*k0
-		AS2(	pmuludq	mm5, k3)		// a1*k3
-		AS2(	paddq	mm0, mm1)
-		AS2(	movd	mm1, a2)
-		AS2(	pmuludq	mm1, k2)		// a2*k2
-		AS2(	paddq	mm0, mm2)
-		AS2(	paddq	mm0, mm4)
-		AS2(	movq	mm4, mm0)
-		AS2(	movd	mm2, a3)
-		AS2(	pmuludq	mm2, mm6)		// a3*k1
-		AS2(	pmuludq	mm6, a0)		// a0*k1
-		AS2(	psrlq	mm0, 31)
-		AS2(	paddq	mm0, mm3)
-		AS2(	movd	mm3, [edi])
-		AS2(	paddq	mm0, mm3)
-		AS2(	movd	mm3, a2)
-		AS2(	pmuludq	mm3, k3)		// a2*k3
-		AS2(	paddq	mm5, mm1)
-		AS2(	movd	mm1, a3)
-		AS2(	pmuludq	mm1, k2)		// a3*k2
-		AS2(	paddq	mm5, mm2)
-		AS2(	movd	mm2, [edi+4])
-		AS2(	psllq	mm5, 1)
-		AS2(	paddq	mm0, mm5)
-		AS2(	movq	mm5, mm0)
-		AS2(	psllq	mm4, 33)
-		AS2(	psrlq	mm0, 32)
-		AS2(	paddq	mm6, mm7)
-		AS2(	movd	mm7, esi)
-		AS2(	paddq	mm0, mm6)
-		AS2(	paddq	mm0, mm2)
-		AS2(	paddq	mm3, mm1)
-		AS2(	psllq	mm3, 1)
-		AS2(	paddq	mm0, mm3)
-		AS2(	psrlq	mm4, 1)
-		AS2(	punpckldq	mm5, mm0)
-		AS2(	psrlq	mm0, 32)
-		AS2(	por		mm4, mm7)
-		AS2(	paddq	mm0, mm4)
-		AS2(	movq	a0, mm5)
-		AS2(	movq	a2, mm0)
-#if defined(__GNUC__)
-		".att_syntax prefix;"
-		"mov %0, %%ebx;"
-		: "=m" (temp)
-		: "m" (&ah), "D" (&ml), "d" (&kh), "a" (&al), "S" (&mh), "c" (&kl)
-		: "memory", "cc"
-	);
-#endif
-
-
-#undef a0
-#undef a1
-#undef a2
-#undef a3
-#undef k0
-#undef k1
-#undef k2
-#undef k3
+  #define a0 [eax + 0]
+  #define a1 [eax + 4]
+  #define a2 [ebx + 0]
+  #define a3 [ebx + 4]
+  #define k0 [ecx + 0]
+  #define k1 [ecx + 4]
+  #define k2 [edx + 0]
+  #define k3 [edx + 4]
+
+  #if defined(__GNUC__)
+    uint32_t temp;
+    __asm__ __volatile__
+    (
+        "mov %%ebx, %0;"
+        "mov %1, %%ebx;"
+        ".intel_syntax noprefix;"
+  #else
+        AS2(mov     ebx, &ah)
+        AS2(mov     edx, &kh)
+        AS2(mov     eax, &al)
+        AS2(mov     ecx, &kl)
+        AS2(mov     esi, &mh)
+        AS2(mov     edi, &ml)
+  #endif
+
+        AS2(movd    mm0    ,  a3        )
+        AS2(movq    mm4    , mm0        )
+        AS2(pmuludq mm0    ,  k3        ) // a3*k3
+        AS2(movd    mm1    ,  a0        )
+        AS2(pmuludq mm1    ,  k2        ) // a0*k2
+        AS2(movd    mm2    ,  a1        )
+        AS2(movd    mm6    ,  k1        )
+        AS2(pmuludq mm2    , mm6        ) // a1*k1
+        AS2(movd    mm3    ,  a2        )
+        AS2(movq    mm5    , mm3        )
+        AS2(movd    mm7    ,  k0        )
+        AS2(pmuludq mm3    , mm7        ) // a2*k0
+        AS2(pmuludq mm4    , mm7        ) // a3*k0
+        AS2(pmuludq mm5    , mm6        ) // a2*k1
+        AS2(psllq   mm0    ,   1        )
+        AS2(paddq   mm0    ,   [esi]    )
+        AS2(paddq   mm0    , mm1        )
+        AS2(movd    mm1    ,  a1        )
+        AS2(paddq   mm4    , mm5        )
+        AS2(movq    mm5    , mm1        )
+        AS2(pmuludq mm1    ,  k2        ) // a1*k2
+        AS2(paddq   mm0    , mm2        )
+        AS2(movd    mm2    ,  a0        )
+        AS2(paddq   mm0    , mm3        )
+        AS2(movq    mm3    , mm2        )
+        AS2(pmuludq mm2    ,  k3        ) // a0*k3
+        AS2(pmuludq mm3    , mm7        ) // a0*k0
+        AS2(movd    esi    , mm0        )
+        AS2(psrlq   mm0    ,  32        )
+        AS2(pmuludq mm7    , mm5        ) // a1*k0
+        AS2(pmuludq mm5    ,  k3        ) // a1*k3
+        AS2(paddq   mm0    , mm1        )
+        AS2(movd    mm1    ,  a2        )
+        AS2(pmuludq mm1    ,  k2        ) // a2*k2
+        AS2(paddq   mm0    , mm2        )
+        AS2(paddq   mm0    , mm4        )
+        AS2(movq    mm4    , mm0        )
+        AS2(movd    mm2    ,  a3        )
+        AS2(pmuludq mm2    , mm6        ) // a3*k1
+        AS2(pmuludq mm6    ,  a0        ) // a0*k1
+        AS2(psrlq   mm0    ,  31        )
+        AS2(paddq   mm0    , mm3        )
+        AS2(movd    mm3    ,   [edi]    )
+        AS2(paddq   mm0    , mm3        )
+        AS2(movd    mm3    ,  a2        )
+        AS2(pmuludq mm3    ,  k3        ) // a2*k3
+        AS2(paddq   mm5    , mm1        )
+        AS2(movd    mm1    ,  a3        )
+        AS2(pmuludq mm1    ,  k2        ) // a3*k2
+        AS2(paddq   mm5    , mm2        )
+        AS2(movd    mm2    ,   [edi + 4])
+        AS2(psllq   mm5    ,   1        )
+        AS2(paddq   mm0    , mm5        )
+        AS2(movq    mm5    , mm0        )
+        AS2(psllq   mm4    ,  33        )
+        AS2(psrlq   mm0    ,  32        )
+        AS2(paddq   mm6    , mm7        )
+        AS2(movd    mm7    , esi        )
+        AS2(paddq   mm0    , mm6        )
+        AS2(paddq   mm0    , mm2        )
+        AS2(paddq   mm3    , mm1        )
+        AS2(psllq   mm3    ,   1        )
+        AS2(paddq   mm0    , mm3        )
+        AS2(psrlq   mm4    ,   1        )
+        AS2(punpckldq   mm5, mm0        )
+        AS2(psrlq   mm0    ,  32        )
+        AS2(por     mm4    , mm7        )
+        AS2(paddq   mm0    , mm4        )
+        AS2(movq    a0     , mm5        )
+        AS2(movq    a2     , mm0        )
+  #if defined(__GNUC__)
+        ".att_syntax prefix;"
+        "mov %0, %%ebx;"
+        : "=m" (temp)
+        : "m" (&ah), "D" (&ml), "d" (&kh), "a" (&al), "S" (&mh), "c" (&kl)
+        : "memory", "cc"
+    );
+  #else
+  #endif
+
+  #undef a0
+  #undef a1
+  #undef a2
+  #undef a3
+  #undef k0
+  #undef k1
+  #undef k2
+  #undef k3
 }
 
 #endif
 
 //-----------------------------------------------------------------------------
 // Wrapper implementations
-template < bool bswap >
-static void nh_16(const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t & rh, uint64_t & rl) {
+template <bool bswap>
+static void nh_16( const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t & rh, uint64_t & rl ) {
 #if defined(HAVE_32BIT_PLATFORM) && defined(HAVE_SSE_2)
     nh_16_sse2<bswap>(mp, kp, nw, rh, rl);
 #else
@@ -384,8 +389,8 @@ static void nh_16(const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t &
 #endif
 }
 
-template < bool bswap >
-static void nh_vmac_nhbytes(const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t & rh, uint64_t & rl) {
+template <bool bswap>
+static void nh_vmac_nhbytes( const uint8_t * mp, const uint64_t * kp, size_t nw, uint64_t & rh, uint64_t & rl ) {
 #if defined(HAVE_32BIT_PLATFORM) && defined(HAVE_SSE_2)
     nh_16_sse2<bswap>(mp, kp, nw, rh, rl);
 #else
@@ -393,8 +398,8 @@ static void nh_vmac_nhbytes(const uint8_t * mp, const uint64_t * kp, size_t nw,
 #endif
 }
 
-static void poly_step(uint64_t & ah, uint64_t & al, const uint64_t & kh,
-        const uint64_t & kl, const uint64_t & mh, const uint64_t & ml) {
+static void poly_step( uint64_t & ah, uint64_t & al, const uint64_t & kh, const uint64_t & kl,
+        const uint64_t & mh, const uint64_t & ml ) {
 #if defined(HAVE_32BIT_PLATFORM) && defined(HAVE_SSE_2)
     poly_step_sse2(ah, al, kh, kl, mh, ml);
 #else
@@ -410,38 +415,38 @@ static void poly_step(uint64_t & ah, uint64_t & al, const uint64_t & kh,
 //-----------------------------------------------------------------------------
 #include "AES.h"
 
-typedef uint32_t aes_int_key[4*(VMAC_KEY_LEN/32+7)];
+typedef uint32_t aes_int_key[4 * (VMAC_KEY_LEN / 32 + 7)];
 
-#define aes_encryption(in,out,int_key)                  \
-    AES_Encrypt<10>(int_key,                            \
-            (const uint8_t *)(in),                      \
+#define aes_encryption(in,out,int_key) \
+    AES_Encrypt<10>(int_key,           \
+            (const uint8_t *)(in),     \
             (uint8_t *)(out))
 
-#define aes_key_setup(user_key,int_key)                 \
-    AES_KeySetup_Enc(int_key,                           \
-            (const uint8_t *)(user_key),                \
+#define aes_key_setup(user_key,int_key)  \
+    AES_KeySetup_Enc(int_key,            \
+            (const uint8_t *)(user_key), \
             VMAC_KEY_LEN)
 
 //-----------------------------------------------------------------------------
 typedef struct {
-	uint64_t nhkey  [(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
-	uint64_t polykey[2*VMAC_TAG_LEN/64];
-	uint64_t l3key  [2*VMAC_TAG_LEN/64];
-	aes_int_key cipher_key;
+    uint64_t     nhkey[(VMAC_NHBYTES / 8) + 2 * (VMAC_TAG_LEN / 64 - 1)];
+    uint64_t     polykey[2 * VMAC_TAG_LEN / 64];
+    uint64_t     l3key[2 * VMAC_TAG_LEN / 64];
+    aes_int_key  cipher_key;
 } vmac_ctx_t;
 
 //-----------------------------------------------------------------------------
 #if defined(_MSC_VER)
-#  if !defined(_WIN64)
-#    define _mmm_empty _mm_empty();
-#  else // _WIN64
-#    define _mmm_empty
-#  endif // _WIN64
+  #if !defined(_WIN64)
+    #define _mmm_empty _mm_empty();
+  #else // _WIN64
+    #define _mmm_empty
+  #endif // _WIN64
 #else // _MSC_VER
-#  define _mmm_empty __asm volatile ( "emms" ::: "memory" );
+  #define _mmm_empty __asm volatile ("emms" ::: "memory");
 #endif // _MSC_VER
 
-static void vhash_abort(vmac_ctx_t *ctx) {
+static void vhash_abort( vmac_ctx_t * ctx ) {
 #if defined(HAVE_32BIT_PLATFORM) && defined(HAVE_SSE_2)
     _mmm_empty /* SSE2 version of poly_step uses mmx instructions */
 #endif
@@ -449,64 +454,64 @@ static void vhash_abort(vmac_ctx_t *ctx) {
 
 #undef _mmm_empty
 
-template < bool bswap >
-static void vmac_set_key(uint8_t user_key[], vmac_ctx_t *ctx) {
-    uint64_t in[2] = {0}, out[2];
+template <bool bswap>
+static void vmac_set_key( uint8_t user_key[], vmac_ctx_t * ctx ) {
+    uint64_t in[2] = { 0 }, out[2];
     uint32_t i;
 
     aes_key_setup(user_key, ctx->cipher_key);
 
     /* Fill nh key */
     ((uint8_t *)in)[0] = 0x80;
-    for (i = 0; i < sizeof(ctx->nhkey)/8; i+=2) {
+    for (i = 0; i < sizeof(ctx->nhkey) / 8; i += 2) {
         aes_encryption((uint8_t *)in, (uint8_t *)out, ctx->cipher_key);
-        ctx->nhkey[i  ] = GET_U64<bswap>((uint8_t *)out, 0);
-        ctx->nhkey[i+1] = GET_U64<bswap>((uint8_t *)out, 8);
+        ctx->nhkey[i    ]    = GET_U64<bswap>((uint8_t *)out, 0);
+        ctx->nhkey[i + 1]    = GET_U64<bswap>((uint8_t *)out, 8);
         ((uint8_t *)in)[15] += 1;
     }
 
     /* Fill poly key */
     ((uint8_t *)in)[0] = 0xC0;
-    in[1] = 0;
-    for (i = 0; i < sizeof(ctx->polykey)/8; i+=2) {
+    in             [1] =    0;
+    for (i = 0; i < sizeof(ctx->polykey) / 8; i += 2) {
         aes_encryption((uint8_t *)in, (uint8_t *)out, ctx->cipher_key);
         // "& mpoly" code is moved into vhash() due to new seeding
-        ctx->polykey[i  ] = GET_U64<bswap>((uint8_t *)out, 0);
-        ctx->polykey[i+1] = GET_U64<bswap>((uint8_t *)out, 8);
+        ctx->polykey[i    ]  = GET_U64<bswap>((uint8_t *)out, 0);
+        ctx->polykey[i + 1]  = GET_U64<bswap>((uint8_t *)out, 8);
         ((uint8_t *)in)[15] += 1;
     }
 
     /* Fill ip key */
     ((uint8_t *)in)[0] = 0xE0;
-    in[1] = 0;
-    for (i = 0; i < sizeof(ctx->l3key)/8; i+=2) {
+    in             [1] =    0;
+    for (i = 0; i < sizeof(ctx->l3key) / 8; i += 2) {
         do {
             aes_encryption((uint8_t *)in, (uint8_t *)out, ctx->cipher_key);
-            ctx->l3key[i  ] = GET_U64<bswap>((uint8_t *)out, 0);
-            ctx->l3key[i+1] = GET_U64<bswap>((uint8_t *)out, 8);
+            ctx->l3key[i    ]    = GET_U64<bswap>((uint8_t *)out, 0);
+            ctx->l3key[i + 1]    = GET_U64<bswap>((uint8_t *)out, 8);
             ((uint8_t *)in)[15] += 1;
-        } while (ctx->l3key[i] >= p64 || ctx->l3key[i+1] >= p64);
+        } while (ctx->l3key[i] >= p64 || ctx->l3key[i + 1] >= p64);
     }
 }
 
-static uint64_t l3hash(uint64_t p1, uint64_t p2, uint64_t k1, uint64_t k2, uint64_t len) {
-    uint64_t rh, rl, t, z=0;
+static uint64_t l3hash( uint64_t p1, uint64_t p2, uint64_t k1, uint64_t k2, uint64_t len ) {
+    uint64_t rh, rl, t, z = 0;
 
     /* fully reduce (p1,p2)+(len,0) mod p127 */
-    t = p1 >> 63;
+    t   = p1 >> 63;
     p1 &= m63;
     ADD128(p1, p2, len, t);
     /* At this point, (p1,p2) is at most 2^127+(len<<64) */
-    t = (p1 > m63) + ((p1 == m63) && (p2 == m64));
-    ADD128(p1, p2, z, t);
+    t   = (p1 > m63) + ((p1 == m63) && (p2 == m64));
+    ADD128(p1, p2, z  , t);
     p1 &= m63;
 
     /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
-    t = p1 + (p2 >> 32);
-    t += (t >> 32);
-    t += (uint32_t)t > 0xfffffffeu;
-    p1 += (t >> 32);
-    p2 += (p1 << 32);
+    t   = p1 + (p2 >> 32);
+    t  +=      (t  >> 32);
+    t  += (uint32_t)t > 0xfffffffeu;
+    p1 +=      (t  >> 32);
+    p2 +=      (p1 << 32);
 
     /* compute (p1+k1)%p64 and (p2+k2)%p64 */
     p1 += k1;
@@ -516,42 +521,42 @@ static uint64_t l3hash(uint64_t p1, uint64_t p2, uint64_t k1, uint64_t k2, uint6
 
     /* compute (p1+k1)*(p2+k2)%p64 */
     MUL64(rh, rl, p1, p2);
-    t = rh >> 56;
+    t    = rh >> 56;
     ADD128(t, rl, z, rh);
     rh <<= 8;
     ADD128(t, rl, z, rh);
-    t += t << 8;
-    rl += t;
-    rl += (0 - (rl < t)) & 257;
-    rl += (0 - (rl > p64-1)) & 257;
+    t   += t << 8;
+    rl  += t;
+    rl  += (0 - (rl < t      )) & 257;
+    rl  += (0 - (rl > p64 - 1)) & 257;
     return rl;
 }
 
 // Homegrown (unofficial) seeding
-template < bool bswap >
-static uint64_t vhash(const uint8_t * mptr, size_t mbytes, uint64_t seed, vmac_ctx_t * ctx) {
-    uint64_t rh, rl;
+template <bool bswap>
+static uint64_t vhash( const uint8_t * mptr, size_t mbytes, uint64_t seed, vmac_ctx_t * ctx ) {
+    uint64_t         rh, rl;
     const uint64_t * kptr = ctx->nhkey;
-    size_t i, remaining;
-    uint64_t ch, cl;
-    uint64_t pkh = (ctx->polykey[0] ^ ROTR64(seed, 24)) & mpoly;
-    uint64_t pkl = (ctx->polykey[1] ^ seed            ) & mpoly;
+    size_t           i, remaining;
+    uint64_t         ch, cl;
+    uint64_t         pkh = (ctx->polykey[0] ^ ROTR64(seed, 24)) & mpoly;
+    uint64_t         pkl = (ctx->polykey[1] ^ seed            ) & mpoly;
 
-    i = mbytes / VMAC_NHBYTES;
+    i         = mbytes / VMAC_NHBYTES;
     remaining = mbytes % VMAC_NHBYTES;
 
     if (i) {
-        nh_vmac_nhbytes<bswap>(mptr,kptr,VMAC_NHBYTES/8,ch,cl);
+        nh_vmac_nhbytes<bswap>(mptr, kptr, VMAC_NHBYTES / 8, ch, cl);
         ch &= m62;
-        ADD128(ch,cl,pkh,pkl);
+        ADD128(ch, cl, pkh, pkl);
         i--;
     } else if (remaining) {
         alignas(16) uint8_t buf[VMAC_NHBYTES];
         memcpy(buf, mptr, remaining);
         memset(buf + remaining, 0, sizeof(buf) - remaining);
-        nh_16<bswap>(buf,kptr,2*((remaining+15)/16),ch,cl);
+        nh_16<bswap>(buf, kptr, 2 * ((remaining + 15) / 16), ch, cl);
         ch &= m62;
-        ADD128(ch,cl,pkh,pkl);
+        ADD128(ch, cl, pkh, pkl);
         goto do_l3;
     } else {
         ch = pkh; cl = pkl;
@@ -560,20 +565,20 @@ static uint64_t vhash(const uint8_t * mptr, size_t mbytes, uint64_t seed, vmac_c
 
     while (i--) {
         mptr += VMAC_NHBYTES;
-        nh_vmac_nhbytes<bswap>(mptr,kptr,VMAC_NHBYTES/8,rh,rl);
-        rh &= m62;
-        poly_step(ch,cl,pkh,pkl,rh,rl);
+        nh_vmac_nhbytes<bswap>(mptr, kptr, VMAC_NHBYTES / 8, rh, rl);
+        rh   &= m62;
+        poly_step(ch, cl, pkh, pkl, rh, rl);
     }
     if (remaining) {
         alignas(16) uint8_t buf[VMAC_NHBYTES];
         memcpy(buf, mptr + VMAC_NHBYTES, remaining);
         memset(buf + remaining, 0, sizeof(buf) - remaining);
-        nh_16<bswap>(buf,kptr,2*((remaining+15)/16),rh,rl);
+        nh_16<bswap>(buf, kptr, 2 * ((remaining + 15) / 16), rh, rl);
         rh &= m62;
-        poly_step(ch,cl,pkh,pkl,rh,rl);
+        poly_step(ch, cl, pkh, pkl, rh, rl);
     }
 
-do_l3:
+  do_l3:
     vhash_abort(ctx);
     remaining *= 8;
     return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1], remaining);
@@ -582,76 +587,77 @@ static uint64_t vhash(const uint8_t * mptr, size_t mbytes, uint64_t seed, vmac_c
 //-----------------------------------------------------------------------------
 
 class VHASH_initializer {
-public:
-	alignas(16) vmac_ctx_t ctx;
+  public:
+    alignas(16) vmac_ctx_t ctx;
 
-	VHASH_initializer() {
-		alignas(4) uint8_t key[1 + VMAC_KEY_LEN/8] = "abcdefghijklmnop";
+    VHASH_initializer() {
+        alignas(4) uint8_t key[1 + VMAC_KEY_LEN / 8] = "abcdefghijklmnop";
         if (isBE()) {
             vmac_set_key<false>(key, &ctx);
         } else {
             vmac_set_key<true>(key, &ctx);
         }
-	}
+    }
 
-	~VHASH_initializer() {
-	}
-};
+    ~VHASH_initializer() {}
+}; // class VHASH_initializer
 
 // WARNING: this is shared across CPUs, and so must be read-only
 // during hashing!!
 // Making this thread-local has a sizable performance hit.
 static VHASH_initializer vhi;
 
-template < bool bswap >
-static void VHASH32(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void VHASH32( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t hash = vhash<bswap>((const uint8_t *)in, len, (uint64_t)seed, &(vhi.ctx));
+
     PUT_U32<bswap>(hash, (uint8_t *)out, 0);
 }
 
-template < bool bswap >
-static void VHASH64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void VHASH64( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t hash = vhash<bswap>((const uint8_t *)in, len, (uint64_t)seed, &(vhi.ctx));
+
     PUT_U64<bswap>(hash, (uint8_t *)out, 0);
 }
 
 //-----------------------------------------------------------------------------
 
 REGISTER_FAMILY(vmac,
-  $.src_url = "https://www.fastcrypto.org/vmac/",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://www.fastcrypto.org/vmac/",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(VHASH__32,
-  $.desc = "VHASH low 32 bits, by Ted Krovetz and Wei Dai",
-  $.hash_flags =
-        FLAG_HASH_AES_BASED              |
-        FLAG_HASH_CRYPTOGRAPHIC,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128        |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_ASM                    |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 32,
-  $.verification_LE = 0x613E4735,
-  $.verification_BE = 0x8797E01C,
-  $.hashfn_native = VHASH32<false>,
-  $.hashfn_bswap = VHASH32<true>
-);
+   $.desc       = "VHASH low 32 bits, by Ted Krovetz and Wei Dai",
+   $.hash_flags =
+         FLAG_HASH_AES_BASED              |
+         FLAG_HASH_CRYPTOGRAPHIC,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128        |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_ASM                    |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 32,
+   $.verification_LE = 0x613E4735,
+   $.verification_BE = 0x8797E01C,
+   $.hashfn_native   = VHASH32<false>,
+   $.hashfn_bswap    = VHASH32<true>
+ );
 
 REGISTER_HASH(VHASH,
-  $.desc = "VHASH, by Ted Krovetz and Wei Dai",
-  $.hash_flags =
-        FLAG_HASH_AES_BASED              |
-        FLAG_HASH_CRYPTOGRAPHIC,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128        |
-        FLAG_IMPL_ROTATE                 |
-        FLAG_IMPL_ASM                    |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x7417A00F,
-  $.verification_BE = 0x81C8B066,
-  $.hashfn_native = VHASH64<false>,
-  $.hashfn_bswap = VHASH64<true>
-);
+   $.desc       = "VHASH, by Ted Krovetz and Wei Dai",
+   $.hash_flags =
+         FLAG_HASH_AES_BASED              |
+         FLAG_HASH_CRYPTOGRAPHIC,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128        |
+         FLAG_IMPL_ROTATE                 |
+         FLAG_IMPL_ASM                    |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x7417A00F,
+   $.verification_BE = 0x81C8B066,
+   $.hashfn_native   = VHASH64<false>,
+   $.hashfn_bswap    = VHASH64<true>
+ );
diff --git a/hashes/wyhash.cpp b/hashes/wyhash.cpp
index 110a14ed..b48e8bdf 100644
--- a/hashes/wyhash.cpp
+++ b/hashes/wyhash.cpp
@@ -41,18 +41,18 @@
 
 //-----------------------------------------------------------------------------
 // Data reading functions, common to 32- and 64-bit hashes
-template < bool bswap >
-static inline uint64_t _wyr8(const uint8_t * p) {
-  return GET_U64<bswap>(p, 0);
+template <bool bswap>
+static inline uint64_t _wyr8( const uint8_t * p ) {
+    return GET_U64<bswap>(p, 0);
 }
 
-template < bool bswap >
-static inline uint64_t _wyr4(const uint8_t * p) {
-  return GET_U32<bswap>(p, 0);
+template <bool bswap>
+static inline uint64_t _wyr4( const uint8_t * p ) {
+    return GET_U32<bswap>(p, 0);
 }
 
-static inline uint64_t _wyr3(const uint8_t * p, size_t k) {
-  return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];
+static inline uint64_t _wyr3( const uint8_t * p, size_t k ) {
+    return (((uint64_t)p[0]) << 16) | (((uint64_t)p[k >> 1]) << 8) | p[k - 1];
 }
 
 //-----------------------------------------------------------------------------
@@ -62,216 +62,219 @@ static inline uint64_t _wyr3(const uint8_t * p, size_t k) {
 // choice of strict. I.e. for a given set of template parameter
 // choices, this function should always give the same answer
 // regardless of platform.
-static inline uint64_t _wyrot(uint64_t x) { return ROTL64(x, 32); }
+static inline uint64_t _wyrot( uint64_t x ) { return ROTL64(x, 32); }
 
 // TODO: pass mum32bit template param through _wyhash64
-template < bool mum32bit, bool strict >
-static inline void _wymum(uint64_t *A, uint64_t *B){
-  if (mum32bit) {
-    uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(uint32_t)*B, lh=(uint32_t)*A*(*B>>32), ll=(uint64_t)(uint32_t)*A*(uint32_t)*B;
-    if (strict) {
-      *A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
+template <bool mum32bit, bool strict>
+static inline void _wymum( uint64_t * A, uint64_t * B ) {
+    if (mum32bit) {
+        uint64_t hh = (*A >> 32) * (*B >> 32), hl = (*A >> 32) * (uint32_t)*B,
+                lh = (uint32_t)*A * (*B >> 32), ll = (uint64_t)(uint32_t)*A * (uint32_t)*B;
+        if (strict) {
+            *A ^= _wyrot(hl) ^ hh; *B ^= _wyrot(lh) ^ ll;
+        } else {
+            *A = _wyrot(hl)  ^ hh; *B  = _wyrot(lh) ^ ll;
+        }
     } else {
-      *A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
+        uint64_t rlo, rhi;
+        mult64_128(rlo, rhi, *A, *B);
+        if (strict) {
+            *A ^= rlo; *B ^= rhi;
+        } else {
+            *A = rlo;  *B  = rhi;
+        }
     }
-  } else {
-    uint64_t rlo, rhi;
-    mult64_128(rlo, rhi, *A, *B);
-    if (strict) {
-      *A^=rlo; *B^=rhi;
-    } else {
-      *A=rlo; *B=rhi;
-    }
-  }
 }
 
 //-----------------------------------------------------------------------------
 // multiply and xor mix function, aka MUM
-template < bool strict >
-static inline uint64_t _wymix(uint64_t A, uint64_t B) {
-  _wymum<false,strict>(&A,&B);
-  return A^B;
+template <bool strict>
+static inline uint64_t _wymix( uint64_t A, uint64_t B ) {
+    _wymum<false, strict>(&A, &B);
+    return A ^ B;
 }
 
 // wyhash64 main function
-template < bool bswap, bool strict >
-static inline uint64_t _wyhash64(const void * key, size_t len, uint64_t seed, const uint64_t * secrets) {
-  const uint8_t * p = (const uint8_t *)key;
-  uint64_t a, b;
+template <bool bswap, bool strict>
+static inline uint64_t _wyhash64( const void * key, size_t len, uint64_t seed, const uint64_t * secrets ) {
+    const uint8_t * p = (const uint8_t *)key;
+    uint64_t        a, b;
 
-  seed ^= secrets[0];
+    seed ^= secrets[0];
 
-  if (likely(len <= 16)) {
-    if (likely(len >= 4)) {
-      a = (_wyr4<bswap>(p) << 32)    | _wyr4<bswap>(p+((len>>3)<<2));
-      b = (_wyr4<bswap>(p+len-4)<<32)| _wyr4<bswap>(p+len-4-((len>>3)<<2));
-    } else if (likely(len>0)) {
-      a = _wyr3(p,len);
-      b=0;
+    if (likely(len <= 16)) {
+        if (likely(len >= 4)) {
+            a = (_wyr4<bswap>(p) << 32) | _wyr4<bswap>(p + ((len >> 3) << 2));
+            b = (_wyr4<bswap>(p + len - 4) << 32) | _wyr4<bswap>(p + len - 4 - ((len >> 3) << 2));
+        } else if (likely(len > 0)) {
+            a = _wyr3(p, len);
+            b = 0;
+        } else {
+            a = b = 0;
+        }
     } else {
-      a = b = 0;
-    }
-  } else {
-    size_t i = len;
-    if (unlikely(i>48)) {
-      uint64_t see1=seed, see2=seed;
-      do {
-        seed=_wymix<strict>(_wyr8<bswap>(p)   ^secrets[1],  _wyr8<bswap>(p+8) ^seed);
-        see1=_wymix<strict>(_wyr8<bswap>(p+16)^secrets[2],  _wyr8<bswap>(p+24)^see1);
-        see2=_wymix<strict>(_wyr8<bswap>(p+32)^secrets[3],  _wyr8<bswap>(p+40)^see2);
-        p+=48; i-=48;
-      } while(likely(i>48));
-      seed ^= see1 ^ see2;
+        size_t i = len;
+        if (unlikely(i > 48)) {
+            uint64_t see1 = seed, see2 = seed;
+            do {
+                seed = _wymix<strict>(_wyr8<bswap>(p)      ^ secrets[1], _wyr8<bswap>(p +  8) ^ seed);
+                see1 = _wymix<strict>(_wyr8<bswap>(p + 16) ^ secrets[2], _wyr8<bswap>(p + 24) ^ see1);
+                see2 = _wymix<strict>(_wyr8<bswap>(p + 32) ^ secrets[3], _wyr8<bswap>(p + 40) ^ see2);
+                p   += 48; i -= 48;
+            } while (likely(i > 48));
+            seed ^= see1 ^ see2;
+        }
+        while (unlikely(i > 16)) {
+            seed = _wymix<strict>(_wyr8<bswap>(p) ^ secrets[1], _wyr8<bswap>(p + 8) ^ seed);
+            i   -= 16; p += 16;
+        }
+        a = _wyr8<bswap>(p + i - 16);
+        b = _wyr8<bswap>(p + i -  8);
     }
-    while (unlikely(i>16)) {
-      seed = _wymix<strict>(_wyr8<bswap>(p)^secrets[1], _wyr8<bswap>(p+8)^seed);
-      i-=16; p+=16;
-    }
-    a=_wyr8<bswap>(p+i-16);
-    b=_wyr8<bswap>(p+i-8);
-  }
-  return _wymix<strict>(secrets[1]^len, _wymix<strict>(a^secrets[1], b^seed));
+    return _wymix<strict>(secrets[1] ^ len, _wymix<strict>(a ^ secrets[1], b ^ seed));
 }
 
 //-----------------------------------------------------------------------------
 // 32-bit hash function
-static inline void _wymix32(uint32_t * A,  uint32_t * B) {
-  uint64_t c;
-  c  = *A ^ 0x53c5ca59;
-  c *= *B ^ 0x74743c1b;
-  *A = (uint32_t)c;
-  *B = (uint32_t)(c >> 32);
+static inline void _wymix32( uint32_t * A,  uint32_t * B ) {
+    uint64_t c;
+
+    c  = *A ^ 0x53c5ca59;
+    c *= *B ^ 0x74743c1b;
+    *A = (uint32_t)c;
+    *B = (uint32_t)(c >> 32);
 }
 
-template < bool bswap >
-static inline uint32_t _wyhash32(const void * key, uint64_t len, uint32_t seed) {
-  const uint8_t * p = (const uint8_t *)key;
-  uint64_t i = len;
-  uint32_t see1 = (uint32_t)len;
+template <bool bswap>
+static inline uint32_t _wyhash32( const void * key, uint64_t len, uint32_t seed ) {
+    const uint8_t * p    = (const uint8_t *)key;
+    uint64_t        i    = len;
+    uint32_t        see1 = (uint32_t       )len;
 
-  seed ^= (uint32_t)(len>>32);
-  _wymix32(&seed, &see1);
+    seed ^= (uint32_t)(len >> 32);
+    _wymix32(&seed, &see1);
 
-  for (;i>8;i-=8,p+=8) {
-    seed ^= _wyr4<bswap>(p);
-    see1 ^= _wyr4<bswap>(p+4);
+    for (; i > 8; i -= 8, p += 8) {
+        seed ^= _wyr4<bswap>(p    );
+        see1 ^= _wyr4<bswap>(p + 4);
+        _wymix32(&seed, &see1);
+    }
+    if (i >= 4) {
+        seed ^= _wyr4<bswap>(p        );
+        see1 ^= _wyr4<bswap>(p + i - 4);
+    } else if (i) {
+        seed ^= _wyr3(p, (size_t)i);
+    }
+    _wymix32(&seed, &see1);
     _wymix32(&seed, &see1);
-  }
-  if (i>=4) {
-    seed ^= _wyr4<bswap>(p);
-    see1 ^= _wyr4<bswap>(p + i - 4);
-  } else if (i) {
-    seed ^= _wyr3(p, (size_t)i);
-  }
-  _wymix32(&seed, &see1);
-  _wymix32(&seed, &see1);
-  return seed ^ see1;
+    return seed ^ see1;
 }
 
 //-----------------------------------------------------------------------------
 // the default secret parameters
 static const uint64_t _wyp[4] = {
-  UINT64_C(0xa0761d6478bd642f), UINT64_C(0xe7037ed1a0b428db),
-  UINT64_C(0x8ebc6af09c88c6e3), UINT64_C(0x589965cc75374cc3)
+    UINT64_C(0xa0761d6478bd642f), UINT64_C(0xe7037ed1a0b428db),
+    UINT64_C(0x8ebc6af09c88c6e3), UINT64_C(0x589965cc75374cc3)
 };
 
 //-----------------------------------------------------------------------------
-template < bool bswap >
-static void Wyhash32(const void * in, const size_t len, const seed_t seed, void * out) {
-  PUT_U32<bswap>(_wyhash32<bswap>(in, (uint64_t)len, (uint32_t)seed), (uint8_t *)out, 0);
+template <bool bswap>
+static void Wyhash32( const void * in, const size_t len, const seed_t seed, void * out ) {
+    PUT_U32<bswap>(_wyhash32<bswap>(in, (uint64_t)len, (uint32_t)seed), (uint8_t *)out, 0);
 }
 
-template < bool bswap, bool strict >
-static void Wyhash64(const void * in, const size_t len, const seed_t seed, void * out) {
-  PUT_U64<bswap>(_wyhash64<bswap,strict>(in, len, (uint64_t)seed, _wyp), (uint8_t *)out, 0);
+template <bool bswap, bool strict>
+static void Wyhash64( const void * in, const size_t len, const seed_t seed, void * out ) {
+    PUT_U64<bswap>(_wyhash64<bswap, strict>(in, len, (uint64_t)seed, _wyp), (uint8_t *)out, 0);
 }
 
 //-----------------------------------------------------------------------------
-static bool wyhash64_selftest(void) {
-  struct {
-    const uint64_t hash;
-    const char * key;
-  } selftests[] = {
-    { UINT64_C(0x42bc986dc5eec4d3), "" },
-    { UINT64_C(0x84508dc903c31551), "a" },
-    { UINT64_C(0x0bc54887cfc9ecb1), "abc" },
-    { UINT64_C(0x6e2ff3298208a67c), "message digest" },
-    { UINT64_C(0x9a64e42e897195b9), "abcdefghijklmnopqrstuvwxyz" },
-    { UINT64_C(0x9199383239c32554), "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" },
-    { UINT64_C(0x7c1ccf6bba30f5a5), "12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
-  };
+static bool wyhash64_selftest( void ) {
+    struct {
+        const uint64_t  hash;
+        const char *    key;
+    } selftests[] = {
+        { UINT64_C  (0x42bc986dc5eec4d3), "" }                          ,
+        { UINT64_C  (0x84508dc903c31551), "a" }                         ,
+        { UINT64_C  (0x0bc54887cfc9ecb1), "abc" }                       ,
+        { UINT64_C  (0x6e2ff3298208a67c), "message digest" }            ,
+        { UINT64_C  (0x9a64e42e897195b9), "abcdefghijklmnopqrstuvwxyz" },
+        { UINT64_C  (0x9199383239c32554), "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" },
+        {
+            UINT64_C(0x7c1ccf6bba30f5a5),
+            "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
+        },
+    };
 
-  for (int i = 0; i < sizeof(selftests)/sizeof(selftests[0]); i++) {
-    uint64_t h;
-    if (isLE()) {
-      Wyhash64<false,false>(selftests[i].key, strlen(selftests[i].key), i, &h);
-    } else {
-      Wyhash64<true,false>(selftests[i].key, strlen(selftests[i].key), i, &h);
-      // h is in little-endian format
-      h = COND_BSWAP(h, true);
-    }
-    if (h != selftests[i].hash) {
-      printf("Hash %016lx != expected %016lx for string \"%s\"\n",
-	     h, selftests[i].hash, selftests[i].key);
-      return false;
+    for (int i = 0; i < sizeof(selftests) / sizeof(selftests[0]); i++) {
+        uint64_t h;
+        if (isLE()) {
+            Wyhash64<false, false>(selftests[i].key, strlen(selftests[i].key), i, &h);
+        } else {
+            Wyhash64<true, false>(selftests[i].key, strlen(selftests[i].key), i, &h);
+            // h is in little-endian format
+            h = COND_BSWAP(h, true);
+        }
+        if (h != selftests[i].hash) {
+            printf("Hash %016lx != expected %016lx for string \"%s\"\n", h, selftests[i].hash, selftests[i].key);
+            return false;
+        }
     }
-  }
 
-  return true;
+    return true;
 }
 
-
 //-----------------------------------------------------------------------------
 REGISTER_FAMILY(wyhash,
-  $.src_url = "https://github.com/wangyi-fudan/wyhash",
-  $.src_status = HashFamilyInfo::SRC_ACTIVE
-);
+   $.src_url    = "https://github.com/wangyi-fudan/wyhash",
+   $.src_status = HashFamilyInfo::SRC_ACTIVE
+ );
 
 REGISTER_HASH(wyhash_32,
-  $.desc = "wyhash v3, 32-bit native version",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY         |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 32,
-  $.verification_LE = 0x09DE8066,
-  $.verification_BE = 0x9D86BAC7,
-  $.hashfn_native = Wyhash32<false>,
-  $.hashfn_bswap = Wyhash32<true>,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = { 0x429dacdd, 0xd637dbf3 }
-);
+   $.desc       = "wyhash v3, 32-bit native version",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY         |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 32,
+   $.verification_LE = 0x09DE8066,
+   $.verification_BE = 0x9D86BAC7,
+   $.hashfn_native   = Wyhash32<false>,
+   $.hashfn_bswap    = Wyhash32<true>,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0x429dacdd, 0xd637dbf3 }
+ );
 
 REGISTER_HASH(wyhash,
-  $.desc = "wyhash v3, 64-bit non-strict version",
-  $.hash_flags =
-	0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128  |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0x67031D43,
-  $.verification_BE = 0x912E4607,
-  $.hashfn_native = Wyhash64<false,false>,
-  $.hashfn_bswap = Wyhash64<true,false>,
-  $.initfn = wyhash64_selftest,
-  $.seedfixfn = excludeBadseeds,
-  $.badseeds = { 0x14cc886e, 0x1bf4ed84, UINT64_C(0x14cc886e14cc886e) } // all seeds with those lower bits ?
-);
+   $.desc       = "wyhash v3, 64-bit non-strict version",
+   $.hash_flags =
+     0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128  |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0x67031D43,
+   $.verification_BE = 0x912E4607,
+   $.hashfn_native   = Wyhash64<false, false>,
+   $.hashfn_bswap    = Wyhash64<true, false>,
+   $.initfn = wyhash64_selftest,
+   $.seedfixfn       = excludeBadseeds,
+   $.badseeds        = { 0x14cc886e, 0x1bf4ed84, UINT64_C (0x14cc886e14cc886e) } // all seeds with those lower bits ?
+ );
 
 REGISTER_HASH(wyhash__strict,
-  $.desc = "wyhash v3, 64-bit strict version",
-  $.hash_flags =
-	0,
-  $.impl_flags =
-        FLAG_IMPL_MULTIPLY_64_128  |
-        FLAG_IMPL_ROTATE           |
-        FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
-  $.bits = 64,
-  $.verification_LE = 0xA82DBAD7,
-  $.verification_BE = 0xDB7957D4,
-  $.hashfn_native = Wyhash64<false,true>,
-  $.hashfn_bswap = Wyhash64<true,true>
-);
+   $.desc       = "wyhash v3, 64-bit strict version",
+   $.hash_flags =
+     0,
+   $.impl_flags =
+         FLAG_IMPL_MULTIPLY_64_128  |
+         FLAG_IMPL_ROTATE           |
+         FLAG_IMPL_LICENSE_PUBLIC_DOMAIN,
+   $.bits = 64,
+   $.verification_LE = 0xA82DBAD7,
+   $.verification_BE = 0xDB7957D4,
+   $.hashfn_native   = Wyhash64<false, true>,
+   $.hashfn_bswap    = Wyhash64<true, true>
+ );
diff --git a/hashes/x17.cpp b/hashes/x17.cpp
index 68b3e702..82e687c7 100644
--- a/hashes/x17.cpp
+++ b/hashes/x17.cpp
@@ -28,37 +28,38 @@
 #include "Hashlib.h"
 
 //------------------------------------------------------------
-static uint32_t x17_impl(const uint8_t * data, size_t len, uint32_t h) {
-    for(size_t i = 0; i < len; ++i) {
+static uint32_t x17_impl( const uint8_t * data, size_t len, uint32_t h ) {
+    for (size_t i = 0; i < len; ++i) {
         h = 17 * h + (data[i] - ' ');
     }
     return h ^ (h >> 16);
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void x17(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void x17( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = x17_impl((const uint8_t *)in, len, (uint32_t)seed);
+
     PUT_U32<bswap>(h, (uint8_t *)out, 0);
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(x17,
-  $.src_url = "https://github.com/aappleby/smhasher/blob/master/src/Hashes.cpp",
-  $.src_status = HashFamilyInfo::SRC_FROZEN
-);
+   $.src_url    = "https://github.com/aappleby/smhasher/blob/master/src/Hashes.cpp",
+   $.src_status = HashFamilyInfo::SRC_FROZEN
+ );
 
 REGISTER_HASH(x17,
-  $.desc = "x17",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED,
-  $.impl_flags =
-        FLAG_IMPL_SLOW         |
-        FLAG_IMPL_MULTIPLY     |
-        FLAG_IMPL_LICENSE_MIT,
-  $.bits = 32,
-  $.verification_LE = 0x8128E14C,
-  $.verification_BE = 0x9AD0FE22,
-  $.hashfn_native = x17<false>,
-  $.hashfn_bswap = x17<true>
-);
+   $.desc       = "x17",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED,
+   $.impl_flags =
+         FLAG_IMPL_SLOW         |
+         FLAG_IMPL_MULTIPLY     |
+         FLAG_IMPL_LICENSE_MIT,
+   $.bits = 32,
+   $.verification_LE = 0x8128E14C,
+   $.verification_BE = 0x9AD0FE22,
+   $.hashfn_native   = x17<false>,
+   $.hashfn_bswap    = x17<true>
+ );
diff --git a/hashes/xxhash.cpp b/hashes/xxhash.cpp
index cc515ec7..6aa2496e 100644
--- a/hashes/xxhash.cpp
+++ b/hashes/xxhash.cpp
@@ -34,13 +34,13 @@
 
 #include "Mathmult.h"
 
-//#define FORCE_SCALAR
+// #define FORCE_SCALAR
 
 //------------------------------------------------------------
 #define XXH_VERSION_MAJOR    0
 #define XXH_VERSION_MINOR    8
 #define XXH_VERSION_RELEASE  1
-#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR * 100 * 100 + XXH_VERSION_MINOR * 100 + XXH_VERSION_RELEASE)
 
 // Used to prevent unwanted optimizations for var.
 //
@@ -56,26 +56,26 @@
 // XXH3_initCustomSecret_scalar().
 #if defined(HAVE_X86_64_ASM) || defined(HAVE_ARM_ASM) || \
     defined(HAVE_ARM64_ASM) || defined(HAVE_PPC_ASM)
-#define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
+  #define XXH_COMPILER_GUARD(var) __asm__ __volatile__ ("" : "+r" (var))
 #else
-#define XXH_COMPILER_GUARD(var) ((void)var)
+  #define XXH_COMPILER_GUARD(var) ((void)var)
 #endif
 
 //------------------------------------------------------------
 // XXH32 family -- functions used in the classic 32-bit xxHash algorithm
 
 // #define instead of static const, to be used as initializers
-#define XXH_PRIME32_1  0x9E3779B1  // 0b10011110001101110111100110110001
-#define XXH_PRIME32_2  0x85EBCA77  // 0b10000101111010111100101001110111
-#define XXH_PRIME32_3  0xC2B2AE3D  // 0b11000010101100101010111000111101
-#define XXH_PRIME32_4  0x27D4EB2F  // 0b00100111110101001110101100101111
-#define XXH_PRIME32_5  0x165667B1  // 0b00010110010101100110011110110001
+#define XXH_PRIME32_1  0x9E3779B1 // 0b10011110001101110111100110110001
+#define XXH_PRIME32_2  0x85EBCA77 // 0b10000101111010111100101001110111
+#define XXH_PRIME32_3  0xC2B2AE3D // 0b11000010101100101010111000111101
+#define XXH_PRIME32_4  0x27D4EB2F // 0b00100111110101001110101100101111
+#define XXH_PRIME32_5  0x165667B1 // 0b00010110010101100110011110110001
 
 // Mixes all bits to finalize the hash.
 // The final mix ensures that all input bits have a chance to impact
 // any bit in the output digest, resulting in an unbiased
 // distribution.
-static uint32_t XXH32_avalanche(uint32_t hash) {
+static uint32_t XXH32_avalanche( uint32_t hash ) {
     hash ^= hash >> 15;
     hash *= XXH_PRIME32_2;
     hash ^= hash >> 13;
@@ -88,17 +88,17 @@ static uint32_t XXH32_avalanche(uint32_t hash) {
 // There may be up to 15 bytes remaining to consume from the input.
 // This final stage will digest them to ensure that all input bytes
 // are present in the final mix.
-template < bool bswap >
-static uint32_t XXH32_finalize(uint32_t hash, const uint8_t * ptr, size_t len) {
+template <bool bswap>
+static uint32_t XXH32_finalize( uint32_t hash, const uint8_t * ptr, size_t len ) {
     while (len >= 4) {
         hash += GET_U32<bswap>(ptr, 0) * XXH_PRIME32_3;
-        ptr += 4;
-        hash  = ROTL32(hash, 17) * XXH_PRIME32_4;
-        len -= 4;
+        ptr  += 4;
+        hash  = ROTL32(hash, 17)       * XXH_PRIME32_4;
+        len  -= 4;
     }
     while (len > 0) {
         hash += (*ptr++) * XXH_PRIME32_5;
-        hash = ROTL32(hash, 11) * XXH_PRIME32_1;
+        hash  = ROTL32(hash, 11) * XXH_PRIME32_1;
         --len;
     }
     return XXH32_avalanche(hash);
@@ -138,7 +138,7 @@ static uint32_t XXH32_finalize(uint32_t hash, const uint8_t * ptr, size_t len) {
 // This is also enabled on AArch64, as Clang autovectorizes it incorrectly
 // and it is pointless writing a NEON implementation that is basically the
 // same speed as scalar for XXH32.
-static uint32_t XXH32_round(uint32_t acc, uint32_t input) {
+static uint32_t XXH32_round( uint32_t acc, uint32_t input ) {
     acc += input * XXH_PRIME32_2;
     acc  = ROTL32(acc, 13);
     acc *= XXH_PRIME32_1;
@@ -148,12 +148,12 @@ static uint32_t XXH32_round(uint32_t acc, uint32_t input) {
     return acc;
 }
 
-template < bool bswap >
-static uint32_t XXH32_impl(const uint8_t * input, size_t len, uint32_t seed) {
+template <bool bswap>
+static uint32_t XXH32_impl( const uint8_t * input, size_t len, uint32_t seed ) {
     uint32_t h32;
 
-    if (len>=16) {
-        const uint8_t * const bEnd = input + len;
+    if (len >= 16) {
+        const uint8_t * const bEnd  = input + len;
         const uint8_t * const limit = bEnd - 15;
         uint32_t v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
         uint32_t v2 = seed + XXH_PRIME32_2;
@@ -161,21 +161,21 @@ static uint32_t XXH32_impl(const uint8_t * input, size_t len, uint32_t seed) {
         uint32_t v4 = seed - XXH_PRIME32_1;
 
         do {
-            v1 = XXH32_round(v1, GET_U32<bswap>(input,  0));
-            v2 = XXH32_round(v2, GET_U32<bswap>(input,  4));
-            v3 = XXH32_round(v3, GET_U32<bswap>(input,  8));
-            v4 = XXH32_round(v4, GET_U32<bswap>(input, 12));
+            v1     = XXH32_round(v1, GET_U32<bswap>(input,  0));
+            v2     = XXH32_round(v2, GET_U32<bswap>(input,  4));
+            v3     = XXH32_round(v3, GET_U32<bswap>(input,  8));
+            v4     = XXH32_round(v4, GET_U32<bswap>(input, 12));
             input += 16;
         } while (input < limit);
 
         h32 = ROTL32(v1, 1) + ROTL32(v2, 7) + ROTL32(v3, 12) + ROTL32(v4, 18);
     } else {
-        h32  = seed + XXH_PRIME32_5;
+        h32 = seed + XXH_PRIME32_5;
     }
 
     h32 += (uint32_t)len;
 
-    return XXH32_finalize<bswap>(h32, input, len&15);
+    return XXH32_finalize<bswap>(h32, input, len & 15);
 }
 
 //------------------------------------------------------------
@@ -193,21 +193,21 @@ static uint32_t XXH32_impl(const uint8_t * input, size_t len, uint32_t seed) {
 // 0b0010011111010100111010110010111100010110010101100110011111000101
 #define XXH_PRIME64_5  UINT64_C(0x27D4EB2F165667C5)
 
-static uint64_t XXH64_round(uint64_t acc, uint64_t input) {
+static uint64_t XXH64_round( uint64_t acc, uint64_t input ) {
     acc += input * XXH_PRIME64_2;
     acc  = ROTL64(acc, 31);
     acc *= XXH_PRIME64_1;
     return acc;
 }
 
-static uint64_t XXH64_mergeRound(uint64_t acc, uint64_t val) {
+static uint64_t XXH64_mergeRound( uint64_t acc, uint64_t val ) {
     val  = XXH64_round(0, val);
     acc ^= val;
     acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
     return acc;
 }
 
-static uint64_t XXH64_avalanche(uint64_t hash) {
+static uint64_t XXH64_avalanche( uint64_t hash ) {
     hash ^= hash >> 33;
     hash *= XXH_PRIME64_2;
     hash ^= hash >> 29;
@@ -220,35 +220,35 @@ static uint64_t XXH64_avalanche(uint64_t hash) {
 // There may be up to 31 bytes remaining to consume from the input.
 // This final stage will digest them to ensure that all input bytes
 // are present in the final mix.
-template < bool bswap >
-static uint64_t XXH64_finalize(uint64_t hash, const uint8_t * ptr, size_t len) {
+template <bool bswap>
+static uint64_t XXH64_finalize( uint64_t hash, const uint8_t * ptr, size_t len ) {
     while (len >= 8) {
         uint64_t const k1 = XXH64_round(0, GET_U64<bswap>(ptr, 0));
-        ptr += 8;
+        ptr  += 8;
         hash ^= k1;
-        hash  = ROTL64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
-        len -= 8;
+        hash  = ROTL64(hash, 27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+        len  -= 8;
     }
     if (len >= 4) {
         hash ^= (uint64_t)(GET_U32<bswap>(ptr, 0)) * XXH_PRIME64_1;
-        ptr += 4;
-        hash = ROTL64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
-        len -= 4;
+        ptr  += 4;
+        hash  = ROTL64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+        len  -= 4;
     }
     while (len > 0) {
         hash ^= (*ptr++) * XXH_PRIME64_5;
-        hash = ROTL64(hash, 11) * XXH_PRIME64_1;
+        hash  = ROTL64(hash, 11) * XXH_PRIME64_1;
         --len;
     }
-    return  XXH64_avalanche(hash);
+    return XXH64_avalanche(hash);
 }
 
-template < bool bswap >
-static uint64_t XXH64_impl(const uint8_t * input, size_t len, uint64_t seed) {
+template <bool bswap>
+static uint64_t XXH64_impl( const uint8_t * input, size_t len, uint64_t seed ) {
     uint64_t h64;
 
-    if (len>=32) {
-        const uint8_t * const bEnd = input + len;
+    if (len >= 32) {
+        const uint8_t * const bEnd  = input + len;
         const uint8_t * const limit = bEnd - 31;
         uint64_t v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
         uint64_t v2 = seed + XXH_PRIME64_2;
@@ -256,12 +256,12 @@ static uint64_t XXH64_impl(const uint8_t * input, size_t len, uint64_t seed) {
         uint64_t v4 = seed - XXH_PRIME64_1;
 
         do {
-            v1 = XXH64_round(v1, GET_U64<bswap>(input,  0));
-            v2 = XXH64_round(v2, GET_U64<bswap>(input,  8));
-            v3 = XXH64_round(v3, GET_U64<bswap>(input, 16));
-            v4 = XXH64_round(v4, GET_U64<bswap>(input, 24));
+            v1     = XXH64_round(v1, GET_U64<bswap>(input,  0));
+            v2     = XXH64_round(v2, GET_U64<bswap>(input,  8));
+            v3     = XXH64_round(v3, GET_U64<bswap>(input, 16));
+            v4     = XXH64_round(v4, GET_U64<bswap>(input, 24));
             input += 32;
-        } while (input<limit);
+        } while (input < limit);
 
         h64 = ROTL64(v1, 1) + ROTL64(v2, 7) + ROTL64(v3, 12) + ROTL64(v4, 18);
         h64 = XXH64_mergeRound(h64, v1);
@@ -269,12 +269,12 @@ static uint64_t XXH64_impl(const uint8_t * input, size_t len, uint64_t seed) {
         h64 = XXH64_mergeRound(h64, v3);
         h64 = XXH64_mergeRound(h64, v4);
     } else {
-        h64  = seed + XXH_PRIME64_5;
+        h64 = seed + XXH_PRIME64_5;
     }
 
-    h64 += (uint64_t) len;
+    h64 += (uint64_t)len;
 
-    return XXH64_finalize<bswap>(h64, input, len&31);
+    return XXH64_finalize<bswap>(h64, input, len & 31);
 }
 
 //------------------------------------------------------------
@@ -412,59 +412,62 @@ alignas(64) static const uint8_t XXH3_kSecret[XXH3_SECRET_DEFAULT_SIZE] = {
  * -O2, but the other one we can't control without "failed to inline always
  * inline function due to target mismatch" warnings.
  */
-#  if defined(__GNUC__) && !defined(__clang__) && /* GCC, not Clang */ \
+  #if defined(__GNUC__) && !defined(__clang__) && /* GCC, not Clang */ \
       defined(__OPTIMIZE__)
-#    define XXH3_POP_PRAGMA
-#    pragma GCC push_options
-#    pragma GCC optimize("-O2")
-#  endif
+    #define XXH3_POP_PRAGMA
+    #pragma GCC push_options
+    #pragma GCC optimize("-O2")
+  #endif
 #endif
 
 //------------------------------------------------------------
 typedef struct {
-    uint64_t low64;   // value & 0xFFFFFFFFFFFFFFFF
-    uint64_t high64;  // value >> 64
+    uint64_t  low64;  // value & 0xFFFFFFFFFFFFFFFF
+    uint64_t  high64; // value >> 64
 } XXH128_hash_t;
 
-static inline uint64_t XXH_mult32to64(uint32_t lhs, uint32_t rhs) {
+static inline uint64_t XXH_mult32to64( uint32_t lhs, uint32_t rhs ) {
     uint64_t r64;
+
     mult32_64(r64, lhs, rhs);
     return r64;
 }
 
-static inline XXH128_hash_t XXH_mult64to128(uint64_t lhs, uint64_t rhs) {
+static inline XXH128_hash_t XXH_mult64to128( uint64_t lhs, uint64_t rhs ) {
     XXH128_hash_t r128;
+
     mult64_128(r128.low64, r128.high64, lhs, rhs);
     return r128;
 }
 
-static uint64_t XXH3_mul128_fold64(uint64_t lhs, uint64_t rhs) {
+static uint64_t XXH3_mul128_fold64( uint64_t lhs, uint64_t rhs ) {
     XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
+
     return product.low64 ^ product.high64;
 }
 
 // Seems to produce slightly better code on GCC for some reason.
-static FORCE_INLINE uint64_t XXH_xorshift64(uint64_t v64, const int shift) {
-    //static_assert(0 <= shift && shift < 64, "valid shift value");
+static FORCE_INLINE uint64_t XXH_xorshift64( uint64_t v64, const int shift ) {
+    // static_assert(0 <= shift && shift < 64, "valid shift value");
     return v64 ^ (v64 >> shift);
 }
 
 // This is a fast avalanche stage, suitable when input bits are
 // already partially mixed.
-static uint64_t XXH3_avalanche(uint64_t h64) {
-    h64 = XXH_xorshift64(h64, 37);
+static uint64_t XXH3_avalanche( uint64_t h64 ) {
+    h64  = XXH_xorshift64(h64, 37);
     h64 *= UINT64_C(0x165667919E3779F9);
-    h64 = XXH_xorshift64(h64, 32);
+    h64  = XXH_xorshift64(h64, 32);
     return h64;
 }
 
 // This is a stronger avalanche, inspired by Pelle Evensen's rrmxmx.
 // preferable when input has not been previously mixed.
-static uint64_t XXH3_rrmxmx(uint64_t h64, uint64_t len) {
+static uint64_t XXH3_rrmxmx( uint64_t h64, uint64_t len ) {
     /* this mix is inspired by Pelle Evensen's rrmxmx */
     h64 ^= ROTL64(h64, 49) ^ ROTL64(h64, 24);
     h64 *= UINT64_C(0x9FB21C651E98DF25);
-    h64 ^= (h64 >> 35) + len ;
+    h64 ^= (h64 >> 35) + len;
     h64 *= UINT64_C(0x9FB21C651E98DF25);
     return XXH_xorshift64(h64, 28);
 }
@@ -502,50 +505,56 @@ static uint64_t XXH3_rrmxmx(uint64_t h64, uint64_t len) {
 //
 // This adds an extra layer of strength for custom secrets.
 
-template < bool bswap >
-static FORCE_INLINE uint64_t XXH3_len_1to3_64b(const uint8_t * input, size_t len, const uint8_t * secret, uint64_t seed) {
+template <bool bswap>
+static FORCE_INLINE uint64_t XXH3_len_1to3_64b( const uint8_t * input,
+        size_t len, const uint8_t * secret, uint64_t seed ) {
     // len = 1: combined = { input[0], 0x01, input[0], input[0] }
     // len = 2: combined = { input[1], 0x02, input[0], input[1] }
     // len = 3: combined = { input[2], 0x03, input[0], input[1] }
-    uint8_t  const c1 = input[0];
-    uint8_t  const c2 = input[len >> 1];
-    uint8_t  const c3 = input[len - 1];
-    uint32_t const combined = ((uint32_t)c1 << 16) | ((uint32_t)c2  << 24) |
-        ((uint32_t)c3 <<  0) | ((uint32_t)len << 8);
-    uint64_t const bitflip = (GET_U32<bswap>(secret,0) ^ GET_U32<bswap>(secret,4)) + seed;
-    uint64_t const keyed = (uint64_t)combined ^ bitflip;
+    uint8_t const  c1       = input[0];
+    uint8_t const  c2       = input[len >> 1];
+    uint8_t const  c3       = input[len  - 1];
+    uint32_t const combined = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) |
+            ((uint32_t)c3 << 0) | ((uint32_t)len << 8);
+    uint64_t const bitflip  = (GET_U32<bswap>(secret, 0) ^ GET_U32<bswap>(secret, 4)) + seed;
+    uint64_t const keyed    = (uint64_t)combined ^ bitflip;
+
     return XXH64_avalanche(keyed);
 }
 
-template < bool bswap >
-static FORCE_INLINE uint64_t XXH3_len_4to8_64b(const uint8_t * input, size_t len, const uint8_t * secret, uint64_t seed) {
+template <bool bswap>
+static FORCE_INLINE uint64_t XXH3_len_4to8_64b( const uint8_t * input,
+        size_t len, const uint8_t * secret, uint64_t seed ) {
     seed ^= (uint64_t)BSWAP((uint32_t)seed) << 32;
-    uint32_t const input1 = GET_U32<bswap>(input, 0);
-    uint32_t const input2 = GET_U32<bswap>(input, len - 4);
+    uint32_t const input1  = GET_U32<bswap>(input,   0    );
+    uint32_t const input2  = GET_U32<bswap>(input, len - 4);
     uint64_t const input64 = input2 + (((uint64_t)input1) << 32);
-    uint64_t const bitflip = (GET_U64<bswap>(secret, 8) ^ GET_U64<bswap>(secret,16)) - seed;
-    uint64_t const keyed = input64 ^ bitflip;
+    uint64_t const bitflip = (GET_U64<bswap>(secret, 8) ^ GET_U64<bswap>(secret, 16)) - seed;
+    uint64_t const keyed   = input64 ^ bitflip;
     return XXH3_rrmxmx(keyed, len);
 }
 
-template < bool bswap >
-static FORCE_INLINE uint64_t XXH3_len_9to16_64b(const uint8_t * input, size_t len, const uint8_t * secret, uint64_t seed) {
-    uint64_t const bitflip1 = (GET_U64<bswap>(secret,24) ^ GET_U64<bswap>(secret,32)) + seed;
-    uint64_t const bitflip2 = (GET_U64<bswap>(secret,40) ^ GET_U64<bswap>(secret,48)) - seed;
-    uint64_t const input_lo = GET_U64<bswap>(input, 0)       ^ bitflip1;
+template <bool bswap>
+static FORCE_INLINE uint64_t XXH3_len_9to16_64b( const uint8_t * input,
+        size_t len, const uint8_t * secret, uint64_t seed ) {
+    uint64_t const bitflip1 = (GET_U64<bswap>(secret, 24) ^ GET_U64<bswap>(secret, 32)) + seed;
+    uint64_t const bitflip2 = (GET_U64<bswap>(secret, 40) ^ GET_U64<bswap>(secret, 48)) - seed;
+    uint64_t const input_lo = GET_U64<bswap>(input,   0    ) ^ bitflip1;
     uint64_t const input_hi = GET_U64<bswap>(input, len - 8) ^ bitflip2;
-    uint64_t const acc = len + input_hi + BSWAP(input_lo) +
-        XXH3_mul128_fold64(input_lo, input_hi);
+    uint64_t const acc      = len + input_hi + BSWAP(input_lo) +
+            XXH3_mul128_fold64(input_lo, input_hi);
+
     return XXH3_avalanche(acc);
 }
 
-template < bool bswap >
-static FORCE_INLINE uint64_t XXH3_len_0to16_64b(const uint8_t * input, size_t len, const uint8_t * secret, uint64_t seed) {
-    if (likely(len >  8)) return XXH3_len_9to16_64b<bswap>(input, len, secret, seed);
-    if (likely(len >= 4)) return XXH3_len_4to8_64b<bswap>(input, len, secret, seed);
-    if (len)              return XXH3_len_1to3_64b<bswap>(input, len, secret, seed);
-    return XXH64_avalanche(seed ^ GET_U64<bswap>(secret,56) ^
-            GET_U64<bswap>(secret,64));
+template <bool bswap>
+static FORCE_INLINE uint64_t XXH3_len_0to16_64b( const uint8_t * input,
+        size_t len, const uint8_t * secret, uint64_t seed ) {
+    if (likely(len >  8)) { return XXH3_len_9to16_64b<bswap>(input, len, secret, seed); }
+    if (likely(len >= 4)) { return XXH3_len_4to8_64b<bswap>(input, len, secret, seed); }
+    if (len) { return XXH3_len_1to3_64b<bswap>(input, len, secret, seed); }
+    return XXH64_avalanche(seed ^ GET_U64<bswap>(secret, 56) ^
+            GET_U64<bswap>(secret, 64));
 }
 
 //------------------------------------------------------------
@@ -592,41 +601,38 @@ static FORCE_INLINE uint64_t XXH3_len_0to16_64b(const uint8_t * input, size_t le
 
 #define XXH3_MIDSIZE_MAX 240
 
-template < bool bswap >
-static FORCE_INLINE uint64_t XXH3_mix16B(const uint8_t * RESTRICT input,
-        const uint8_t * RESTRICT secret, uint64_t seed64) {
-#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */
+template <bool bswap>
+static FORCE_INLINE uint64_t XXH3_mix16B( const uint8_t * RESTRICT input,
+        const uint8_t * RESTRICT secret, uint64_t seed64 ) {
+#if defined(__GNUC__) && !defined(__clang__)  /* GCC, not Clang */ \
+    && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */
     XXH_COMPILER_GUARD(seed64);
 #endif
     uint64_t const input_lo = GET_U64<bswap>(input, 0);
     uint64_t const input_hi = GET_U64<bswap>(input, 8);
-    return XXH3_mul128_fold64(
-            input_lo ^ (GET_U64<bswap>(secret, 0) + seed64),
+    return XXH3_mul128_fold64(input_lo ^ (GET_U64<bswap>(secret, 0) + seed64),
             input_hi ^ (GET_U64<bswap>(secret, 8) - seed64));
 }
 
-template < bool bswap >
-static FORCE_INLINE uint64_t XXH3_len_17to128_64b(
-        const uint8_t * RESTRICT input, size_t len,
-        const uint8_t * RESTRICT secret, size_t secretSize,
-        uint64_t seed) {
+template <bool bswap>
+static FORCE_INLINE uint64_t XXH3_len_17to128_64b( const uint8_t * RESTRICT input, size_t len,
+        const uint8_t * RESTRICT secret, size_t secretSize, uint64_t seed ) {
     uint64_t acc = len * XXH_PRIME64_1;
 
     if (len > 32) {
         if (len > 64) {
             if (len > 96) {
-                acc += XXH3_mix16B<bswap>(input+48, secret+96, seed);
-                acc += XXH3_mix16B<bswap>(input+len-64, secret+112, seed);
+                acc += XXH3_mix16B<bswap>(input + 48      , secret +  96, seed);
+                acc += XXH3_mix16B<bswap>(input + len - 64, secret + 112, seed);
             }
-            acc += XXH3_mix16B<bswap>(input+32, secret+64, seed);
-            acc += XXH3_mix16B<bswap>(input+len-48, secret+80, seed);
+            acc += XXH3_mix16B<bswap>(input + 32      , secret + 64, seed);
+            acc += XXH3_mix16B<bswap>(input + len - 48, secret + 80, seed);
         }
-        acc += XXH3_mix16B<bswap>(input+16, secret+32, seed);
-        acc += XXH3_mix16B<bswap>(input+len-32, secret+48, seed);
+        acc += XXH3_mix16B<bswap>(input + 16      , secret + 32, seed);
+        acc += XXH3_mix16B<bswap>(input + len - 32, secret + 48, seed);
     }
-    acc += XXH3_mix16B<bswap>(input+0, secret+0, seed);
-    acc += XXH3_mix16B<bswap>(input+len-16, secret+16, seed);
+    acc += XXH3_mix16B<bswap>(input + 0       , secret +  0, seed);
+    acc += XXH3_mix16B<bswap>(input + len - 16, secret + 16, seed);
 
     return XXH3_avalanche(acc);
 }
@@ -649,27 +655,25 @@ static FORCE_INLINE uint64_t XXH3_len_17to128_64b(
 // This loop is the easiest to fix, as unlike XXH32, this pragma
 // _actually works_ because it is a loop vectorization instead of an
 // SLP vectorization.
-template < bool bswap >
-static NEVER_INLINE uint64_t XXH3_len_129to240_64b(
-        const uint8_t * RESTRICT input, size_t len,
-        const uint8_t * RESTRICT secret, size_t secretSize,
-        uint64_t seed) {
-    #define XXH3_MIDSIZE_STARTOFFSET 3
-    #define XXH3_MIDSIZE_LASTOFFSET  17
+template <bool bswap>
+static NEVER_INLINE uint64_t XXH3_len_129to240_64b( const uint8_t * RESTRICT input, size_t len,
+        const uint8_t * RESTRICT secret, size_t secretSize, uint64_t seed ) {
+#define XXH3_MIDSIZE_STARTOFFSET 3
+#define XXH3_MIDSIZE_LASTOFFSET  17
 
-    uint64_t acc = len * XXH_PRIME64_1;
+    uint64_t  acc      = len * XXH_PRIME64_1;
     int const nbRounds = (int)len / 16;
     for (int i = 0; i < 8; i++) {
-        acc += XXH3_mix16B<bswap>(input+(16*i), secret+(16*i), seed);
+        acc += XXH3_mix16B<bswap>(input + (16 * i), secret + (16 * i), seed);
     }
     acc = XXH3_avalanche(acc);
 
 #if defined(__clang__) && (defined(__ARM_NEON) || defined(__ARM_NEON__))
-#  pragma clang loop vectorize(disable)
+  #pragma clang loop vectorize(disable)
 #endif
 
-    for (int i = 8 ; i < nbRounds; i++) {
-        acc += XXH3_mix16B<bswap>(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+    for (int i = 8; i < nbRounds; i++) {
+        acc += XXH3_mix16B<bswap>(input + (16 * i), secret + (16 * (i - 8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
     }
     /* last bytes */
     acc += XXH3_mix16B<bswap>(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
@@ -695,42 +699,43 @@ static NEVER_INLINE uint64_t XXH3_len_129to240_64b(
 // XXH64).
 
 // A doubled version of 1to3_64b with different constants.
-template < bool bswap >
-static FORCE_INLINE XXH128_hash_t XXH3_len_1to3_128b(const uint8_t* input,
-        size_t len, const uint8_t* secret, uint64_t seed) {
+template <bool bswap>
+static FORCE_INLINE XXH128_hash_t XXH3_len_1to3_128b( const uint8_t * input,
+        size_t len, const uint8_t * secret, uint64_t seed ) {
     /*
      * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
      * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
      * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
      */
-    uint8_t const c1 = input[0];
-    uint8_t const c2 = input[len >> 1];
-    uint8_t const c3 = input[len - 1];
-    uint32_t const combinedl = ((uint32_t)c1 <<16) | ((uint32_t)c2 << 24)
-        | ((uint32_t)c3 << 0) | ((uint32_t)len << 8);
+    uint8_t const  c1        = input[0];
+    uint8_t const  c2        = input[len >> 1];
+    uint8_t const  c3        = input[len  - 1];
+    uint32_t const combinedl = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) |
+            ((uint32_t)c3 << 0) | ((uint32_t)len << 8);
     uint32_t const combinedh = ROTL32(BSWAP(combinedl), 13);
-    uint64_t const bitflipl = (GET_U32<bswap>(secret,0) ^ GET_U32<bswap>(secret, 4)) + seed;
-    uint64_t const bitfliph = (GET_U32<bswap>(secret,8) ^ GET_U32<bswap>(secret,12)) - seed;
-    uint64_t const keyed_lo = (uint64_t)combinedl ^ bitflipl;
-    uint64_t const keyed_hi = (uint64_t)combinedh ^ bitfliph;
-    XXH128_hash_t h128 = { XXH64_avalanche(keyed_lo), XXH64_avalanche(keyed_hi) };
+    uint64_t const bitflipl  = (GET_U32<bswap>(secret, 0) ^ GET_U32<bswap>(secret,  4)) + seed;
+    uint64_t const bitfliph  = (GET_U32<bswap>(secret, 8) ^ GET_U32<bswap>(secret, 12)) - seed;
+    uint64_t const keyed_lo  = (uint64_t)combinedl ^ bitflipl;
+    uint64_t const keyed_hi  = (uint64_t)combinedh ^ bitfliph;
+    XXH128_hash_t  h128      = { XXH64_avalanche(keyed_lo), XXH64_avalanche(keyed_hi) };
+
     return h128;
 }
 
-template < bool bswap >
-static FORCE_INLINE XXH128_hash_t XXH3_len_4to8_128b(const uint8_t* input,
-        size_t len, const uint8_t* secret, uint64_t seed) {
+template <bool bswap>
+static FORCE_INLINE XXH128_hash_t XXH3_len_4to8_128b( const uint8_t * input,
+        size_t len, const uint8_t * secret, uint64_t seed ) {
     seed ^= (uint64_t)BSWAP((uint32_t)seed) << 32;
-    uint32_t const input_lo = GET_U32<bswap>(input, 0);
+    uint32_t const input_lo = GET_U32<bswap>(input,   0    );
     uint32_t const input_hi = GET_U32<bswap>(input, len - 4);
     uint64_t const input_64 = input_lo + ((uint64_t)input_hi << 32);
-    uint64_t const bitflip = (GET_U64<bswap>(secret,16) ^ GET_U64<bswap>(secret,24)) + seed;
-    uint64_t const keyed = input_64 ^ bitflip;
+    uint64_t const bitflip  = (GET_U64<bswap>(secret, 16) ^ GET_U64<bswap>(secret, 24)) + seed;
+    uint64_t const keyed    = input_64 ^ bitflip;
 
     /* Shift len to the left to ensure it is even, this avoids even multiplies. */
     XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
 
-    m128.high64 += (m128.low64 << 1);
+    m128.high64 += (m128.low64  << 1);
     m128.low64  ^= (m128.high64 >> 3);
 
     m128.low64   = XXH_xorshift64(m128.low64, 35);
@@ -740,14 +745,15 @@ static FORCE_INLINE XXH128_hash_t XXH3_len_4to8_128b(const uint8_t* input,
     return m128;
 }
 
-template < bool bswap >
-static FORCE_INLINE XXH128_hash_t XXH3_len_9to16_128b(const uint8_t* input,
-        size_t len, const uint8_t* secret, uint64_t seed) {
-    uint64_t const bitflipl = (GET_U64<bswap>(secret,32) ^ GET_U64<bswap>(secret,40)) - seed;
-    uint64_t const bitfliph = (GET_U64<bswap>(secret,48) ^ GET_U64<bswap>(secret,56)) + seed;
-    uint64_t const input_lo = GET_U64<bswap>(input, 0);
+template <bool bswap>
+static FORCE_INLINE XXH128_hash_t XXH3_len_9to16_128b( const uint8_t * input,
+        size_t len, const uint8_t * secret, uint64_t seed ) {
+    uint64_t const bitflipl = (GET_U64<bswap>(secret, 32) ^ GET_U64<bswap>(secret, 40)) - seed;
+    uint64_t const bitfliph = (GET_U64<bswap>(secret, 48) ^ GET_U64<bswap>(secret, 56)) + seed;
+    uint64_t const input_lo = GET_U64<bswap>(input,   0    );
     uint64_t       input_hi = GET_U64<bswap>(input, len - 8);
-    XXH128_hash_t m128      = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
+    XXH128_hash_t  m128     = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
+
     /*
      * Put len in the middle of m128 to ensure that the length gets mixed to
      * both the low and high bits in the 128x64 multiply below.
@@ -798,28 +804,28 @@ static FORCE_INLINE XXH128_hash_t XXH3_len_9to16_128b(const uint8_t* input,
     m128.high64 += input_hi + XXH_mult32to64((uint32_t)input_hi, XXH_PRIME32_2 - 1);
 #endif
     /* m128 ^= XXH_swap64(m128 >> 64); */
-    m128.low64  ^= BSWAP(m128.high64);
+    m128.low64 ^= BSWAP(m128.high64);
 
     /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
     XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
     h128.high64 += m128.high64 * XXH_PRIME64_2;
 
-    h128.low64   = XXH3_avalanche(h128.low64);
+    h128.low64   = XXH3_avalanche(h128.low64 );
     h128.high64  = XXH3_avalanche(h128.high64);
     return h128;
 }
 
 // Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
-template < bool bswap >
-static FORCE_INLINE XXH128_hash_t XXH3_len_0to16_128b(const uint8_t* input,
-        size_t len, const uint8_t* secret, uint64_t seed) {
-    if (len >  8) return XXH3_len_9to16_128b<bswap>(input, len, secret, seed);
-    if (len >= 4) return XXH3_len_4to8_128b<bswap>(input, len, secret, seed);
-    if (len)      return XXH3_len_1to3_128b<bswap>(input, len, secret, seed);
-
-    uint64_t const bitflipl = GET_U64<bswap>(secret,64) ^ GET_U64<bswap>(secret,72);
-    uint64_t const bitfliph = GET_U64<bswap>(secret,80) ^ GET_U64<bswap>(secret,88);
-    XXH128_hash_t h128 = { XXH64_avalanche(seed ^ bitflipl), XXH64_avalanche( seed ^ bitfliph) };
+template <bool bswap>
+static FORCE_INLINE XXH128_hash_t XXH3_len_0to16_128b( const uint8_t * input,
+        size_t len, const uint8_t * secret, uint64_t seed ) {
+    if (len >  8) { return XXH3_len_9to16_128b<bswap>(input, len, secret, seed); }
+    if (len >= 4) { return XXH3_len_4to8_128b<bswap>(input, len, secret, seed); }
+    if (len) { return XXH3_len_1to3_128b<bswap>(input, len, secret, seed); }
+
+    uint64_t const bitflipl = GET_U64<bswap>(secret, 64) ^ GET_U64<bswap>(secret, 72);
+    uint64_t const bitfliph = GET_U64<bswap>(secret, 80) ^ GET_U64<bswap>(secret, 88);
+    XXH128_hash_t  h128     = { XXH64_avalanche(seed ^ bitflipl), XXH64_avalanche(seed ^ bitfliph) };
     return h128;
 }
 
@@ -827,83 +833,70 @@ static FORCE_INLINE XXH128_hash_t XXH3_len_0to16_128b(const uint8_t* input,
 // XXH3-128 mid-range keys
 
 // A bit slower than XXH3_mix16B, but handles multiply by zero better.
-template < bool bswap >
-static FORCE_INLINE XXH128_hash_t XXH128_mix32B(XXH128_hash_t acc,
-        const uint8_t* input_1, const uint8_t* input_2,
-        const uint8_t* secret, uint64_t seed) {
-    acc.low64  += XXH3_mix16B<bswap>(input_1, secret+0, seed);
+template <bool bswap>
+static FORCE_INLINE XXH128_hash_t XXH128_mix32B( XXH128_hash_t acc, const uint8_t * input_1,
+        const uint8_t * input_2, const uint8_t * secret, uint64_t seed ) {
+    acc.low64  += XXH3_mix16B<bswap>(input_1, secret +  0, seed);
     acc.low64  ^= GET_U64<bswap>(input_2, 0) + GET_U64<bswap>(input_2, 8);
-    acc.high64 += XXH3_mix16B<bswap>(input_2, secret+16, seed);
+    acc.high64 += XXH3_mix16B<bswap>(input_2, secret + 16, seed);
     acc.high64 ^= GET_U64<bswap>(input_1, 0) + GET_U64<bswap>(input_1, 8);
     return acc;
 }
 
-template < bool bswap >
-static FORCE_INLINE XXH128_hash_t XXH3_len_17to128_128b(
-        const uint8_t* RESTRICT input, size_t len,
-        const uint8_t* RESTRICT secret, size_t secretSize, uint64_t seed) {
+template <bool bswap>
+static FORCE_INLINE XXH128_hash_t XXH3_len_17to128_128b( const uint8_t * RESTRICT input, size_t len,
+        const uint8_t * RESTRICT secret, size_t secretSize, uint64_t seed ) {
     XXH128_hash_t acc = { len * XXH_PRIME64_1, acc.high64 = 0 };
 
     if (len > 32) {
         if (len > 64) {
             if (len > 96) {
-                acc = XXH128_mix32B<bswap>(acc, input+48, input+len-64, secret+96, seed);
+                acc = XXH128_mix32B<bswap>(acc, input + 48, input + len - 64, secret + 96, seed);
             }
-            acc = XXH128_mix32B<bswap>(acc, input+32, input+len-48, secret+64, seed);
+            acc = XXH128_mix32B<bswap>(acc, input + 32, input + len - 48, secret + 64, seed);
         }
-        acc = XXH128_mix32B<bswap>(acc, input+16, input+len-32, secret+32, seed);
+        acc = XXH128_mix32B<bswap>(acc, input + 16, input + len - 32, secret + 32, seed);
     }
-    acc = XXH128_mix32B<bswap>(acc, input, input+len-16, secret, seed);
+    acc = XXH128_mix32B<bswap>(acc, input, input + len - 16, secret, seed);
 
     XXH128_hash_t h128;
     h128.low64  = acc.low64 + acc.high64;
-    h128.high64 = (acc.low64    * XXH_PRIME64_1) +
-                  (acc.high64   * XXH_PRIME64_4) +
-                  ((len - seed) * XXH_PRIME64_2) ;
+    h128.high64 = (acc.low64  * XXH_PRIME64_1) +
+                  (acc.high64 * XXH_PRIME64_4) +
+            (     (len        - seed         ) * XXH_PRIME64_2);
     h128.low64  = XXH3_avalanche(h128.low64);
     h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64);
     return h128;
 }
 
-template < bool bswap >
-static NEVER_INLINE XXH128_hash_t XXH3_len_129to240_128b(
-        const uint8_t* RESTRICT input, size_t len,
-        const uint8_t* RESTRICT secret, size_t secretSize, uint64_t seed) {
+template <bool bswap>
+static NEVER_INLINE XXH128_hash_t XXH3_len_129to240_128b( const uint8_t * RESTRICT input, size_t len,
+        const uint8_t * RESTRICT secret, size_t secretSize, uint64_t seed ) {
     XXH128_hash_t acc;
-    int const nbRounds = (int)len / 32;
+    int const     nbRounds = (int)len / 32;
 
-    acc.low64 = len * XXH_PRIME64_1;
+    acc.low64  = len * XXH_PRIME64_1;
     acc.high64 = 0;
     for (int i = 0; i < 4; i++) {
-        acc = XXH128_mix32B<bswap>(acc,
-                input  + (32 * i),
-                input  + (32 * i) + 16,
-                secret + (32 * i),
-                seed);
+        acc = XXH128_mix32B<bswap>(acc, input + (32 * i), input + (32 * i) + 16, secret + (32 * i), seed);
     }
-    acc.low64 = XXH3_avalanche(acc.low64);
+    acc.low64  = XXH3_avalanche(acc.low64 );
     acc.high64 = XXH3_avalanche(acc.high64);
 
     for (int i = 4; i < nbRounds; i++) {
-        acc = XXH128_mix32B<bswap>(acc,
-                input + (32 * i),
-                input + (32 * i) + 16,
-                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
-                seed);
+        acc = XXH128_mix32B<bswap>(acc, input + (32 * i), input + (32 * i) + 16,
+                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)), seed);
     }
 
     /* last bytes */
-    acc = XXH128_mix32B<bswap>(acc,
-            input + len - 16,
-            input + len - 32,
-            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
-            UINT64_C(0) - seed);
+    acc = XXH128_mix32B<bswap>(acc, input + len - 16, input + len - 32,
+            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, UINT64_C(0) - seed);
 
     XXH128_hash_t h128;
     h128.low64  = acc.low64 + acc.high64;
-    h128.high64 = (acc.low64    * XXH_PRIME64_1) +
-                  (acc.high64   * XXH_PRIME64_4) +
-                  ((len - seed) * XXH_PRIME64_2) ;
+    h128.high64 = (acc.low64  * XXH_PRIME64_1) +
+                  (acc.high64 * XXH_PRIME64_4) +
+            (     (len        - seed         ) * XXH_PRIME64_2);
     h128.low64  = XXH3_avalanche(h128.low64);
     h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64);
     return h128;
@@ -933,26 +926,27 @@ static NEVER_INLINE XXH128_hash_t XXH3_len_129to240_128b(
 //
 // This doesn't matter on 64-bit hashes since they all get merged
 // together in the end, so we skip the extra step.
-template < bool bswap >
-static FORCE_INLINE void XXH3_scalarRound(void * RESTRICT acc,
-        void const * RESTRICT input, void const * RESTRICT secret, size_t lane) {
-    uint64_t * xacc = (uint64_t*) acc;
-    uint8_t const * xinput  = (uint8_t const*) input;
-    uint8_t const * xsecret = (uint8_t const*) secret;
-    uint64_t const data_val = GET_U64<bswap>(xinput, lane * 8);
-    uint64_t const data_key = data_val ^ GET_U64<bswap>(xsecret, lane * 8);
+template <bool bswap>
+static FORCE_INLINE void XXH3_scalarRound( void * RESTRICT acc, void const * RESTRICT input,
+        void const * RESTRICT secret, size_t lane ) {
+    uint64_t *      xacc     = (uint64_t *     )acc;
+    uint8_t const * xinput   = (uint8_t const *)input;
+    uint8_t const * xsecret  = (uint8_t const *)secret;
+    uint64_t const  data_val = GET_U64           <bswap>(xinput, lane *  8);
+    uint64_t const  data_key = data_val ^ GET_U64<bswap>(xsecret, lane * 8);
+
     xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
-    xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+    xacc[lane]     += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
 }
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_accumulate_512_scalar(void * RESTRICT acc,
-        const void * RESTRICT input, const void * RESTRICT secret) {
+template <bool bswap>
+static FORCE_INLINE void XXH3_accumulate_512_scalar( void * RESTRICT acc,
+        const void * RESTRICT input, const void * RESTRICT secret ) {
     /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
-#if defined(__GNUC__) && !defined(__clang__) \
-  && (defined(__arm__) || defined(__thumb2__)) \
+#if defined(__GNUC__) && !defined(__clang__)     \
+    && (defined(__arm__) || defined(__thumb2__)) \
     && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */
-#  pragma GCC unroll 8
+  #pragma GCC unroll 8
 #endif
     for (size_t i = 0; i < XXH_ACC_NB; i++) {
         XXH3_scalarRound<bswap>(acc, input, secret, i);
@@ -973,22 +967,21 @@ static FORCE_INLINE void XXH3_accumulate_512_scalar(void * RESTRICT acc,
 // Since our algorithm uses a pseudorandom secret to add some variance
 // into the mix, we don't need to (or want to) mix as often or as much
 // as HighwayHash does.
-template < bool bswap >
-static FORCE_INLINE void XXH3_scalarScrambleRound(void * RESTRICT acc,
-        void const* RESTRICT secret, size_t lane) {
-    uint64_t* const xacc = (uint64_t*) acc;   /* presumed aligned */
-    const uint8_t* const xsecret = (const uint8_t*) secret;   /* no alignment restriction */
-    uint64_t const key64 = GET_U64<bswap>(xsecret, lane * 8);
+template <bool bswap>
+static FORCE_INLINE void XXH3_scalarScrambleRound( void * RESTRICT acc, void const * RESTRICT secret, size_t lane ) {
+    uint64_t      * const xacc    = (uint64_t *     )acc;    /* presumed aligned */
+    const uint8_t * const xsecret = (const uint8_t *)secret; /* no alignment restriction */
+    uint64_t const        key64   = GET_U64<bswap>(xsecret, lane * 8);
     uint64_t acc64 = xacc[lane];
-    acc64 = XXH_xorshift64(acc64, 47);
-    acc64 ^= key64;
-    acc64 *= XXH_PRIME32_1;
+
+    acc64      = XXH_xorshift64(acc64, 47);
+    acc64     ^= key64;
+    acc64     *= XXH_PRIME32_1;
     xacc[lane] = acc64;
 }
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_scrambleAcc_scalar(void * RESTRICT acc,
-        const void * RESTRICT secret) {
+template <bool bswap>
+static FORCE_INLINE void XXH3_scrambleAcc_scalar( void * RESTRICT acc, const void * RESTRICT secret ) {
     for (size_t i = 0; i < XXH_ACC_NB; i++) {
         XXH3_scalarScrambleRound<bswap>(acc, secret, i);
     }
@@ -1024,15 +1017,15 @@ static FORCE_INLINE void XXH3_scrambleAcc_scalar(void * RESTRICT acc,
 // XXH3_64bits_withSeed, len == 256, Snapdragon 835
 //   without hack: 2654.4 MB/s
 //   with hack:    3202.9 MB/s
-template < bool bswap >
-static FORCE_INLINE void XXH3_initCustomSecret_scalar(void * RESTRICT customSecret,
-        uint64_t seed64) {
+template <bool bswap>
+static FORCE_INLINE void XXH3_initCustomSecret_scalar( void * RESTRICT customSecret, uint64_t seed64 ) {
     /*
      * We need a separate pointer for the GUARD hack below,
      * which requires a non-const pointer.
      * Any decent compiler will optimize this out otherwise.
      */
-    const uint8_t* kSecretPtr = XXH3_kSecret;
+    const uint8_t * kSecretPtr = XXH3_kSecret;
+
 #if defined(__clang__) && defined(__aarch64__)
     XXH_COMPILER_GUARD(kSecretPtr);
 #endif
@@ -1045,10 +1038,10 @@ static FORCE_INLINE void XXH3_initCustomSecret_scalar(void * RESTRICT customSecr
          * loads together for free. Putting the loads together before the stores
          * properly generates LDP.
          */
-        uint64_t lo = GET_U64<bswap>(kSecretPtr, 16*i)     + seed64;
-        uint64_t hi = GET_U64<bswap>(kSecretPtr, 16*i + 8) - seed64;
-        PUT_U64<bswap>(lo, (uint8_t*)customSecret, 16*i    );
-        PUT_U64<bswap>(hi, (uint8_t*)customSecret, 16*i + 8);
+        uint64_t lo = GET_U64<bswap>(kSecretPtr, 16 * i    ) + seed64;
+        uint64_t hi = GET_U64<bswap>(kSecretPtr, 16 * i + 8) - seed64;
+        PUT_U64<bswap>(lo, (uint8_t *)customSecret, 16 * i    );
+        PUT_U64<bswap>(hi, (uint8_t *)customSecret, 16 * i + 8);
     }
 }
 
@@ -1064,65 +1057,64 @@ static FORCE_INLINE void XXH3_initCustomSecret_scalar(void * RESTRICT customSecr
 #define XXH_VSX    5
 
 #if defined(__has_builtin)
-#  define XXH_HAS_BUILTIN(x) __has_builtin(x)
+  #define XXH_HAS_BUILTIN(x) __has_builtin(x)
 #else
-#  define XXH_HAS_BUILTIN(x) 0
+  #define XXH_HAS_BUILTIN(x) 0
 #endif
 
-#if !defined(FORCE_SCALAR) && defined(HAVE_PPC_VSX) &&              \
-    !defined(HAVE_PPC_ASM) && !defined(__s390x__) &&                \
+#if !defined(FORCE_SCALAR) && defined(HAVE_PPC_VSX) && \
+    !defined(HAVE_PPC_ASM) && !defined(__s390x__) &&   \
     !(defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw))
-#warning "PPC mulo/mule compiler support not found; falling back to scalar code"
-#define FORCE_SCALAR
+  #warning "PPC mulo/mule compiler support not found; falling back to scalar code"
+  #define FORCE_SCALAR
 #endif
 
 #if defined(FORCE_SCALAR)
-#define XXH_VECTOR    XXH_SCALAR
-#define XXH_ACC_ALIGN 8
-#define XXH_SEC_ALIGN 8
+  #define XXH_VECTOR    XXH_SCALAR
+  #define XXH_ACC_ALIGN 8
+  #define XXH_SEC_ALIGN 8
 #elif defined(HAVE_ARM_NEON)
-#define XXH_VECTOR    XXH_NEON
-#define XXH_ACC_ALIGN 16
-#define XXH_SEC_ALIGN 8
-#include "Intrinsics.h"
-#include "xxhash/xxh3-arm.h"
+  #define XXH_VECTOR    XXH_NEON
+  #define XXH_ACC_ALIGN 16
+  #define XXH_SEC_ALIGN 8
+  #include "Intrinsics.h"
+  #include "xxhash/xxh3-arm.h"
 #elif defined(HAVE_PPC_VSX)
-#define XXH_VECTOR    XXH_VSX
-#define XXH_ACC_ALIGN 16
-#define XXH_SEC_ALIGN 8
-#include "Intrinsics.h"
-#include "xxhash/xxh3-ppc.h"
+  #define XXH_VECTOR    XXH_VSX
+  #define XXH_ACC_ALIGN 16
+  #define XXH_SEC_ALIGN 8
+  #include "Intrinsics.h"
+  #include "xxhash/xxh3-ppc.h"
 #elif defined(HAVE_AVX512_F)
-#define XXH_VECTOR    XXH_AVX512
-#define XXH_ACC_ALIGN 64
-#define XXH_SEC_ALIGN 64
-#include "Intrinsics.h"
-#include "xxhash/xxh3-avx512.h"
+  #define XXH_VECTOR    XXH_AVX512
+  #define XXH_ACC_ALIGN 64
+  #define XXH_SEC_ALIGN 64
+  #include "Intrinsics.h"
+  #include "xxhash/xxh3-avx512.h"
 #elif defined(HAVE_AVX2)
-#define XXH_VECTOR    XXH_AVX2
-#define XXH_ACC_ALIGN 32
-#define XXH_SEC_ALIGN 32
-#include "Intrinsics.h"
-#include "xxhash/xxh3-avx2.h"
+  #define XXH_VECTOR    XXH_AVX2
+  #define XXH_ACC_ALIGN 32
+  #define XXH_SEC_ALIGN 32
+  #include "Intrinsics.h"
+  #include "xxhash/xxh3-avx2.h"
 #elif defined(HAVE_SSE_2)
-#define XXH_VECTOR    XXH_SSE2
-#define XXH_ACC_ALIGN 16
-#define XXH_SEC_ALIGN 16
-#include "Intrinsics.h"
-#include "xxhash/xxh3-sse2.h"
+  #define XXH_VECTOR    XXH_SSE2
+  #define XXH_ACC_ALIGN 16
+  #define XXH_SEC_ALIGN 16
+  #include "Intrinsics.h"
+  #include "xxhash/xxh3-sse2.h"
 #else
-#define XXH_VECTOR    XXH_SCALAR
-#define XXH_ACC_ALIGN 8
-#define XXH_SEC_ALIGN 8
+  #define XXH_VECTOR    XXH_SCALAR
+  #define XXH_ACC_ALIGN 8
+  #define XXH_SEC_ALIGN 8
 #endif
 
 //------------------------------------------------------------
 // XXH3 and XXH3-128 long keys
 // "Dispatcher" code
 
-template < bool bswap >
-static void XXH3_accumulate_512(void * RESTRICT acc, const void * RESTRICT input,
-        const void * RESTRICT secret) {
+template <bool bswap>
+static void XXH3_accumulate_512( void * RESTRICT acc, const void * RESTRICT input, const void * RESTRICT secret ) {
 #if (XXH_VECTOR == XXH_AVX512)
     XXH3_accumulate_512_avx512<bswap>(acc, input, secret);
 #elif (XXH_VECTOR == XXH_AVX2)
@@ -1138,8 +1130,8 @@ static void XXH3_accumulate_512(void * RESTRICT acc, const void * RESTRICT input
 #endif
 }
 
-template < bool bswap >
-static void XXH3_scrambleAcc(void * RESTRICT acc, const void * RESTRICT secret) {
+template <bool bswap>
+static void XXH3_scrambleAcc( void * RESTRICT acc, const void * RESTRICT secret ) {
 #if (XXH_VECTOR == XXH_AVX512)
     XXH3_scrambleAcc_avx512<bswap>(acc, secret);
 #elif (XXH_VECTOR == XXH_AVX2)
@@ -1155,8 +1147,8 @@ static void XXH3_scrambleAcc(void * RESTRICT acc, const void * RESTRICT secret)
 #endif
 }
 
-template < bool bswap >
-static void XXH3_initCustomSecret(void * RESTRICT customSecret, uint64_t seed64) {
+template <bool bswap>
+static void XXH3_initCustomSecret( void * RESTRICT customSecret, uint64_t seed64 ) {
 #if (XXH_VECTOR == XXH_AVX512)
     XXH3_initCustomSecret_avx512<bswap>(customSecret, seed64);
 #elif (XXH_VECTOR == XXH_AVX2)
@@ -1176,53 +1168,48 @@ static void XXH3_initCustomSecret(void * RESTRICT customSecret, uint64_t seed64)
 // XXH3 and XXH3-128 long keys
 
 #if defined(__clang__)
-#  define XXH_PREFETCH_DIST 320
+  #define XXH_PREFETCH_DIST 320
 #elif (XXH_VECTOR == XXH_AVX512)
-#  define XXH_PREFETCH_DIST 512
+  #define XXH_PREFETCH_DIST 512
 #else
-#  define XXH_PREFETCH_DIST 384
+  #define XXH_PREFETCH_DIST 384
 #endif  /* __clang__ */
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_accumulate(uint64_t * RESTRICT acc,
-        const uint8_t* RESTRICT input,
-        const uint8_t* RESTRICT secret,
-        size_t nbStripes) {
-    for (size_t n = 0; n < nbStripes; n++ ) {
-        const uint8_t* const in = input + n*XXH_STRIPE_LEN;
+template <bool bswap>
+static FORCE_INLINE void XXH3_accumulate( uint64_t * RESTRICT acc, const uint8_t * RESTRICT input,
+        const uint8_t * RESTRICT secret, size_t nbStripes ) {
+    for (size_t n = 0; n < nbStripes; n++) {
+        const uint8_t * const in = input + n * XXH_STRIPE_LEN;
         prefetch(in + XXH_PREFETCH_DIST);
-        XXH3_accumulate_512<bswap>(acc, in, secret + n*XXH_SECRET_CONSUME_RATE);
+        XXH3_accumulate_512<bswap>(acc, in, secret + n * XXH_SECRET_CONSUME_RATE);
     }
 }
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_hashLong_internal_loop(uint64_t* RESTRICT acc,
-        const uint8_t* RESTRICT input, size_t len,
-        const uint8_t* RESTRICT secret, size_t secretSize) {
+template <bool bswap>
+static FORCE_INLINE void XXH3_hashLong_internal_loop( uint64_t * RESTRICT acc, const uint8_t * RESTRICT input,
+        size_t len, const uint8_t * RESTRICT secret, size_t secretSize ) {
     size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
-    size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
-    size_t const nb_blocks = (len - 1) / block_len;
+    size_t const block_len         = XXH_STRIPE_LEN * nbStripesPerBlock;
+    size_t const nb_blocks         = (len        - 1             ) / block_len;
 
     for (size_t n = 0; n < nb_blocks; n++) {
-        XXH3_accumulate<bswap>(acc, input + n*block_len, secret, nbStripesPerBlock);
+        XXH3_accumulate<bswap>(acc, input + n * block_len, secret, nbStripesPerBlock);
         XXH3_scrambleAcc<bswap>(acc, secret + secretSize - XXH_STRIPE_LEN);
     }
 
     /* last partial block */
     size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
-    XXH3_accumulate<bswap>(acc, input + nb_blocks*block_len, secret, nbStripes);
+    XXH3_accumulate<bswap>(acc, input + nb_blocks * block_len, secret, nbStripes);
 
     /* last stripe */
-    const uint8_t* const p = input + len - XXH_STRIPE_LEN;
-#define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
+    const uint8_t * const p = input + len - XXH_STRIPE_LEN;
+#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */
     XXH3_accumulate_512<bswap>(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
 }
 
-template < bool bswap >
-static FORCE_INLINE uint64_t XXH3_mix2Accs(const uint64_t* RESTRICT acc, const uint8_t* RESTRICT secret) {
-    return XXH3_mul128_fold64(
-               acc[0] ^ GET_U64<bswap>(secret, 0),
-               acc[1] ^ GET_U64<bswap>(secret, 8));
+template <bool bswap>
+static FORCE_INLINE uint64_t XXH3_mix2Accs( const uint64_t * RESTRICT acc, const uint8_t * RESTRICT secret ) {
+    return XXH3_mul128_fold64(acc[0] ^ GET_U64<bswap>(secret, 0), acc[1] ^ GET_U64<bswap>(secret, 8));
 }
 
 // UGLY HACK:
@@ -1231,12 +1218,12 @@ static FORCE_INLINE uint64_t XXH3_mix2Accs(const uint64_t* RESTRICT acc, const u
 // XXH3_64bits, len == 256, Snapdragon 835:
 //   without hack: 2063.7 MB/s
 //   with hack:    2560.7 MB/s
-template < bool bswap >
-static uint64_t XXH3_mergeAccs(const uint64_t* RESTRICT acc,
-        const uint8_t* RESTRICT secret, uint64_t start) {
+template <bool bswap>
+static uint64_t XXH3_mergeAccs( const uint64_t * RESTRICT acc, const uint8_t * RESTRICT secret, uint64_t start ) {
     uint64_t result64 = start;
+
     for (size_t i = 0; i < 4; i++) {
-        result64 += XXH3_mix2Accs<bswap>(acc+2*i, secret + 16*i);
+        result64 += XXH3_mix2Accs<bswap>(acc + 2 * i, secret + 16 * i);
 #if defined(__clang__)                                /* Clang */ \
     && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
     && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */
@@ -1250,43 +1237,36 @@ static uint64_t XXH3_mergeAccs(const uint64_t* RESTRICT acc,
 
 // It's important for performance that XXH3_hashLong is not inlined. Not sure
 // why (uop cache maybe?), but the difference is large and easily measurable.
-template < bool bswap >
-static NEVER_INLINE uint64_t XXH3_hashLong_64b_internal(
-        const void* RESTRICT input, size_t len,
-        const void* RESTRICT secret, size_t secretSize) {
+template <bool bswap>
+static NEVER_INLINE uint64_t XXH3_hashLong_64b_internal( const void * RESTRICT input,
+        size_t len, const void * RESTRICT secret, size_t secretSize ) {
     alignas(XXH_ACC_ALIGN) uint64_t acc[XXH_ACC_NB] = {
         XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3,
         XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1,
     };
 
-    XXH3_hashLong_internal_loop<bswap>(acc, (const uint8_t*)input, len,
-            (const uint8_t*)secret, secretSize);
+    XXH3_hashLong_internal_loop<bswap>(acc, (const uint8_t *)input, len, (const uint8_t *)secret, secretSize);
 
     return XXH3_mergeAccs<bswap>(acc, (const uint8_t *)secret + XXH_SECRET_MERGEACCS_START,
             (uint64_t)len * XXH_PRIME64_1);
 }
 
-template < bool bswap >
-static NEVER_INLINE XXH128_hash_t XXH3_hashLong_128b_internal(
-        const void* RESTRICT input, size_t len,
-        const void* RESTRICT secret, size_t secretSize) {
+template <bool bswap>
+static NEVER_INLINE XXH128_hash_t XXH3_hashLong_128b_internal( const void * RESTRICT input,
+        size_t len, const void * RESTRICT secret, size_t secretSize ) {
     alignas(XXH_ACC_ALIGN) uint64_t acc[XXH_ACC_NB] = {
         XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3,
         XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1,
     };
 
-    XXH3_hashLong_internal_loop<bswap>(acc, (const uint8_t*)input, len,
-            (const uint8_t*)secret, secretSize);
+    XXH3_hashLong_internal_loop<bswap>(acc, (const uint8_t *)input, len, (const uint8_t *)secret, secretSize);
 
     // converge into final hash
     const XXH128_hash_t h128 = {
-        /* .low64 = */  XXH3_mergeAccs<bswap>(acc,
-                (const uint8_t *)secret + XXH_SECRET_MERGEACCS_START,
+        /* .low64 = */ XXH3_mergeAccs <bswap>(acc, (const uint8_t *)secret + XXH_SECRET_MERGEACCS_START,
                 (uint64_t)len * XXH_PRIME64_1),
-        /* .high64 = */ XXH3_mergeAccs<bswap>(acc,
-                (const uint8_t *)secret + secretSize -
-                    sizeof(acc) - XXH_SECRET_MERGEACCS_START,
-                ~((uint64_t)len * XXH_PRIME64_2)),
+        /* .high64 = */ XXH3_mergeAccs<bswap>(acc, (const uint8_t *)secret + secretSize                -
+                sizeof(acc) - XXH_SECRET_MERGEACCS_START, ~((uint64_t)len * XXH_PRIME64_2)),
     };
     return h128;
 }
@@ -1294,46 +1274,48 @@ static NEVER_INLINE XXH128_hash_t XXH3_hashLong_128b_internal(
 //------------------------------------------------------------
 // XXH3 and XXH3-128 top-level functions
 
-template < bool bswap >
-static uint64_t XXH3_64bits_withSeed(const void * input, size_t len, uint64_t seed) {
+template <bool bswap>
+static uint64_t XXH3_64bits_withSeed( const void * input, size_t len, uint64_t seed ) {
     const uint8_t * RESTRICT secret = (const uint8_t *)XXH3_kSecret;
     size_t secretLen = sizeof(XXH3_kSecret);
 
-    if (len <= 16)
-        return XXH3_len_0to16_64b<bswap>((const uint8_t*)input, len,
-                secret, seed);
-    if (len <= 128)
-        return XXH3_len_17to128_64b<bswap>((const uint8_t*)input, len,
-                secret, secretLen, seed);
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_len_129to240_64b<bswap>((const uint8_t*)input, len,
-                secret, secretLen, seed);
-
-    if (seed == 0)
+    if (len <= 16) {
+        return XXH3_len_0to16_64b<bswap>((const uint8_t *)input, len, secret, seed);
+    }
+    if (len <= 128) {
+        return XXH3_len_17to128_64b<bswap>((const uint8_t *)input, len, secret, secretLen, seed);
+    }
+    if (len <= XXH3_MIDSIZE_MAX) {
+        return XXH3_len_129to240_64b<bswap>((const uint8_t *)input, len, secret, secretLen, seed);
+    }
+
+    if (seed == 0) {
         return XXH3_hashLong_64b_internal<bswap>(input, len, secret, secretLen);
+    }
 
     alignas(XXH_SEC_ALIGN) uint8_t secretbuf[XXH3_SECRET_DEFAULT_SIZE];
     XXH3_initCustomSecret<bswap>(secretbuf, seed);
     return XXH3_hashLong_64b_internal<bswap>(input, len, secretbuf, sizeof(secretbuf));
 }
 
-template < bool bswap >
-static XXH128_hash_t XXH3_128bits_withSeed(const void * input, size_t len, uint64_t seed) {
+template <bool bswap>
+static XXH128_hash_t XXH3_128bits_withSeed( const void * input, size_t len, uint64_t seed ) {
     const uint8_t * RESTRICT secret = (const uint8_t *)XXH3_kSecret;
     size_t secretLen = sizeof(XXH3_kSecret);
 
-    if (len <= 16)
-        return XXH3_len_0to16_128b<bswap>((const uint8_t*)input, len,
-                secret, seed);
-    if (len <= 128)
-        return XXH3_len_17to128_128b<bswap>((const uint8_t*)input, len,
-                secret, secretLen, seed);
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_len_129to240_128b<bswap>((const uint8_t*)input, len,
-                secret, secretLen, seed);
-
-    if (seed == 0)
+    if (len <= 16) {
+        return XXH3_len_0to16_128b<bswap>((const uint8_t *)input, len, secret, seed);
+    }
+    if (len <= 128) {
+        return XXH3_len_17to128_128b<bswap>((const uint8_t *)input, len, secret, secretLen, seed);
+    }
+    if (len <= XXH3_MIDSIZE_MAX) {
+        return XXH3_len_129to240_128b<bswap>((const uint8_t *)input, len, secret, secretLen, seed);
+    }
+
+    if (seed == 0) {
         return XXH3_hashLong_128b_internal<bswap>(input, len, secret, secretLen);
+    }
 
     alignas(XXH_SEC_ALIGN) uint8_t secretbuf[XXH3_SECRET_DEFAULT_SIZE];
     XXH3_initCustomSecret<bswap>(secretbuf, seed);
@@ -1341,13 +1323,14 @@ static XXH128_hash_t XXH3_128bits_withSeed(const void * input, size_t len, uint6
 }
 
 #if defined(XXH3_POP_PRAGMA)
-#  pragma GCC pop_options
+  #pragma GCC pop_options
 #endif
 
 //------------------------------------------------------------
-template < bool bswap >
-static void XXH32(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void XXH32( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint32_t h = XXH32_impl<bswap>((const uint8_t *)in, len, (uint32_t)seed);
+
 #if 0
     // Output in "canonical" format
     if (isLE()) {
@@ -1360,9 +1343,10 @@ static void XXH32(const void * in, const size_t len, const seed_t seed, void * o
 #endif
 }
 
-template < bool bswap >
-static void XXH64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void XXH64( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = XXH64_impl<bswap>((const uint8_t *)in, len, (uint64_t)seed);
+
 #if 0
     // Output in "canonical" format
     if (isLE()) {
@@ -1376,9 +1360,10 @@ static void XXH64(const void * in, const size_t len, const seed_t seed, void * o
 }
 
 //------------------------------------------------------------
-template < bool bswap >
-static void XXH3_64(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void XXH3_64( const void * in, const size_t len, const seed_t seed, void * out ) {
     uint64_t h = XXH3_64bits_withSeed<bswap>(in, len, seed);
+
     // Output in "canonical" BE format
     if (isLE()) {
         PUT_U64<true>(h, (uint8_t *)out, 0);
@@ -1387,88 +1372,89 @@ static void XXH3_64(const void * in, const size_t len, const seed_t seed, void *
     }
 }
 
-template <  bool bswap >
-static void XXH3_128(const void * in, const size_t len, const seed_t seed, void * out) {
+template <bool bswap>
+static void XXH3_128( const void * in, const size_t len, const seed_t seed, void * out ) {
     XXH128_hash_t h = XXH3_128bits_withSeed<bswap>(in, len, seed);
+
     // Output in "canonical" BE format
     if (isLE()) {
         PUT_U64<true>(h.high64, (uint8_t *)out, 0);
-        PUT_U64<true>(h.low64,  (uint8_t *)out, 8);
+        PUT_U64<true>(h.low64 , (uint8_t *)out, 8);
     } else {
         PUT_U64<false>(h.high64, (uint8_t *)out, 0);
-        PUT_U64<false>(h.low64,  (uint8_t *)out, 8);
+        PUT_U64<false>(h.low64 , (uint8_t *)out, 8);
     }
 }
 
 //------------------------------------------------------------
 REGISTER_FAMILY(xxhash,
-  $.src_url = "https://github.com/Cyan4973/xxHash",
-  $.src_status = HashFamilyInfo::SRC_ACTIVE
-);
+   $.src_url    = "https://github.com/Cyan4973/xxHash",
+   $.src_status = HashFamilyInfo::SRC_ACTIVE
+ );
 
 REGISTER_HASH(XXH_32,
-  $.desc = "xxHash, 32-bit version",
-  $.hash_flags =
-        FLAG_HASH_SMALL_SEED          |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_MULTIPLY            |
-        FLAG_IMPL_ROTATE              |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 32,
-  $.verification_LE = 0xBA88B743,
-  $.verification_BE = 0x2BC79298,
-  $.hashfn_native = XXH32<false>,
-  $.hashfn_bswap = XXH32<true>
-);
+   $.desc       = "xxHash, 32-bit version",
+   $.hash_flags =
+         FLAG_HASH_SMALL_SEED          |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_MULTIPLY            |
+         FLAG_IMPL_ROTATE              |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 32,
+   $.verification_LE = 0xBA88B743,
+   $.verification_BE = 0x2BC79298,
+   $.hashfn_native   = XXH32<false>,
+   $.hashfn_bswap    = XXH32<true>
+ );
 
 REGISTER_HASH(XXH_64,
-  $.desc = "xxHash, 64-bit version",
-  $.hash_flags =
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_MULTIPLY_64_64      |
-        FLAG_IMPL_ROTATE              |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 64,
-  $.verification_LE = 0x024B7CF4,
-  $.verification_BE = 0xB96ABE81,
-  $.hashfn_native = XXH64<false>,
-  $.hashfn_bswap = XXH64<true>
-);
+   $.desc       = "xxHash, 64-bit version",
+   $.hash_flags =
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_MULTIPLY_64_64      |
+         FLAG_IMPL_ROTATE              |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 64,
+   $.verification_LE = 0x024B7CF4,
+   $.verification_BE = 0xB96ABE81,
+   $.hashfn_native   = XXH64<false>,
+   $.hashfn_bswap    = XXH64<true>
+ );
 
 REGISTER_HASH(XXH3_64,
-  $.desc = "xxh3, 64-bit version",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE        |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_MULTIPLY            |
-        FLAG_IMPL_ROTATE              |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 64,
-  $.verification_LE = 0x1AAEE62C,
-  $.verification_BE = 0xF8DBB4DD,
-  $.hashfn_native = XXH3_64<false>,
-  $.hashfn_bswap = XXH3_64<true>
-);
+   $.desc       = "xxh3, 64-bit version",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE        |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_MULTIPLY            |
+         FLAG_IMPL_ROTATE              |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 64,
+   $.verification_LE = 0x1AAEE62C,
+   $.verification_BE = 0xF8DBB4DD,
+   $.hashfn_native   = XXH3_64<false>,
+   $.hashfn_bswap    = XXH3_64<true>
+ );
 
 REGISTER_HASH(XXH3_128,
-  $.desc = "xxh3, 128-bit version",
-  $.hash_flags =
-        FLAG_HASH_LOOKUP_TABLE        |
-        FLAG_HASH_ENDIAN_INDEPENDENT,
-  $.impl_flags =
-        FLAG_IMPL_CANONICAL_LE        |
-        FLAG_IMPL_MULTIPLY            |
-        FLAG_IMPL_ROTATE              |
-        FLAG_IMPL_LICENSE_BSD,
-  $.bits = 128,
-  $.verification_LE = 0x288DAA94,
-  $.verification_BE = 0x6C82FA25,
-  $.hashfn_native = XXH3_128<false>,
-  $.hashfn_bswap = XXH3_128<true>
-);
+   $.desc       = "xxh3, 128-bit version",
+   $.hash_flags =
+         FLAG_HASH_LOOKUP_TABLE        |
+         FLAG_HASH_ENDIAN_INDEPENDENT,
+   $.impl_flags =
+         FLAG_IMPL_CANONICAL_LE        |
+         FLAG_IMPL_MULTIPLY            |
+         FLAG_IMPL_ROTATE              |
+         FLAG_IMPL_LICENSE_BSD,
+   $.bits = 128,
+   $.verification_LE = 0x288DAA94,
+   $.verification_BE = 0x6C82FA25,
+   $.hashfn_native   = XXH3_128<false>,
+   $.hashfn_bswap    = XXH3_128<true>
+ );
diff --git a/hashes/xxhash/xxh3-arm.h b/hashes/xxhash/xxh3-arm.h
index 5be01687..fbd4182f 100644
--- a/hashes/xxhash/xxh3-arm.h
+++ b/hashes/xxhash/xxh3-arm.h
@@ -111,22 +111,22 @@
 
 /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */
 /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */
-#if (defined(__GNUC__) || defined(__clang__)) &&                    \
+#if (defined(__GNUC__) || defined(__clang__)) && \
     (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
-#define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                            \
-    do {                                                                \
-        /* Undocumented GCC/Clang operand modifier: */                  \
-        /*     %e0 = lower D half, %f0 = upper D half */                \
-        __asm__("vzip.32  %e0, %f0" : "+w" (in));                       \
-        (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));             \
-        (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));             \
-    } while (0)
+  #define XXH_SPLIT_IN_PLACE(in, outLo, outHi)            \
+      do {                                                \
+      /* Undocumented GCC/Clang operand modifier: */      \
+      /*     %e0 = lower D half, %f0 = upper D half */    \
+      __asm__ ("vzip.32  %e0, %f0" : "+w" (in));                                                \
+      (outLo) = vget_low_u32(vreinterpretq_u32_u64(in));  \
+      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in)); \
+      } while (0)
 #else
-#define XXH_SPLIT_IN_PLACE(in, outLo, outHi)    \
-    do {                                        \
-        (outLo) = vmovn_u64    (in);            \
-        (outHi) = vshrn_n_u64  ((in), 32);      \
-    } while (0)
+  #define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
+      do {                                     \
+      (outLo) = vmovn_u64(in);                 \
+      (outHi) = vshrn_n_u64((in), 32);         \
+      } while (0)
 #endif
 
 /*
@@ -142,14 +142,18 @@
  * unaligned load.
  */
 #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
+
 /* silence -Wcast-align */
-static FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) {
-    return *(uint64x2_t const*)ptr;
+static FORCE_INLINE uint64x2_t XXH_vld1q_u64( void const * ptr ) {
+    return *(uint64x2_t const *)ptr;
 }
+
 #else
-static FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) {
-    return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
+
+static FORCE_INLINE uint64x2_t XXH_vld1q_u64( void const * ptr ) {
+    return vreinterpretq_u64_u8(vld1q_u8((uint8_t const *)ptr));
 }
+
 #endif
 
 // Controls the NEON to scalar ratio for XXH3
@@ -187,9 +191,9 @@ static FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) {
 //
 // XXH_ACC_NB is #defined already, back in the main file.
 #if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC))
-#define XXH3_NEON_LANES 6
+  #define XXH3_NEON_LANES 6
 #else
-#define XXH3_NEON_LANES XXH_ACC_NB
+  #define XXH3_NEON_LANES XXH_ACC_NB
 #endif
 
 /*
@@ -201,27 +205,27 @@ static FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) {
  *
  * See XXH3_NEON_LANES for configuring this and details about this optimization.
  */
-template < bool bswap >
-static FORCE_INLINE void XXH3_accumulate_512_neon(void * RESTRICT acc,
-        const void * RESTRICT input, const void * RESTRICT secret) {
-    uint64x2_t* const xacc = (uint64x2_t *) acc;
+template <bool bswap>
+static FORCE_INLINE void XXH3_accumulate_512_neon( void * RESTRICT acc, const void * RESTRICT input,
+        const void * RESTRICT secret ) {
+    uint64x2_t    * const xacc    = (uint64x2_t *   )acc;
     /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
-    uint8_t const* const xinput = (const uint8_t *) input;
-    uint8_t const* const xsecret  = (const uint8_t *) secret;
+    uint8_t const * const xinput  = (const uint8_t *)input;
+    uint8_t const * const xsecret = (const uint8_t *)secret;
 
     /* AArch64 uses both scalar and neon at the same time */
     for (size_t i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
         XXH3_scalarRound<bswap>(acc, input, secret, i);
     }
     for (size_t i = 0; i < XXH3_NEON_LANES / 2; i++) {
-        uint64x2_t acc_vec = xacc[i];
+        uint64x2_t acc_vec  = xacc[i];
         /* data_vec = xinput[i]; */
         uint64x2_t data_vec = XXH_vld1q_u64(xinput  + (i * 16));
         /* key_vec  = xsecret[i];  */
         uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
         if (bswap) {
-            data_vec        = Vbswap64_u64(data_vec);
-            key_vec         = Vbswap64_u64(key_vec);
+            data_vec = Vbswap64_u64(data_vec);
+            key_vec  = Vbswap64_u64(key_vec );
         }
         uint64x2_t data_key;
         uint32x2_t data_key_lo, data_key_hi;
@@ -229,24 +233,25 @@ static FORCE_INLINE void XXH3_accumulate_512_neon(void * RESTRICT acc,
         uint64x2_t acc_vec_2 = vextq_u64(data_vec, data_vec, 1);
         /* data_key = data_vec ^ key_vec; */
         data_key = veorq_u64(data_vec, key_vec);
-        /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
+        /*
+         * data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
          * data_key_hi = (uint32x2_t) (data_key >> 32);
-         * data_key = UNDEFINED; */
+         * data_key = UNDEFINED;
+         */
         XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
         /* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
-        acc_vec_2 = vmlal_u32 (acc_vec_2, data_key_lo, data_key_hi);
+        acc_vec_2 = vmlal_u32(acc_vec_2, data_key_lo, data_key_hi);
         /* xacc[i] += acc_vec_2; */
-        acc_vec = vaddq_u64 (acc_vec, acc_vec_2);
-        xacc[i] = acc_vec;
+        acc_vec   = vaddq_u64(acc_vec, acc_vec_2);
+        xacc[i]   = acc_vec;
     }
 }
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_scrambleAcc_neon(void * RESTRICT acc,
-        const void * RESTRICT secret) {
-    uint64x2_t* xacc       = (uint64x2_t*) acc;
-    uint8_t const* xsecret = (uint8_t const*) secret;
-    uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
+template <bool bswap>
+static FORCE_INLINE void XXH3_scrambleAcc_neon( void * RESTRICT acc, const void * RESTRICT secret ) {
+    uint64x2_t *    xacc    = (uint64x2_t *   )acc;
+    uint8_t const * xsecret = (uint8_t const *)secret;
+    uint32x2_t      prime   = vdup_n_u32(XXH_PRIME32_1);
 
     /* AArch64 uses both scalar and neon at the same time */
     for (size_t i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
@@ -255,21 +260,23 @@ static FORCE_INLINE void XXH3_scrambleAcc_neon(void * RESTRICT acc,
     for (size_t i = 0; i < XXH3_NEON_LANES / 2; i++) {
         /* xacc[i] ^= (xacc[i] >> 47); */
         uint64x2_t acc_vec  = xacc[i];
-        uint64x2_t shifted  = vshrq_n_u64   (acc_vec, 47);
-        uint64x2_t data_vec = veorq_u64     (acc_vec, shifted);
+        uint64x2_t shifted  = vshrq_n_u64(acc_vec, 47);
+        uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
 
         /* xacc[i] ^= xsecret[i]; */
-        uint64x2_t key_vec  = XXH_vld1q_u64 (xsecret + (i * 16));
+        uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
         if (bswap) {
             key_vec = vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(key_vec)));
         }
-        uint64x2_t data_key = veorq_u64     (data_vec, key_vec);
+        uint64x2_t data_key = veorq_u64(data_vec, key_vec);
 
         /* xacc[i] *= XXH_PRIME32_1 */
         uint32x2_t data_key_lo, data_key_hi;
-        /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
+        /*
+         * data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
          * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
-         * xacc[i] = UNDEFINED; */
+         * xacc[i] = UNDEFINED;
+         */
         XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
         {
             /*
@@ -290,7 +297,7 @@ static FORCE_INLINE void XXH3_scrambleAcc_neon(void * RESTRICT acc,
              * this bug completely.
              * See https://bugs.llvm.org/show_bug.cgi?id=39967
              */
-            uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
+            uint64x2_t prod_hi = vmull_u32(data_key_hi, prime);
             /* xacc[i] = prod_hi << 32; */
             prod_hi = vshlq_n_u64(prod_hi, 32);
             /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
diff --git a/hashes/xxhash/xxh3-avx2.h b/hashes/xxhash/xxh3-avx2.h
index 1a09b450..5d0ebd37 100644
--- a/hashes/xxhash/xxh3-avx2.h
+++ b/hashes/xxhash/xxh3-avx2.h
@@ -30,65 +30,69 @@
  *   - xxHash homepage: https://www.xxhash.com
  *   - xxHash source repository: https://github.com/Cyan4973/xxHash
  */
-template < bool bswap >
-static FORCE_INLINE void XXH3_accumulate_512_avx2(
-        void* RESTRICT acc, const void* RESTRICT input,
-        const void* RESTRICT secret) {
-    __m256i* const xacc    =       (__m256i *) acc;
-    /* Unaligned. This is mainly for pointer arithmetic, and because
-     * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
-    const         __m256i* const xinput  = (const __m256i *) input;
-    /* Unaligned. This is mainly for pointer arithmetic, and because
-     * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
-    const         __m256i* const xsecret = (const __m256i *) secret;
+template <bool bswap>
+static FORCE_INLINE void XXH3_accumulate_512_avx2( void * RESTRICT acc, const void * RESTRICT input,
+        const void * RESTRICT secret ) {
+    __m256i       * const xacc    = (__m256i *      )acc;
+    /*
+     * Unaligned. This is mainly for pointer arithmetic, and because
+     * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason.
+     */
+    const __m256i * const xinput  = (const __m256i *)input;
+    /*
+     * Unaligned. This is mainly for pointer arithmetic, and because
+     * _mm256_loadu_si256 requires a const __m256i * pointer for some reason.
+     */
+    const __m256i * const xsecret = (const __m256i *)secret;
 
-    for (size_t i = 0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+    for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m256i); i++) {
         /* data_vec    = xinput[i]; */
-        __m256i const data_vec    = bswap ?
-                  mm256_bswap64(_mm256_loadu_si256(xinput+i)) :
-                                _mm256_loadu_si256(xinput+i);
+        __m256i const data_vec = bswap ?
+                    mm256_bswap64(_mm256_loadu_si256(xinput + i)) :
+                    _mm256_loadu_si256(xinput + i);
         /* key_vec     = xsecret[i]; */
-        __m256i const key_vec     = bswap ?
-                  mm256_bswap64(_mm256_loadu_si256(xsecret+i)) :
-                                _mm256_loadu_si256(xsecret+i);
+        __m256i const key_vec = bswap ?
+                    mm256_bswap64(_mm256_loadu_si256(xsecret + i)) :
+                    _mm256_loadu_si256(xsecret + i);
         /* data_key    = data_vec ^ key_vec; */
-        __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+        __m256i const data_key    = _mm256_xor_si256(data_vec, key_vec);
         /* data_key_lo = data_key >> 32; */
-        __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+        __m256i const data_key_lo = _mm256_shuffle_epi32(data_key, _MM_SHUFFLE(0, 3, 0, 1));
         /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-        __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
+        __m256i const product     = _mm256_mul_epu32(data_key, data_key_lo);
         /* xacc[i] += swap(data_vec); */
-        __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
-        __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
+        __m256i const data_swap   = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
+        __m256i const sum         = _mm256_add_epi64(xacc[i], data_swap);
         /* xacc[i] += product; */
         xacc[i] = _mm256_add_epi64(product, sum);
     }
 }
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_scrambleAcc_avx2(void * RESTRICT acc,
-        const void * RESTRICT secret) {
-    __m256i* const xacc = (__m256i*) acc;
-    /* Unaligned. This is mainly for pointer arithmetic, and because
-     * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
-    const         __m256i* const xsecret = (const __m256i *) secret;
-    const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
+template <bool bswap>
+static FORCE_INLINE void XXH3_scrambleAcc_avx2( void * RESTRICT acc, const void * RESTRICT secret ) {
+    __m256i       * const xacc    = (__m256i *      )acc;
+    /*
+     * Unaligned. This is mainly for pointer arithmetic, and because
+     * _mm256_loadu_si256 requires a const __m256i * pointer for some reason.
+     */
+    const __m256i * const xsecret = (const __m256i *)secret;
+    const __m256i         prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
 
-    for (size_t i = 0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
+    for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m256i); i++) {
         /* xacc[i] ^= (xacc[i] >> 47) */
-        __m256i const acc_vec     = xacc[i];
-        __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
-        __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
+        __m256i const acc_vec  = xacc[i];
+        __m256i const shifted  = _mm256_srli_epi64(acc_vec, 47);
+        __m256i const data_vec = _mm256_xor_si256(acc_vec , shifted);
         /* xacc[i] ^= xsecret; */
-        __m256i const key_vec     = bswap ?
-                  mm256_bswap64(_mm256_loadu_si256(xsecret+i)) :
-                                _mm256_loadu_si256(xsecret+i);
-        __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
+        __m256i const key_vec  = bswap ?
+                    mm256_bswap64(_mm256_loadu_si256(xsecret + i)) :
+                    _mm256_loadu_si256(xsecret + i);
+        __m256i const data_key = _mm256_xor_si256(data_vec, key_vec);
 
         /* xacc[i] *= XXH_PRIME32_1; */
-        __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
-        __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
-        __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
+        __m256i const data_key_hi = _mm256_shuffle_epi32(data_key, _MM_SHUFFLE(0, 3, 0, 1));
+        __m256i const prod_lo     = _mm256_mul_epu32(data_key   , prime32);
+        __m256i const prod_hi     = _mm256_mul_epu32(data_key_hi, prime32);
         xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
     }
 }
@@ -98,14 +102,14 @@ static FORCE_INLINE void XXH3_scrambleAcc_avx2(void * RESTRICT acc,
  *   - not extract the secret from sse registers in the internal loop
  *   - use less common registers, and avoid pushing these reg into stack
  */
-template < bool bswap >
-static FORCE_INLINE void XXH3_initCustomSecret_avx2(void * RESTRICT customSecret,
-        uint64_t seed64) {
-    _mm_prefetch((const char*)customSecret, _MM_HINT_T0);
-    __m256i const seed = _mm256_set_epi64x((int64_t)(UINT64_C(0) - seed64), (int64_t)seed64, (int64_t)(UINT64_C(0) - seed64), (int64_t)seed64);
+template <bool bswap>
+static FORCE_INLINE void XXH3_initCustomSecret_avx2( void * RESTRICT customSecret, uint64_t seed64 ) {
+    _mm_prefetch((const char *)customSecret, _MM_HINT_T0);
+    __m256i const seed = _mm256_set_epi64x((int64_t)(UINT64_C(0) - seed64), (int64_t)seed64,
+            (int64_t)(UINT64_C(0) - seed64), (int64_t)seed64);
 
-    const __m256i* const src  = (const __m256i*) ((const void*) XXH3_kSecret);
-          __m256i*       dest = (      __m256i*) customSecret;
+    const __m256i * const src = (const __m256i *)((const void *)XXH3_kSecret);
+    __m256i       * dest =      (__m256i *      )customSecret;
 
 #if defined(__GNUC__) || defined(__clang__)
     XXH_COMPILER_GUARD(dest);
@@ -113,18 +117,18 @@ static FORCE_INLINE void XXH3_initCustomSecret_avx2(void * RESTRICT customSecret
 
     /* GCC -O2 need unroll loop manually */
     if (bswap) {
-        dest[0] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src+0)), seed));
-        dest[1] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src+1)), seed));
-        dest[2] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src+2)), seed));
-        dest[3] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src+3)), seed));
-        dest[4] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src+4)), seed));
-        dest[5] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src+5)), seed));
+        dest[0] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src + 0)), seed));
+        dest[1] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src + 1)), seed));
+        dest[2] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src + 2)), seed));
+        dest[3] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src + 3)), seed));
+        dest[4] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src + 4)), seed));
+        dest[5] = mm256_bswap64(_mm256_add_epi64(mm256_bswap64(_mm256_stream_load_si256(src + 5)), seed));
     } else {
-        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
-        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
-        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
-        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
-        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
-        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
+        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src + 0), seed);
+        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src + 1), seed);
+        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src + 2), seed);
+        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src + 3), seed);
+        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src + 4), seed);
+        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src + 5), seed);
     }
 }
diff --git a/hashes/xxhash/xxh3-avx512.h b/hashes/xxhash/xxh3-avx512.h
index ceab0035..90a98663 100644
--- a/hashes/xxhash/xxh3-avx512.h
+++ b/hashes/xxhash/xxh3-avx512.h
@@ -30,53 +30,52 @@
  *   - xxHash homepage: https://www.xxhash.com
  *   - xxHash source repository: https://github.com/Cyan4973/xxHash
  */
-template < bool bswap >
-static FORCE_INLINE void XXH3_accumulate_512_avx512(
-        void * RESTRICT acc, const void * RESTRICT input,
-        const void * RESTRICT secret) {
-
-    __m512i * const xacc = (__m512i *) acc;
+template <bool bswap>
+static FORCE_INLINE void XXH3_accumulate_512_avx512( void * RESTRICT acc,
+        const void * RESTRICT input, const void * RESTRICT secret ) {
+    __m512i * const xacc   = (__m512i *)acc;
     /* data_vec    = input[0]; */
-    __m512i const data_vec    = bswap ?
-                  mm512_bswap64(_mm512_loadu_si512   (input))  :
-                                _mm512_loadu_si512   (input);
+    __m512i const data_vec = bswap ?
+                mm512_bswap64(_mm512_loadu_si512(input))  :
+                _mm512_loadu_si512(input);
     /* key_vec     = secret[0]; */
-    __m512i const key_vec     = bswap ?
-                  mm512_bswap64(_mm512_loadu_si512   (secret))  :
-                                _mm512_loadu_si512   (secret);
+    __m512i const key_vec = bswap ?
+                mm512_bswap64(_mm512_loadu_si512(secret))  :
+                _mm512_loadu_si512(secret);
     /* data_key    = data_vec ^ key_vec; */
-    __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+    __m512i const data_key    = _mm512_xor_si512(data_vec, key_vec);
     /* data_key_lo = data_key >> 32; */
-    __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+    __m512i const data_key_lo = _mm512_shuffle_epi32(data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
     /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-    __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
+    __m512i const product     = _mm512_mul_epu32(data_key, data_key_lo);
     /* xacc[0] += swap(data_vec); */
-    __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
-    __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);
+    __m512i const data_swap   = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
+    __m512i const sum         = _mm512_add_epi64(*xacc, data_swap);
+
     /* xacc[0] += product; */
     *xacc = _mm512_add_epi64(product, sum);
 }
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_scrambleAcc_avx512(
-        void * RESTRICT acc, const void * RESTRICT secret) {
-    __m512i* const xacc = (__m512i*) acc;
-    const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
+template <bool bswap>
+static FORCE_INLINE void XXH3_scrambleAcc_avx512( void * RESTRICT acc, const void * RESTRICT secret ) {
+    __m512i * const xacc    = (__m512i *)acc;
+    const __m512i   prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
 
     /* xacc[0] ^= (xacc[0] >> 47) */
-    __m512i const acc_vec     = *xacc;
-    __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
-    __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
+    __m512i const acc_vec  = *xacc;
+    __m512i const shifted  = _mm512_srli_epi64(acc_vec, 47);
+    __m512i const data_vec = _mm512_xor_si512(acc_vec , shifted);
     /* xacc[0] ^= secret; */
-    __m512i const key_vec     = bswap ?
-                  mm512_bswap64(_mm512_loadu_si512   (secret))  :
-                                _mm512_loadu_si512   (secret);
-    __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+    __m512i const key_vec  = bswap ?
+                mm512_bswap64(_mm512_loadu_si512(secret))  :
+                _mm512_loadu_si512(secret);
+    __m512i const data_key = _mm512_xor_si512(data_vec, key_vec);
 
     /* xacc[0] *= XXH_PRIME32_1; */
-    __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
-    __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
-    __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
+    __m512i const data_key_hi = _mm512_shuffle_epi32(data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+    __m512i const prod_lo     = _mm512_mul_epu32(data_key   , prime32);
+    __m512i const prod_hi     = _mm512_mul_epu32(data_key_hi, prime32);
+
     *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
 }
 
@@ -85,23 +84,25 @@ static FORCE_INLINE void XXH3_scrambleAcc_avx512(
 //
 // fwojcik: Make this GCC-only, since it explicitly supports
 // union-based type punning, which is otherwise Undefined Behavior
-template < bool bswap >
-static FORCE_INLINE void XXH3_initCustomSecret_avx512(
-        void * RESTRICT customSecret, uint64_t seed64) {
-    int const nbRounds = XXH3_SECRET_DEFAULT_SIZE / sizeof(__m512i);
-    __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((uint64_t)seed64), 0xAA, (uint64_t)(UINT64_C(0) - seed64));
+template <bool bswap>
+static FORCE_INLINE void XXH3_initCustomSecret_avx512( void * RESTRICT customSecret, uint64_t seed64 ) {
+    int const     nbRounds = XXH3_SECRET_DEFAULT_SIZE / sizeof(__m512i);
+    __m512i const seed     = _mm512_mask_set1_epi64(_mm512_set1_epi64(
+            (uint64_t)seed64), 0xAA, (uint64_t)(UINT64_C(0) - seed64));
+
+    const __m512i * const src  = (const __m512i *)((const void *)XXH3_kSecret);
+    __m512i       * const dest = (__m512i *      )customSecret;
 
-    const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
-          __m512i* const dest = (      __m512i*) customSecret;
     for (int i = 0; i < nbRounds; ++i) {
 #if defined(__GNUC__) || !defined(__clang__)
         union {
-            const __m512i* cp;
-            void* p;
+            const __m512i * cp;
+            void *          p;
         } remote_const_void;
         remote_const_void.cp = src + i;
         if (bswap) {
-            dest[i] = mm512_bswap64(_mm512_add_epi64(mm512_bswap64(_mm512_stream_load_si512(remote_const_void.p)), seed));
+            dest[i] = mm512_bswap64(_mm512_add_epi64(mm512_bswap64(_mm512_stream_load_si512(
+                    remote_const_void.p)), seed));
         } else {
             dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
         }
diff --git a/hashes/xxhash/xxh3-ppc.h b/hashes/xxhash/xxh3-ppc.h
index d8d21e97..f013dc36 100644
--- a/hashes/xxhash/xxh3-ppc.h
+++ b/hashes/xxhash/xxh3-ppc.h
@@ -40,25 +40,31 @@
  * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
  */
 
-typedef __vector unsigned long long xxh_u64x2;
-typedef __vector unsigned char xxh_u8x16;
-typedef __vector unsigned int xxh_u32x4;
+typedef __vector unsigned long long  xxh_u64x2;
+typedef __vector unsigned char       xxh_u8x16;
+typedef __vector unsigned int        xxh_u32x4;
 
 #if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
-#  define XXH_vec_revb vec_revb
+  #define XXH_vec_revb vec_revb
 #else
+
 // A polyfill for POWER9's vec_revb().
-static FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) {
-    xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
-                                  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
+static FORCE_INLINE xxh_u64x2 XXH_vec_revb( xxh_u64x2 val ) {
+    xxh_u8x16 const vByteSwap = {
+        0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+        0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08
+    };
+
     return vec_perm(val, val, vByteSwap);
 }
+
 #endif
 
 // Performs an unaligned vector load and byte swaps it on big endian.
-template < bool bswap >
-static FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) {
+template <bool bswap>
+static FORCE_INLINE xxh_u64x2 XXH_vec_loadu( const void * ptr ) {
     xxh_u64x2 ret;
+
     memcpy(&ret, ptr, sizeof(xxh_u64x2));
     if (bswap) {
         ret = XXH_vec_revb(ret);
@@ -71,51 +77,58 @@ static FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) {
  *
  * These intrinsics weren't added until GCC 8, despite existing for a while,
  * and they are endian dependent. Also, their meaning swap depending on version.
- * */
+ *
+ */
 #if defined(__s390x__)
 /* s390x is always big endian, no issue on this platform */
-#  define XXH_vec_mulo vec_mulo
-#  define XXH_vec_mule vec_mule
+  #define XXH_vec_mulo vec_mulo
+  #define XXH_vec_mule vec_mule
 #elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
 /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
-#  define XXH_vec_mulo __builtin_altivec_vmulouw
-#  define XXH_vec_mule __builtin_altivec_vmuleuw
+  #define XXH_vec_mulo __builtin_altivec_vmulouw
+  #define XXH_vec_mule __builtin_altivec_vmuleuw
 #else
+
 /* gcc needs inline assembly */
+
 /* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
-static FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) {
+static FORCE_INLINE xxh_u64x2 XXH_vec_mulo( xxh_u32x4 a, xxh_u32x4 b ) {
     xxh_u64x2 result;
-    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+
+    __asm__ ("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
     return result;
 }
-static FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) {
+
+static FORCE_INLINE xxh_u64x2 XXH_vec_mule( xxh_u32x4 a, xxh_u32x4 b ) {
     xxh_u64x2 result;
-    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
+
+    __asm__ ("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
     return result;
 }
+
 #endif /* XXH_vec_mulo, XXH_vec_mule */
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_accumulate_512_vsx(void * RESTRICT acc,
-        const void * RESTRICT input, const void * RESTRICT secret) {
+template <bool bswap>
+static FORCE_INLINE void XXH3_accumulate_512_vsx( void * RESTRICT acc, const void * RESTRICT input,
+        const void * RESTRICT secret ) {
     /* presumed aligned */
-    uint32_t * const xacc = (uint32_t *) acc;
-    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
-    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
-    xxh_u64x2 const v32 = { 32, 32 };
+    uint32_t        * const xacc    = (uint32_t *       )acc;
+    xxh_u64x2 const * const xinput  = (xxh_u64x2 const *)input;   /* no alignment restriction */
+    xxh_u64x2 const * const xsecret = (xxh_u64x2 const *)secret;  /* no alignment restriction */
+    xxh_u64x2 const         v32     = { 32, 32 };
 
     for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
         /* data_vec = xinput[i]; */
-        xxh_u64x2 const data_vec = XXH_vec_loadu<bswap>(xinput + i);
+        xxh_u64x2 const data_vec = XXH_vec_loadu<bswap>(xinput  + i);
         /* key_vec = xsecret[i]; */
         xxh_u64x2 const key_vec  = XXH_vec_loadu<bswap>(xsecret + i);
         xxh_u64x2 const data_key = data_vec ^ key_vec;
         /* shuffled = (data_key << 32) | (data_key >> 32); */
-        xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
+        xxh_u32x4 const shuffled = (xxh_u32x4   )vec_rl(data_key   , v32);
         /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
         xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
         /* acc_vec = xacc[i]; */
-        xxh_u64x2 acc_vec        = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
+        xxh_u64x2 acc_vec        = (xxh_u64x2   )vec_xl(0          , xacc + 4 * i);
         acc_vec += product;
 
         /* swap high and low halves */
@@ -129,14 +142,13 @@ static FORCE_INLINE void XXH3_accumulate_512_vsx(void * RESTRICT acc,
     }
 }
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_scrambleAcc_vsx(void * RESTRICT acc,
-        const void * RESTRICT secret) {
-          xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
-    const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
+template <bool bswap>
+static FORCE_INLINE void XXH3_scrambleAcc_vsx( void * RESTRICT acc, const void * RESTRICT secret ) {
+    xxh_u64x2       * const xacc    = (xxh_u64x2 *      )acc;
+    const xxh_u64x2 * const xsecret = (const xxh_u64x2 *)secret;
     /* constants */
-    xxh_u64x2 const v32  = { 32, 32 };
-    xxh_u64x2 const v47 = { 47, 47 };
+    xxh_u64x2 const v32   = { 32, 32 };
+    xxh_u64x2 const v47   = { 47, 47 };
     xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
 
     for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
@@ -150,7 +162,7 @@ static FORCE_INLINE void XXH3_scrambleAcc_vsx(void * RESTRICT acc,
 
         /* xacc[i] *= XXH_PRIME32_1 */
         /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
-        xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);
+        xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime);
         /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
         xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);
         xacc[i] = prod_odd + (prod_even << v32);
diff --git a/hashes/xxhash/xxh3-sse2.h b/hashes/xxhash/xxh3-sse2.h
index 90c42df9..afbbc907 100644
--- a/hashes/xxhash/xxh3-sse2.h
+++ b/hashes/xxhash/xxh3-sse2.h
@@ -30,66 +30,70 @@
  *   - xxHash homepage: https://www.xxhash.com
  *   - xxHash source repository: https://github.com/Cyan4973/xxHash
  */
-template < bool bswap >
-static FORCE_INLINE void XXH3_accumulate_512_sse2(
-        void * RESTRICT acc, const void * RESTRICT input,
-        const void * RESTRICT secret) {
+template <bool bswap>
+static FORCE_INLINE void XXH3_accumulate_512_sse2( void * RESTRICT acc, const void * RESTRICT input,
+        const void * RESTRICT secret ) {
     /* SSE2 is just a half-scale version of the AVX2 version. */
-    __m128i* const xacc    =       (__m128i *) acc;
-    /* Unaligned. This is mainly for pointer arithmetic, and because
-     * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
-    const         __m128i* const xinput  = (const __m128i *) input;
-    /* Unaligned. This is mainly for pointer arithmetic, and because
-     * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
-    const         __m128i* const xsecret = (const __m128i *) secret;
+    __m128i       * const xacc    = (__m128i *      )acc;
+    /*
+     * Unaligned. This is mainly for pointer arithmetic, and because
+     * _mm_loadu_si128 requires a const __m128i * pointer for some reason.
+     */
+    const __m128i * const xinput  = (const __m128i *)input;
+    /*
+     * Unaligned. This is mainly for pointer arithmetic, and because
+     * _mm_loadu_si128 requires a const __m128i * pointer for some reason.
+     */
+    const __m128i * const xsecret = (const __m128i *)secret;
 
-    for (size_t i = 0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+    for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) {
         /* data_vec    = xinput[i]; */
-        __m128i const data_vec    = bswap ?
-                         mm_bswap64(_mm_loadu_si128(xinput+i)) :
-                                    _mm_loadu_si128(xinput+i);
+        __m128i const data_vec = bswap ?
+                    mm_bswap64(_mm_loadu_si128(xinput + i)) :
+                    _mm_loadu_si128(xinput + i);
         /* key_vec     = xsecret[i]; */
-        __m128i const key_vec     = bswap ?
-                         mm_bswap64(_mm_loadu_si128(xsecret+i)) :
-                                    _mm_loadu_si128(xsecret+i);
+        __m128i const key_vec = bswap ?
+                    mm_bswap64(_mm_loadu_si128(xsecret + i)) :
+                    _mm_loadu_si128(xsecret + i);
         /* data_key    = data_vec ^ key_vec; */
-        __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+        __m128i const data_key    = _mm_xor_si128(data_vec, key_vec);
         /* data_key_lo = data_key >> 32; */
-        __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+        __m128i const data_key_lo = _mm_shuffle_epi32(data_key, _MM_SHUFFLE(0, 3, 0, 1));
         /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-        __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);
+        __m128i const product     = _mm_mul_epu32(data_key, data_key_lo);
         /* xacc[i] += swap(data_vec); */
-        __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
-        __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
+        __m128i const data_swap   = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
+        __m128i const sum         = _mm_add_epi64(xacc[i], data_swap);
         /* xacc[i] += product; */
         xacc[i] = _mm_add_epi64(product, sum);
     }
 }
 
-template < bool bswap >
-static FORCE_INLINE void XXH3_scrambleAcc_sse2(void * RESTRICT acc,
-        const void * RESTRICT secret) {
-    __m128i* const xacc = (__m128i*) acc;
-    /* Unaligned. This is mainly for pointer arithmetic, and because
-     * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
-    const         __m128i* const xsecret = (const __m128i *) secret;
-    const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
+template <bool bswap>
+static FORCE_INLINE void XXH3_scrambleAcc_sse2( void * RESTRICT acc, const void * RESTRICT secret ) {
+    __m128i       * const xacc    = (__m128i *      )acc;
+    /*
+     * Unaligned. This is mainly for pointer arithmetic, and because
+     * _mm_loadu_si128 requires a const __m128i * pointer for some reason.
+     */
+    const __m128i * const xsecret = (const __m128i *)secret;
+    const __m128i         prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
 
-    for (size_t i = 0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
+    for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) {
         /* xacc[i] ^= (xacc[i] >> 47) */
-        __m128i const acc_vec     = xacc[i];
-        __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
-        __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
+        __m128i const acc_vec  = xacc[i];
+        __m128i const shifted  = _mm_srli_epi64(acc_vec, 47);
+        __m128i const data_vec = _mm_xor_si128(acc_vec , shifted);
         /* xacc[i] ^= xsecret[i]; */
-        __m128i const key_vec     = bswap ?
-                         mm_bswap64(_mm_loadu_si128(xsecret+i)) :
-                                    _mm_loadu_si128(xsecret+i);
-        __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
+        __m128i const key_vec  = bswap ?
+                    mm_bswap64(_mm_loadu_si128(xsecret + i)) :
+                    _mm_loadu_si128(xsecret + i);
+        __m128i const data_key = _mm_xor_si128(data_vec, key_vec);
 
         /* xacc[i] *= XXH_PRIME32_1; */
-        __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
-        __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
-        __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
+        __m128i const data_key_hi = _mm_shuffle_epi32(data_key, _MM_SHUFFLE(0, 3, 0, 1));
+        __m128i const prod_lo     = _mm_mul_epu32(data_key   , prime32);
+        __m128i const prod_hi     = _mm_mul_epu32(data_key_hi, prime32);
         xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
     }
 }
@@ -99,21 +103,21 @@ static FORCE_INLINE void XXH3_scrambleAcc_sse2(void * RESTRICT acc,
  *   - not extract the secret from sse registers in the internal loop
  *   - use less common registers, and avoid pushing these reg into stack
  */
-template < bool bswap >
-static FORCE_INLINE void XXH3_initCustomSecret_sse2(void * RESTRICT customSecret,
-        uint64_t seed64) {
+template <bool bswap>
+static FORCE_INLINE void XXH3_initCustomSecret_sse2( void * RESTRICT customSecret, uint64_t seed64 ) {
     int const nbRounds = XXH3_SECRET_DEFAULT_SIZE / sizeof(__m128i);
 
     /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
 #if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
     alignas(16) const uint64_t seed64x2[2] = {
-        (uint64_t)seed64, (uint64_t)(UINT64_C(0) - seed64) };
-    __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
+        (uint64_t)seed64, (uint64_t)(UINT64_C(0) - seed64)
+    };
+    __m128i const seed       = _mm_load_si128((__m128i const *)seed64x2);
 #else
-    __m128i const seed = _mm_set_epi64x((uint64_t)(UINT64_C(0) - seed64), (uint64_t)seed64);
+    __m128i const seed       = _mm_set_epi64x((uint64_t)(UINT64_C(0) - seed64), (uint64_t)seed64);
 #endif
     const void * const src16 = XXH3_kSecret;
-    __m128i* dst16 = (__m128i*) customSecret;
+    __m128i *          dst16 = (__m128i *)customSecret;
 
 #if defined(__GNUC__) || defined(__clang__)
     XXH_COMPILER_GUARD(dst16);
@@ -121,9 +125,9 @@ static FORCE_INLINE void XXH3_initCustomSecret_sse2(void * RESTRICT customSecret
 
     for (int i = 0; i < nbRounds; ++i) {
         if (bswap) {
-            dst16[i] = mm_bswap64(_mm_add_epi64(mm_bswap64(_mm_load_si128((const __m128i *)src16+i)), seed));
+            dst16[i] = mm_bswap64(_mm_add_epi64(mm_bswap64(_mm_load_si128((const __m128i *)src16 + i)), seed));
         } else {
-            dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
+            dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16 + i), seed);
         }
     }
 }
diff --git a/include/common/Hashinfo.h b/include/common/Hashinfo.h
index b11ebfdf..146f7bd3 100644
--- a/include/common/Hashinfo.h
+++ b/include/common/Hashinfo.h
@@ -19,50 +19,50 @@
 #define HAVE_HASHINFO
 #include <set>
 
-#define HASH_FLAGS                                     \
-    FLAG_EXPAND(HASH_MOCK)                             \
-    FLAG_EXPAND(HASH_CRYPTOGRAPHIC)                    \
-    FLAG_EXPAND(HASH_CRYPTOGRAPHIC_WEAK)               \
-    FLAG_EXPAND(HASH_CRC_BASED)                        \
-    FLAG_EXPAND(HASH_AES_BASED)                        \
-    FLAG_EXPAND(HASH_CLMUL_BASED)                      \
-    FLAG_EXPAND(HASH_LOOKUP_TABLE)                     \
-    FLAG_EXPAND(HASH_XL_SEED)                          \
-    FLAG_EXPAND(HASH_SMALL_SEED)                       \
-    FLAG_EXPAND(HASH_NO_SEED)                          \
-    FLAG_EXPAND(HASH_SYSTEM_SPECIFIC)                  \
-    FLAG_EXPAND(HASH_ENDIAN_INDEPENDENT)               \
+#define HASH_FLAGS                       \
+    FLAG_EXPAND(HASH_MOCK)               \
+    FLAG_EXPAND(HASH_CRYPTOGRAPHIC)      \
+    FLAG_EXPAND(HASH_CRYPTOGRAPHIC_WEAK) \
+    FLAG_EXPAND(HASH_CRC_BASED)          \
+    FLAG_EXPAND(HASH_AES_BASED)          \
+    FLAG_EXPAND(HASH_CLMUL_BASED)        \
+    FLAG_EXPAND(HASH_LOOKUP_TABLE)       \
+    FLAG_EXPAND(HASH_XL_SEED)            \
+    FLAG_EXPAND(HASH_SMALL_SEED)         \
+    FLAG_EXPAND(HASH_NO_SEED)            \
+    FLAG_EXPAND(HASH_SYSTEM_SPECIFIC)    \
+    FLAG_EXPAND(HASH_ENDIAN_INDEPENDENT) \
     FLAG_EXPAND(HASH_FLOATING_POINT)
 
-#define IMPL_FLAGS                                     \
-    FLAG_EXPAND(IMPL_SANITY_FAILS)                     \
-    FLAG_EXPAND(IMPL_SLOW)                             \
-    FLAG_EXPAND(IMPL_VERY_SLOW)                        \
-    FLAG_EXPAND(IMPL_READ_PAST_EOB)                    \
-    FLAG_EXPAND(IMPL_TYPE_PUNNING)                     \
-    FLAG_EXPAND(IMPL_INCREMENTAL)                      \
-    FLAG_EXPAND(IMPL_INCREMENTAL_DIFFERENT)            \
-    FLAG_EXPAND(IMPL_128BIT)                           \
-    FLAG_EXPAND(IMPL_MULTIPLY)                         \
-    FLAG_EXPAND(IMPL_MULTIPLY_64_64)                   \
-    FLAG_EXPAND(IMPL_MULTIPLY_64_128)                  \
-    FLAG_EXPAND(IMPL_MULTIPLY_128_128)                 \
-    FLAG_EXPAND(IMPL_ROTATE)                           \
-    FLAG_EXPAND(IMPL_ROTATE_VARIABLE)                  \
-    FLAG_EXPAND(IMPL_SHIFT_VARIABLE)                   \
-    FLAG_EXPAND(IMPL_DIVIDE)                           \
-    FLAG_EXPAND(IMPL_MODULUS)                          \
-    FLAG_EXPAND(IMPL_ASM)                              \
-    FLAG_EXPAND(IMPL_CANONICAL_LE)                     \
-    FLAG_EXPAND(IMPL_CANONICAL_BE)                     \
-    FLAG_EXPAND(IMPL_SEED_WITH_HINT)                   \
-    FLAG_EXPAND(IMPL_LICENSE_PUBLIC_DOMAIN)            \
-    FLAG_EXPAND(IMPL_LICENSE_BSD)                      \
-    FLAG_EXPAND(IMPL_LICENSE_MIT)                      \
-    FLAG_EXPAND(IMPL_LICENSE_ZLIB)                     \
+#define IMPL_FLAGS                          \
+    FLAG_EXPAND(IMPL_SANITY_FAILS)          \
+    FLAG_EXPAND(IMPL_SLOW)                  \
+    FLAG_EXPAND(IMPL_VERY_SLOW)             \
+    FLAG_EXPAND(IMPL_READ_PAST_EOB)         \
+    FLAG_EXPAND(IMPL_TYPE_PUNNING)          \
+    FLAG_EXPAND(IMPL_INCREMENTAL)           \
+    FLAG_EXPAND(IMPL_INCREMENTAL_DIFFERENT) \
+    FLAG_EXPAND(IMPL_128BIT)                \
+    FLAG_EXPAND(IMPL_MULTIPLY)              \
+    FLAG_EXPAND(IMPL_MULTIPLY_64_64)        \
+    FLAG_EXPAND(IMPL_MULTIPLY_64_128)       \
+    FLAG_EXPAND(IMPL_MULTIPLY_128_128)      \
+    FLAG_EXPAND(IMPL_ROTATE)                \
+    FLAG_EXPAND(IMPL_ROTATE_VARIABLE)       \
+    FLAG_EXPAND(IMPL_SHIFT_VARIABLE)        \
+    FLAG_EXPAND(IMPL_DIVIDE)                \
+    FLAG_EXPAND(IMPL_MODULUS)               \
+    FLAG_EXPAND(IMPL_ASM)                   \
+    FLAG_EXPAND(IMPL_CANONICAL_LE)          \
+    FLAG_EXPAND(IMPL_CANONICAL_BE)          \
+    FLAG_EXPAND(IMPL_SEED_WITH_HINT)        \
+    FLAG_EXPAND(IMPL_LICENSE_PUBLIC_DOMAIN) \
+    FLAG_EXPAND(IMPL_LICENSE_BSD)           \
+    FLAG_EXPAND(IMPL_LICENSE_MIT)           \
+    FLAG_EXPAND(IMPL_LICENSE_ZLIB)          \
     FLAG_EXPAND(IMPL_LICENSE_GPL3)
 
-#define FLAG_EXPAND(name) FLAG_ENUM_##name,
+#define FLAG_EXPAND(name) FLAG_ENUM_ ## name,
 typedef enum {
     HASH_FLAGS
 } hashflag_enum_t;
@@ -71,7 +71,7 @@ typedef enum {
 } implflag_enum_t;
 #undef FLAG_EXPAND
 
-#define FLAG_EXPAND(name) FLAG_##name=(1ULL << FLAG_ENUM_##name),
+#define FLAG_EXPAND(name) FLAG_ ## name = (1ULL << FLAG_ENUM_ ## name),
 typedef enum : uint64_t {
     HASH_FLAGS
 } HashFlags;
@@ -83,16 +83,16 @@ typedef enum : uint64_t {
 //-----------------------------------------------------------------------------
 class HashInfo;
 
-typedef bool      (*HashInitFn)(void);
-typedef seed_t    (*HashSeedfixFn)(const HashInfo * hinfo, const seed_t seed);
-typedef uintptr_t (*HashSeedFn)(const seed_t seed);
-typedef void      (*HashFn)(const void * in, const size_t len, const seed_t seed, void * out);
+typedef bool       (* HashInitFn)( void );
+typedef seed_t     (* HashSeedfixFn)( const HashInfo * hinfo, const seed_t seed );
+typedef uintptr_t  (* HashSeedFn)( const seed_t seed );
+typedef void       (* HashFn)( const void * in, const size_t len, const seed_t seed, void * out );
 
-seed_t excludeBadseeds(const HashInfo * hinfo, const seed_t seed);
-seed_t excludeZeroSeed(const HashInfo * hinfo, const seed_t seed);
+seed_t excludeBadseeds( const HashInfo * hinfo, const seed_t seed );
+seed_t excludeZeroSeed( const HashInfo * hinfo, const seed_t seed );
 
 class HashInfo {
-  friend class HashFamilyInfo;
+    friend class HashFamilyInfo;
 
   public:
     enum endianness : uint32_t {
@@ -105,20 +105,21 @@ class HashInfo {
     };
 
   protected:
-    static const char * _fixup_name(const char * in);
+    static const char * _fixup_name( const char * in );
 
   private:
-    uint32_t _ComputedVerifyImpl(const HashInfo * hinfo, enum HashInfo::endianness endian) const;
+    uint32_t _ComputedVerifyImpl( const HashInfo * hinfo, enum HashInfo::endianness endian ) const;
 
-    bool _is_native(enum endianness e) const {
+    bool _is_native( enum endianness e ) const {
         bool is_native = true;
-        switch(e) {
+
+        switch (e) {
         case ENDIAN_NATIVE     : is_native = true; break;
         case ENDIAN_BYTESWAPPED: is_native = false; break;
         case ENDIAN_LITTLE     : is_native = isLE(); break;
         case ENDIAN_BIG        : is_native = isBE(); break;
         case ENDIAN_DEFAULT    : /* fallthrough */
-        case ENDIAN_NONDEFAULT :
+        case ENDIAN_NONDEFAULT : {
             // Compute is_native for the DEFAULT case
             if (hash_flags & FLAG_HASH_ENDIAN_INDEPENDENT) {
                 if (impl_flags & FLAG_IMPL_CANONICAL_LE) {
@@ -133,62 +134,64 @@ class HashInfo {
             if (e == ENDIAN_NONDEFAULT) { is_native = !is_native; }
             break;
         }
+        }
         return is_native;
     }
 
   public:
-    const char * family;
-    const char * name;
-    const char * desc;
-    uint64_t hash_flags;
-    uint64_t impl_flags;
-    uint32_t sort_order;
-    uint32_t bits;
-    uint32_t verification_LE;
-    uint32_t verification_BE;
-    HashInitFn initfn;
-    HashSeedfixFn seedfixfn;
-    HashSeedFn seedfn;
-    HashFn hashfn_native;
-    HashFn hashfn_bswap;
-    std::set<seed_t> badseeds;
-
-    HashInfo(const char * n, const char * f) :
-        name(_fixup_name(n)), family(f), desc(""),
-        initfn(NULL), seedfixfn(NULL), seedfn(NULL),
-        hashfn_native(NULL), hashfn_bswap(NULL) { }
+    const char *      family;
+    const char *      name;
+    const char *      desc;
+    uint64_t          hash_flags;
+    uint64_t          impl_flags;
+    uint32_t          sort_order;
+    uint32_t          bits;
+    uint32_t          verification_LE;
+    uint32_t          verification_BE;
+    HashInitFn        initfn;
+    HashSeedfixFn     seedfixfn;
+    HashSeedFn        seedfn;
+    HashFn            hashfn_native;
+    HashFn            hashfn_bswap;
+    std::set<seed_t>  badseeds;
+
+    HashInfo( const char * n, const char * f ) :
+        name( _fixup_name( n )), family( f ), desc( "" ),
+        initfn( NULL ), seedfixfn( NULL ), seedfn( NULL ),
+        hashfn_native( NULL ), hashfn_bswap( NULL ) {}
 
     ~HashInfo() {
         free((char *)name);
     }
 
     // The hash will be seeded with a value of 0 before this fn returns
-    uint32_t ComputedVerify(enum HashInfo::endianness endian) const {
+    uint32_t ComputedVerify( enum HashInfo::endianness endian ) const {
         return _ComputedVerifyImpl(this, endian);
     }
 
-    uint32_t ExpectedVerify(enum HashInfo::endianness endian) const {
+    uint32_t ExpectedVerify( enum HashInfo::endianness endian ) const {
         const bool wantLE = isBE() ^ _is_native(endian);
+
         return wantLE ? this->verification_LE : this->verification_BE;
     }
 
-    FORCE_INLINE HashFn hashFn(enum HashInfo::endianness endian) const {
+    FORCE_INLINE HashFn hashFn( enum HashInfo::endianness endian ) const {
         return _is_native(endian) ? hashfn_native : hashfn_bswap;
     }
 
-    FORCE_INLINE bool Init(void) const {
+    FORCE_INLINE bool Init( void ) const {
         if (initfn != NULL) {
             return initfn();
         }
         return true;
     }
 
-    FORCE_INLINE seed_t Seed(seed_t seed, bool force = false, uint64_t hint = 0) const {
+    FORCE_INLINE seed_t Seed( seed_t seed, bool force = false, uint64_t hint = 0 ) const {
         if (unlikely(impl_flags & FLAG_IMPL_SEED_WITH_HINT)) {
             seedfixfn(NULL, hint);
             return seed;
         }
-        if (!force && seedfixfn != NULL) {
+        if (!force && (seedfixfn != NULL)) {
             seed = seedfixfn(this, seed);
         }
         if (seedfn != NULL) {
@@ -200,46 +203,46 @@ class HashInfo {
         return seed;
     }
 
-    FORCE_INLINE bool isMock(void) const {
+    FORCE_INLINE bool isMock( void ) const {
         return !!(hash_flags & FLAG_HASH_MOCK);
     }
 
-    FORCE_INLINE bool is32BitSeed(void) const {
+    FORCE_INLINE bool is32BitSeed( void ) const {
         return !!(hash_flags & FLAG_HASH_SMALL_SEED);
     }
 
-    FORCE_INLINE bool isEndianDefined(void) const {
+    FORCE_INLINE bool isEndianDefined( void ) const {
         return !!(hash_flags & FLAG_HASH_ENDIAN_INDEPENDENT);
     }
 
-    FORCE_INLINE bool isCrypto(void) const {
+    FORCE_INLINE bool isCrypto( void ) const {
         return !!(hash_flags & FLAG_HASH_CRYPTOGRAPHIC);
     }
 
-    FORCE_INLINE bool isSlow(void) const {
+    FORCE_INLINE bool isSlow( void ) const {
         return !!(impl_flags & (FLAG_IMPL_SLOW | FLAG_IMPL_VERY_SLOW));
     }
 
-    FORCE_INLINE bool isVerySlow(void) const {
+    FORCE_INLINE bool isVerySlow( void ) const {
         return !!(impl_flags & FLAG_IMPL_VERY_SLOW);
     }
-};
+}; // class HashInfo
 
 class HashFamilyInfo {
-public:
-  const char * name;
-  const char * src_url;
-  enum SrcStatus : uint32_t {
-    SRC_UNKNOWN,
-    SRC_FROZEN,     // Very unlikely to change
-    SRC_STABLEISH,  // Fairly unlikely to change
-    SRC_ACTIVE,     // Likely to change
-  } src_status;
-
-  HashFamilyInfo(const char * n) :
-    name(_fixup_name(n)),
-    src_url(NULL), src_status(SRC_UNKNOWN) { }
-
-private:
-    static const char * _fixup_name(const char * in);
-};
+  public:
+    const char * name;
+    const char * src_url;
+    enum SrcStatus : uint32_t {
+        SRC_UNKNOWN,
+        SRC_FROZEN,    // Very unlikely to change
+        SRC_STABLEISH, // Fairly unlikely to change
+        SRC_ACTIVE,    // Likely to change
+    }  src_status;
+
+    HashFamilyInfo( const char * n ) :
+        name( _fixup_name( n )),
+        src_url( NULL ), src_status( SRC_UNKNOWN ) {}
+
+  private:
+    static const char * _fixup_name( const char * in );
+}; // class HashFamilyInfo
diff --git a/include/common/Intrinsics.h b/include/common/Intrinsics.h
index 44922189..5b1ecef4 100644
--- a/include/common/Intrinsics.h
+++ b/include/common/Intrinsics.h
@@ -28,33 +28,33 @@
 #pragma once
 
 #if defined(HAVE_X86INTRIN)
-#  include <x86intrin.h>
+  #include <x86intrin.h>
 #elif defined(HAVE_AMMINTRIN)
-#  include <ammintrin.h>
+  #include <ammintrin.h>
 #elif defined(HAVE_IMMINTRIN)
-#  include <immintrin.h>
+  #include <immintrin.h>
 #endif
 
 #if defined(HAVE_ARM_NEON)
-    /* circumvent a clang bug */
-#  if defined(__GNUC__) || defined(__clang__)
-#    if defined(__ARM_NEON__) || defined(__ARM_NEON) || \
+/* circumvent a clang bug */
+  #if defined(__GNUC__) || defined(__clang__)
+    #if defined(__ARM_NEON__) || defined(__ARM_NEON) || \
         defined(__aarch64__)  || defined(_M_ARM)     || \
         defined(_M_ARM64)     || defined(_M_ARM64EC)
-#           define inline __inline__
-#    endif
-#  endif
-#  include <arm_neon.h>
-#  if defined(__GNUC__) || defined(__clang__)
-#    if defined(__ARM_NEON__) || defined(__ARM_NEON) || \
+      #define inline __inline__
+    #endif
+  #endif
+  #include <arm_neon.h>
+  #if defined(__GNUC__) || defined(__clang__)
+    #if defined(__ARM_NEON__) || defined(__ARM_NEON) || \
         defined(__aarch64__)  || defined(_M_ARM)     || \
         defined(_M_ARM64)     || defined(_M_ARM64EC)
-#           undef inline
-#    endif
-#  endif
-#  if defined(HAVE_ARM_ACLE)
-#    include <arm_acle.h>
-#  endif
+      #undef inline
+    #endif
+  #endif
+  #if defined(HAVE_ARM_ACLE)
+    #include <arm_acle.h>
+  #endif
 #endif
 
 
@@ -69,46 +69,46 @@
  *
  * We use pragma push_macro/pop_macro to keep the namespace clean.
  */
-#pragma push_macro("bool")
-#pragma push_macro("vector")
-#pragma push_macro("pixel")
+  #pragma push_macro("bool")
+  #pragma push_macro("vector")
+  #pragma push_macro("pixel")
 /* silence potential macro redefined warnings */
-#undef bool
-#undef vector
-#undef pixel
-
-#if defined(__s390x__)
-#  include <s390intrin.h>
-#else
-#  include <altivec.h>
-#endif
+  #undef bool
+  #undef vector
+  #undef pixel
+
+  #if defined(__s390x__)
+    #include <s390intrin.h>
+  #else
+    #include <altivec.h>
+  #endif
 
 /* Restore the original macro values, if applicable. */
-#pragma pop_macro("pixel")
-#pragma pop_macro("vector")
-#pragma pop_macro("bool")
+  #pragma pop_macro("pixel")
+  #pragma pop_macro("vector")
+  #pragma pop_macro("bool")
 
-#if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
+  #if defined(__ibmxl__) || (defined(_AIX) && defined(__xlC__))
 typedef  __vector unsigned char vec_t;
-#define vec_encrypt(a,b) __vcipher(a,b);
-#define vec_encryptlast(a,b) __vcipherlast(a,b);
-#define vec_decrypt(a,b) __vncipher(a,b);
-#define vec_encryptlast(a,b) __vncipherlast(a,b);
-#elif defined(__clang__)
+    #define vec_encrypt(a, b) __vcipher(a, b);
+    #define vec_encryptlast(a, b) __vcipherlast(a, b);
+    #define vec_decrypt(a, b) __vncipher(a, b);
+    #define vec_encryptlast(a, b) __vncipherlast(a, b);
+  #elif defined(__clang__)
 typedef  __vector unsigned long long vec_t;
-#define vec_encrypt(a,b) __builtin_altivec_crypto_vcipher(a, b);
-#define vec_encryptlast(a,b) __builtin_altivec_crypto_vcipherlast(a, b);
-#define vec_decrypt(a,b) __builtin_altivec_crypto_vncipher(a, b);
-#define vec_decryptlast(a,b) __builtin_altivec_crypto_vncipherlast(a, b);
-#elif defined(__GNUC__)
+    #define vec_encrypt(a, b) __builtin_altivec_crypto_vcipher(a, b);
+    #define vec_encryptlast(a, b) __builtin_altivec_crypto_vcipherlast(a, b);
+    #define vec_decrypt(a, b) __builtin_altivec_crypto_vncipher(a, b);
+    #define vec_decryptlast(a, b) __builtin_altivec_crypto_vncipherlast(a, b);
+  #elif defined(__GNUC__)
 typedef  __vector unsigned long long vec_t;
-#define vec_encrypt(a,b) __builtin_crypto_vcipher(a,b);
-#define vec_encryptlast(a,b) __builtin_crypto_vcipherlast(a,b);
-#define vec_decrypt(a,b) __builtin_crypto_vncipher(a,b);
-#define vec_decryptlast(a,b) __builtin_crypto_vncipherlast(a,b);
-#else
-#error "PPC AES intrinsic mapping unimplemented"
-#endif
+    #define vec_encrypt(a, b) __builtin_crypto_vcipher(a, b);
+    #define vec_encryptlast(a, b) __builtin_crypto_vcipherlast(a, b);
+    #define vec_decrypt(a, b) __builtin_crypto_vncipher(a, b);
+    #define vec_decryptlast(a, b) __builtin_crypto_vncipherlast(a, b);
+  #else
+    #error "PPC AES intrinsic mapping unimplemented"
+  #endif
 #endif
 
 //------------------------------------------------------------
@@ -117,121 +117,127 @@ typedef  __vector unsigned long long vec_t;
 // prefetch() implementation without this.
 
 #if defined(HAVE_SSE_2)
-#undef prefetch
-#define prefetch(x) _mm_prefetch(x, _MM_HINT_T0)
+  #undef prefetch
+  #define prefetch(x) _mm_prefetch(x, _MM_HINT_T0)
 #endif
 
 //------------------------------------------------------------
 // Vectorized byteswapping
 
 #if defined(HAVE_ARM_NEON)
-static FORCE_INLINE uint64x2_t Vbswap64_u64(const uint64x2_t v) {
+
+static FORCE_INLINE uint64x2_t Vbswap64_u64( const uint64x2_t v ) {
     return vreinterpretq_u64_u8(vrev64q_u8(vreinterpretq_u8_u64(v)));
 }
-static FORCE_INLINE uint32x4_t Vbswap32_u32(const uint32x4_t v) {
+
+static FORCE_INLINE uint32x4_t Vbswap32_u32( const uint32x4_t v ) {
     return vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(v)));
 }
+
 #endif
 
 #if defined(HAVE_AVX512_BW)
-static FORCE_INLINE __m512i mm512_bswap64(const __m512i v) {
-    const __m512i MASK = _mm512_set_epi64(UINT64_C(0x08090a0b0c0d0e0f),
-                                          UINT64_C(0x0001020304050607),
-                                          UINT64_C(0x08090a0b0c0d0e0f),
-                                          UINT64_C(0x0001020304050607),
-                                          UINT64_C(0x08090a0b0c0d0e0f),
-                                          UINT64_C(0x0001020304050607),
-                                          UINT64_C(0x08090a0b0c0d0e0f),
-                                          UINT64_C(0x0001020304050607));
+
+static FORCE_INLINE __m512i mm512_bswap64( const __m512i v ) {
+    const __m512i MASK = _mm512_set_epi64(UINT64_C(0x08090a0b0c0d0e0f), UINT64_C(0x0001020304050607),
+            UINT64_C(0x08090a0b0c0d0e0f), UINT64_C(0x0001020304050607), UINT64_C(0x08090a0b0c0d0e0f),
+            UINT64_C(0x0001020304050607), UINT64_C(0x08090a0b0c0d0e0f), UINT64_C(0x0001020304050607));
+
     return _mm512_shuffle_epi8(v, MASK);
 }
-static FORCE_INLINE __m512i mm512_bswap32(const __m512i v) {
-    const __m512i MASK = _mm512_set_epi64(UINT64_C(0x0c0d0e0f08090a0b),
-                                          UINT64_C(0x0405060700010203),
-                                          UINT64_C(0x0c0d0e0f08090a0b),
-                                          UINT64_C(0x0405060700010203),
-                                          UINT64_C(0x0c0d0e0f08090a0b),
-                                          UINT64_C(0x0405060700010203),
-                                          UINT64_C(0x0c0d0e0f08090a0b),
-                                          UINT64_C(0x0405060700010203));
+
+static FORCE_INLINE __m512i mm512_bswap32( const __m512i v ) {
+    const __m512i MASK = _mm512_set_epi64(UINT64_C(0x0c0d0e0f08090a0b), UINT64_C(0x0405060700010203),
+            UINT64_C(0x0c0d0e0f08090a0b), UINT64_C(0x0405060700010203), UINT64_C(0x0c0d0e0f08090a0b),
+            UINT64_C(0x0405060700010203), UINT64_C(0x0c0d0e0f08090a0b), UINT64_C(0x0405060700010203));
+
     return _mm512_shuffle_epi8(v, MASK);
 }
+
 #elif defined(HAVE_AVX512_F)
-static FORCE_INLINE __m512i mm512_bswap64(const __m512i v) {
+
+static FORCE_INLINE __m512i mm512_bswap64( const __m512i v ) {
     // Byteswapping 256 bits at a time, since _mm512_shuffle_epi8()
     // requires AVX512-BW in addition to AVX512-F.
-    const __m256i MASK = _mm256_set_epi64x(UINT64_C(0x08090a0b0c0d0e0f),
-                                           UINT64_C(0x0001020304050607),
-                                           UINT64_C(0x08090a0b0c0d0e0f),
-                                           UINT64_C(0x0001020304050607));
-    __m256i blk1 = _mm512_extracti64x4_epi64(v, 0);
-    __m256i blk2 = _mm512_extracti64x4_epi64(v, 1);
+    const __m256i MASK = _mm256_set_epi64x(UINT64_C(0x08090a0b0c0d0e0f), UINT64_C(0x0001020304050607),
+            UINT64_C(0x08090a0b0c0d0e0f), UINT64_C(0x0001020304050607));
+    __m256i blk1       = _mm512_extracti64x4_epi64(v, 0);
+    __m256i blk2       = _mm512_extracti64x4_epi64(v, 1);
+
     blk1 = _mm256_shuffle_epi8(blk1, MASK);
     blk2 = _mm256_shuffle_epi8(blk2, MASK);
-    v = _mm512_inserti64x4(v, blk1, 0);
-    v = _mm512_inserti64x4(v, blk2, 1);
+    v    = _mm512_inserti64x4(v, blk1, 0);
+    v    = _mm512_inserti64x4(v, blk2, 1);
     return v;
 }
-static FORCE_INLINE __m512i mm512_bswap64(const __m512i v) {
+
+static FORCE_INLINE __m512i mm512_bswap64( const __m512i v ) {
     // Byteswapping 256 bits at a time, since _mm512_shuffle_epi8()
     // requires AVX512-BW in addition to AVX512-F.
-    const __m256i MASK = _mm256_set_epi64x(UINT64_C(0x0c0d0e0f08090a0b),
-                                           UINT64_C(0x0405060700010203),
-                                           UINT64_C(0x0c0d0e0f08090a0b),
-                                           UINT64_C(0x0405060700010203));
-    __m256i blk1 = _mm512_extracti64x4_epi64(v, 0);
-    __m256i blk2 = _mm512_extracti64x4_epi64(v, 1);
+    const __m256i MASK = _mm256_set_epi64x(UINT64_C(0x0c0d0e0f08090a0b), UINT64_C(0x0405060700010203),
+            UINT64_C(0x0c0d0e0f08090a0b), UINT64_C(0x0405060700010203));
+    __m256i blk1       = _mm512_extracti64x4_epi64(v, 0);
+    __m256i blk2       = _mm512_extracti64x4_epi64(v, 1);
+
     blk1 = _mm256_shuffle_epi8(blk1, MASK);
     blk2 = _mm256_shuffle_epi8(blk2, MASK);
-    v = _mm512_inserti64x4(v, blk1, 0);
-    v = _mm512_inserti64x4(v, blk2, 1);
+    v    = _mm512_inserti64x4(v, blk1, 0);
+    v    = _mm512_inserti64x4(v, blk2, 1);
     return v;
 }
+
 #endif
 
 #if defined(HAVE_AVX2)
-static FORCE_INLINE __m256i mm256_bswap64(const __m256i v) {
-    const __m256i MASK = _mm256_set_epi64x(UINT64_C(0x08090a0b0c0d0e0f),
-                                           UINT64_C(0x0001020304050607),
-                                           UINT64_C(0x08090a0b0c0d0e0f),
-                                           UINT64_C(0x0001020304050607));
+
+static FORCE_INLINE __m256i mm256_bswap64( const __m256i v ) {
+    const __m256i MASK = _mm256_set_epi64x(UINT64_C(0x08090a0b0c0d0e0f), UINT64_C(0x0001020304050607),
+            UINT64_C(0x08090a0b0c0d0e0f), UINT64_C(0x0001020304050607));
+
     return _mm256_shuffle_epi8(v, MASK);
 }
-static FORCE_INLINE __m256i mm256_bswap32(const __m256i v) {
-    const __m256i MASK = _mm256_set_epi64x(UINT64_C(0x0c0d0e0f08090a0b),
-                                           UINT64_C(0x0405060700010203),
-                                           UINT64_C(0x0c0d0e0f08090a0b),
-                                           UINT64_C(0x0405060700010203));
+
+static FORCE_INLINE __m256i mm256_bswap32( const __m256i v ) {
+    const __m256i MASK = _mm256_set_epi64x(UINT64_C(0x0c0d0e0f08090a0b), UINT64_C(0x0405060700010203),
+            UINT64_C(0x0c0d0e0f08090a0b), UINT64_C(0x0405060700010203));
+
     return _mm256_shuffle_epi8(v, MASK);
 }
+
 #endif
 
 #if defined(HAVE_SSSE_3)
-static FORCE_INLINE __m128i mm_bswap64(const __m128i v) {
-    const __m128i MASK = _mm_set_epi64x(UINT64_C(0x08090a0b0c0d0e0f),
-                                        UINT64_C(0x0001020304050607));
+
+static FORCE_INLINE __m128i mm_bswap64( const __m128i v ) {
+    const __m128i MASK = _mm_set_epi64x(UINT64_C(0x08090a0b0c0d0e0f), UINT64_C(0x0001020304050607));
+
     return _mm_shuffle_epi8(v, MASK);
 }
-static FORCE_INLINE __m128i mm_bswap32(const __m128i v) {
-    const __m128i MASK = _mm_set_epi64x(UINT64_C(0x0c0d0e0f08090a0b),
-                                        UINT64_C(0x0405060700010203));
+
+static FORCE_INLINE __m128i mm_bswap32( const __m128i v ) {
+    const __m128i MASK = _mm_set_epi64x(UINT64_C(0x0c0d0e0f08090a0b), UINT64_C(0x0405060700010203));
+
     return _mm_shuffle_epi8(v, MASK);
 }
+
 #elif defined(HAVE_SSE_2)
-static FORCE_INLINE __m128i mm_bswap64(const __m128i v) {
+
+static FORCE_INLINE __m128i mm_bswap64( const __m128i v ) {
     // Swap each pair of bytes
-    __m128i tmp = _mm_or_si128(_mm_slri_epi16(v, 8),
-                               _mm_slli_epi16(v, 8));
+    __m128i tmp = _mm_or_si128(_mm_slri_epi16(v, 8), _mm_slli_epi16(v, 8));
+
     // Swap 16-bit words
     tmp = _mm_shufflelo_epi16(tmp, _MM_SHUFFLE(0, 1, 2, 3));
     tmp = _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(0, 1, 2, 3));
 }
-static FORCE_INLINE __m128i mm_bswap32(const __m128i v) {
+
+static FORCE_INLINE __m128i mm_bswap32( const __m128i v ) {
     // Swap each pair of bytes
-    __m128i tmp = _mm_or_si128(_mm_slri_epi16(v, 8),
-                               _mm_slli_epi16(v, 8));
+    __m128i tmp = _mm_or_si128(_mm_slri_epi16(v, 8), _mm_slli_epi16(v, 8));
+
     // Swap 16-bit words
     tmp = _mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1));
     tmp = _mm_shufflehi_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1));
 }
+
 #endif
diff --git a/include/hashlib/AES-aesni.h b/include/hashlib/AES-aesni.h
index 57fc59a0..7879d30d 100644
--- a/include/hashlib/AES-aesni.h
+++ b/include/hashlib/AES-aesni.h
@@ -31,6 +31,7 @@
  */
 static inline __m128i _expand_key_helper( __m128i rkey, __m128i assist ) {
     __m128i temp;
+
     temp = _mm_slli_si128(rkey, 0x4);
     rkey = _mm_xor_si128(rkey, temp);
     temp = _mm_slli_si128(temp, 0x4);
@@ -39,16 +40,17 @@ static inline __m128i _expand_key_helper( __m128i rkey, __m128i assist ) {
     rkey = _mm_xor_si128(rkey, temp);
 
     temp = _mm_shuffle_epi32(assist, 0xff);
-    rkey = _mm_xor_si128 (rkey, temp);
+    rkey = _mm_xor_si128(rkey, temp);
 
     return rkey;
 }
 
 #define MKASSIST(x, y) x, _mm_aeskeygenassist_si128(x, y)
 
-static int AES_KeySetup_Enc_AESNI(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
+static int AES_KeySetup_Enc_AESNI( uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t cipherKey[], int keyBits ) {
     __m128i * round_keys = (__m128i *)rk;
-    round_keys[ 0] = _mm_loadu_si128((__m128i *)cipherKey);
+
+    round_keys[0]  = _mm_loadu_si128((__m128i *)cipherKey);
     round_keys[ 1] = _expand_key_helper(MKASSIST(round_keys[0], 0x01));
     round_keys[ 2] = _expand_key_helper(MKASSIST(round_keys[1], 0x02));
     round_keys[ 3] = _expand_key_helper(MKASSIST(round_keys[2], 0x04));
@@ -62,19 +64,20 @@ static int AES_KeySetup_Enc_AESNI(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cip
     return (keyBits == 128) ? 10 : (keyBits == 192) ? 12 : (keyBits == 256) ? 14 : 0;
 }
 
-static int AES_KeySetup_Dec_AESNI(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
+static int AES_KeySetup_Dec_AESNI( uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t cipherKey[], int keyBits ) {
     __m128i * round_keys = (__m128i *)rk;
+
     round_keys[10] = _mm_loadu_si128((__m128i *)cipherKey);
-    round_keys[ 9] = _expand_key_helper(MKASSIST(round_keys[10], 0x01));
-    round_keys[ 8] = _expand_key_helper(MKASSIST(round_keys[ 9], 0x02));
-    round_keys[ 7] = _expand_key_helper(MKASSIST(round_keys[ 8], 0x04));
-    round_keys[ 6] = _expand_key_helper(MKASSIST(round_keys[ 7], 0x08));
-    round_keys[ 5] = _expand_key_helper(MKASSIST(round_keys[ 6], 0x10));
-    round_keys[ 4] = _expand_key_helper(MKASSIST(round_keys[ 5], 0x20));
-    round_keys[ 3] = _expand_key_helper(MKASSIST(round_keys[ 4], 0x40));
-    round_keys[ 2] = _expand_key_helper(MKASSIST(round_keys[ 3], 0x80));
-    round_keys[ 1] = _expand_key_helper(MKASSIST(round_keys[ 2], 0x1b));
-    round_keys[ 0] = _expand_key_helper(MKASSIST(round_keys[ 1], 0x36));
+    round_keys[9]  = _expand_key_helper(MKASSIST(round_keys[10], 0x01));
+    round_keys[8]  = _expand_key_helper(MKASSIST(round_keys[ 9], 0x02));
+    round_keys[7]  = _expand_key_helper(MKASSIST(round_keys[ 8], 0x04));
+    round_keys[6]  = _expand_key_helper(MKASSIST(round_keys[ 7], 0x08));
+    round_keys[5]  = _expand_key_helper(MKASSIST(round_keys[ 6], 0x10));
+    round_keys[4]  = _expand_key_helper(MKASSIST(round_keys[ 5], 0x20));
+    round_keys[3]  = _expand_key_helper(MKASSIST(round_keys[ 4], 0x40));
+    round_keys[2]  = _expand_key_helper(MKASSIST(round_keys[ 3], 0x80));
+    round_keys[1]  = _expand_key_helper(MKASSIST(round_keys[ 2], 0x1b));
+    round_keys[0]  = _expand_key_helper(MKASSIST(round_keys[ 1], 0x36));
     for (int i = 1; i < 10; i++) {
         round_keys[i] = _mm_aesimc_si128(round_keys[i]);
     }
@@ -83,48 +86,54 @@ static int AES_KeySetup_Dec_AESNI(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cip
 
 #undef MKASSIST
 
-template < int Nr >
-static inline void AES_Encrypt_AESNI(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t pt[16], uint8_t ct[16]) {
+template <int Nr>
+static inline void AES_Encrypt_AESNI( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t pt[16], uint8_t ct[16] ) {
     const __m128i * round_keys = (const __m128i *)rk;
-    __m128i tmp;
-    tmp = _mm_loadu_si128((const __m128i*)pt);
+    __m128i         tmp;
+
+    tmp = _mm_loadu_si128((const __m128i *)pt);
 
     tmp = _mm_xor_si128(tmp, round_keys[0]);
 
-    for (int j = 1; j < Nr; j++)
+    for (int j = 1; j < Nr; j++) {
         tmp = _mm_aesenc_si128(tmp, round_keys[j]);
+    }
 
     tmp = _mm_aesenclast_si128(tmp, round_keys[Nr]);
 
-    _mm_storeu_si128((((__m128i*)ct)), tmp);
+    _mm_storeu_si128((((__m128i *)ct)), tmp);
 }
 
-template < int Nr >
-static inline void AES_Decrypt_AESNI(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t ct[16], uint8_t pt[16]) {
+template <int Nr>
+static inline void AES_Decrypt_AESNI( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t ct[16], uint8_t pt[16] ) {
     const __m128i * round_keys = (const __m128i *)rk;
-    __m128i tmp;
-    tmp = _mm_loadu_si128((const __m128i*)ct);
+    __m128i         tmp;
+
+    tmp = _mm_loadu_si128((const __m128i *)ct);
 
     tmp = _mm_xor_si128(tmp, round_keys[0]);
 
-    for (int j = 1; j < Nr; j++)
+    for (int j = 1; j < Nr; j++) {
         tmp = _mm_aesdec_si128(tmp, round_keys[j]);
+    }
 
     tmp = _mm_aesdeclast_si128(tmp, round_keys[Nr]);
 
-    _mm_storeu_si128((((__m128i*)pt)), tmp);
+    _mm_storeu_si128((((__m128i *)pt)), tmp);
 }
 
-static inline void AES_EncryptRound_AESNI(const uint32_t rk[4], uint8_t block[16]) {
+static inline void AES_EncryptRound_AESNI( const uint32_t rk[4], uint8_t block[16] ) {
     const __m128i round_key = _mm_loadu_si128((const __m128i *)rk);
-    __m128i tmp = _mm_loadu_si128((__m128i *)block);
+    __m128i       tmp       = _mm_loadu_si128((__m128i *)block   );
+
     tmp = _mm_aesenc_si128(tmp, round_key);
-    _mm_storeu_si128((((__m128i*)block)), tmp);
+    _mm_storeu_si128((((__m128i *)block)), tmp);
 }
 
-static void AES_DecryptRound_AESNI(const uint32_t rk[4], uint8_t block[16]) {
+static void AES_DecryptRound_AESNI( const uint32_t rk[4], uint8_t block[16] ) {
     const __m128i round_key = _mm_loadu_si128((const __m128i *)rk);
-    __m128i tmp = _mm_loadu_si128((__m128i *)block);
+    __m128i       tmp       = _mm_loadu_si128((__m128i *)block   );
+
     tmp = _mm_aesdec_si128(tmp, round_key);
-    _mm_storeu_si128((((__m128i*)block)), tmp);
+    _mm_storeu_si128((((__m128i *)block)), tmp);
 }
diff --git a/include/hashlib/AES-arm.h b/include/hashlib/AES-arm.h
index f1fd7d4d..cbb9ec7c 100644
--- a/include/hashlib/AES-arm.h
+++ b/include/hashlib/AES-arm.h
@@ -29,57 +29,59 @@
  *
  * For more information, please refer to <http://unlicense.org/>
  */
-template < int Nr >
-static inline void AES_Encrypt_ARM(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t pt[16], uint8_t ct[16]) {
-    uint8x16_t block = vld1q_u8(pt);
-    const uint8_t * keys = (const uint8_t *)rk;
+template <int Nr>
+static inline void AES_Encrypt_ARM( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t pt[16], uint8_t ct[16] ) {
+    uint8x16_t      block = vld1q_u8(pt);
+    const uint8_t * keys  = (const uint8_t *)rk;
 
     // AES single round encryption
-    block = vaeseq_u8(block, vld1q_u8(keys+0*16));
+    block = vaeseq_u8(block, vld1q_u8(keys + 0 * 16));
 
     for (int i = 1; i < Nr; i++) {
         // AES mix columns
         block = vaesmcq_u8(block);
         // AES single round encryption
-        block = vaeseq_u8(block, vld1q_u8(keys+i*16));
+        block = vaeseq_u8(block, vld1q_u8(keys + i * 16));
     }
 
     // Final xor
-    block = veorq_u8(block, vld1q_u8(keys+Nr*16));
+    block = veorq_u8(block, vld1q_u8(keys + Nr * 16));
 
     vst1q_u8(ct, block);
 }
 
-template < int Nr >
-static inline void AES_Decrypt_ARM(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t ct[16], uint8_t pt[16]) {
-    uint8x16_t block = vld1q_u8(ct);
-    const uint8_t * keys = (const uint8_t *)rk;
+template <int Nr>
+static inline void AES_Decrypt_ARM( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t ct[16], uint8_t pt[16] ) {
+    uint8x16_t      block = vld1q_u8(ct);
+    const uint8_t * keys  = (const uint8_t *)rk;
 
     // AES single round decryption
-    block = vaesdq_u8(block, vld1q_u8(keys+0*16));
+    block = vaesdq_u8(block, vld1q_u8(keys + 0 * 16));
 
     for (int i = 1; i < Nr; i++) {
         // AES inverse mix columns
         block = vaesimcq_u8(block);
         // AES single round decryption
-        block = vaesdq_u8(block, vld1q_u8(keys+i*16));
+        block = vaesdq_u8(block, vld1q_u8(keys + i * 16));
     }
 
     // Final xor
-    block = veorq_u8(block, vld1q_u8(keys+Nr*16));
+    block = veorq_u8(block, vld1q_u8(keys + Nr * 16));
 
     vst1q_u8(pt, block);
 }
 
-static inline void AES_EncryptRound_ARM(const uint32_t rk[4], uint8_t block[16]) {
+static inline void AES_EncryptRound_ARM( const uint32_t rk[4], uint8_t block[16] ) {
     uint8x16_t tmp = vld1q_u8(block);
+
     tmp = vaeseq_u8(tmp, vld1q_u8((const uint8_t *)rk));
     tmp = vaesmcq_u8(tmp);
     vst1q_u8(block, tmp);
 }
 
-static inline void AES_DecryptRound_ARM(const uint32_t rk[4], uint8_t block[16]) {
+static inline void AES_DecryptRound_ARM( const uint32_t rk[4], uint8_t block[16] ) {
     uint8x16_t tmp = vld1q_u8(block);
+
     tmp = vaesdq_u8(tmp, vld1q_u8((const uint8_t *)rk));
     tmp = vaesimcq_u8(tmp);
     vst1q_u8(block, tmp);
diff --git a/include/hashlib/AES-portable.h b/include/hashlib/AES-portable.h
index 68abdfc3..4d35522d 100644
--- a/include/hashlib/AES-portable.h
+++ b/include/hashlib/AES-portable.h
@@ -25,10 +25,11 @@
 extern const uint32_t Te0[256], Te1[256], Te2[256], Te3[256], Te4[256];
 extern const uint32_t Td0[256], Td1[256], Td2[256], Td3[256], Td4[256];
 
-static const uint32_t rcon[] = {
+/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+static const uint32_t rcon[10] = {
     0x01000000, 0x02000000, 0x04000000, 0x08000000,
     0x10000000, 0x20000000, 0x40000000, 0x80000000,
-    0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+    0x1B000000, 0x36000000,
 };
 
 /* Endian-independent macros */
@@ -37,7 +38,7 @@ static const uint32_t rcon[] = {
                     ((uint32_t)(pt)[1] << 16) ^ \
                     ((uint32_t)(pt)[2] <<  8) ^ \
                     ((uint32_t)(pt)[3]      ) )
-#define PUTU32(ct, st) { \
+#define PUTU32(ct, st) {                 \
         (ct)[0] = (uint8_t)((st) >> 24); \
         (ct)[1] = (uint8_t)((st) >> 16); \
         (ct)[2] = (uint8_t)((st) >>  8); \
@@ -48,24 +49,24 @@ static const uint32_t rcon[] = {
  *
  * Returns the number of rounds for the given cipher key size.
  */
-static int AES_KeySetup_Enc_portable(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
-    int i = 0;
+static int AES_KeySetup_Enc_portable( uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t cipherKey[], int keyBits ) {
+    int      i = 0;
     uint32_t temp;
 
     rk[0] = GETU32(cipherKey     );
-    rk[1] = GETU32(cipherKey +  4);
-    rk[2] = GETU32(cipherKey +  8);
+    rk[1] = GETU32(cipherKey + 4 );
+    rk[2] = GETU32(cipherKey + 8 );
     rk[3] = GETU32(cipherKey + 12);
 
     if (keyBits == 128) {
         for (;;) {
             temp  = rk[3];
             rk[4] = rk[0] ^
-                (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-                (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-                (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-                (Te4[(temp >> 24)       ] & 0x000000ff) ^
-                rcon[i];
+                    (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+                    (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
+                    (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
+                    (Te4[(temp >> 24)       ] & 0x000000ff) ^
+                    rcon[i];
             rk[5] = rk[1] ^ rk[4];
             rk[6] = rk[2] ^ rk[5];
             rk[7] = rk[3] ^ rk[6];
@@ -81,22 +82,22 @@ static int AES_KeySetup_Enc_portable(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t
 
     if (keyBits == 192) {
         for (;;) {
-            temp = rk[ 5];
-            rk[ 6] = rk[ 0] ^
-                (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-                (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-                (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-                (Te4[(temp >> 24)       ] & 0x000000ff) ^
-                rcon[i];
-            rk[ 7] = rk[ 1] ^ rk[ 6];
-            rk[ 8] = rk[ 2] ^ rk[ 7];
-            rk[ 9] = rk[ 3] ^ rk[ 8];
+            temp  = rk[5];
+            rk[6] = rk[0] ^
+                    (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+                    (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
+                    (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
+                    (Te4[(temp >> 24)       ] & 0x000000ff) ^
+                    rcon[i];
+            rk[ 7] = rk[1] ^ rk[ 6];
+            rk[ 8] = rk[2] ^ rk[ 7];
+            rk[ 9] = rk[3] ^ rk[ 8];
             if (++i == 8) {
                 return 12;
             }
-            rk[10] = rk[ 4] ^ rk[ 9];
-            rk[11] = rk[ 5] ^ rk[10];
-            rk += 6;
+            rk[10] = rk[4] ^ rk[ 9];
+            rk[11] = rk[5] ^ rk[10];
+            rk    += 6;
         }
     }
 
@@ -105,30 +106,30 @@ static int AES_KeySetup_Enc_portable(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t
 
     if (keyBits == 256) {
         for (;;) {
-            temp = rk[ 7];
-            rk[ 8] = rk[ 0] ^
-                (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
-                (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
-                (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
-                (Te4[(temp >> 24)       ] & 0x000000ff) ^
-                rcon[i];
-            rk[ 9] = rk[ 1] ^ rk[ 8];
-            rk[10] = rk[ 2] ^ rk[ 9];
-            rk[11] = rk[ 3] ^ rk[10];
+            temp  = rk[7];
+            rk[8] = rk[0] ^
+                    (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+                    (Te4[(temp >>  8) & 0xff] & 0x00ff0000) ^
+                    (Te4[(temp      ) & 0xff] & 0x0000ff00) ^
+                    (Te4[(temp >> 24)       ] & 0x000000ff) ^
+                    rcon[i];
+            rk[ 9] = rk[1] ^ rk[ 8];
+            rk[10] = rk[2] ^ rk[ 9];
+            rk[11] = rk[3] ^ rk[10];
             if (++i == 7) {
                 return 14;
             }
-            temp = rk[11];
-            rk[12] = rk[ 4] ^
-                (Te4[(temp >> 24)       ] & 0xff000000) ^
-                (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
-                (Te4[(temp >>  8) & 0xff] & 0x0000ff00) ^
-                (Te4[(temp      ) & 0xff] & 0x000000ff);
-            rk[13] = rk[ 5] ^ rk[12];
-            rk[14] = rk[ 6] ^ rk[13];
-            rk[15] = rk[ 7] ^ rk[14];
-
-            rk += 8;
+            temp   = rk[11];
+            rk[12] = rk[4] ^
+                    (Te4[(temp >> 24)       ] & 0xff000000) ^
+                    (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+                    (Te4[(temp >>  8) & 0xff] & 0x0000ff00) ^
+                    (Te4[(temp      ) & 0xff] & 0x000000ff);
+            rk[13] = rk[5] ^ rk[12];
+            rk[14] = rk[6] ^ rk[13];
+            rk[15] = rk[7] ^ rk[14];
+
+            rk    += 8;
         }
     }
 
@@ -140,15 +141,15 @@ static int AES_KeySetup_Enc_portable(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t
  *
  * Returns the number of rounds for the given cipher key size.
  */
-static int AES_KeySetup_Dec_portable(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
-    int Nr, i, j;
+static int AES_KeySetup_Dec_portable( uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t cipherKey[], int keyBits ) {
+    int      Nr, i, j;
     uint32_t temp;
 
     /* expand the cipher key: */
     Nr = AES_KeySetup_Dec_portable(rk, cipherKey, keyBits);
 
     /* invert the order of the round keys: */
-    for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
+    for (i = 0, j = 4 * Nr; i < j; i += 4, j -= 4) {
         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
@@ -157,35 +158,35 @@ static int AES_KeySetup_Dec_portable(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t
 
     /* apply the inverse MixColumn transform to all round keys but the first and the last: */
     for (i = 1; i < Nr; i++) {
-        rk += 4;
+        rk   += 4;
         rk[0] =
-            Td0[Te4[(rk[0] >> 24)       ] & 0xff] ^
-            Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
-            Td2[Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
-            Td3[Te4[(rk[0]      ) & 0xff] & 0xff];
+                Td0[Te4[(rk[0] >> 24)       ] & 0xff] ^
+                Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+                Td2[Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
+                Td3[Te4[(rk[0]      ) & 0xff] & 0xff];
         rk[1] =
-            Td0[Te4[(rk[1] >> 24)       ] & 0xff] ^
-            Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
-            Td2[Te4[(rk[1] >>  8) & 0xff] & 0xff] ^
-            Td3[Te4[(rk[1]      ) & 0xff] & 0xff];
+                Td0[Te4[(rk[1] >> 24)       ] & 0xff] ^
+                Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+                Td2[Te4[(rk[1] >>  8) & 0xff] & 0xff] ^
+                Td3[Te4[(rk[1]      ) & 0xff] & 0xff];
         rk[2] =
-            Td0[Te4[(rk[2] >> 24)       ] & 0xff] ^
-            Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
-            Td2[Te4[(rk[2] >>  8) & 0xff] & 0xff] ^
-            Td3[Te4[(rk[2]      ) & 0xff] & 0xff];
+                Td0[Te4[(rk[2] >> 24)       ] & 0xff] ^
+                Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+                Td2[Te4[(rk[2] >>  8) & 0xff] & 0xff] ^
+                Td3[Te4[(rk[2]      ) & 0xff] & 0xff];
         rk[3] =
-            Td0[Te4[(rk[3] >> 24)       ] & 0xff] ^
-            Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
-            Td2[Te4[(rk[3] >>  8) & 0xff] & 0xff] ^
-            Td3[Te4[(rk[3]      ) & 0xff] & 0xff];
+                Td0[Te4[(rk[3] >> 24)       ] & 0xff] ^
+                Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+                Td2[Te4[(rk[3] >>  8) & 0xff] & 0xff] ^
+                Td3[Te4[(rk[3]      ) & 0xff] & 0xff];
     }
 
     return Nr;
 }
 
-template < int Nr >
-static void AES_Encrypt_portable(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t pt[16], uint8_t ct[16]) {
-    //STATIC_ASSERT(Nr >=1 && Nr <= 14);
+template <int Nr>
+static void AES_Encrypt_portable( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t pt[16], uint8_t ct[16] ) {
+    // STATIC_ASSERT(Nr >=1 && Nr <= 14);
     uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
 
     /*
@@ -199,94 +200,94 @@ static void AES_Encrypt_portable(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_
 
     /* round 1: */
     if (Nr > 1) {
-        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
-        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
-        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
-        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[4];
+        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[5];
+        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[6];
+        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[7];
     }
     /* round 2: */
     if (Nr > 2) {
-        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
-        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
-        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
-        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
     }
     /* round 3: */
     if (Nr > 3) {
-        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
-        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
-        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
-        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
     }
     /* round 4: */
     if (Nr > 4) {
-        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
-        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
-        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
-        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
     }
     /* round 5: */
     if (Nr > 5) {
-        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
-        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
-        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
-        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
     }
     /* round 6: */
     if (Nr > 6) {
-        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
-        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
-        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
-        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
     }
     /* round 7: */
     if (Nr > 7) {
-        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
-        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
-        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
-        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
     }
     /* round 8: */
     if (Nr > 8) {
-        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
-        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
-        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
-        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
     }
     /* round 9: */
     if (Nr > 9) {
-        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
-        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
-        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
-        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
     }
     /* round 10: */
     if (Nr > 10) {
-        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
-        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
-        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
-        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
     }
     /* round 11: */
     if (Nr > 11) {
-        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
-        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
-        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
-        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
     }
     /* round 12: */
     if (Nr > 12) {
-        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
-        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
-        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
-        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
     }
     /* round 13: */
     if (Nr > 13) {
-        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
-        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
-        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
-        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
     }
 
     rk += Nr << 2;
@@ -303,38 +304,39 @@ static void AES_Encrypt_portable(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_
      * map cipher state to byte array block:
      */
     s0 =
-        (Te4[(t0 >> 24)       ] & 0xff000000) ^
-        (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
-        (Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
-        (Te4[(t3      ) & 0xff] & 0x000000ff) ^
-        rk[0];
-    PUTU32(ct     , s0);
+            (Te4[(t0 >> 24)       ] & 0xff000000) ^
+            (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+            (Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
+            (Te4[(t3      ) & 0xff] & 0x000000ff) ^
+            rk[0];
     s1 =
-        (Te4[(t1 >> 24)       ] & 0xff000000) ^
-        (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
-        (Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
-        (Te4[(t0      ) & 0xff] & 0x000000ff) ^
-        rk[1];
-    PUTU32(ct +  4, s1);
+            (Te4[(t1 >> 24)       ] & 0xff000000) ^
+            (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+            (Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
+            (Te4[(t0      ) & 0xff] & 0x000000ff) ^
+            rk[1];
     s2 =
-        (Te4[(t2 >> 24)       ] & 0xff000000) ^
-        (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
-        (Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
-        (Te4[(t1      ) & 0xff] & 0x000000ff) ^
-        rk[2];
-    PUTU32(ct +  8, s2);
+            (Te4[(t2 >> 24)       ] & 0xff000000) ^
+            (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+            (Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
+            (Te4[(t1      ) & 0xff] & 0x000000ff) ^
+            rk[2];
     s3 =
-        (Te4[(t3 >> 24)       ] & 0xff000000) ^
-        (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
-        (Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
-        (Te4[(t2      ) & 0xff] & 0x000000ff) ^
-        rk[3];
+            (Te4[(t3 >> 24)       ] & 0xff000000) ^
+            (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+            (Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
+            (Te4[(t2      ) & 0xff] & 0x000000ff) ^
+            rk[3];
+
+    PUTU32(ct     , s0);
+    PUTU32(ct +  4, s1);
+    PUTU32(ct +  8, s2);
     PUTU32(ct + 12, s3);
 }
 
-template < int Nr >
-static void AES_Decrypt_portable(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t ct[16], uint8_t pt[16]) {
-    //STATIC_ASSERT(Nr >=1 && Nr <= 14);
+template <int Nr>
+static void AES_Decrypt_portable( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t ct[16], uint8_t pt[16] ) {
+    // STATIC_ASSERT(Nr >=1 && Nr <= 14);
     uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
 
     /*
@@ -348,94 +350,94 @@ static void AES_Decrypt_portable(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_
 
     /* round 1: */
     if (Nr > 1) {
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[4];
+        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[5];
+        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[6];
+        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[7];
     }
     /* round 2: */
     if (Nr > 2) {
-        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
-        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
-        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
-        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
     }
     /* round 3: */
     if (Nr > 3) {
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
     }
     /* round 4: */
     if (Nr > 4) {
-        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
-        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
-        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
-        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
     }
     /* round 5: */
     if (Nr > 5) {
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
     }
     /* round 6: */
     if (Nr > 6) {
-        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
-        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
-        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
-        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
     }
     /* round 7: */
     if (Nr > 7) {
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
     }
     /* round 8: */
     if (Nr > 8) {
-        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
-        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
-        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
-        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
     }
     /* round 9: */
     if (Nr > 9) {
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
     }
     /* round 10: */
     if (Nr > 10) {
-        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
-        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
-        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
-        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
     }
     /* round 11: */
     if (Nr > 11) {
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
     }
     /* round 12: */
     if (Nr > 12) {
-        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
-        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
-        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
-        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
     }
     /* round 13: */
     if (Nr > 13) {
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
     }
 
     rk += Nr << 2;
@@ -452,36 +454,37 @@ static void AES_Decrypt_portable(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_
      * map cipher state to byte array block:
      */
     s0 =
-        (Td4[(t0 >> 24)       ] & 0xff000000) ^
-        (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
-        (Td4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
-        (Td4[(t1      ) & 0xff] & 0x000000ff) ^
-        rk[0];
-    PUTU32(pt     , s0);
+            (Td4[(t0 >> 24)       ] & 0xff000000) ^
+            (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+            (Td4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
+            (Td4[(t1      ) & 0xff] & 0x000000ff) ^
+            rk[0];
     s1 =
-        (Td4[(t1 >> 24)       ] & 0xff000000) ^
-        (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
-        (Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
-        (Td4[(t2      ) & 0xff] & 0x000000ff) ^
-        rk[1];
-    PUTU32(pt +  4, s1);
+            (Td4[(t1 >> 24)       ] & 0xff000000) ^
+            (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+            (Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
+            (Td4[(t2      ) & 0xff] & 0x000000ff) ^
+            rk[1];
     s2 =
-        (Td4[(t2 >> 24)       ] & 0xff000000) ^
-        (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
-        (Td4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
-        (Td4[(t3      ) & 0xff] & 0x000000ff) ^
-        rk[2];
-    PUTU32(pt +  8, s2);
+            (Td4[(t2 >> 24)       ] & 0xff000000) ^
+            (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+            (Td4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
+            (Td4[(t3      ) & 0xff] & 0x000000ff) ^
+            rk[2];
     s3 =
-        (Td4[(t3 >> 24)       ] & 0xff000000) ^
-        (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
-        (Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
-        (Td4[(t0      ) & 0xff] & 0x000000ff) ^
-        rk[3];
+            (Td4[(t3 >> 24)       ] & 0xff000000) ^
+            (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+            (Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
+            (Td4[(t0      ) & 0xff] & 0x000000ff) ^
+            rk[3];
+
+    PUTU32(pt     , s0);
+    PUTU32(pt +  4, s1);
+    PUTU32(pt +  8, s2);
     PUTU32(pt + 12, s3);
 }
 
-static void AES_EncryptRound_portable(const uint32_t rk[4], uint8_t block[16]) {
+static void AES_EncryptRound_portable( const uint32_t rk[4], uint8_t block[16] ) {
     uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
 
     s0 = GETU32(block     );
@@ -490,29 +493,29 @@ static void AES_EncryptRound_portable(const uint32_t rk[4], uint8_t block[16]) {
     s3 = GETU32(block + 12);
 
     t0 =
-        (Te0[(s0 >> 24)       ] & 0xff000000) ^
-        (Te1[(s1 >> 16) & 0xff] & 0x00ff0000) ^
-        (Te2[(s2 >>  8) & 0xff] & 0x0000ff00) ^
-        (Te3[(s3      ) & 0xff] & 0x000000ff) ^
-        rk[0];
+            (Te0[(s0 >> 24)       ] & 0xff000000) ^
+            (Te1[(s1 >> 16) & 0xff] & 0x00ff0000) ^
+            (Te2[(s2 >>  8) & 0xff] & 0x0000ff00) ^
+            (Te3[(s3      ) & 0xff] & 0x000000ff) ^
+            rk[0];
     t1 =
-        (Te0[(s1 >> 24)       ] & 0xff000000) ^
-        (Te1[(s2 >> 16) & 0xff] & 0x00ff0000) ^
-        (Te2[(s3 >>  8) & 0xff] & 0x0000ff00) ^
-        (Te3[(s0      ) & 0xff] & 0x000000ff) ^
-        rk[1];
+            (Te0[(s1 >> 24)       ] & 0xff000000) ^
+            (Te1[(s2 >> 16) & 0xff] & 0x00ff0000) ^
+            (Te2[(s3 >>  8) & 0xff] & 0x0000ff00) ^
+            (Te3[(s0      ) & 0xff] & 0x000000ff) ^
+            rk[1];
     t2 =
-        (Te0[(s2 >> 24)       ] & 0xff000000) ^
-        (Te1[(s3 >> 16) & 0xff] & 0x00ff0000) ^
-        (Te2[(s0 >>  8) & 0xff] & 0x0000ff00) ^
-        (Te3[(s1      ) & 0xff] & 0x000000ff) ^
-        rk[2];
+            (Te0[(s2 >> 24)       ] & 0xff000000) ^
+            (Te1[(s3 >> 16) & 0xff] & 0x00ff0000) ^
+            (Te2[(s0 >>  8) & 0xff] & 0x0000ff00) ^
+            (Te3[(s1      ) & 0xff] & 0x000000ff) ^
+            rk[2];
     t3 =
-        (Te0[(s3 >> 24)       ] & 0xff000000) ^
-        (Te1[(s0 >> 16) & 0xff] & 0x00ff0000) ^
-        (Te2[(s1 >>  8) & 0xff] & 0x0000ff00) ^
-        (Te3[(s2      ) & 0xff] & 0x000000ff) ^
-        rk[3];
+            (Te0[(s3 >> 24)       ] & 0xff000000) ^
+            (Te1[(s0 >> 16) & 0xff] & 0x00ff0000) ^
+            (Te2[(s1 >>  8) & 0xff] & 0x0000ff00) ^
+            (Te3[(s2      ) & 0xff] & 0x000000ff) ^
+            rk[3];
 
     PUTU32(block     , t0);
     PUTU32(block +  4, t1);
@@ -520,7 +523,7 @@ static void AES_EncryptRound_portable(const uint32_t rk[4], uint8_t block[16]) {
     PUTU32(block + 12, t3);
 }
 
-static void AES_DecryptRound_portable(const uint32_t rk[4], uint8_t block[16]) {
+static void AES_DecryptRound_portable( const uint32_t rk[4], uint8_t block[16] ) {
     uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
 
     s0 = GETU32(block     );
@@ -529,29 +532,29 @@ static void AES_DecryptRound_portable(const uint32_t rk[4], uint8_t block[16]) {
     s3 = GETU32(block + 12);
 
     t0 =
-        Td0[(s0 >> 24)       ] ^
-        Td1[(s3 >> 16) & 0xff] ^
-        Td2[(s2 >>  8) & 0xff] ^
-        Td3[(s1      ) & 0xff] ^
-        rk[0];
+            Td0[(s0 >> 24)       ] ^
+            Td1[(s3 >> 16) & 0xff] ^
+            Td2[(s2 >>  8) & 0xff] ^
+            Td3[(s1      ) & 0xff] ^
+            rk[0];
     t1 =
-        Td0[(s1 >> 24)       ] ^
-        Td1[(s0 >> 16) & 0xff] ^
-        Td2[(s3 >>  8) & 0xff] ^
-        Td3[(s2      ) & 0xff] ^
-        rk[1];
+            Td0[(s1 >> 24)       ] ^
+            Td1[(s0 >> 16) & 0xff] ^
+            Td2[(s3 >>  8) & 0xff] ^
+            Td3[(s2      ) & 0xff] ^
+            rk[1];
     t2 =
-        Td0[(s2 >> 24)       ] ^
-        Td1[(s1 >> 16) & 0xff] ^
-        Td2[(s0 >>  8) & 0xff] ^
-        Td3[(s3      ) & 0xff] ^
-        rk[2];
+            Td0[(s2 >> 24)       ] ^
+            Td1[(s1 >> 16) & 0xff] ^
+            Td2[(s0 >>  8) & 0xff] ^
+            Td3[(s3      ) & 0xff] ^
+            rk[2];
     t3 =
-        Td0[(s3 >> 24)       ] ^
-        Td1[(s2 >> 16) & 0xff] ^
-        Td2[(s1 >>  8) & 0xff] ^
-        Td3[(s0      ) & 0xff] ^
-        rk[3];
+            Td0[(s3 >> 24)       ] ^
+            Td1[(s2 >> 16) & 0xff] ^
+            Td2[(s1 >>  8) & 0xff] ^
+            Td3[(s0      ) & 0xff] ^
+            rk[3];
 
     PUTU32(block     , t0);
     PUTU32(block +  4, t1);
diff --git a/include/hashlib/AES-ppc.h b/include/hashlib/AES-ppc.h
index 693c5ab2..b2e3730c 100644
--- a/include/hashlib/AES-ppc.h
+++ b/include/hashlib/AES-ppc.h
@@ -18,46 +18,50 @@
  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-template < int Nr >
-static inline void AES_Encrypt_PPC(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t pt[16], uint8_t ct[16]) {
+template <int Nr>
+static inline void AES_Encrypt_PPC( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t pt[16], uint8_t ct[16] ) {
     const uint8_t * keys = (const uint8_t *)rk;
 
     vec_t block = (vec_t)vec_vsx_ld(0, pt);
+
     block = vec_xor(block, (vec_t)vec_vsx_ld(0, keys));
 
     for (int i = 1; i < Nr; i++) {
-        block = vec_encrypt(block, (vec_t)vec_vsx_ld(i*16, keys));
+        block = vec_encrypt(block, (vec_t)vec_vsx_ld(i * 16, keys));
     }
 
-    block = vec_encryptlast(block, (vec_t)vec_vsx_ld(Nr*16, keys));
+    block = vec_encryptlast(block, (vec_t)vec_vsx_ld(Nr * 16, keys));
 
     vec_vsx_st((__vector unsigned char)block, 0, ct);
 }
 
-template < int Nr >
-static inline void AES_Decrypt_PPC(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t ct[16], uint8_t pt[16]) {
+template <int Nr>
+static inline void AES_Decrypt_PPC( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t ct[16], uint8_t pt[16] ) {
     const uint8_t * keys = (const uint8_t *)rk;
 
     vec_t block = (vec_t)vec_vsx_ld(0, ct);
+
     block = vec_xor(block, (vec_t)vec_vsx_ld(0, keys));
 
     for (int i = 1; i < Nr; i++) {
-        block = vec_decrypt(block, (vec_t)vec_vsx_ld(i*16, keys));
+        block = vec_decrypt(block, (vec_t)vec_vsx_ld(i * 16, keys));
     }
 
-    block = vec_decryptlast(block, (vec_t)vec_vsx_ld(Nr*16, keys));
+    block = vec_decryptlast(block, (vec_t)vec_vsx_ld(Nr * 16, keys));
 
     vec_vsx_st((__vector unsigned char)block, 0, pt);
 }
 
-static inline void AES_EncryptRound_PPC(const uint32_t rk[4], uint8_t block[16]) {
+static inline void AES_EncryptRound_PPC( const uint32_t rk[4], uint8_t block[16] ) {
     vec_t tmp = (vec_t)vec_vsx_ld(0, block);
+
     tmp = vec_encrypt(tmp, (vec_t)vec_vsx_ld(0, (const uint8_t *)rk));
     vec_vsx_st((__vector unsigned char)tmp, 0, block);
 }
 
-static inline void AES_DecryptRound_PPC(const uint32_t rk[4], uint8_t block[16]) {
+static inline void AES_DecryptRound_PPC( const uint32_t rk[4], uint8_t block[16] ) {
     vec_t tmp = (vec_t)vec_vsx_ld(0, block);
+
     tmp = vec_decrypt(tmp, (vec_t)vec_vsx_ld(0, (const uint8_t *)rk));
     vec_vsx_st((__vector unsigned char)tmp, 0, block);
 }
diff --git a/include/hashlib/AES.h b/include/hashlib/AES.h
index b5d55b14..1cde9b89 100644
--- a/include/hashlib/AES.h
+++ b/include/hashlib/AES.h
@@ -35,31 +35,31 @@
 #include "Intrinsics.h"
 
 #if defined(HAVE_X86_64_AES)
-#  include "AES-aesni.h"
+  #include "AES-aesni.h"
 #elif defined(HAVE_ARM_AES)
-#  include "AES-arm.h"
-#  include "AES-portable.h" // ARM doesn't have any AES keygen intrinsics
+  #include "AES-arm.h"
+  #include "AES-portable.h" // ARM doesn't have any AES keygen intrinsics
 #elif defined(HAVE_PPC_AES)
-#  include "AES-ppc.h"
-#  include "AES-portable.h" // PPC doesn't really have any AES keygen intrinsics
+  #include "AES-ppc.h"
+  #include "AES-portable.h" // PPC doesn't really have any AES keygen intrinsics
 #else
-#  include "AES-portable.h"
+  #include "AES-portable.h"
 #endif
 
-static inline void _bswap_subkeys(uint32_t rk[], int subkeys) {
+static inline void _bswap_subkeys( uint32_t rk[], int subkeys ) {
     for (int i = 0; i < subkeys; i++) {
         rk[i] = COND_BSWAP(rk[i], true);
     }
 }
 
-static int AES_KeySetup_Enc(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
+static int AES_KeySetup_Enc( uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t cipherKey[], int keyBits ) {
     // STATIC_ASSERT(keyBits == 128);
 #if defined(HAVE_X86_64_AES)
     return AES_KeySetup_Enc_AESNI(rk, cipherKey, keyBits);
 #elif defined(HAVE_ARM_AES)
     int Nr = AES_KeySetup_Enc_portable(rk, cipherKey, keyBits);
     if (isLE()) {
-        _bswap_subkeys(rk, 4*(Nr+1));
+        _bswap_subkeys(rk, 4 * (Nr + 1));
     }
     return Nr;
 #else
@@ -67,14 +67,14 @@ static int AES_KeySetup_Enc(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey
 #endif
 }
 
-static int AES_KeySetup_Dec(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
+static int AES_KeySetup_Dec( uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t cipherKey[], int keyBits ) {
     // STATIC_ASSERT(keyBits == 128);
 #if defined(HAVE_X86_64_AES)
     return AES_KeySetup_Dec_AESNI(rk, cipherKey, keyBits);
 #elif defined(HAVE_ARM_AES)
     int Nr = AES_KeySetup_Dec_portable(rk, cipherKey, keyBits);
     if (isLE()) {
-        _bswap_subkeys(rk, 4*(Nr+1));
+        _bswap_subkeys(rk, 4 * (Nr + 1));
     }
     return Nr;
 #else
@@ -82,8 +82,8 @@ static int AES_KeySetup_Dec(uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey
 #endif
 }
 
-template < int Nr >
-static void AES_Encrypt(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t pt[16], uint8_t ct[16]) {
+template <int Nr>
+static void AES_Encrypt( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t pt[16], uint8_t ct[16] ) {
 #if defined(HAVE_X86_64_AES)
     AES_Encrypt_AESNI<Nr>(rk, pt, ct);
 #elif defined(HAVE_ARM_AES)
@@ -95,8 +95,8 @@ static void AES_Encrypt(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t pt[16],
 #endif
 }
 
-template < int Nr >
-static void AES_Decrypt(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t ct[16], uint8_t pt[16]) {
+template <int Nr>
+static void AES_Decrypt( const uint32_t rk[] /*4*(Nr + 1)*/, const uint8_t ct[16], uint8_t pt[16] ) {
 #if defined(HAVE_X86_64_AES)
     AES_Decrypt_AESNI<Nr>(rk, pt, ct);
 #elif defined(HAVE_ARM_AES)
@@ -108,7 +108,7 @@ static void AES_Decrypt(const uint32_t rk[/*4*(Nr + 1)*/], const uint8_t ct[16],
 #endif
 }
 
-static void AES_EncryptRound(const uint32_t rk[4], uint8_t block[16]) {
+static void AES_EncryptRound( const uint32_t rk[4], uint8_t block[16] ) {
 #if defined(HAVE_X86_64_AES)
     AES_EncryptRound_AESNI(rk, block);
 #elif defined(HAVE_ARM_AES)
@@ -120,7 +120,7 @@ static void AES_EncryptRound(const uint32_t rk[4], uint8_t block[16]) {
 #endif
 }
 
-static void AES_DecryptRound(const uint32_t rk[4], uint8_t block[16]) {
+static void AES_DecryptRound( const uint32_t rk[4], uint8_t block[16] ) {
 #if defined(HAVE_X86_64_AES)
     AES_DecryptRound_AESNI(rk, block);
 #elif defined(HAVE_ARM_AES)
diff --git a/include/hashlib/Hashlib.h b/include/hashlib/Hashlib.h
index 5d341a98..339e1a2f 100644
--- a/include/hashlib/Hashlib.h
+++ b/include/hashlib/Hashlib.h
@@ -20,42 +20,43 @@
 
 #include <vector>
 
-unsigned register_hash(const HashInfo * hinfo);
+// Interface for hash implementations
+unsigned register_hash( const HashInfo * hinfo );
 
-const HashInfo * findHash(const char * name);
-std::vector<const HashInfo *> findAllHashes(void);
-void listHashes(bool nameonly);
+// Interface for consumer for getting hashes
+const HashInfo * findHash( const char * name );
+std::vector<const HashInfo *> findAllHashes( void );
+void listHashes( bool nameonly );
 
-bool verifyAllHashes(bool verbose);
-bool verifyHash(const HashInfo * hinfo,
-        enum HashInfo::endianness endian,
-        bool verbose, bool prefix);
+// Interface for ensuring hash is giving expected results
+bool verifyAllHashes( bool verbose );
+bool verifyHash( const HashInfo * hinfo, enum HashInfo::endianness endian, bool verbose, bool prefix );
 
 //-----------------------------------------------------------------------------
 
-#define CONCAT_INNER(x, y) x##y
-#define CONCAT(x,y) CONCAT_INNER(x, y)
-
-#define REGISTER_FAMILY(N, ...)                             \
-  static_assert(sizeof(#N) > 1,                             \
-      "REGISTER_FAMILY() needs a non-empty name");          \
-  static HashFamilyInfo THIS_HASH_FAMILY = []{              \
-    HashFamilyInfo $(#N);                                   \
-    __VA_ARGS__;                                            \
-    return $;                                               \
-  }();                                                      \
+#define CONCAT_INNER(x, y) x ## y
+#define CONCAT(x, y) CONCAT_INNER(x, y)
+
+#define REGISTER_FAMILY(N, ...)                    \
+  static_assert(sizeof(#N) > 1,                    \
+      "REGISTER_FAMILY() needs a non-empty name"); \
+  static HashFamilyInfo THIS_HASH_FAMILY = []{     \
+    HashFamilyInfo $(#N);                          \
+    __VA_ARGS__;                                   \
+    return $;                                      \
+  }();                                             \
   unsigned CONCAT(N,_ref)
 
-#define REGISTER_HASH(N, ...)                               \
-  static_assert(sizeof(#N) > 1,                             \
-      "REGISTER_HASH() needs a non-empty name");            \
-  static HashInfo CONCAT(Hash_,N) = []{                     \
-    HashInfo $(#N, THIS_HASH_FAMILY.name);                  \
-    __VA_ARGS__;                                            \
-    register_hash(&$);                                      \
-    return $;                                               \
+#define REGISTER_HASH(N, ...)                    \
+  static_assert(sizeof(#N) > 1,                  \
+      "REGISTER_HASH() needs a non-empty name"); \
+  static HashInfo CONCAT(Hash_,N) = []{          \
+    HashInfo $(#N, THIS_HASH_FAMILY.name);       \
+    __VA_ARGS__;                                 \
+    register_hash(&$);                           \
+    return $;                                    \
   }();
 
-#define USE_FAMILY(N)                                       \
-    extern unsigned CONCAT(N,_ref);                         \
+#define USE_FAMILY(N)               \
+    extern unsigned CONCAT(N,_ref); \
     CONCAT(N,_ref) = 1
diff --git a/include/hashlib/Mathmult.h b/include/hashlib/Mathmult.h
index f2d94552..2bc6365d 100644
--- a/include/hashlib/Mathmult.h
+++ b/include/hashlib/Mathmult.h
@@ -54,19 +54,19 @@
  */
 
 // 32x32->64 multiplication [rhi:rlo = a * b]
-static FORCE_INLINE void mult32_64(uint32_t& rlo, uint32_t& rhi, uint32_t a, uint32_t b) {
+static FORCE_INLINE void mult32_64( uint32_t & rlo, uint32_t & rhi, uint32_t a, uint32_t b ) {
     // XXX Are either of these asm blocks better than just the plain code?
 #if 0 && defined(HAVE_ARM_ASM)
-    __asm__("UMULL w%0, w%1, w%2, w%3\n"
-            : "+r" (rlo), "+r" (rhi)
-            : "r" (a), "r" (b)
-            : "cc", "memory"
-            );
+    __asm__ ("UMULL w%0, w%1, w%2, w%3\n"
+             : "+r" (rlo), "+r" (rhi)
+             : "r" (a), "r" (b)
+             : "cc", "memory"
+    );
 #elif 0 && defined(HAVE_X86_64_ASM)
-    __asm__("mull  %[b]\n"
-            : "=d" (rhi), "=a" (rlo)
-            : "1" (a), [b] "rm" (b)
-            );
+    __asm__ ("mull  %[b]\n"
+             : "=d" (rhi), "=a" (rlo)
+             : "1" (a), [b] "rm" (b)
+    );
 #else
     uint64_t r = (uint64_t)a * (uint64_t)b;
     rhi = (uint32_t)(r >> 32);
@@ -75,7 +75,7 @@ static FORCE_INLINE void mult32_64(uint32_t& rlo, uint32_t& rhi, uint32_t a, uin
 }
 
 // 32x32->64 multiplication [r64 = a32 * b32]
-static FORCE_INLINE void mult32_64(uint64_t & r64, uint32_t a32, uint32_t b32) {
+static FORCE_INLINE void mult32_64( uint64_t & r64, uint32_t a32, uint32_t b32 ) {
 #if defined(_MSC_VER) && defined(_M_IX86)
     r64 = __emulu(a32, b32);
 #else
@@ -84,56 +84,56 @@ static FORCE_INLINE void mult32_64(uint64_t & r64, uint32_t a32, uint32_t b32) {
 }
 
 // 96-bit addition [rhi:rmi:rlo += addhi:addmi:addlo]
-static FORCE_INLINE void add96(uint32_t& rlo, uint32_t& rmi, uint32_t& rhi, const uint32_t& addlo, const uint32_t& addmi, const uint32_t& addhi) {
+static FORCE_INLINE void add96( uint32_t & rlo, uint32_t & rmi, uint32_t & rhi, const uint32_t & addlo,
+        const uint32_t & addmi, const uint32_t & addhi ) {
 #if defined(HAVE_ARM_ASM)
-    __asm__("ADDS %w0, %w3, %w0\n"
-            "ADCS %w1, %w4, %w1\n"
-            "ADC  %w2, %w5, %w2\n"
-            : "+r" (rlo), "+r" (rmi), "+r" (rhi)
-            : "r" (addlo), "r" (addmi), "r" (addhi)
-            : "cc"
-            );
+    __asm__ ("ADDS %w0, %w3, %w0\n"
+             "ADCS %w1, %w4, %w1\n"
+             "ADC  %w2, %w5, %w2\n"
+             : "+r" (rlo), "+r" (rmi), "+r" (rhi)
+             : "r" (addlo), "r" (addmi), "r" (addhi)
+             : "cc"
+    );
 #elif defined(HAVE_X86_64_ASM)
-    __asm__("addl %3, %0\n"
-            "adcl %4, %1\n"
-            "adcl %5, %2\n"
-            : "+g" (rlo), "+g" (rmi), "+g" (rhi)
-            : "g" (addlo), "g" (addmi), "g" (addhi)
-            : "cc"
-            );
+    __asm__ ("addl %3, %0\n"
+             "adcl %4, %1\n"
+             "adcl %5, %2\n"
+             : "+g" (rlo), "+g" (rmi), "+g" (rhi)
+             : "g" (addlo), "g" (addmi), "g" (addhi)
+             : "cc"
+    );
 #else
-    uint64_t w = (((uint64_t)rmi) << 32)   + ((uint64_t)rlo);
+    uint64_t w = (((uint64_t)rmi  ) << 32) + ((uint64_t)rlo  );
     uint64_t r = (((uint64_t)addmi) << 32) + ((uint64_t)addlo) + w;
-    rhi += (r < w);
-    rhi += addhi;
-    rmi = (uint32_t)(r >> 32);
-    rlo = (uint32_t)(r);
+    rhi += addhi + (r < w);
+    rmi  = (uint32_t)(r >> 32);
+    rlo  = (uint32_t)(r      );
 #endif
 }
 
 // 96-bit fused multiply addition [rhi:rmi:rlo += a * b]
-static FORCE_INLINE void fma32_96(uint32_t& rlo, uint32_t& rmi, uint32_t& rhi, uint32_t a, uint32_t b) {
+static FORCE_INLINE void fma32_96( uint32_t & rlo, uint32_t & rmi, uint32_t & rhi, uint32_t a, uint32_t b ) {
 // These #defines are not correct; some arm seems to not support this
 #if 0 && defined(HAVE_ARM_ASM)
     uint32_t tmphi, tmplo;
-    __asm__("UMULL %w3, %w4, %w5, %w6\n"
-            "ADDS  %w0, %w3, %w0\n"
-            "ADCS  %w1, %w4, %w1\n"
-            "ADC   %w2, %w2, #0x0\n"
-            : "+r" (rlo), "+r" (rmi), "+r" (rhi), "=r" (tmplo), "=r" (tmphi)
-            : "r" (a), "r" (b)
-            : "cc"
-            );
+    __asm__ ("UMULL %w3, %w4, %w5, %w6\n"
+             "ADDS  %w0, %w3, %w0\n"
+             "ADCS  %w1, %w4, %w1\n"
+             "ADC   %w2, %w2, #0x0\n"
+             : "+r" (rlo), "+r" (rmi), "+r" (rhi), "=r" (tmplo), "=r" (tmphi)
+             : "r" (a), "r" (b)
+             : "cc"
+    );
 #elif defined(HAVE_X86_64_ASM)
     uint32_t tmphi;
-    __asm__("mull %5\n"
-            "addl %%eax, %0\n"
-            "adcl %%edx, %1\n"
-            "adcl $0, %2\n"
-            : "+g" (rlo), "+g" (rmi), "+g" (rhi), "=a" (tmphi)
-            : "a" (a), "g" (b)
-            : "edx", "cc"
-            );
+    __asm__ ("mull %5\n"
+             "addl %%eax, %0\n"
+             "adcl %%edx, %1\n"
+             "adcl $0, %2\n"
+             : "+g" (rlo), "+g" (rmi), "+g" (rhi), "=a" (tmphi)
+             : "a" (a), "g" (b)
+             : "edx", "cc"
+    );
 #else
     uint32_t tmplo, tmpmi, tmphi = 0;
     mult32_64(tmplo, tmpmi, a, b);
@@ -142,7 +142,7 @@ static FORCE_INLINE void fma32_96(uint32_t& rlo, uint32_t& rmi, uint32_t& rhi, u
 }
 
 // 64x64->128 multiplication [rhi:rlo = a * b]
-static FORCE_INLINE void mult64_128(uint64_t& rlo, uint64_t& rhi, uint64_t a, uint64_t b) {
+static FORCE_INLINE void mult64_128( uint64_t & rlo, uint64_t & rhi, uint64_t a, uint64_t b ) {
 #if defined(HAVE_ARM64_ASM)
     /*
      * AARCH64 needs 2 insns to calculate 128-bit result of the
@@ -151,16 +151,16 @@ static FORCE_INLINE void mult64_128(uint64_t& rlo, uint64_t& rhi, uint64_t a, ui
      * is very slow.
      */
     rlo = a * b;
-    __asm__("umulh %0, %1, %2\n"
-            : "=r" (rhi)
-            : "r" (a), "r" (b)
-            );
+    __asm__ ("umulh %0, %1, %2\n"
+             : "=r" (rhi)
+             : "r" (a), "r" (b)
+    );
 #elif defined(HAVE_PPC_ASM)
     rlo = a * b;
-    __asm__("mulhdu %0, %1, %2\n"
-            : "=r" (rhi)
-            : "r" (a), "r" (b)
-            );
+    __asm__ ("mulhdu %0, %1, %2\n"
+             : "=r" (rhi)
+             : "r" (a), "r" (b)
+    );
 #elif defined(HAVE_UMUL128)
     rlo = _umul128(a, b, &rhi);
 #elif defined(HAVE_UMULH)
@@ -173,16 +173,16 @@ static FORCE_INLINE void mult64_128(uint64_t& rlo, uint64_t& rhi, uint64_t a, ui
      * takes 3-cycles vs. 4 for MULX, MULX permits more freedom in
      * insn scheduling as it uses less fixed registers.
      */
-    __asm__("mulxq %3,%1,%0\n"
-            : "=r" (rhi), "=r" (rlo)
-            : "d" (a), "r" (b)
-            );
+    __asm__ ("mulxq %3,%1,%0\n"
+             : "=r" (rhi), "=r" (rlo)
+             : "d" (a), "r" (b)
+    );
 #elif defined(HAVE_X86_64_ASM)
-    __asm__("mulq %[b]\n"
-            : "=d" (rhi), "=a" (rlo)
-            : "1" (a), [b] "rm" (b)
-            : "cc"
-            );
+    __asm__ ("mulq %[b]\n"
+             : "=d" (rhi), "=a" (rlo)
+             : "1" (a), [b] "rm" (b)
+             : "cc"
+    );
 #elif defined(HAVE_INT128)
     // Maybe move this before the other x64 ASM methods?
     // Seems like it's more compiler-friendly, but it produces slower code.
@@ -202,16 +202,16 @@ static FORCE_INLINE void mult64_128(uint64_t& rlo, uint64_t& rhi, uint64_t a, ui
     uint64_t tmplo   = alo * blo;
     uint64_t t, carry = 0;
 
-    t = tmplo + (tmpmi_0 << 32);
-    carry += (t < tmplo);
-    rlo = t + (tmpmi_1 << 32);
-    carry += (rlo < t);
-    rhi = tmphi + (tmpmi_0 >> 32) + (tmpmi_1 >> 32) + carry;
+    t      = (tmpmi_0 << 32   ) + tmplo;
+    carry += (t        < tmplo);
+    rlo    = (tmpmi_1 << 32   ) + t;
+    carry += (rlo      < t    );
+    rhi    = (tmpmi_0 >> 32   ) + (tmpmi_1 >> 32) + tmphi + carry;
 #endif
 }
 
 // 64x64->128 multiplication with no cross-lane carry [rhi:rlo ~= a * b]
-static FORCE_INLINE void mult64_128_nocarry(uint64_t& rlo, uint64_t& rhi, uint64_t a, uint64_t b) {
+static FORCE_INLINE void mult64_128_nocarry( uint64_t & rlo, uint64_t & rhi, uint64_t a, uint64_t b ) {
     /*
      * Implementation of 64x64->128-bit multiplication by four
      * 32x32->64 bit multiplication, excluding the carry bits.  This
@@ -230,27 +230,27 @@ static FORCE_INLINE void mult64_128_nocarry(uint64_t& rlo, uint64_t& rhi, uint64
 }
 
 // 128-bit addition special case [rhi:rlo += 0:addlo]
-static FORCE_INLINE void add128(uint64_t& rlo, uint64_t& rhi, uint64_t addlo) {
+static FORCE_INLINE void add128( uint64_t & rlo, uint64_t & rhi, uint64_t addlo ) {
 #if defined(HAVE_X86_64_ASM)
-    __asm__("addq %2, %0\n"
-            "adcq $0, %1\n"
-#if defined(DEBUG)
-            : "+r" (rlo), "+r" (rhi)
-            : "r" (addlo)
-#elif defined(__clang__)
-            // clang cannot work properly with "g" and silently
-            // produces hardly-workging code, if "g" is specified;
-            // see, for instance, here:
-            // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
-            // To avoid 3x performance hit we have to specify sources/destinations
-            : "+r" (rlo), "+r" (rhi)
-            : "m" (addlo)
-#else
-            : "+g" (rlo), "+g" (rhi)
-            : "g" (addlo)
-#endif
-            : "cc"
-            );
+    __asm__ ("addq %2, %0\n"
+             "adcq $0, %1\n"
+  #if defined(DEBUG)
+             : "+r" (rlo), "+r" (rhi)
+             : "r" (addlo)
+  #elif defined(__clang__)
+             // clang cannot work properly with "g" and silently
+             // produces hardly-workging code, if "g" is specified;
+             // see, for instance, here:
+             // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
+             // To avoid 3x performance hit we have to specify sources/destinations
+             : "+r" (rlo), "+r" (rhi)
+             : "m" (addlo)
+  #else
+             : "+g" (rlo), "+g" (rhi)
+             : "g" (addlo)
+  #endif
+             : "cc"
+    );
 #else
     rlo += addlo;
     rhi += (rlo < addlo);
@@ -258,33 +258,33 @@ static FORCE_INLINE void add128(uint64_t& rlo, uint64_t& rhi, uint64_t addlo) {
 }
 
 // 128-bit addition [rhi:rlo += addhi:addlo]
-static FORCE_INLINE void add128(uint64_t& rlo, uint64_t& rhi, uint64_t addlo, uint64_t addhi) {
+static FORCE_INLINE void add128( uint64_t & rlo, uint64_t & rhi, uint64_t addlo, uint64_t addhi ) {
 #if defined(HAVE_X86_64_ASM)
-    __asm__("addq %2, %0\n"
-            "adcq %3, %1\n"
-#if defined(DEBUG)
-            : "+r" (rlo), "+r" (rhi)
-            : "r" (addlo), "r" (addhi)
-#elif defined(__clang__)
-            // clang cannot work properly with "g" and silently
-            // produces hardly-workging code, if "g" is specified;
-            // see, for instance, here:
-            // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
-            // To avoid 3x performance hit we have to specify sources/destinations
-            : "+r" (rlo), "+r" (rhi)
-            : "m" (addlo), "m" (addhi)
-#else
-            : "+r" (rlo), "+g" (rhi)
-            : "g" (addlo), "g" (addhi)
-#endif
-            : "cc"
-            );
+    __asm__ ("addq %2, %0\n"
+             "adcq %3, %1\n"
+  #if defined(DEBUG)
+             : "+r" (rlo), "+r" (rhi)
+             : "r" (addlo), "r" (addhi)
+  #elif defined(__clang__)
+             // clang cannot work properly with "g" and silently
+             // produces hardly-workging code, if "g" is specified;
+             // see, for instance, here:
+             // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
+             // To avoid 3x performance hit we have to specify sources/destinations
+             : "+r" (rlo), "+r" (rhi)
+             : "m" (addlo), "m" (addhi)
+  #else
+             : "+r" (rlo), "+g" (rhi)
+             : "g" (addlo), "g" (addhi)
+  #endif
+             : "cc"
+    );
 #elif defined(HAVE_PPC_ASM)
-    __asm__("addc %1, %1, %3\n"
-            "adde %0, %0, %2\n"
-            : "+r" (rhi), "+r" (rlo)
-            : "r" (addhi), "r" (addlo)
-            );
+    __asm__ ("addc %1, %1, %3\n"
+             "adde %0, %0, %2\n"
+             : "+r" (rhi), "+r" (rlo)
+             : "r" (addhi), "r" (addlo)
+    );
 #else
     rlo += addlo;
     rhi += (rlo < addlo);
@@ -293,28 +293,29 @@ static FORCE_INLINE void add128(uint64_t& rlo, uint64_t& rhi, uint64_t addlo, ui
 }
 
 // 192-bit addition [rhi:rmi:rlo += addhi:addmi:addlo]
-static FORCE_INLINE void add192(uint64_t& rlo, uint64_t& rmi, uint64_t& rhi, const uint64_t& addlo, const uint64_t& addmi, const uint64_t& addhi) {
+static FORCE_INLINE void add192( uint64_t & rlo, uint64_t & rmi, uint64_t & rhi, const uint64_t & addlo,
+        const uint64_t & addmi, const uint64_t & addhi ) {
 #if defined(HAVE_X86_64_ASM)
-    __asm__("addq %3, %0\n"
-            "adcq %4, %1\n"
-            "adcq %5, %2\n"
-#if defined(DEBUG)
-            : "+r" (rlo), "+r" (rmi), "+r" (rhi)
-            : "r" (addlo), "r" (addmi), "r" (addhi)
-#elif defined(__clang__)
-            // clang cannot work properly with "g" and silently
-            // produces hardly-workging code, if "g" is specified;
-            // see, for instance, here:
-            // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
-            // To avoid 3x performance hit we have to specify sources/destinations
-            : "+r" (rlo), "+r" (rmi), "+r" (rhi)
-            : "m" (addlo), "m" (addmi), "m" (addhi)
-#else
-            : "+g" (rlo), "+g" (rmi), "+g" (rhi)
-            : "rm" (addlo), "rm" (addmi), "rm" (addhi)
-#endif
-            : "cc"
-            );
+    __asm__ ("addq %3, %0\n"
+             "adcq %4, %1\n"
+             "adcq %5, %2\n"
+  #if defined(DEBUG)
+             : "+r" (rlo), "+r" (rmi), "+r" (rhi)
+             : "r" (addlo), "r" (addmi), "r" (addhi)
+  #elif defined(__clang__)
+             // clang cannot work properly with "g" and silently
+             // produces hardly-workging code, if "g" is specified;
+             // see, for instance, here:
+             // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
+             // To avoid 3x performance hit we have to specify sources/destinations
+             : "+r" (rlo), "+r" (rmi), "+r" (rhi)
+             : "m" (addlo), "m" (addmi), "m" (addhi)
+  #else
+             : "+g" (rlo), "+g" (rmi), "+g" (rhi)
+             : "rm" (addlo), "rm" (addmi), "rm" (addhi)
+  #endif
+             : "cc"
+    );
 #else
     rlo += addlo;
     rmi += (rlo < addlo);
@@ -325,7 +326,7 @@ static FORCE_INLINE void add192(uint64_t& rlo, uint64_t& rmi, uint64_t& rhi, con
 }
 
 // 128-bit fused multiply addition [rhi:rlo += a * b]
-static FORCE_INLINE void fma64_128(uint64_t& rlo, uint64_t& rhi, uint64_t a, uint64_t b) {
+static FORCE_INLINE void fma64_128( uint64_t & rlo, uint64_t & rhi, uint64_t a, uint64_t b ) {
 #if defined(HAVE_X86_64_ASM)
     /*
      * Dummy variable to tell the compiler that the register rax is
@@ -333,25 +334,25 @@ static FORCE_INLINE void fma64_128(uint64_t& rlo, uint64_t& rhi, uint64_t a, uin
      * below. Better syntactic expression is very welcome.
      */
     uint64_t dummy;
-    __asm__("mulq %4\n"
-            "addq %%rax, %0\n"
-            "adcq %%rdx, %1\n"
-#if defined(DEBUG)
-            : "+r" (rlo), "+r" (rhi), "=a" (dummy)
-            : "a" (a), "r" (b)
-#elif defined(__clang__)
-            // clang cannot work properly with "g" and silently
-            // produces hardly-workging code, if "g" is specified;
-            // see, for instance, here:
-            // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
-            // To avoid 3x performance hit we have to specify sources/destinations
-            : "+r" (rlo), "+r" (rhi), "=a" (dummy)
-            : "a" (a), "m" (b)
-#else
-            : "+g" (rlo), "+g" (rhi), "=a" (dummy)
-            : "a" (a), "g" (b)
-#endif
-            : "rdx", "cc");
+    __asm__ ("mulq %4\n"
+             "addq %%rax, %0\n"
+             "adcq %%rdx, %1\n"
+  #if defined(DEBUG)
+             : "+r" (rlo), "+r" (rhi), "=a" (dummy)
+             : "a" (a), "r" (b)
+  #elif defined(__clang__)
+             // clang cannot work properly with "g" and silently
+             // produces hardly-workging code, if "g" is specified;
+             // see, for instance, here:
+             // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
+             // To avoid 3x performance hit we have to specify sources/destinations
+             : "+r" (rlo), "+r" (rhi), "=a" (dummy)
+             : "a" (a), "m" (b)
+  #else
+             : "+g" (rlo), "+g" (rhi), "=a" (dummy)
+             : "a" (a), "g" (b)
+  #endif
+             : "rdx", "cc");
 #else
     uint64_t tmplo, tmphi;
     mult64_128(tmplo, tmphi, a, b);
@@ -360,7 +361,7 @@ static FORCE_INLINE void fma64_128(uint64_t& rlo, uint64_t& rhi, uint64_t a, uin
 }
 
 // 192-bit fused multiply addition [rhi:rmi:rlo += a * b]
-static FORCE_INLINE void fma64_192(uint64_t& rlo, uint64_t& rmi, uint64_t& rhi, uint64_t a, uint64_t b) {
+static FORCE_INLINE void fma64_192( uint64_t & rlo, uint64_t & rmi, uint64_t & rhi, uint64_t a, uint64_t b ) {
 #if defined(HAVE_X86_64_ASM)
     /*
      * Dummy variable to tell the compiler that the register rax is
@@ -368,26 +369,26 @@ static FORCE_INLINE void fma64_192(uint64_t& rlo, uint64_t& rmi, uint64_t& rhi,
      * below. Better syntactic expression is very welcome.
      */
     uint64_t dummy;
-    __asm__("mulq %5\n"
-            "addq %%rax, %0\n"
-            "adcq %%rdx, %1\n"
-            "adcq $0, %2\n"
-#if defined(DEBUG)
-            : "+r" (rlo), "+r" (rmi), "+r" (rhi), "=a" (dummy)
-            : "a" (a), "r" (b)
-#elif defined(__clang__)
-            // clang cannot work properly with "g" and silently
-            // produces hardly-workging code, if "g" is specified;
-            // see, for instance, here:
-            // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
-            // To avoid 3x performance hit we have to specify sources/destinations
-            : "+r" (rlo), "+r" (rmi), "+r" (rhi), "=a" (dummy)
-            : "a" (a), "m" (b)
-#else
-            : "+g" (rlo), "+g" (rmi), "+g" (rhi), "=a" (dummy)
-            : "a" (a), "g" (b)
-#endif
-            : "rdx", "cc" );
+    __asm__ ("mulq %5\n"
+             "addq %%rax, %0\n"
+             "adcq %%rdx, %1\n"
+             "adcq $0, %2\n"
+  #if defined(DEBUG)
+             : "+r" (rlo), "+r" (rmi), "+r" (rhi), "=a" (dummy)
+             : "a" (a), "r" (b)
+  #elif defined(__clang__)
+             // clang cannot work properly with "g" and silently
+             // produces hardly-workging code, if "g" is specified;
+             // see, for instance, here:
+             // http://stackoverflow.com/questions/16850309/clang-llvm-inline-assembly-multiple-constraints-with-useless-spills-reload
+             // To avoid 3x performance hit we have to specify sources/destinations
+             : "+r" (rlo), "+r" (rmi), "+r" (rhi), "=a" (dummy)
+             : "a" (a), "m" (b)
+  #else
+             : "+g" (rlo), "+g" (rmi), "+g" (rhi), "=a" (dummy)
+             : "a" (a), "g" (b)
+  #endif
+             : "rdx", "cc");
 #else
     uint64_t tmplo, tmpmi, tmphi = 0;
     mult64_128(tmplo, tmpmi, a, b);
@@ -396,11 +397,12 @@ static FORCE_INLINE void fma64_192(uint64_t& rlo, uint64_t& rmi, uint64_t& rhi,
 }
 
 // 128x128->128 multiplication [rhi:rlo = a * bhi:blo]
-static FORCE_INLINE void mult128_128(uint64_t& rlo, uint64_t& rhi, uint64_t alo, uint64_t ahi, uint64_t blo, uint64_t bhi) {
+static FORCE_INLINE void mult128_128( uint64_t & rlo, uint64_t & rhi, uint64_t alo,
+        uint64_t ahi, uint64_t blo, uint64_t bhi ) {
 #if defined(HAVE_INT128)
     uint128_t r = (((uint128_t)ahi) << 64) + (uint128_t)alo;
     uint128_t c = (((uint128_t)bhi) << 64) + (uint128_t)blo;
-    r = r * c;
+    r   = r * c;
     rhi = (uint64_t)(r >> 64);
     rlo = (uint64_t)r;
 #else
diff --git a/lib/Hashinfo.cpp b/lib/Hashinfo.cpp
index 02aae7aa..17c9b542 100644
--- a/lib/Hashinfo.cpp
+++ b/lib/Hashinfo.cpp
@@ -24,21 +24,22 @@
 #include <string>
 #include <algorithm>
 
-const char * HashInfo::_fixup_name(const char * in) {
+const char * HashInfo::_fixup_name( const char * in ) {
     // Since dashes can't be in C/C++ identifiers, but humans want
     // them in names, replace underscores with dashes. Similarly,
     // replace double underscores with dots.
-    std::string out(in);
+    std::string out( in );
+
     do {
         size_t p = out.find("__");
-        if (p == std::string::npos) break;
+        if (p == std::string::npos) { break; }
         out.replace(p, 2, ".");
-    } while(true);
+    } while (true);
     std::replace(&out[0], &out[out.length()], '_', '-');
     return strdup(out.c_str());
 }
 
-const char * HashFamilyInfo::_fixup_name(const char * in) {
+const char * HashFamilyInfo::_fixup_name( const char * in ) {
     return HashInfo::_fixup_name(in);
 }
 
@@ -46,56 +47,57 @@ const char * HashFamilyInfo::_fixup_name(const char * in) {
 // This should hopefully be a thorough and uambiguous test of whether a hash
 // is correctly implemented on a given platform.
 
-uint32_t HashInfo::_ComputedVerifyImpl(const HashInfo * hinfo, enum HashInfo::endianness endian) const {
-  const HashFn hash = hinfo->hashFn(endian);
-  const uint32_t hashbits = hinfo->bits;
-  const uint32_t hashbytes = hashbits / 8;
-
-  uint8_t * key    = new uint8_t[256];
-  uint8_t * hashes = new uint8_t[hashbytes * 256];
-  uint8_t * total  = new uint8_t[hashbytes];
-
-  memset(key,0,256);
-  memset(hashes,0,hashbytes*256);
-  memset(total,0,hashbytes);
-
-  // Hash keys of the form {}, {0}, {0,1}, {0,1,2}... up to N=255, using
-  // 256-N as the seed
-  for(int i = 0; i < 256; i++) {
-    seed_t seed = 256 - i;
-    seed = hinfo->Seed(seed, true, 1);
-    hash(key, i, seed, &hashes[i*hashbytes]);
-    addVCodeInput(key, i);
-    key[i] = (uint8_t)i;
-  }
-
-  // Then hash the result array
-  seed_t seed = 0;
-  seed = hinfo->Seed(0, true, 1);
-  hash(hashes, hashbytes*256, seed, total);
-  addVCodeOutput(hashes, 256*hashbytes);
-  addVCodeOutput(total, hashbytes);
-
-  // The first four bytes of that hash, interpreted as a little-endian
-  // integer, is our verification value
-  uint32_t verification = (total[0] <<  0) | (total[1] <<  8) |
-                          (total[2] << 16) | (total[3] << 24) ;
-  addVCodeResult(verification);
-
-  delete [] total;
-  delete [] hashes;
-  delete [] key;
-
-  return verification;
+uint32_t HashInfo::_ComputedVerifyImpl( const HashInfo * hinfo, enum HashInfo::endianness endian ) const {
+    const HashFn   hash      = hinfo->hashFn(endian);
+    const uint32_t hashbits  = hinfo->bits;
+    const uint32_t hashbytes = hashbits / 8;
+
+    uint8_t * key    = new uint8_t[256            ];
+    uint8_t * hashes = new uint8_t[hashbytes * 256];
+    uint8_t * total  = new uint8_t[hashbytes      ];
+
+    memset(key   , 0,       256);
+    memset(hashes, 0, hashbytes * 256);
+    memset(total , 0, hashbytes);
+
+    // Hash keys of the form {}, {0}, {0,1}, {0,1,2}... up to N=255, using
+    // 256-N as the seed
+    for (int i = 0; i < 256; i++) {
+        seed_t seed = 256 - i;
+        seed = hinfo->Seed(seed, true, 1);
+        hash(key, i, seed, &hashes[i * hashbytes]);
+        addVCodeInput(key, i);
+        key[i] = (uint8_t)i;
+    }
+
+    // Then hash the result array
+    seed_t seed = 0;
+    seed = hinfo->Seed(0, true, 1);
+    hash(hashes, hashbytes * 256, seed, total);
+    addVCodeOutput(hashes,       256 * hashbytes);
+    addVCodeOutput(total , hashbytes            );
+
+    // The first four bytes of that hash, interpreted as a little-endian
+    // integer, is our verification value
+    uint32_t verification = (total[0] << 0) | (total[1] << 8) |
+            (total[2] << 16) | (total[3] << 24);
+    addVCodeResult(verification);
+
+    delete [] total;
+    delete [] hashes;
+    delete [] key;
+
+    return verification;
 }
 
 //-----------------------------------------------------------------------------
 // Utility function for hashes to easily specify that any seeds in
 // their badseed set should be excluded when their FixupSeed() method
 // is called.
-seed_t excludeBadseeds(const HashInfo * hinfo, const seed_t seed) {
+seed_t excludeBadseeds( const HashInfo * hinfo, const seed_t seed ) {
     seed_t newseed = seed;
-    auto endp = hinfo->badseeds.end();
+    auto   endp    = hinfo->badseeds.end();
+
     while (hinfo->badseeds.find(newseed) != endp) {
         newseed++;
     }
@@ -104,6 +106,6 @@ seed_t excludeBadseeds(const HashInfo * hinfo, const seed_t seed) {
 
 // Utility function for hashes to easily specify that the seed value
 // should not be 0.
-seed_t excludeZeroSeed(const HashInfo * hinfo, const seed_t seed) {
+seed_t excludeZeroSeed( const HashInfo * hinfo, const seed_t seed ) {
     return (seed == 0) ? 1 : seed;
 }
diff --git a/lib/Hashlib.cpp b/lib/Hashlib.cpp
index 5344ebda..f948a3aa 100644
--- a/lib/Hashlib.cpp
+++ b/lib/Hashlib.cpp
@@ -25,54 +25,56 @@
 #include <algorithm>
 
 //-----------------------------------------------------------------------------
-typedef std::unordered_map<std::string, const HashInfo *> HashMap;
-typedef std::vector<const HashInfo *> HashMapOrder;
+typedef std::unordered_map<std::string, const HashInfo *>  HashMap;
+typedef std::vector<const HashInfo *>                      HashMapOrder;
 
-static HashMap& hashMap() {
-  static HashMap * map = new HashMap;
-  return *map;
+static HashMap & hashMap() {
+    static HashMap * map = new HashMap;
+
+    return *map;
 }
 
 //-----------------------------------------------------------------------------
 // Add a hash to the hashMap list of all hashes.
 //
 // FIXME Verify hinfo is all filled out.
-unsigned register_hash(const HashInfo * hinfo) {
-  static std::unordered_map<uint32_t, const HashInfo *> hashcodes;
-  std::string name = hinfo->name;
-  // Allow users to lookup hashes by any case
-  std::transform(name.begin(), name.end(), name.begin(), ::tolower);
-
-  if (hashMap().find(name) != hashMap().end()) {
-    printf("Hash names must be unique.\n");
-    printf("\"%s\" (\"%s\") was added multiple times.\n", hinfo->name, name.c_str());
-    printf("Note that hash names are using a case-insensitive comparison.\n");
-    exit(1);
-  }
-
-  if (hinfo->verification_LE != 0) {
-      const auto it_LE = hashcodes.find(hinfo->verification_LE);
-      if (it_LE == hashcodes.end()) {
-          hashcodes[hinfo->verification_LE] = hinfo;
-      } else {
-          printf("WARNING: Hash with verification code %08x was already registered: %s\n",
-                  hinfo->verification_LE, it_LE->second->name);
-          printf("         Are you certain %s is a unique implementation?\n", hinfo->name);
-      }
-  }
-  if ((hinfo->verification_BE != 0) && (hinfo->verification_BE != hinfo->verification_LE)) {
-      const auto it_BE = hashcodes.find(hinfo->verification_BE);
-      if (it_BE == hashcodes.end()) {
-          hashcodes[hinfo->verification_BE] = hinfo;
-      } else {
-          printf("WARNING: Hash with verification code %08x was already registered: %s\n",
-                  hinfo->verification_BE, it_BE->second->name);
-          printf("         Are you certain %s is a unique implementation?\n", hinfo->name);
-      }
-  }
-
-  hashMap()[name] = hinfo;
-  return hashMap().size();
+unsigned register_hash( const HashInfo * hinfo ) {
+    static std::unordered_map<uint32_t, const HashInfo *> hashcodes;
+    std::string name = hinfo->name;
+
+    // Allow users to lookup hashes by any case
+    std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+
+    if (hashMap().find(name) != hashMap().end()) {
+        printf("Hash names must be unique.\n");
+        printf("\"%s\" (\"%s\") was added multiple times.\n", hinfo->name, name.c_str());
+        printf("Note that hash names are using a case-insensitive comparison.\n");
+        exit(1);
+    }
+
+    if (hinfo->verification_LE != 0) {
+        const auto it_LE = hashcodes.find(hinfo->verification_LE);
+        if (it_LE == hashcodes.end()) {
+            hashcodes[hinfo->verification_LE] = hinfo;
+        } else {
+            printf("WARNING: Hash with verification code %08x was already registered: %s\n",
+                    hinfo->verification_LE, it_LE->second->name);
+            printf("         Are you certain %s is a unique implementation?\n", hinfo->name);
+        }
+    }
+    if ((hinfo->verification_BE != 0) && (hinfo->verification_BE != hinfo->verification_LE)) {
+        const auto it_BE = hashcodes.find(hinfo->verification_BE);
+        if (it_BE == hashcodes.end()) {
+            hashcodes[hinfo->verification_BE] = hinfo;
+        } else {
+            printf("WARNING: Hash with verification code %08x was already registered: %s\n",
+                    hinfo->verification_BE, it_BE->second->name);
+            printf("         Are you certain %s is a unique implementation?\n", hinfo->name);
+        }
+    }
+
+    hashMap()[name] = hinfo;
+    return hashMap().size();
 }
 
 //-----------------------------------------------------------------------------
@@ -84,75 +86,80 @@ unsigned register_hash(const HashInfo * hinfo) {
 //
 // This is overloaded for mock hashes to also override the sorting for
 // _family name_, which is not something general users should do.
-static HashMapOrder defaultSort(HashMap & map) {
+static HashMapOrder defaultSort( HashMap & map ) {
     HashMapOrder hashes;
+
     hashes.reserve(map.size());
-    for (auto kv : map) {
+    for (auto kv: map) {
         hashes.push_back(kv.second);
     }
-    std::sort(hashes.begin(), hashes.end(),
-            [](const HashInfo * a, const HashInfo * b) {
-                int r;
-                // Mock hashes go before others
-                if (a->isMock() != b->isMock())
-                    return a->isMock();
-                // Mock hashes use sort_order over all other criteria
-                if (a->isMock() && (a->sort_order != b->sort_order))
-                    return (a->sort_order < b->sort_order);
-                // Cryptographic hashes go before non-crypto
-                if (a->isCrypto() != b->isCrypto())
-                    return a->isCrypto();
-                // Then sort by family (case-insensitive)
-                if ((r = strcasecmp(a->family, b->family)) != 0)
-                    return (r < 0);
-                // Then by hash output size (smaller first)
-                if (a->bits != b->bits)
-                    return (a->bits < b->bits);
-                // Then by explicit sort_order
-                if (a->sort_order != b->sort_order)
-                    return (a->sort_order < b->sort_order);
-                // And finally by hash name (case-insensitive)
-                if ((r = strcasecmp(a->name, b->name)) != 0)
-                    return (r < 0);
-                return false;
-            });
+    std::sort(hashes.begin(), hashes.end(), []( const HashInfo * a, const HashInfo * b ) {
+            int r;
+            // Mock hashes go before others
+            if (a->isMock() != b->isMock()) {
+                return a->isMock();
+            }
+            // Mock hashes use sort_order over all other criteria
+            if (a->isMock() && (a->sort_order != b->sort_order)) {
+                return a->sort_order < b->sort_order;
+            }
+            // Cryptographic hashes go before non-crypto
+            if (a->isCrypto() != b->isCrypto()) {
+                return a->isCrypto();
+            }
+            // Then sort by family (case-insensitive)
+            if ((r = strcasecmp(a->family, b->family)) != 0) {
+                return r < 0;
+            }
+            // Then by hash output size (smaller first)
+            if (a->bits != b->bits) {
+                return a->bits < b->bits;
+            }
+            // Then by explicit sort_order
+            if (a->sort_order != b->sort_order) {
+                return a->sort_order < b->sort_order;
+            }
+            // And finally by hash name (case-insensitive)
+            if ((r = strcasecmp(a->name, b->name)) != 0) {
+                return r < 0;
+            }
+            return false;
+        });
     return hashes;
 }
 
-std::vector<const HashInfo *> findAllHashes(void) {
+std::vector<const HashInfo *> findAllHashes( void ) {
     HashMapOrder hashes;
+
     hashes = defaultSort(hashMap());
     return hashes;
 }
 
-const HashInfo * findHash(const char * name) {
-  std::string n = name;
-  // Search without regards to case
-  std::transform(n.begin(), n.end(), n.begin(), ::tolower);
-  // Since underscores can't be in names, the user must have meant a dash
-  std::replace(n.begin(), n.end(), '_', '-');
-
-  const auto it = hashMap().find(n);
-  if (it == hashMap().end()) {
-    return NULL;
-  }
-  return it->second;
+const HashInfo * findHash( const char * name ) {
+    std::string n = name;
+
+    // Search without regards to case
+    std::transform(n.begin(), n.end(), n.begin(), ::tolower);
+    // Since underscores can't be in names, the user must have meant a dash
+    std::replace(n.begin(), n.end(), '_', '-');
+
+    const auto it = hashMap().find(n);
+    if (it == hashMap().end()) {
+        return NULL;
+    }
+    return it->second;
 }
 
-void listHashes(bool nameonly) {
+void listHashes( bool nameonly ) {
     if (!nameonly) {
         printf("Hashnames can be supplied using any case letters.\n\n");
-        printf("%-25s %4s  %6s  %-60s\n",
-            "Name", "Bits", "Type", "Description");
-        printf("%-25s %4s  %6s  %-60s\n",
-            "----", "----", "----", "-----------");
+        printf("%-25s %4s  %6s  %-60s\n", "Name", "Bits", "Type", "Description");
+        printf("%-25s %4s  %6s  %-60s\n", "----", "----", "----", "-----------");
     }
-    for (const HashInfo * h : defaultSort(hashMap())) {
+    for (const HashInfo * h: defaultSort(hashMap())) {
         if (!nameonly) {
-            printf("%-25s %4d  %6s  %-60s\n",
-                    h->name, h->bits,
-                    h->isMock() ? "MOCK" : (h->isCrypto() ? "CRYPTO" : ""),
-                    h->desc);
+            printf("%-25s %4d  %6s  %-60s\n", h->name, h->bits,
+                    h->isMock() ? "MOCK" : (h->isCrypto() ? "CRYPTO" : ""), h->desc);
         } else {
             printf("%s\n", h->name);
         }
@@ -162,16 +169,14 @@ void listHashes(bool nameonly) {
 //-----------------------------------------------------------------------------
 // Hash verification routines
 
-static void reportInitFailure(const HashInfo * hinfo) {
-    printf("%25s - Hash initialization failed!      ...... FAIL!\n",
-            hinfo->name);
+static void reportInitFailure( const HashInfo * hinfo ) {
+    printf("%25s - Hash initialization failed!      ...... FAIL!\n", hinfo->name);
 }
 
-static bool compareVerification(uint32_t expected, uint32_t actual,
-        const char * endstr, const char * name,
-        bool verbose, bool prefix) {
+static bool compareVerification( uint32_t expected, uint32_t actual, const char * endstr,
+        const char * name, bool verbose, bool prefix ) {
     const char * result_str;
-    bool result = true;
+    bool         result = true;
 
     if (expected == actual) {
         result_str = (actual != 0) ? "PASS\n" : "INSECURE (should not be 0)\n";
@@ -179,7 +184,7 @@ static bool compareVerification(uint32_t expected, uint32_t actual,
         result_str = "SKIP (unverifiable)\n";
     } else {
         result_str = "FAIL! (Expected 0x%08x)\n";
-        result = false;
+        result     = false;
     }
 
     if (verbose) {
@@ -193,8 +198,8 @@ static bool compareVerification(uint32_t expected, uint32_t actual,
     return result;
 }
 
-static const char * endianstr(enum HashInfo::endianness e) {
-    switch(e) {
+static const char * endianstr( enum HashInfo::endianness e ) {
+    switch (e) {
     case HashInfo::ENDIAN_LITTLE     : return "LE"; // "Little endian"
     case HashInfo::ENDIAN_BIG        : return "BE"; // "Big endian"
     case HashInfo::ENDIAN_NATIVE     : return isLE() ? "LE" : "BE";
@@ -205,21 +210,20 @@ static const char * endianstr(enum HashInfo::endianness e) {
     return NULL; /* unreachable */
 }
 
-bool verifyHash(const HashInfo * hinfo, enum HashInfo::endianness endian,
-        bool verbose, bool prefix = true) {
+bool verifyHash( const HashInfo * hinfo, enum HashInfo::endianness endian, bool verbose, bool prefix = true ) {
     bool result = true;
     const uint32_t actual = hinfo->ComputedVerify(endian);
     const uint32_t expect = hinfo->ExpectedVerify(endian);
 
-    result &= compareVerification(expect, actual, endianstr(endian),
-          hinfo->name, verbose, prefix);
+    result &= compareVerification(expect, actual, endianstr(endian), hinfo->name, verbose, prefix);
 
     return result;
 }
 
-bool verifyAllHashes(bool verbose) {
+bool verifyAllHashes( bool verbose ) {
     bool result = true;
-    for (const HashInfo * h : defaultSort(hashMap())) {
+
+    for (const HashInfo * h: defaultSort(hashMap())) {
         if (!h->Init()) {
             if (verbose) {
                 reportInitFailure(h);
@@ -228,13 +232,13 @@ bool verifyAllHashes(bool verbose) {
         } else if (h->isEndianDefined()) {
             // Verify the hash the canonical way first, and then the
             // other way.
-            result &= verifyHash(h, HashInfo::ENDIAN_DEFAULT, verbose);
+            result &= verifyHash(h, HashInfo::ENDIAN_DEFAULT   , verbose);
             result &= verifyHash(h, HashInfo::ENDIAN_NONDEFAULT, verbose);
         } else {
             // Always verify little-endian first, just for consistency
             // for humans looking at the results.
             result &= verifyHash(h, HashInfo::ENDIAN_LITTLE, verbose);
-            result &= verifyHash(h, HashInfo::ENDIAN_BIG, verbose);
+            result &= verifyHash(h, HashInfo::ENDIAN_BIG   , verbose);
         }
     }
     printf("\n");
@@ -243,10 +247,12 @@ bool verifyAllHashes(bool verbose) {
 
 //-----------------------------------------------------------------------------
 // Run Mathmult unit tests via global constructor
-int Mathmult_selftest(void);
+int Mathmult_selftest( void );
+
 static int selftest_result = Mathmult_selftest();
 
 //-----------------------------------------------------------------------------
 // See Hashrefs.cpp.in for why these exist. You can very likely just ignore them.
 unsigned refs();
+
 static unsigned dummy = refs();
diff --git a/lib/Mathmult.cpp b/lib/Mathmult.cpp
index 9314734e..0ad101e6 100644
--- a/lib/Mathmult.cpp
+++ b/lib/Mathmult.cpp
@@ -21,16 +21,15 @@
 
 #include "Mathmult.h"
 
-template < typename T >
-static void fail(const char * test, int idx, const T * expected,
-        std::initializer_list<T> actual) {
+template <typename T>
+static void fail( const char * test, int idx, const T * expected, std::initializer_list<T> actual ) {
     if (idx >= 0) {
         printf("Test %s #%d failed!\n\tGot     :", test, idx);
     } else {
         printf("Test %s failed!\n\tGot     :", test);
     }
     int count = 0;
-    for (auto val : actual) {
+    for (auto val: actual) {
         if (sizeof(T) == 4) {
             printf(" %08x", val);
         } else {
@@ -49,7 +48,7 @@ static void fail(const char * test, int idx, const T * expected,
     printf("\n\n");
 }
 
-static bool test_32(void) {
+static bool test_32( void ) {
     bool passed = true;
     const uint32_t tests[14][4] = {
         {        0x1,        0x1,        0x0,        0x1 },
@@ -67,32 +66,33 @@ static bool test_32(void) {
         { 0xFFFFFFFF, 0x11111111, 0x11111110, 0xEEEEEEEF },
         { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE,        0x1 },
     };
-    const uint32_t testsum[3] = { 0x33058587, 0x416D9DEB, 0x2580A632};
+    const uint32_t testsum[3] = { 0x33058587, 0x416D9DEB, 0x2580A632 };
 
     uint32_t sum1_lo, sum1_mi, sum1_hi, sum2_lo, sum2_mi, sum2_hi;
     uint32_t r1_lo, r1_hi, r2_lo, r2_hi;
     uint64_t r1_64, r2_64;
+
     sum1_lo = sum1_mi = sum1_hi = sum2_lo = sum2_mi = sum2_hi = 0;
 
     for (int i = 0; i < 14; i++) {
-        mult32_64(r1_lo, r1_hi, tests[i][0], tests[i][1]);
+        mult32_64(r1_lo, r1_hi      , tests[i][0], tests[i][1]);
         mult32_64(r1_64, tests[i][0], tests[i][1]);
-        mult32_64(r2_lo, r2_hi, tests[i][1], tests[i][0]);
+        mult32_64(r2_lo, r2_hi      , tests[i][1], tests[i][0]);
         mult32_64(r2_64, tests[i][1], tests[i][0]);
         if ((r1_hi != tests[i][2]) || (r1_lo != tests[i][3])) {
-            fail("mult32_64, r1, rhi:rlo", i, &tests[i][2], {r1_hi, r1_lo});
+            fail("mult32_64, r1, rhi:rlo", i, &tests[i][2], { r1_hi, r1_lo });
             passed = false;
         }
         if (((r1_64 >> 32) != tests[i][2]) || (((uint32_t)r1_64) != tests[i][3])) {
-            fail("mult32_64, r1, r64", i, &tests[i][2], {(uint32_t)(r1_64 >> 32), (uint32_t)r1_64});
+            fail("mult32_64, r1, r64", i, &tests[i][2], { (uint32_t)(r1_64 >> 32), (uint32_t)r1_64 });
             passed = false;
         }
         if ((r2_hi != tests[i][2]) || (r2_lo != tests[i][3])) {
-            fail("mult32_64, r2, rhi:rlo", i, &tests[i][2], {r2_hi, r2_lo});
+            fail("mult32_64, r2, rhi:rlo", i, &tests[i][2], { r2_hi, r2_lo });
             passed = false;
         }
         if (((r2_64 >> 32) != tests[i][2]) || (((uint32_t)r2_64) != tests[i][3])) {
-            fail("mult32_64, r2, r64", i, &tests[i][2], {(uint32_t)(r2_64 >> 32), (uint32_t)r2_64});
+            fail("mult32_64, r2, r64", i, &tests[i][2], { (uint32_t)(r2_64 >> 32), (uint32_t)r2_64 });
             passed = false;
         }
         add96(sum1_lo, sum1_mi, sum1_hi, tests[i][3], tests[i][2], 0x38ADE957);
@@ -102,80 +102,115 @@ static bool test_32(void) {
     }
 
     if ((sum1_hi != testsum[0]) || (sum1_mi != testsum[1]) || (sum1_lo != testsum[2])) {
-        fail("add96", -1, &testsum[0], {sum1_hi, sum1_mi, sum1_lo});
+        fail("add96", -1, &testsum[0], { sum1_hi, sum1_mi, sum1_lo });
         passed = false;
     }
     if ((sum2_hi != testsum[0]) || (sum2_mi != testsum[1]) || (sum2_lo != testsum[2])) {
-        fail("fma32_96", -1, &testsum[0], {sum2_hi, sum2_mi, sum2_lo});
+        fail("fma32_96", -1, &testsum[0], { sum2_hi, sum2_mi, sum2_lo });
         passed = false;
     }
 
     return passed;
 }
 
-static bool test_64(void) {
+static bool test_64( void ) {
     bool passed = true;
 
     const uint64_t tests[16][6] = {
-        {                         0x1 ,                         0x1,
-                                  0x0 ,                         0x1,
-                                  0x0 ,                         0x1 },
-        { UINT64_C(0x2F9AC342168A6741),                         0x0,
-                                  0x0 ,                         0x0,
-                                  0x0 ,                         0x0 },
+        {
+            0x1,                         0x1,
+            0x0,                         0x1,
+            0x0,                         0x1
+        },
+        {
+            UINT64_C(0x2F9AC342168A6741), 0x0,
+            0x0, 0x0,
+            0x0,                         0x0
+        },
         // No cross-lane carry
-        { UINT64_C(0x418FD883CEB217D8), UINT64_C(0x7213F60E1222CE60),
-          UINT64_C(0x1D372B1B98652CD8), UINT64_C(0xC1E418E52CA8C100),
-          UINT64_C(0x1D372B1B98652CD8), UINT64_C(0xC1E418E52CA8C100) },
+        {
+            UINT64_C(0x418FD883CEB217D8), UINT64_C(0x7213F60E1222CE60),
+            UINT64_C(0x1D372B1B98652CD8), UINT64_C(0xC1E418E52CA8C100),
+            UINT64_C(0x1D372B1B98652CD8), UINT64_C(0xC1E418E52CA8C100)
+        },
         // 1 cross-lane carry
-        { UINT64_C(0x477B3604218D2514), UINT64_C(0xA6019680FBEACF3B),
-          UINT64_C(0x2E5A5688195E73C4), UINT64_C(0x1E1F1A735CCAB79C),
-          UINT64_C(0x2E5A5688195E73C3), UINT64_C(0x1E1F1A735CCAB79C) },
+        {
+            UINT64_C(0x477B3604218D2514), UINT64_C(0xA6019680FBEACF3B),
+            UINT64_C(0x2E5A5688195E73C4), UINT64_C(0x1E1F1A735CCAB79C),
+            UINT64_C(0x2E5A5688195E73C3), UINT64_C(0x1E1F1A735CCAB79C)
+        },
         // 2 cross-lane carries
-        { UINT64_C(0xA7E5AD86B74C236C), UINT64_C(0x1522F8FF937041C7),
-          UINT64_C(0x0DDCC70B3782740B), UINT64_C(0x0249EA7D546DF4F4),
-          UINT64_C(0x0DDCC70B37827409), UINT64_C(0x0249EA7D546DF4F4) },
-        { UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(               0x1),
-          UINT64_C(               0x0), UINT64_C(0x7FFFFFFFFFFFFFFF),
-          UINT64_C(               0x0), UINT64_C(0x7FFFFFFFFFFFFFFF) },
-        { UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(               0x2),
-          UINT64_C(               0x0), UINT64_C(0xFFFFFFFFFFFFFFFE),
-          UINT64_C(               0x0), UINT64_C(0xFFFFFFFFFFFFFFFE) },
-        { UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(               0x3),
-          UINT64_C(               0x1), UINT64_C(0x7FFFFFFFFFFFFFFD),
-          UINT64_C(               0x1), UINT64_C(0x7FFFFFFFFFFFFFFD) },
-        { UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(               0x4),
-          UINT64_C(               0x1), UINT64_C(0xFFFFFFFFFFFFFFFC),
-          UINT64_C(               0x1), UINT64_C(0xFFFFFFFFFFFFFFFC) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x1),
-          UINT64_C(               0x0), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(               0x0), UINT64_C(0xFFFFFFFFFFFFFFFF) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x2),
-          UINT64_C(               0x1), UINT64_C(0xFFFFFFFFFFFFFFFE),
-          UINT64_C(               0x1), UINT64_C(0xFFFFFFFFFFFFFFFE) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x3),
-          UINT64_C(               0x2), UINT64_C(0xFFFFFFFFFFFFFFFD),
-          UINT64_C(               0x2), UINT64_C(0xFFFFFFFFFFFFFFFD) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x4),
-          UINT64_C(               0x3), UINT64_C(0xFFFFFFFFFFFFFFFC),
-          UINT64_C(               0x3), UINT64_C(0xFFFFFFFFFFFFFFFC) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x8),
-          UINT64_C(               0x7), UINT64_C(0xFFFFFFFFFFFFFFF8),
-          UINT64_C(               0x7), UINT64_C(0xFFFFFFFFFFFFFFF8) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0x1111111111111111),
-          UINT64_C(0x1111111111111110), UINT64_C(0xEEEEEEEEEEEEEEEF),
-          UINT64_C(0x111111111111110F), UINT64_C(0xEEEEEEEEEEEEEEEF) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x1),
-          UINT64_C(0xFFFFFFFFFFFFFFFD), UINT64_C(               0x1) },
+        {
+            UINT64_C(0xA7E5AD86B74C236C), UINT64_C(0x1522F8FF937041C7),
+            UINT64_C(0x0DDCC70B3782740B), UINT64_C(0x0249EA7D546DF4F4),
+            UINT64_C(0x0DDCC70B37827409), UINT64_C(0x0249EA7D546DF4F4)
+        },
+        {
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(               0x1),
+            UINT64_C(               0x0), UINT64_C(0x7FFFFFFFFFFFFFFF),
+            UINT64_C(               0x0), UINT64_C(0x7FFFFFFFFFFFFFFF)
+        },
+        {
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(               0x2),
+            UINT64_C(               0x0), UINT64_C(0xFFFFFFFFFFFFFFFE),
+            UINT64_C(               0x0), UINT64_C(0xFFFFFFFFFFFFFFFE)
+        },
+        {
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(               0x3),
+            UINT64_C(               0x1), UINT64_C(0x7FFFFFFFFFFFFFFD),
+            UINT64_C(               0x1), UINT64_C(0x7FFFFFFFFFFFFFFD)
+        },
+        {
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(               0x4),
+            UINT64_C(               0x1), UINT64_C(0xFFFFFFFFFFFFFFFC),
+            UINT64_C(               0x1), UINT64_C(0xFFFFFFFFFFFFFFFC)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x1),
+            UINT64_C(               0x0), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(               0x0), UINT64_C(0xFFFFFFFFFFFFFFFF)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x2),
+            UINT64_C(               0x1), UINT64_C(0xFFFFFFFFFFFFFFFE),
+            UINT64_C(               0x1), UINT64_C(0xFFFFFFFFFFFFFFFE)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x3),
+            UINT64_C(               0x2), UINT64_C(0xFFFFFFFFFFFFFFFD),
+            UINT64_C(               0x2), UINT64_C(0xFFFFFFFFFFFFFFFD)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x4),
+            UINT64_C(               0x3), UINT64_C(0xFFFFFFFFFFFFFFFC),
+            UINT64_C(               0x3), UINT64_C(0xFFFFFFFFFFFFFFFC)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x8),
+            UINT64_C(               0x7), UINT64_C(0xFFFFFFFFFFFFFFF8),
+            UINT64_C(               0x7), UINT64_C(0xFFFFFFFFFFFFFFF8)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0x1111111111111111),
+            UINT64_C(0x1111111111111110), UINT64_C(0xEEEEEEEEEEEEEEEF),
+            UINT64_C(0x111111111111110F), UINT64_C(0xEEEEEEEEEEEEEEEF)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x1),
+            UINT64_C(0xFFFFFFFFFFFFFFFD), UINT64_C(               0x1)
+        },
+    };
+    const uint64_t testsum[3] = {
+        UINT64_C(0x92791E340E9CF671),
+        UINT64_C(0xD4FEB37FF4AE4B9B),
+        UINT64_C(0xA278198999A0B8CA)
     };
-    const uint64_t testsum[3] = { UINT64_C(0x92791E340E9CF671),
-                                  UINT64_C(0xD4FEB37FF4AE4B9B),
-                                  UINT64_C(0xA278198999A0B8CA)  };
 
     uint64_t sum1_lo, sum1_mi, sum1_hi, sum2_lo, sum2_mi, sum2_hi;
     uint64_t sum3_lo, sum3_mi, sum3_hi;
     uint64_t r1_lo, r1_hi, r2_lo, r2_hi;
+
     sum1_lo = sum1_mi = sum1_hi = sum2_lo = sum2_mi = sum2_hi = 0;
     sum3_lo = sum3_mi = sum3_hi = 0;
 
@@ -183,28 +218,27 @@ static bool test_64(void) {
         mult64_128_nocarry(r1_lo, r1_hi, tests[i][0], tests[i][1]);
         mult64_128_nocarry(r2_lo, r2_hi, tests[i][1], tests[i][0]);
         if ((r1_hi != tests[i][4]) || (r1_lo != tests[i][5])) {
-            fail("mult64_128_nocarry, r1, rhi:rlo", i, &tests[i][4], {r1_hi, r1_lo});
+            fail("mult64_128_nocarry, r1, rhi:rlo", i, &tests[i][4], { r1_hi, r1_lo });
             passed = false;
         }
         if ((r2_hi != tests[i][4]) || (r2_lo != tests[i][5])) {
-            fail("mult64_128_nocarry, r2, rhi:rlo", i, &tests[i][4], {r2_hi, r2_lo});
+            fail("mult64_128_nocarry, r2, rhi:rlo", i, &tests[i][4], { r2_hi, r2_lo });
             passed = false;
         }
 
         mult64_128(r1_lo, r1_hi, tests[i][0], tests[i][1]);
         mult64_128(r2_lo, r2_hi, tests[i][1], tests[i][0]);
         if ((r1_hi != tests[i][2]) || (r1_lo != tests[i][3])) {
-            fail("mult64_128, r1, rhi:rlo", i, &tests[i][0], {r1_hi, r1_lo});
+            fail("mult64_128, r1, rhi:rlo", i, &tests[i][0], { r1_hi, r1_lo });
             passed = false;
         }
         if ((r2_hi != tests[i][2]) || (r2_lo != tests[i][3])) {
-            fail("mult64_128, r2, rhi:rlo", i, &tests[i][0], {r2_hi, r2_lo});
+            fail("mult64_128, r2, rhi:rlo", i, &tests[i][0], { r2_hi, r2_lo });
             passed = false;
         }
 
         add128(sum1_lo, sum1_mi, tests[i][3], tests[i][2]);
-        add192(sum1_lo, sum1_mi, sum1_hi,
-                tests[i][3], tests[i][2], UINT64_C(0x192791e340e9cf67));
+        add192(sum1_lo, sum1_mi, sum1_hi, tests[i][3], tests[i][2], UINT64_C(0x192791e340e9cf67));
         fma64_128(sum2_lo, sum2_mi, tests[i][0], tests[i][1]);
         fma64_128(sum3_lo, sum3_mi, tests[i][1], tests[i][0]);
         fma64_192(sum2_lo, sum2_mi, sum2_hi, tests[i][0], tests[i][1]);
@@ -214,73 +248,105 @@ static bool test_64(void) {
     }
 
     if ((sum1_hi != testsum[0]) || (sum1_mi != testsum[1]) || (sum1_lo != testsum[2])) {
-        fail("add128/add192", -1, &testsum[0], {sum1_hi, sum1_mi, sum1_lo});
+        fail("add128/add192", -1, &testsum[0], { sum1_hi, sum1_mi, sum1_lo });
         passed = false;
     }
     if ((sum2_hi != testsum[0]) || (sum2_mi != testsum[1]) || (sum2_lo != testsum[2])) {
-        fail("fma64_128/fma64_192", 1, &testsum[0], {sum2_hi, sum2_mi, sum2_lo});
+        fail("fma64_128/fma64_192", 1, &testsum[0], { sum2_hi, sum2_mi, sum2_lo });
         passed = false;
     }
     if ((sum3_hi != testsum[0]) || (sum3_mi != testsum[1]) || (sum3_lo != testsum[2])) {
-        fail("fma64_128/fma64_192", 2, &testsum[0], {sum3_hi, sum3_mi, sum3_lo});
+        fail("fma64_128/fma64_192", 2, &testsum[0], { sum3_hi, sum3_mi, sum3_lo });
         passed = false;
     }
 
     return passed;
 }
 
-static bool test_128(void) {
+static bool test_128( void ) {
     bool passed = true;
 
     const uint64_t tests[16][6] = {
-        {                         0x0 ,                         0x1,
-                                  0x0 ,                         0x1,
-                                  0x0 ,                         0x1  },
-        { UINT64_C(0xAF756DACBD453D68), UINT64_C(0xE5915DA08FF8BFD9),
-                                  0x0 ,                         0x0,
-                                  0x0 ,                         0x0  },
-        { UINT64_C(0xAF756DACBD453D68), UINT64_C(0xE5915DA08FF8BFD9),
-          UINT64_C(0x2C297F5B51B1274F), UINT64_C(0x2A51DC0FB3F6EA0A),
-          UINT64_C(0xB9E5265202949E5E), UINT64_C(0x96526CC31499D87A) },
-        { UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(               0x0), UINT64_C(               0x1),
-          UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF) },
-        { UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(               0x0), UINT64_C(               0x2),
-          UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFE) },
-        { UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(               0x0), UINT64_C(               0x3),
-          UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFD) },
-        { UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(               0x0), UINT64_C(               0x4),
-          UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFC) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x1),
-          UINT64_C(               0x0), UINT64_C(               0x1),
-          UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x1) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x1),
-          UINT64_C(               0x0), UINT64_C(               0x2),
-          UINT64_C(0xFFFFFFFFFFFFFFFC), UINT64_C(               0x2) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(               0x0), UINT64_C(               0x3),
-          UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFD) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(               0x0), UINT64_C(               0x4),
-          UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFC) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(               0x0), UINT64_C(               0x8),
-          UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFF8) },
-        { UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0x1111111111111111),
-          UINT64_C(0x1111111111111110), UINT64_C(0xEEEEEEEEEEEEEEEE),
-          UINT64_C(0x1FDB97530ECA8642), UINT64_C(0xDF0123456789ABCE) },
-        { UINT64_C(0xAAAAAAAAAAAAAAAA), UINT64_C(0xFFFFFFFFFFFFFFFF),
-          UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x1),
-          UINT64_C(0xAAAAAAAAAAAAAAAC), UINT64_C(0xFFFFFFFFFFFFFFFF) },
-        { UINT64_C(0xAAAAAAAAAAAAAAAA), UINT64_C(0x5555555555555555),
-          UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x1),
-          UINT64_C(               0x0), UINT64_C(0x5555555555555555) },
-        { UINT64_C(0xAAAAAAAAAAAAAAAA), UINT64_C(0x5555555555555555),
-          UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x0),
-          UINT64_C(0x5555555555555556), UINT64_C(               0x0) },
+        {
+            0x0,                         0x1,
+            0x0,                         0x1,
+            0x0,                         0x1
+        },
+        {
+            UINT64_C(0xAF756DACBD453D68), UINT64_C(0xE5915DA08FF8BFD9),
+            0x0,                         0x0,
+            0x0,                         0x0
+        },
+        {
+            UINT64_C(0xAF756DACBD453D68), UINT64_C(0xE5915DA08FF8BFD9),
+            UINT64_C(0x2C297F5B51B1274F), UINT64_C(0x2A51DC0FB3F6EA0A),
+            UINT64_C(0xB9E5265202949E5E), UINT64_C(0x96526CC31499D87A)
+        },
+        {
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(               0x0), UINT64_C(               0x1),
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF)
+        },
+        {
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(               0x0), UINT64_C(               0x2),
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFE)
+        },
+        {
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(               0x0), UINT64_C(               0x3),
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFD)
+        },
+        {
+            UINT64_C(0x7FFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(               0x0), UINT64_C(               0x4),
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFC)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x1),
+            UINT64_C(               0x0), UINT64_C(               0x1),
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(               0x1)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x1),
+            UINT64_C(               0x0), UINT64_C(               0x2),
+            UINT64_C(0xFFFFFFFFFFFFFFFC), UINT64_C(               0x2)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(               0x0), UINT64_C(               0x3),
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFD)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(               0x0), UINT64_C(               0x4),
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFC)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(               0x0), UINT64_C(               0x8),
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFF8)
+        },
+        {
+            UINT64_C(0xFFFFFFFFFFFFFFFF), UINT64_C(0x1111111111111111),
+            UINT64_C(0x1111111111111110), UINT64_C(0xEEEEEEEEEEEEEEEE),
+            UINT64_C(0x1FDB97530ECA8642), UINT64_C(0xDF0123456789ABCE)
+        },
+        {
+            UINT64_C(0xAAAAAAAAAAAAAAAA), UINT64_C(0xFFFFFFFFFFFFFFFF),
+            UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x1),
+            UINT64_C(0xAAAAAAAAAAAAAAAC), UINT64_C(0xFFFFFFFFFFFFFFFF)
+        },
+        {
+            UINT64_C(0xAAAAAAAAAAAAAAAA), UINT64_C(0x5555555555555555),
+            UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x1),
+            UINT64_C(               0x0), UINT64_C(0x5555555555555555)
+        },
+        {
+            UINT64_C(0xAAAAAAAAAAAAAAAA), UINT64_C(0x5555555555555555),
+            UINT64_C(0xFFFFFFFFFFFFFFFE), UINT64_C(               0x0),
+            UINT64_C(0x5555555555555556), UINT64_C(               0x0)
+        },
     };
 
     uint64_t r1_lo, r1_hi, r2_lo, r2_hi;
@@ -289,11 +355,11 @@ static bool test_128(void) {
         mult128_128(r1_lo, r1_hi, tests[i][1], tests[i][0], tests[i][3], tests[i][2]);
         mult128_128(r2_lo, r2_hi, tests[i][3], tests[i][2], tests[i][1], tests[i][0]);
         if ((r1_hi != tests[i][4]) || (r1_lo != tests[i][5])) {
-            fail("mult128_128, r1, rhi:rlo", i, &tests[i][4], {r1_hi, r1_lo});
+            fail("mult128_128, r1, rhi:rlo", i, &tests[i][4], { r1_hi, r1_lo });
             passed = false;
         }
         if ((r2_hi != tests[i][4]) || (r2_lo != tests[i][5])) {
-            fail("mult128_128, r2, rhi:rlo", i, &tests[i][4], {r2_hi, r2_lo});
+            fail("mult128_128, r2, rhi:rlo", i, &tests[i][4], { r2_hi, r2_lo });
             passed = false;
         }
     }
@@ -301,7 +367,7 @@ static bool test_128(void) {
     return passed;
 }
 
-int Mathmult_selftest(void) {
+int Mathmult_selftest( void ) {
     bool passed = true;
 
     passed &= test_32();
diff --git a/main.cpp b/main.cpp
index 8e659604..b7425da7 100644
--- a/main.cpp
+++ b/main.cpp
@@ -106,16 +106,16 @@ const char * g_failstr = "*********FAIL*********\n";
 
 //--------
 // Overall log2-p-value statistics and test pass/fail counts
-uint32_t g_log2pValueCounts[COUNT_MAX_PVALUE+2];
+uint32_t g_log2pValueCounts[COUNT_MAX_PVALUE + 2];
 uint32_t g_testPass, g_testFail;
-std::vector< std::pair<const char *, char *> > g_testFailures;
+std::vector<std::pair<const char *, char *>> g_testFailures;
 
 //-----------------------------------------------------------------------------
 // Locally-visible configuration
 static bool g_drawDiagram = false;
 
 // excessive torture tests: Sparse, Avalanche, DiffDist, scan all seeds
-static bool g_testExtra   = false;
+static bool g_testExtra = false;
 
 static bool g_testAll;
 static bool g_testVerifyAll;
@@ -142,39 +142,39 @@ static bool g_testBIC;
 static bool g_testBadSeeds;
 
 struct TestOpts {
-  bool         &var;
-  bool         defaultvalue;  // What "All" sets the test to
-  bool         testspeedonly; // If true, then disabling test doesn't affect "All" testing
-  const char*  name;
+    bool &       var;
+    bool         defaultvalue;  // What "All" sets the test to
+    bool         testspeedonly; // If true, then disabling test doesn't affect "All" testing
+    const char * name;
 };
-static TestOpts g_testopts[] =
-{
-  { g_testVerifyAll,    false,     false,    "VerifyAll" }, // Overrides all others below
-  { g_testSanityAll,    false,     false,    "SanityAll" }, // Overrides all others below
-  { g_testSpeedAll,     false,     false,    "SpeedAll" },  // Overrides all others below
-  { g_testAll,           true,     false,    "All" },
-  { g_testSanity,        true,     false,    "Sanity" },
-  { g_testSpeed,         true,      true,    "Speed" },
-  { g_testHashmap,       true,      true,    "Hashmap" },
-  { g_testAvalanche,     true,     false,    "Avalanche" },
-  { g_testSparse,        true,     false,    "Sparse" },
-  { g_testPermutation,   true,     false,    "Permutation" },
-  { g_testWindow,        true,     false,    "Window" },
-  { g_testCyclic,        true,     false,    "Cyclic" },
-  { g_testTwoBytes,      true,     false,    "TwoBytes" },
-  { g_testText,          true,     false,    "Text" },
-  { g_testZeroes,        true,     false,    "Zeroes" },
-  { g_testSeed,          true,     false,    "Seed" },
-  { g_testPerlinNoise,   true,     false,    "PerlinNoise" },
-  { g_testDiff,          true,     false,    "Diff" },
-  { g_testDiffDist,      true,     false,    "DiffDist" },
-  { g_testPopcount,      true,     false,    "Popcount" },
-  { g_testPrng,          true,     false,    "Prng" },
-  { g_testBIC,          false,     false,    "BIC" },
-  { g_testBadSeeds,     false,     false,    "BadSeeds" },
+// These first 3 override all other selections
+static TestOpts g_testopts[] = {
+    { g_testVerifyAll,    false,     false,    "VerifyAll" },
+    { g_testSanityAll,    false,     false,    "SanityAll" },
+    { g_testSpeedAll,     false,     false,    "SpeedAll" },
+    { g_testAll,           true,     false,    "All" },
+    { g_testSanity,        true,     false,    "Sanity" },
+    { g_testSpeed,         true,      true,    "Speed" },
+    { g_testHashmap,       true,      true,    "Hashmap" },
+    { g_testAvalanche,     true,     false,    "Avalanche" },
+    { g_testSparse,        true,     false,    "Sparse" },
+    { g_testPermutation,   true,     false,    "Permutation" },
+    { g_testWindow,        true,     false,    "Window" },
+    { g_testCyclic,        true,     false,    "Cyclic" },
+    { g_testTwoBytes,      true,     false,    "TwoBytes" },
+    { g_testText,          true,     false,    "Text" },
+    { g_testZeroes,        true,     false,    "Zeroes" },
+    { g_testSeed,          true,     false,    "Seed" },
+    { g_testPerlinNoise,   true,     false,    "PerlinNoise" },
+    { g_testDiff,          true,     false,    "Diff" },
+    { g_testDiffDist,      true,     false,    "DiffDist" },
+    { g_testPopcount,      true,     false,    "Popcount" },
+    { g_testPrng,          true,     false,    "Prng" },
+    { g_testBIC,          false,     false,    "BIC" },
+    { g_testBadSeeds,     false,     false,    "BadSeeds" },
 };
 
-static void set_default_tests(bool enable) {
+static void set_default_tests( bool enable ) {
     for (size_t i = 0; i < sizeof(g_testopts) / sizeof(TestOpts); i++) {
         if (enable) {
             g_testopts[i].var = g_testopts[i].defaultvalue;
@@ -184,10 +184,10 @@ static void set_default_tests(bool enable) {
     }
 }
 
-static void parse_tests(const char * str, bool enable_tests) {
+static void parse_tests( const char * str, bool enable_tests ) {
     while (*str != '\0') {
-        size_t len;
-        const char *p = strchr(str, ',');
+        size_t       len;
+        const char * p = strchr(str, ',');
         if (p == NULL) {
             len = strlen(str);
         } else {
@@ -195,7 +195,7 @@ static void parse_tests(const char * str, bool enable_tests) {
         }
 
         struct TestOpts * found = NULL;
-        bool foundmultiple = false;
+        bool foundmultiple      = false;
         for (size_t i = 0; i < sizeof(g_testopts) / sizeof(TestOpts); i++) {
             const char * testname = g_testopts[i].name;
             // Allow the user to specify test names by case-agnostic
@@ -205,38 +205,33 @@ static void parse_tests(const char * str, bool enable_tests) {
                     foundmultiple = true;
                 }
                 found = &g_testopts[i];
-                // Exact match found, don't bother looking further,
-                // and don't error out.
                 if (testname[len] == '\0') {
+                    // Exact match found, don't bother looking further, and
+                    // don't error out.
                     foundmultiple = false;
                     break;
                 }
             }
         }
         if (foundmultiple) {
-            printf("Ambiguous test name: --%stest=%*s\n",
-                    enable_tests ? "" : "no", (int)len, str);
+            printf("Ambiguous test name: --%stest=%*s\n", enable_tests ? "" : "no", (int)len, str);
             goto error;
         }
         if (found == NULL) {
-            printf("Invalid option: --%stest=%*s\n",
-                    enable_tests ? "" : "no", (int)len, str);
+            printf("Invalid option: --%stest=%*s\n", enable_tests ? "" : "no", (int)len, str);
             goto error;
         }
 
-        //printf("%sabling test %s\n", enable_tests ? "en" : "dis", testname);
+        // printf("%sabling test %s\n", enable_tests ? "en" : "dis", testname);
         found->var = enable_tests;
-        // If "All" tests are being enabled or disabled, then
-        // adjust the individual test variables as
-        // instructed. Otherwise, if a material "All" test
-        // (not just a speed-testing test) is being
-        // specifically disabled, then don't consider "All"
-        // tests as being run.
+
+        // If "All" tests are being enabled or disabled, then adjust the individual
+        // test variables as instructed. Otherwise, if a material "All" test (not
+        // just a speed-testing test) is being specifically disabled, then don't
+        // consider "All" tests as being run.
         if (&found->var == &g_testAll) {
             set_default_tests(enable_tests);
-        } else if (!enable_tests                 &&
-                found->defaultvalue       &&
-                !found->testspeedonly) {
+        } else if (!enable_tests && found->defaultvalue && !found->testspeedonly) {
             g_testAll = false;
         }
 
@@ -248,17 +243,18 @@ static void parse_tests(const char * str, bool enable_tests) {
 
     return;
 
- error:
+  error:
     printf("Valid tests: --test=%s", g_testopts[0].name);
-    for(size_t i = 1; i < sizeof(g_testopts) / sizeof(TestOpts); i++) {
+    for (size_t i = 1; i < sizeof(g_testopts) / sizeof(TestOpts); i++) {
         printf(",%s", g_testopts[i].name);
     }
     printf(" \n");
     exit(1);
 }
 
-static void usage(void);
-static HashInfo::endianness parse_endian(const char * str) {
+static void usage( void );
+
+static HashInfo::endianness parse_endian( const char * str ) {
     if (!strcmp(str, "native"))     { return HashInfo::ENDIAN_NATIVE; }
     if (!strcmp(str, "nonnative"))  { return HashInfo::ENDIAN_BYTESWAPPED; }
     if (!strcmp(str, "default"))    { return HashInfo::ENDIAN_DEFAULT; }
@@ -273,25 +269,25 @@ static HashInfo::endianness parse_endian(const char * str) {
 //-----------------------------------------------------------------------------
 // Self-tests - verify that hashes work correctly
 
-static void HashSelfTestAll(bool verbose) {
-  bool pass = true;
+static void HashSelfTestAll( bool verbose ) {
+    bool pass = true;
 
-  printf("[[[ VerifyAll Tests ]]]\n\n");
+    printf("[[[ VerifyAll Tests ]]]\n\n");
 
-  pass &= verifyAllHashes(verbose);
+    pass &= verifyAllHashes(verbose);
 
-  if (!pass) {
-    printf("Self-test FAILED!\n");
-    if (!verbose) {
-      verifyAllHashes(true);
+    if (!pass) {
+        printf("Self-test FAILED!\n");
+        if (!verbose) {
+            verifyAllHashes(true);
+        }
+        exit(1);
     }
-    exit(1);
-  }
 
-  printf("PASS\n\n");
+    printf("PASS\n\n");
 }
 
-static bool HashSelfTest(const HashInfo * hinfo) {
+static bool HashSelfTest( const HashInfo * hinfo ) {
     bool result = verifyHash(hinfo, g_hashEndian, true, false);
 
     recordTestResult(result, "Sanity", "Implementation verification");
@@ -299,13 +295,13 @@ static bool HashSelfTest(const HashInfo * hinfo) {
     return result;
 }
 
-static void HashSanityTestAll(void) {
+static void HashSanityTestAll( void ) {
     std::vector<const HashInfo *> allHashes = findAllHashes();
 
     printf("[[[ SanityAll Tests ]]]\n\n");
 
     SanityTestHeader();
-    for (const HashInfo * h : allHashes) {
+    for (const HashInfo * h: allHashes) {
         if (!h->Init()) {
             printf("%s : hash initialization failed!", h->name);
             continue;
@@ -318,13 +314,13 @@ static void HashSanityTestAll(void) {
 //-----------------------------------------------------------------------------
 // Quickly speed test all hashes
 
-static void HashSpeedTestAll(void) {
+static void HashSpeedTestAll( void ) {
     std::vector<const HashInfo *> allHashes = findAllHashes();
 
     printf("[[[ Short Speed Tests ]]]\n\n");
 
     ShortSpeedTestHeader();
-    for (const HashInfo * h : allHashes) {
+    for (const HashInfo * h: allHashes) {
         if (!h->Init()) {
             printf("%s : hash initialization failed!", h->name);
             continue;
@@ -335,469 +331,452 @@ static void HashSpeedTestAll(void) {
 }
 
 //-----------------------------------------------------------------------------
-void print_pvaluecounts(void) {
-  printf("Log2(p-value) summary:");
-  for (uint32_t lo = 0; lo <= (COUNT_MAX_PVALUE+1); lo += 10) {
-    printf("\n\t %2d%c", lo, (lo == (COUNT_MAX_PVALUE+1)) ? '+' : ' ');
-    for (uint32_t i = 1; i < 10; i++) {
-      printf("  %2d%c", lo+i, ((lo+i) == (COUNT_MAX_PVALUE+1)) ? '+' : ' ');
-    }
-    printf("\n\t----");
-    for (uint32_t i = 1; i < 10; i++) {
-      printf(" ----");
-    }
-    printf("\n\t%4d", g_log2pValueCounts[lo+0]);
-    for (uint32_t i = 1; i < 10; i++) {
-      printf(" %4d", g_log2pValueCounts[lo+i]);
+
+static void print_pvaluecounts( void ) {
+    printf("Log2(p-value) summary:");
+    for (uint32_t lo = 0; lo <= (COUNT_MAX_PVALUE + 1); lo += 10) {
+        printf("\n\t %2d%c", lo, (lo == (COUNT_MAX_PVALUE + 1)) ? '+' : ' ');
+        for (uint32_t i = 1; i < 10; i++) {
+            printf("  %2d%c", lo + i, ((lo + i) == (COUNT_MAX_PVALUE + 1)) ? '+' : ' ');
+        }
+        printf("\n\t----");
+        for (uint32_t i = 1; i < 10; i++) {
+            printf(" ----");
+        }
+        printf("\n\t%4d", g_log2pValueCounts[lo + 0]);
+        for (uint32_t i = 1; i < 10; i++) {
+            printf(" %4d", g_log2pValueCounts[lo + i]);
+        }
+        printf("\n");
     }
     printf("\n");
-  }
-  printf("\n");
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-static bool test ( const HashInfo * hInfo )
-{
-  const int hashbits = sizeof(hashtype) * 8;
-  bool result = true;
-
-  if (g_testAll) {
-    printf("-------------------------------------------------------------------------------\n");
-  }
-
-  if (!hInfo->Init()) {
-      printf("Hash initialization failed! Cannot continue.\n");
-      exit(1);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Some hashes only take 32-bits of seed data, so there's no way of
-  // getting big seeds to them at all.
-  if ((g_seed >= (1ULL << (8 * sizeof(uint32_t)))) && hInfo->is32BitSeed()) {
-      printf("WARNING: Specified global seed 0x%016" PRIx64 "\n"
-              " is larger than the specified hash can accept\n", g_seed);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Sanity tests
-
-  FILE * outfile;
-  if (g_testAll || g_testSpeed || g_testHashmap)
-    outfile = stdout;
-  else
-    outfile = stderr;
-  fprintf(outfile, "--- Testing %s \"%s\" %s", hInfo->name, hInfo->desc, hInfo->isMock() ? "MOCK" : "");
-  if (g_seed != 0)
-    fprintf(outfile, " seed 0x%016" PRIx64 "\n\n", g_seed);
-  else
-    fprintf(outfile, "\n\n");
-
-  if(g_testSanity)
-  {
-    printf("[[[ Sanity Tests ]]]\n\n");
-
-    result &=  HashSelfTest(hInfo);
-    result &= (SanityTest(hInfo) || hInfo->isMock());
-    printf("\n");
-  }
+template <typename hashtype>
+static bool test( const HashInfo * hInfo ) {
+    const int hashbits = sizeof(hashtype) * 8;
+    bool      result   = true;
+
+    if (g_testAll) {
+        printf("-------------------------------------------------------------------------------\n");
+    }
+
+    if (!hInfo->Init()) {
+        printf("Hash initialization failed! Cannot continue.\n");
+        exit(1);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Some hashes only take 32-bits of seed data, so there's no way of
+    // getting big seeds to them at all.
+    if ((g_seed >= (1ULL << (8 * sizeof(uint32_t)))) && hInfo->is32BitSeed()) {
+        printf("WARNING: Specified global seed 0x%016" PRIx64 "\n"
+                " is larger than the specified hash can accept\n", g_seed);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Sanity tests
+
+    FILE * outfile;
+    if (g_testAll || g_testSpeed || g_testHashmap) {
+        outfile = stdout;
+    } else {
+        outfile = stderr;
+    }
+    fprintf(outfile, "--- Testing %s \"%s\" %s", hInfo->name, hInfo->desc, hInfo->isMock() ? "MOCK" : "");
+    if (g_seed != 0) {
+        fprintf(outfile, " seed 0x%016" PRIx64 "\n\n", g_seed);
+    } else {
+        fprintf(outfile, "\n\n");
+    }
+
+    if (g_testSanity) {
+        printf("[[[ Sanity Tests ]]]\n\n");
+
+        result &= HashSelfTest(hInfo);
+        result &= (SanityTest(hInfo) || hInfo->isMock());
+        printf("\n");
+    }
+
+    //-----------------------------------------------------------------------------
+    // Speed tests
+
+    if (g_testSpeed) {
+        SpeedTest(hInfo);
+    }
+
+    if (g_testHashmap) {
+        result &= HashMapTest(hInfo, g_drawDiagram, g_testExtra);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Avalanche tests
+
+    if (g_testAvalanche) {
+        result &= AvalancheTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Keyset 'Sparse' - keys with all bits 0 except a few
+
+    if (g_testSparse) {
+        result &= SparseKeyTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Keyset 'Permutation' - all possible combinations of a set of blocks
+
+    if (g_testPermutation) {
+        result &= PermutedKeyTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Keyset 'Window'
+
+    if (g_testWindow) {
+        result &= WindowedKeyTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."
+
+    if (g_testCyclic) {
+        result &= CyclicKeyTest<hashtype>(hInfo, g_drawDiagram);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes
+    // With --extra this generates some huge keysets,
+    // 128-bit tests will take ~1.3 gigs of RAM.
+
+    if (g_testTwoBytes) {
+        result &= TwoBytesKeyTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Keyset 'Text'
+
+    if (g_testText) {
+        result &= TextKeyTest<hashtype>(hInfo, g_drawDiagram);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Keyset 'Zeroes'
+
+    if (g_testZeroes) {
+        result &= ZeroKeyTest<hashtype>(hInfo, g_drawDiagram);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Keyset 'Seed'
+
+    if (g_testSeed) {
+        result &= SeedTest<hashtype>(hInfo, g_drawDiagram);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Keyset 'PerlinNoise'
+
+    if (g_testPerlinNoise) {
+        result &= PerlinNoiseTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Differential tests
+
+    if (g_testDiff) {
+        result &= DiffTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Differential-distribution tests
+
+    if (g_testDiffDist) {
+        result &= DiffDistTest<hashtype>(hInfo, g_drawDiagram);
+    }
+
+    //-----------------------------------------------------------------------------
+    // Measuring the distribution of the population count of the
+    // lowest 32 bits set over the whole key space.
+
+    if (g_testPopcount) {
+        result &= PopcountTest<hashtype>(hInfo, g_testExtra);
+    }
 
-  //-----------------------------------------------------------------------------
-  // Speed tests
+    //-----------------------------------------------------------------------------
+    // Test the hash function as a PRNG by repeatedly feeding its output
+    // back into the hash to get the next random number.
 
-  if(g_testSpeed)
-  {
-      SpeedTest(hInfo);
-  }
+    if (g_testPrng) {
+        result &= PRNGTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
+    }
 
-  if(g_testHashmap)
-  {
-      result &= HashMapTest(hInfo, g_drawDiagram, g_testExtra);
-  }
+    //-----------------------------------------------------------------------------
+    // Bit Independence Criteria. Interesting, but doesn't tell us much about
+    // collision or distribution. For >=128bit hashes, do this only with --extra
 
-  //-----------------------------------------------------------------------------
-  // Avalanche tests
+    if (g_testAll && g_testExtra && (hInfo->bits >= 128)) {
+        g_testBIC = true;
+    }
+    if (g_testBIC) {
+        result &= BicTest<hashtype>(hInfo, g_drawDiagram);
+    }
 
-  if(g_testAvalanche)
-  {
-      result &= AvalancheTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
-  }
+    //-----------------------------------------------------------------------------
+    // Test for known or unknown seed values which give bad/suspect hash values.
 
-  //-----------------------------------------------------------------------------
-  // Keyset 'Sparse' - keys with all bits 0 except a few
+    if (g_testBadSeeds) {
+        result &= BadSeedsTest<hashtype>(hInfo, g_testExtra);
+    }
 
-  if(g_testSparse)
-  {
-      result &= SparseKeyTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
-  }
+    //-----------------------------------------------------------------------------
+    // If All material tests were done, show a final summary of testing
+
+    if (g_testAll) {
+        printf("-------------------------------------------------------------------------------\n");
+        print_pvaluecounts();
+        printf("-------------------------------------------------------------------------------\n");
+        printf("Overall result: %s            (%d/%d passed)\n", result ? "pass" : "FAIL",
+                g_testPass, g_testPass + g_testFail);
+        if (!result) {
+            const char * prev = "";
+            printf("Failures");
+            for (auto x: g_testFailures) {
+                if (strcmp(prev, x.first) != 0) {
+                    printf("%c\n    %-20s: [%s", (strlen(prev) == 0) ? ':' : ']', x.first, x.second ? x.second : "");
+                    prev = x.first;
+                } else {
+                    printf(", %s", x.second);
+                }
+                free(x.second);
+            }
+            printf("]\n\n");
+        } else {
+            // Sometimes failures are recorded even for overall
+            // successes. The only example I know of is Mock hashes
+            // failing sanity tests.
+            for (auto x: g_testFailures) {
+                free(x.second);
+            }
+            printf("\n");
+        }
+        printf("-------------------------------------------------------------------------------\n");
+    }
 
-  //-----------------------------------------------------------------------------
-  // Keyset 'Permutation' - all possible combinations of a set of blocks
-
-  if(g_testPermutation)
-  {
-      result &= PermutedKeyTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Window'
-
-  if(g_testWindow)
-  {
-      result &= WindowedKeyTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Cyclic' - keys of the form "abcdabcdabcd..."
-
-  if (g_testCyclic)
-  {
-      result &= CyclicKeyTest<hashtype>(hInfo, g_drawDiagram);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'TwoBytes' - all keys up to N bytes containing two non-zero bytes
-  // With --extra this generates some huge keysets,
-  // 128-bit tests will take ~1.3 gigs of RAM.
-
-  if(g_testTwoBytes)
-  {
-      result &= TwoBytesKeyTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Text'
-
-  if(g_testText)
-  {
-      result &= TextKeyTest<hashtype>(hInfo, g_drawDiagram);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Zeroes'
-
-  if(g_testZeroes)
-  {
-      result &= ZeroKeyTest<hashtype>(hInfo, g_drawDiagram);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'Seed'
-
-  if(g_testSeed)
-  {
-      result &= SeedTest<hashtype>(hInfo, g_drawDiagram);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Keyset 'PerlinNoise'
-
-  if(g_testPerlinNoise)
-  {
-      result &= PerlinNoiseTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Differential tests
-
-  if(g_testDiff)
-  {
-      result &= DiffTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Differential-distribution tests
-
-  if (g_testDiffDist)
-  {
-      result &= DiffDistTest<hashtype>(hInfo, g_drawDiagram);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Measuring the distribution of the population count of the
-  // lowest 32 bits set over the whole key space.
-
-  if (g_testPopcount)
-  {
-      result &= PopcountTest<hashtype>(hInfo, g_testExtra);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Test the hash function as a PRNG by repeatedly feeding its output
-  // back into the hash to get the next random number.
-
-  if (g_testPrng)
-  {
-      result &= PRNGTest<hashtype>(hInfo, g_drawDiagram, g_testExtra);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Bit Independence Criteria. Interesting, but doesn't tell us much about
-  // collision or distribution. For >=128bit hashes, do this only with --extra
-
-  if (g_testAll && g_testExtra && hInfo->bits >= 128) {
-      g_testBIC = true;
-  }
-  if(g_testBIC)
-  {
-    result &= BicTest<hashtype>(hInfo, g_drawDiagram);
-  }
-
-  //-----------------------------------------------------------------------------
-  // Test for known or unknown seed values which give bad/suspect hash values.
-
-  if (g_testBadSeeds)
-  {
-      result &= BadSeedsTest<hashtype>(hInfo, g_testExtra);
-  }
-
-  //-----------------------------------------------------------------------------
-  // If All material tests were done, show a final summary of testing
-  if (g_testAll) {
-      printf("-------------------------------------------------------------------------------\n");
-      print_pvaluecounts();
-      printf("-------------------------------------------------------------------------------\n");
-      printf("Overall result: %s            (%d/%d passed)\n", result ? "pass" : "FAIL",
-              g_testPass, g_testPass+g_testFail);
-      if (!result) {
-          const char * prev = "";
-          printf("Failures");
-          for (auto x: g_testFailures) {
-              if (strcmp(prev, x.first) != 0) {
-                  printf("%c\n    %-20s: [%s", (strlen(prev) == 0) ? ':' : ']',
-                          x.first, x.second ? x.second : "");
-                  prev = x.first;
-              } else {
-                  printf(", %s", x.second);
-              }
-              free(x.second);
-          }
-          printf("]\n\n");
-      } else {
-          // Sometimes failures are recorded even for overall
-          // successes. The only example I know of is Mock hashes
-          // failing sanity tests.
-          for (auto x: g_testFailures) {
-              free(x.second);
-          }
-          printf("\n");
-      }
-      printf("-------------------------------------------------------------------------------\n");
-  }
-
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-static bool testHash(const char * name) {
-  const HashInfo * hInfo;
-
-  if ((hInfo = findHash(name)) == NULL) {
-      printf("Invalid hash '%s' specified\n", name);
-      return false;
-  }
-
-  // If you extend these statements by adding a new bitcount/type, you
-  // need to adjust HASHTYPELIST in util/Instantiate.h also.
-  if(hInfo->bits == 32)
-      return test<Blob<32>>( hInfo );
-  if(hInfo->bits == 64)
-      return test<Blob<64>>( hInfo );
-  if(hInfo->bits == 128)
-      return test<Blob<128>>( hInfo );
-  if(hInfo->bits == 160)
-      return test<Blob<160>>( hInfo );
-  if(hInfo->bits == 224)
-      return test<Blob<224>>( hInfo );
-  if(hInfo->bits == 256)
-      return test<Blob<256>>( hInfo );
-
-  printf("Invalid hash bit width %d for hash '%s'",
-          hInfo->bits, hInfo->name);
-
-  return false;
+static bool testHash( const char * name ) {
+    const HashInfo * hInfo;
+
+    if ((hInfo = findHash(name)) == NULL) {
+        printf("Invalid hash '%s' specified\n", name);
+        return false;
+    }
+
+    // If you extend these statements by adding a new bitcount/type, you
+    // need to adjust HASHTYPELIST in util/Instantiate.h also.
+    if (hInfo->bits == 32) {
+        return test<Blob<32>>(hInfo);
+    }
+    if (hInfo->bits == 64) {
+        return test<Blob<64>>(hInfo);
+    }
+    if (hInfo->bits == 128) {
+        return test<Blob<128>>(hInfo);
+    }
+    if (hInfo->bits == 160) {
+        return test<Blob<160>>(hInfo);
+    }
+    if (hInfo->bits == 224) {
+        return test<Blob<224>>(hInfo);
+    }
+    if (hInfo->bits == 256) {
+        return test<Blob<256>>(hInfo);
+    }
+
+    printf("Invalid hash bit width %d for hash '%s'", hInfo->bits, hInfo->name);
+
+    return false;
 }
 
 //-----------------------------------------------------------------------------
 
-static void usage( void )
-{
+static void usage( void ) {
     printf("Usage: SMHasher3 [--[no]test=<testname>[,...]] [--extra] [--seed=<globalseed>]\n"
-           "                 [--endian=default|nondefault|native|nonnative|big|little]\n"
-           "                 [--verbose] [--vcode] [--ncpu=N] [<hashname>]\n"
-           "\n"
-           "       SMHasher3 [--list]|[--listnames]|[--tests]|[--version]\n"
-           "\n"
-           "  Hashnames can be supplied using any case letters.\n"
-           );
+            "                 [--endian=default|nondefault|native|nonnative|big|little]\n"
+            "                 [--verbose] [--vcode] [--ncpu=N] [<hashname>]\n"
+            "\n"
+            "       SMHasher3 [--list]|[--listnames]|[--tests]|[--version]\n"
+            "\n"
+            "  Hashnames can be supplied using any case letters.\n");
 }
 
 #if defined(DEBUG)
 extern bool blobsort_test_result;
 #endif
 
-int main ( int argc, const char ** argv )
-{
-  setbuf(stdout, NULL); // Unbuffer stdout always
-  setbuf(stderr, NULL); // Unbuffer stderr always
+int main( int argc, const char ** argv ) {
+    setbuf(stdout, NULL); // Unbuffer stdout always
+    setbuf(stderr, NULL); // Unbuffer stderr always
 
-  if (!isLE() && !isBE()) {
-    printf("Runtime endian detection failed! Cannot continue\n");
-    exit(1);
-  }
+    if (!isLE() && !isBE()) {
+        printf("Runtime endian detection failed! Cannot continue\n");
+        exit(1);
+    }
 
 #if defined(DEBUG)
-  if (!blobsort_test_result) {
-      printf("Blobsort self-test failed! Cannot continue\n");
-      exit(1);
-  }
+    if (!blobsort_test_result) {
+        printf("Blobsort self-test failed! Cannot continue\n");
+        exit(1);
+    }
 #endif
 
-  set_default_tests(true);
+    set_default_tests(true);
 
 #if defined(HAVE_32BIT_PLATFORM)
-  const char * defaulthash = "wyhash-32";
+    const char * defaulthash = "wyhash-32";
 #else
-  const char * defaulthash = "xxh3-64";
+    const char * defaulthash = "xxh3-64";
 #endif
-  const char * hashToTest = defaulthash;
+    const char * hashToTest  = defaulthash;
 
-  if (argc < 2) {
-    printf("No test hash given on command line, testing %s.\n", hashToTest);
-    usage();
-  }
-
-  for (int argnb = 1; argnb < argc; argnb++) {
-    const char* const arg = argv[argnb];
-    if (strncmp(arg,"--", 2) == 0) {
-      // This is a command
-      if (strcmp(arg,"--help") == 0) {
+    if (argc < 2) {
+        printf("No test hash given on command line, testing %s.\n", hashToTest);
         usage();
-        exit(0);
-      }
-      if (strcmp(arg,"--list") == 0) {
-        listHashes(false);
-        exit(0);
-      }
-      if (strcmp(arg,"--listnames") == 0) {
-        listHashes(true);
-        exit(0);
-      }
-      if (strcmp(arg,"--tests") == 0) {
-        printf("Valid tests:\n");
-        for(size_t i = 0; i < sizeof(g_testopts) / sizeof(TestOpts); i++) {
-          printf("  %s\n", g_testopts[i].name);
-        }
-        exit(0);
-      }
-      if (strcmp(arg,"--version") == 0) {
-        printf("SMHasher3 %s\n", VERSION);
-        exit(0);
-      }
-      if (strcmp(arg,"--verbose") == 0) {
-        g_drawDiagram = true;
-        continue;
-      }
-      if (strcmp(arg,"--extra") == 0) {
-        g_testExtra = true;
-        continue;
-      }
-      // VCodes allow easy comparison of test results and hash inputs
-      // and outputs across SMHasher3 runs, hashes (of the same width),
-      // and systems.
-      if (strcmp(arg,"--vcode") == 0) {
-        g_doVCode = 1;
-        VCODE_INIT();
-        continue;
-      }
-      if (strncmp(arg,"--endian=", 9) == 0) {
-        g_hashEndian = parse_endian(&arg[9]);
-        continue;
-      }
-      if (strncmp(arg,"--seed=", 7) == 0) {
-        errno = 0;
-        char * endptr;
-        uint64_t seed = strtol(&arg[7], &endptr, 0);
-        if ((errno != 0) || (arg[7] == '\0') || (*endptr != '\0')) {
-            printf("Error parsing global seed value \"%s\"\n", &arg[7]);
-            exit(1);
-        }
-        g_seed = seed;
-        continue;
-      }
-      if (strncmp(arg,"--ncpu=", 7) == 0) {
+    }
+
+    for (int argnb = 1; argnb < argc; argnb++) {
+        const char * const arg = argv[argnb];
+        if (strncmp(arg, "--", 2) == 0) {
+            // This is a command
+            if (strcmp(arg, "--help") == 0) {
+                usage();
+                exit(0);
+            }
+            if (strcmp(arg, "--list") == 0) {
+                listHashes(false);
+                exit(0);
+            }
+            if (strcmp(arg, "--listnames") == 0) {
+                listHashes(true);
+                exit(0);
+            }
+            if (strcmp(arg, "--tests") == 0) {
+                printf("Valid tests:\n");
+                for (size_t i = 0; i < sizeof(g_testopts) / sizeof(TestOpts); i++) {
+                    printf("  %s\n", g_testopts[i].name);
+                }
+                exit(0);
+            }
+            if (strcmp(arg, "--version") == 0) {
+                printf("SMHasher3 %s\n", VERSION);
+                exit(0);
+            }
+            if (strcmp(arg, "--verbose") == 0) {
+                g_drawDiagram = true;
+                continue;
+            }
+            if (strcmp(arg, "--extra") == 0) {
+                g_testExtra = true;
+                continue;
+            }
+            // VCodes allow easy comparison of test results and hash inputs
+            // and outputs across SMHasher3 runs, hashes (of the same width),
+            // and systems.
+            if (strcmp(arg, "--vcode") == 0) {
+                g_doVCode = 1;
+                VCODE_INIT();
+                continue;
+            }
+            if (strncmp(arg, "--endian=", 9) == 0) {
+                g_hashEndian = parse_endian(&arg[9]);
+                continue;
+            }
+            if (strncmp(arg, "--seed=", 7) == 0) {
+                errno = 0;
+                char *   endptr;
+                uint64_t seed = strtol(&arg[7], &endptr, 0);
+                if ((errno != 0) || (arg[7] == '\0') || (*endptr != '\0')) {
+                    printf("Error parsing global seed value \"%s\"\n", &arg[7]);
+                    exit(1);
+                }
+                g_seed = seed;
+                continue;
+            }
+            if (strncmp(arg, "--ncpu=", 7) == 0) {
 #if defined(HAVE_THREADS)
-        errno = 0;
-        char * endptr;
-        long int Ncpu = strtol(&arg[7], &endptr, 0);
-        if ((errno != 0) || (arg[7] == '\0') || (*endptr != '\0') || (Ncpu < 1)) {
-            printf("Error parsing cpu number \"%s\"\n", &arg[7]);
-            exit(1);
-        }
-        if (Ncpu > 32) {
-            printf("WARNING: limiting to 32 threads\n");
-            Ncpu = 32;
-        }
-        g_NCPU = Ncpu;
-        continue;
+                errno = 0;
+                char *   endptr;
+                long int Ncpu = strtol(&arg[7], &endptr, 0);
+                if ((errno != 0) || (arg[7] == '\0') || (*endptr != '\0') || (Ncpu < 1)) {
+                    printf("Error parsing cpu number \"%s\"\n", &arg[7]);
+                    exit(1);
+                }
+                if (Ncpu > 32) {
+                    printf("WARNING: limiting to 32 threads\n");
+                    Ncpu = 32;
+                }
+                g_NCPU = Ncpu;
+                continue;
 #else
-        printf("WARNING: compiled without threads; ignoring --ncpu\n");
-        continue;
+                printf("WARNING: compiled without threads; ignoring --ncpu\n");
+                continue;
 #endif
-      }
-      if (strncmp(arg,"--test=", 6) == 0) {
-          // If a list of tests is given, only test those
-          g_testAll = false;
-          set_default_tests(false);
-          parse_tests(&arg[7], true);
-          continue;
-      }
-      if (strncmp(arg,"--notest=", 8) == 0) {
-          parse_tests(&arg[9], false);
-          continue;
-      }
-      if (strcmp(arg,"--EstimateNbCollisions") == 0) {
-        ReportCollisionEstimates();
-        exit(0);
-      }
-      // invalid command
-      printf("Invalid command \n");
-      usage();
-      exit(1);
-    }
-    // Not a command ? => interpreted as hash name
-    hashToTest = arg;
-  }
-
-  size_t timeBegin = monotonic_clock();
-
-  if (g_testVerifyAll) {
-      HashSelfTestAll(g_drawDiagram);
-  } else if (g_testSanityAll) {
-      HashSanityTestAll();
-  } else if (g_testSpeedAll) {
-      HashSpeedTestAll();
-  } else {
-      testHash(hashToTest);
-  }
-
-  size_t timeEnd = monotonic_clock();
-
-  uint32_t vcode = VCODE_FINALIZE();
-
-  FILE * outfile = g_testAll ? stdout : stderr;
-
-  if (g_doVCode) {
-      fprintf(outfile,
-          "Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",
-          g_inputVCode, g_outputVCode, g_resultVCode);
-  }
-
-  fprintf(outfile,
-          "Verification value is 0x%08x - Testing took %f seconds\n\n",
-          vcode, (double)(timeEnd-timeBegin)/(double)NSEC_PER_SEC);
-
-  return 0;
+            }
+            if (strncmp(arg, "--test=", 6) == 0) {
+                // If a list of tests is given, only test those
+                g_testAll = false;
+                set_default_tests(false);
+                parse_tests(&arg[7], true);
+                continue;
+            }
+            if (strncmp(arg, "--notest=", 8) == 0) {
+                parse_tests(&arg[9], false);
+                continue;
+            }
+            if (strcmp(arg, "--EstimateNbCollisions") == 0) {
+                ReportCollisionEstimates();
+                exit(0);
+            }
+            // invalid command
+            printf("Invalid command \n");
+            usage();
+            exit(1);
+        }
+        // Not a command ? => interpreted as hash name
+        hashToTest = arg;
+    }
+
+    size_t timeBegin = monotonic_clock();
+
+    if (g_testVerifyAll) {
+        HashSelfTestAll(g_drawDiagram);
+    } else if (g_testSanityAll) {
+        HashSanityTestAll();
+    } else if (g_testSpeedAll) {
+        HashSpeedTestAll();
+    } else {
+        testHash(hashToTest);
+    }
+
+    size_t timeEnd = monotonic_clock();
+
+    uint32_t vcode = VCODE_FINALIZE();
+
+    FILE * outfile = g_testAll ? stdout : stderr;
+
+    if (g_doVCode) {
+        fprintf(outfile, "Input vcode 0x%08x, Output vcode 0x%08x, Result vcode 0x%08x\n",
+                g_inputVCode, g_outputVCode, g_resultVCode);
+    }
+
+    fprintf(outfile, "Verification value is 0x%08x - Testing took %f seconds\n\n",
+            vcode, (double)(timeEnd - timeBegin) / (double)NSEC_PER_SEC);
+
+    return 0;
 }
diff --git a/misc/exactcoll.c b/misc/exactcoll.c
index cda5a0a1..739d2c3b 100644
--- a/misc/exactcoll.c
+++ b/misc/exactcoll.c
@@ -53,36 +53,36 @@
 #define EXTRA_DIGITS 0
 
 #if !defined(USE_MPFI)
-#if !defined(USE_MPFR)
-#error "Exactly one of USE_MPFI and USE_MPFR must be defined"
-#endif
+  #if !defined(USE_MPFR)
+    #error "Exactly one of USE_MPFI and USE_MPFR must be defined"
+  #endif
 #endif
 
 #if defined(USE_MPFI)
-#if defined(USE_MPFR)
-#error "Exactly one of USE_MPFI and USE_MPFR must be defined"
-#endif
+  #if defined(USE_MPFR)
+    #error "Exactly one of USE_MPFI and USE_MPFR must be defined"
+  #endif
 #endif
 
 #if defined(USE_MPFI)
-#include <mpfi.h>
-#include <mpfi_io.h>
+  #include <mpfi.h>
+  #include <mpfi_io.h>
 typedef mpfi_t mp_t;
 #else
-#include <mpfr.h>
+  #include <mpfr.h>
 typedef mpfr_t mp_t;
 #endif
 
-char buf[3*PRECISION];
+char   buf[3 * PRECISION];
 FILE * membuf;
 
 #if defined(USE_MPFI)
-#define MP(x,...) mpfi_##x(__VA_ARGS__)
+  #define MP(x, ...) mpfi_ ## x(__VA_ARGS__)
 #else
-#define MP(x,...) mpfr_##x(__VA_ARGS__, MPFR_RNDN)
+  #define MP(x, ...) mpfr_ ## x(__VA_ARGS__, MPFR_RNDN)
 #endif
 
-void printcoll(uint64_t balls, uint64_t log2bins) {
+void printcoll( uint64_t balls, uint64_t log2bins ) {
     mp_t m, n, p, e, f, c;
 
 #if defined(USE_MPFI)
@@ -155,7 +155,7 @@ void printcoll(uint64_t balls, uint64_t log2bins) {
      * represent).
      */
     double lb = strtod(&buf[1], NULL);
-    double ub = strtod(strchr(buf, ',')+1, NULL);
+    double ub = strtod(strchr(buf, ',') + 1, NULL);
     if (lb != ub) {
         printf("BOUNDS DO NOT MATCH TO DOUBLE PRECISION!\n");
         printf("Increase PRECISION and recompile.\n");
@@ -190,7 +190,7 @@ void printcoll(uint64_t balls, uint64_t log2bins) {
 #endif
 }
 
-int main(void) {
+int main( void ) {
     mpfr_set_default_prec(PRECISION);
     membuf = fmemopen(buf, sizeof(buf), "w");
 
@@ -208,9 +208,9 @@ int main(void) {
         264097, 204800, 200000, 102774, 100000,
         77163, 50643, 6
     };
-    const uint64_t bits[] = {256, 224, 160, 128, 64, 55, 45, 42, 39, 36, 32, 29, 27, 24, 22, 19, 12, 8};
-    const uint64_t keycnt = sizeof(keys)/sizeof(keys[0]);
-    const uint64_t bitcnt = sizeof(bits)/sizeof(bits[0]);
+    const uint64_t bits[] = { 256, 224, 160, 128, 64, 55, 45, 42, 39, 36, 32, 29, 27, 24, 22, 19, 12, 8 };
+    const uint64_t keycnt = sizeof(keys) / sizeof(keys[0]);
+    const uint64_t bitcnt = sizeof(bits) / sizeof(bits[0]);
 
     printf("double realcoll[%d][%d] = {\n", keycnt, bitcnt);
 
@@ -222,12 +222,11 @@ int main(void) {
             printcoll(key, bit);
             if (j == bitcnt - 1) {
                 printf(" },\n");
-            } else if ((j%3)==2) {
+            } else if ((j % 3) == 2) {
                 printf(",\n      ");
             } else {
                 printf(", ");
             }
-
         }
     }
 
diff --git a/tests/AvalancheTest.cpp b/tests/AvalancheTest.cpp
index 8ed3e45e..dc07dbe4 100644
--- a/tests/AvalancheTest.cpp
+++ b/tests/AvalancheTest.cpp
@@ -59,11 +59,11 @@
 
 // VCode might have already included this
 #if defined(HAVE_AVX2) || defined(HAVE_SSE_4_1)
-#include "Intrinsics.h"
+  #include "Intrinsics.h"
 #endif
 
 #if defined(HAVE_THREADS)
-#include <atomic>
+  #include <atomic>
 typedef std::atomic<int> a_int;
 #else
 typedef int a_int;
@@ -71,52 +71,48 @@ typedef int a_int;
 
 //-----------------------------------------------------------------------------
 
-static void PrintAvalancheDiagram ( int x, int y, int reps, double scale, uint32_t * bins )
-{
-  const char * symbols = ".123456789X";
+static void PrintAvalancheDiagram( int x, int y, int reps, double scale, uint32_t * bins ) {
+    const char * symbols = ".123456789X";
 
-  for(int i = 0; i < y; i++)
-  {
-    printf("[");
-    for(int j = 0; j < x; j++)
-    {
-      int k = (y - i) -1;
+    for (int i = 0; i < y; i++) {
+        printf("[");
+        for (int j = 0; j < x; j++) {
+            int k        = (y - i) - 1;
 
-      uint32_t bin = bins[k + (j*y)];
+            uint32_t bin = bins[k + (j * y)];
 
-      double b = double(bin) / double(reps);
-      b = fabs(b*2 - 1);
+            double b     = double(bin) / double(reps);
+            b  = fabs(b * 2 - 1);
 
-      b *= scale;
+            b *= scale;
 
-      int s = (int)floor(b*10);
+            int s = (int)floor(b * 10);
 
-      if(s > 10) s = 10;
-      if(s < 0) s = 0;
+            if (s > 10) { s = 10; }
+            if (s < 0) { s = 0; }
 
-      printf("%c",symbols[s]);
-    }
+            printf("%c", symbols[s]);
+        }
 
-    printf("]\n");
-    fflush(NULL);
-  }
+        printf("]\n");
+        fflush(NULL);
+    }
 }
 
 //----------------------------------------------------------------------------
 
-static int maxBias ( uint32_t * counts, int buckets, int reps )
-{
-  int expected = reps / 2;
-  int worst = 0;
+static int maxBias( uint32_t * counts, int buckets, int reps ) {
+    int expected = reps / 2;
+    int worst    = 0;
 
-  for(int i = 0; i < buckets; i++)
-  {
-    int c = abs((int)counts[i] - expected);
-    if(worst < c)
-      worst = c;
-  }
+    for (int i = 0; i < buckets; i++) {
+        int c = abs((int)counts[i] - expected);
+        if (worst < c) {
+            worst = c;
+        }
+    }
 
-  return worst;
+    return worst;
 }
 
 //-----------------------------------------------------------------------------
@@ -127,217 +123,207 @@ static int maxBias ( uint32_t * counts, int buckets, int reps )
 // cause "echoes" of the patterns in the output, which in turn can cause the
 // hash function to fail to create an even, random distribution of hash values.
 
-template < typename hashtype >
-static void calcBiasRange ( const HashFn hash, const seed_t seed,
-                     std::vector<uint32_t> &bins,
-                     const int keybytes, const uint8_t * keys,
-                     a_int & irepp, const int reps, const bool verbose )
-{
-  const int keybits = keybytes * 8;
-  const int hashbytes = sizeof(hashtype);
+template <typename hashtype>
+static void calcBiasRange( const HashFn hash, const seed_t seed, std::vector<uint32_t> & bins, const int keybytes,
+        const uint8_t * keys, a_int & irepp, const int reps, const bool verbose ) {
+    const int keybits   = keybytes * 8;
+    const int hashbytes = sizeof(hashtype);
+
 #if defined(HAVE_AVX2)
-  const __m256i ONE  = _mm256_set1_epi32(1);
-  const __m256i MASK = _mm256_setr_epi32(
-                                         1 << 0,
-                                         1 << 1,
-                                         1 << 2,
-                                         1 << 3,
-                                         1 << 4,
-                                         1 << 5,
-                                         1 << 6,
-                                         1 << 7);
+    const __m256i ONE  = _mm256_set1_epi32(1);
+    const __m256i MASK = _mm256_setr_epi32(1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7);
 #elif defined(HAVE_SSE_4_1)
-  const __m128i ONE  = _mm_set1_epi32(1);
-  const __m128i MASK = _mm_setr_epi32(
-                                         1 << 0,
-                                         1 << 1,
-                                         1 << 2,
-                                         1 << 3);
+    const __m128i ONE  = _mm_set1_epi32(1);
+    const __m128i MASK = _mm_setr_epi32(1 << 0, 1 << 1, 1 << 2, 1 << 3);
 #endif
 
-  uint8_t buf[keybytes];
-  hashtype A,B;
-  int irep;
+    uint8_t  buf[keybytes];
+    hashtype A, B;
+    int      irep;
 
-  while ((irep = irepp++) < reps)
-  {
-    if(verbose) {
-      if(irep % (reps/10) == 0) printf(".");
-    }
+    while ((irep = irepp++) < reps) {
+        if (verbose) {
+            if (irep % (reps / 10) == 0) { printf("."); }
+        }
 
-    ExtBlob K(buf, &keys[keybytes * irep], keybytes);
-    hash(K, keybytes, seed, &A);
+        ExtBlob K( buf, &keys[keybytes * irep], keybytes );
+        hash(K, keybytes, seed, &A);
 
-    uint32_t * cursor = &bins[0];
+        uint32_t * cursor = &bins[0];
 
-    for(int iBit = 0; iBit < keybits; iBit++)
-    {
-      K.flipbit(iBit);
-      hash(K, keybytes, seed, &B);
-      K.flipbit(iBit);
+        for (int iBit = 0; iBit < keybits; iBit++) {
+            K.flipbit(iBit);
+            hash(K, keybytes, seed, &B);
+            K.flipbit(iBit);
 
-      B ^= A;
+            B ^= A;
 
 #if defined(HAVE_AVX2)
-      for(int oWord = 0; oWord < (hashbytes/4); oWord++) {
-          // Get the next 32-bit chunk of the hash difference
-          uint32_t word;
-          memcpy(&word, ((const uint8_t *)&B) + 4*oWord, 4);
-
-          // Expand it out into 4 sets of 8 32-bit integer words, with
-          // each integer being zero or one.
-          __m256i base  = _mm256_set1_epi32(word);
-          __m256i incr1 =_mm256_min_epu32(_mm256_and_si256(base, MASK), ONE);
-          base = _mm256_srli_epi32(base, 8);
-          __m256i incr2 =_mm256_min_epu32(_mm256_and_si256(base, MASK), ONE);
-          base = _mm256_srli_epi32(base, 8);
-          __m256i incr3 =_mm256_min_epu32(_mm256_and_si256(base, MASK), ONE);
-          base = _mm256_srli_epi32(base, 8);
-          __m256i incr4 =_mm256_min_epu32(_mm256_and_si256(base, MASK), ONE);
-
-          // Add these into the counts in bins[]
-          __m256i cnt1  = _mm256_loadu_si256((const __m256i *)cursor);
-          cnt1 = _mm256_add_epi32(cnt1, incr1);
-          _mm256_storeu_si256((__m256i *)cursor, cnt1);
-          cursor += 8;
-          __m256i cnt2  = _mm256_loadu_si256((const __m256i *)cursor);
-          cnt2 = _mm256_add_epi32(cnt2, incr2);
-          _mm256_storeu_si256((__m256i *)cursor, cnt2);
-          cursor += 8;
-          __m256i cnt3  = _mm256_loadu_si256((const __m256i *)cursor);
-          cnt3 = _mm256_add_epi32(cnt3, incr3);
-          _mm256_storeu_si256((__m256i *)cursor, cnt3);
-          cursor += 8;
-          __m256i cnt4  = _mm256_loadu_si256((const __m256i *)cursor);
-          cnt4 = _mm256_add_epi32(cnt4, incr4);
-          _mm256_storeu_si256((__m256i *)cursor, cnt4);
-          cursor += 8;
-      }
+            for (int oWord = 0; oWord < (hashbytes / 4); oWord++) {
+                // Get the next 32-bit chunk of the hash difference
+                uint32_t word;
+                memcpy(&word, ((const uint8_t *)&B) + 4 * oWord, 4);
+
+                // Expand it out into 4 sets of 8 32-bit integer words, with
+                // each integer being zero or one.
+                __m256i base  = _mm256_set1_epi32(word);
+                __m256i incr1 = _mm256_min_epu32(_mm256_and_si256(base, MASK), ONE);
+                base = _mm256_srli_epi32(base, 8);
+                __m256i incr2 = _mm256_min_epu32(_mm256_and_si256(base, MASK), ONE);
+                base = _mm256_srli_epi32(base, 8);
+                __m256i incr3 = _mm256_min_epu32(_mm256_and_si256(base, MASK), ONE);
+                base = _mm256_srli_epi32(base, 8);
+                __m256i incr4 = _mm256_min_epu32(_mm256_and_si256(base, MASK), ONE);
+
+                // Add these into the counts in bins[]
+                __m256i cnt1 = _mm256_loadu_si256((const __m256i *)cursor);
+                cnt1    = _mm256_add_epi32(cnt1, incr1);
+                _mm256_storeu_si256((__m256i *)cursor, cnt1);
+                cursor += 8;
+                __m256i cnt2 = _mm256_loadu_si256((const __m256i *)cursor);
+                cnt2    = _mm256_add_epi32(cnt2, incr2);
+                _mm256_storeu_si256((__m256i *)cursor, cnt2);
+                cursor += 8;
+                __m256i cnt3 = _mm256_loadu_si256((const __m256i *)cursor);
+                cnt3    = _mm256_add_epi32(cnt3, incr3);
+                _mm256_storeu_si256((__m256i *)cursor, cnt3);
+                cursor += 8;
+                __m256i cnt4 = _mm256_loadu_si256((const __m256i *)cursor);
+                cnt4    = _mm256_add_epi32(cnt4, incr4);
+                _mm256_storeu_si256((__m256i *)cursor, cnt4);
+                cursor += 8;
+            }
 #elif defined(HAVE_SSE_4_1)
-      for(int oWord = 0; oWord < (hashbytes/4); oWord++) {
-          // Get the next 32-bit chunk of the hash difference
-          uint32_t word;
-          memcpy(&word, ((const uint8_t *)&B) + 4*oWord, 4);
-
-          // Expand it out into 8 sets of 4 32-bit integer words, with
-          // each integer being zero or one, and add them into the
-          // counts in bins[].
-          __m128i base = _mm_set1_epi32(word);
-          for (int i = 0; i < 8; i++) {
-              __m128i incr = _mm_min_epu32(_mm_and_si128(base, MASK), ONE);
-              __m128i cnt  = _mm_loadu_si128((const __m128i *)cursor);
-              cnt = _mm_add_epi32(cnt, incr);
-              _mm_storeu_si128((__m128i *)cursor, cnt);
-              base = _mm_srli_epi32(base, 4);
-              cursor += 4;
-          }
-      }
+            for (int oWord = 0; oWord < (hashbytes / 4); oWord++) {
+                // Get the next 32-bit chunk of the hash difference
+                uint32_t word;
+                memcpy(&word, ((const uint8_t *)&B) + 4 * oWord, 4);
+
+                // Expand it out into 8 sets of 4 32-bit integer words, with
+                // each integer being zero or one, and add them into the
+                // counts in bins[].
+                __m128i base = _mm_set1_epi32(word);
+                for (int i = 0; i < 8; i++) {
+                    __m128i incr = _mm_min_epu32(_mm_and_si128(base, MASK), ONE);
+                    __m128i cnt  = _mm_loadu_si128((const __m128i *)cursor);
+                    cnt     = _mm_add_epi32(cnt, incr);
+                    _mm_storeu_si128((__m128i *)cursor, cnt);
+                    base    = _mm_srli_epi32(base, 4);
+                    cursor += 4;
+                }
+            }
 #else
-      for(int oByte = 0; oByte < hashbytes; oByte++) {
-          uint8_t byte = B[oByte];
-          for(int oBit = 0; oBit < 8; oBit++) {
-              (*cursor++) += byte & 1;
-              byte >>= 1;
-          }
-      }
+            for (int oByte = 0; oByte < hashbytes; oByte++) {
+                uint8_t byte = B[oByte];
+                for (int oBit = 0; oBit < 8; oBit++) {
+                    (*cursor++) += byte & 1;
+                    byte       >>= 1;
+                }
+            }
 #endif
+        }
     }
-  }
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-static bool AvalancheImpl(HashFn hash, const seed_t seed, const int keybits,
-         const int reps, bool drawDiagram, bool drawdots) {
-  Rand r(48273);
+template <typename hashtype>
+static bool AvalancheImpl( HashFn hash, const seed_t seed, const int keybits,
+        const int reps, bool drawDiagram, bool drawdots ) {
+    Rand r( 48273 );
 
-  assert((keybits & 7)==0);
+    assert((keybits & 7) == 0);
 
-  const int keybytes = keybits / 8;
+    const int keybytes  = keybits / 8;
 
-  const int hashbytes = sizeof(hashtype);
-  const int hashbits = hashbytes * 8;
+    const int hashbytes = sizeof(hashtype);
+    const int hashbits  = hashbytes * 8;
 
-  const int arraysize = keybits * hashbits;
+    const int arraysize = keybits * hashbits;
 
-  printf("Testing %4d-bit keys -> %3d-bit hashes, %6d reps",
-         keybits, hashbits, reps);
-  //----------
-  std::vector<uint8_t> keys(reps * keybytes);
-  for (int i = 0; i < reps; i++)
-    r.rand_p(&keys[i*keybytes],keybytes);
-  addVCodeInput(&keys[0], reps * keybytes);
+    printf("Testing %4d-bit keys -> %3d-bit hashes, %6d reps", keybits, hashbits, reps);
+    //----------
+    std::vector<uint8_t> keys( reps * keybytes );
+    for (int i = 0; i < reps; i++) {
+        r.rand_p(&keys[i * keybytes], keybytes);
+    }
+    addVCodeInput(&keys[0], reps * keybytes);
 
-  a_int irep(0);
+    a_int irep( 0 );
 
-  std::vector<std::vector<uint32_t> > bins(g_NCPU);
-  for (unsigned i = 0; i < g_NCPU; i++) {
-      bins[i].resize(arraysize);
-  }
+    std::vector<std::vector<uint32_t>> bins( g_NCPU );
+    for (unsigned i = 0; i < g_NCPU; i++) {
+        bins[i].resize(arraysize);
+    }
 
-  if (g_NCPU == 1) {
-      calcBiasRange<hashtype>(hash,seed,bins[0],keybytes,&keys[0],irep,reps,drawdots);
-  } else {
+    if (g_NCPU == 1) {
+        calcBiasRange<hashtype>(hash, seed, bins[0], keybytes, &keys[0], irep, reps, drawdots);
+    } else {
 #if defined(HAVE_THREADS)
-      std::thread t[g_NCPU];
-      for (int i=0; i < g_NCPU; i++) {
-          t[i] = std::thread {calcBiasRange<hashtype>,hash,seed,std::ref(bins[i]),keybytes,&keys[0],std::ref(irep),reps,drawdots};
-      }
-      for (int i=0; i < g_NCPU; i++) {
-          t[i].join();
-      }
-      for (int i=1; i < g_NCPU; i++)
-          for (int b=0; b < arraysize; b++)
-              bins[0][b] += bins[i][b];
+        std::thread t[g_NCPU];
+        for (int i = 0; i < g_NCPU; i++) {
+            t[i] = std::thread {
+                calcBiasRange<hashtype>, hash, seed, std::ref(bins[i]),
+                keybytes, &keys[0], std::ref(irep), reps, drawdots
+            };
+        }
+        for (int i = 0; i < g_NCPU; i++) {
+            t[i].join();
+        }
+        for (int i = 1; i < g_NCPU; i++) {
+            for (int b = 0; b < arraysize; b++) {
+                bins[0][b] += bins[i][b];
+            }
+        }
 #endif
-  }
+    }
 
-  //----------
+    //----------
 
-  int bias = maxBias(&bins[0][0], arraysize, reps);
-  bool result = true;
+    int  bias   = maxBias(&bins[0][0], arraysize, reps);
+    bool result = true;
 
-  // Due to threading and memory complications, add the summed
-  // avalanche results instead of the hash values. Not ideal, but the
-  // "real" way is just too expensive.
-  addVCodeOutput(&bins[0][0], arraysize * sizeof(bins[0][0]));
-  addVCodeResult(bias);
+    // Due to threading and memory complications, add the summed
+    // avalanche results instead of the hash values. Not ideal, but the
+    // "real" way is just too expensive.
+    addVCodeOutput(&bins[0][0], arraysize * sizeof(bins[0][0]));
+    addVCodeResult(bias);
 
-  result &= ReportBias(bias, reps, arraysize, drawDiagram);
+    result &= ReportBias(bias, reps, arraysize, drawDiagram);
 
-  recordTestResult(result, "Avalanche", keybits);
+    recordTestResult(result, "Avalanche", keybits);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool AvalancheTest(const HashInfo * hinfo, const bool verbose, const bool extra) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
-    bool drawdots = true; //.......... progress dots
+template <typename hashtype>
+bool AvalancheTest( const HashInfo * hinfo, const bool verbose, const bool extra ) {
+    const HashFn hash     = hinfo->hashFn(g_hashEndian);
+    bool         result   = true;
+    bool         drawdots = true; // .......... progress dots
 
     printf("[[[ Avalanche Tests ]]]\n\n");
 
     const seed_t seed = hinfo->Seed(g_seed, false, 2);
 
-    std::vector<int> testBitsvec =
-        { 24, 32, 40, 48, 56, 64, 72, 80, 96, 112, 128, 160 };
+    std::vector<int> testBitsvec = { 24, 32, 40, 48, 56, 64, 72, 80, 96, 112, 128, 160 };
     testBitsvec.reserve(50); // Workaround for GCC bug 100366
     if (hinfo->bits <= 64) {
         testBitsvec.insert(testBitsvec.end(), { 512, 1024 });
     }
     if (extra) {
-        testBitsvec.insert(testBitsvec.end(), { 192, 224, 256, 320, 384, 448, 512, 640,
-                                                768, 896, 1024, 1280, 1536 });
+        testBitsvec.insert(testBitsvec.end(), {
+            192, 224,  256,  320,  384, 448, 512, 640,
+            768, 896, 1024, 1280, 1536
+        });
     }
     std::sort(testBitsvec.begin(), testBitsvec.end());
     testBitsvec.erase(std::unique(testBitsvec.begin(), testBitsvec.end()), testBitsvec.end());
 
-    for (int testBits : testBitsvec) {
-        result &= AvalancheImpl<hashtype>(hash,seed,testBits,300000,verbose,drawdots);
+    for (int testBits: testBitsvec) {
+        result &= AvalancheImpl<hashtype>(hash, seed, testBits, 300000, verbose, drawdots);
     }
 
     printf("\n%s\n", result ? "" : g_failstr);
diff --git a/tests/AvalancheTest.h b/tests/AvalancheTest.h
index 8e4ceb82..a61f87f6 100644
--- a/tests/AvalancheTest.h
+++ b/tests/AvalancheTest.h
@@ -44,5 +44,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool AvalancheTest(const HashInfo * info, const bool verbose, const bool extra);
+template <typename hashtype>
+bool AvalancheTest( const HashInfo * info, const bool verbose, const bool extra );
diff --git a/tests/BadSeedsTest.cpp b/tests/BadSeedsTest.cpp
index 0ca787b0..e054ca7a 100644
--- a/tests/BadSeedsTest.cpp
+++ b/tests/BadSeedsTest.cpp
@@ -56,27 +56,28 @@
 #include "BadSeedsTest.h"
 
 #if defined(HAVE_THREADS)
-#include <chrono>
-#include <atomic>
-#include <mutex>
+  #include <chrono>
+  #include <atomic>
+  #include <mutex>
 #endif
 
 //-----------------------------------------------------------------------------
 // Find bad seeds, and test against the known secrets/bad seeds.
 
-template< typename hashtype >
-static bool TestSeed(const HashInfo * hinfo, const seed_t seed) {
+template <typename hashtype>
+static bool TestSeed( const HashInfo * hinfo, const seed_t seed ) {
     const HashFn hash = hinfo->hashFn(g_hashEndian);
-    const std::vector<int>     testlens  = {1,2,4,8,12,16,32,64,128};
-    const std::vector<uint8_t> testbytes = {0,32,'0',127,128,255};
-    const unsigned numtestbytes = testbytes.size();
-    const hashtype zero = {0};
-    std::vector<hashtype> hashes(numtestbytes);
-    std::set<hashtype> dummy_collisions;
+    const std::vector<int>     testlens  = { 1, 2, 4, 8, 12, 16, 32, 64, 128 };
+    const std::vector<uint8_t> testbytes = { 0, 32, '0', 127, 128, 255 };
+    const unsigned        numtestbytes   = testbytes.size();
+    const hashtype        zero = { 0 };
+    std::vector<hashtype> hashes( numtestbytes );
+    std::set<hashtype>    dummy_collisions;
     bool result = true;
 
-    if (hinfo->is32BitSeed() && (seed > UINT64_C(0xffffffff)))
+    if (hinfo->is32BitSeed() && (seed > UINT64_C(0xffffffff))) {
         return true;
+    }
 
     /* Premake all the test keys */
     uint8_t keys[numtestbytes][128];
@@ -87,14 +88,13 @@ static bool TestSeed(const HashInfo * hinfo, const seed_t seed) {
     printf("0x%" PRIx64 "\n", seed);
     const seed_t hseed = hinfo->Seed(seed, true);
 
-    for (int len : testlens) {
+    for (int len: testlens) {
         memset(&hashes[0], 0, numtestbytes * sizeof(hashtype));
         for (int i = 0; i < numtestbytes; i++) {
             hash(&keys[i][0], len, hseed, &hashes[i]);
             if (hashes[0] == zero) {
-                printf("Confirmed broken seed 0x%" PRIx64 " => hash of 0"  \
-                        " with key[%d] of all 0x%02x\n",
-                        seed, len, testbytes[i]);
+                printf("Confirmed broken seed 0x%" PRIx64 " => hash of 0"
+                        " with key[%d] of all 0x%02x\n", seed, len, testbytes[i]);
                 result = false;
             }
         }
@@ -111,14 +111,14 @@ static bool TestSeed(const HashInfo * hinfo, const seed_t seed) {
     return result;
 }
 
-template< typename hashtype >
-static bool BadSeedsKnown(const HashInfo * hinfo) {
+template <typename hashtype>
+static bool BadSeedsKnown( const HashInfo * hinfo ) {
     bool result = true;
     const std::set<seed_t> & seeds = hinfo->badseeds;
 
     printf("Testing %" PRIu64 " known bad seeds:\n", seeds.size());
 
-    for (seed_t seed : seeds) {
+    for (seed_t seed: seeds) {
         bool thisresult = true;
         thisresult &= TestSeed<hashtype>(hinfo, seed);
         if (!hinfo->is32BitSeed() && (seed <= 0xffffffff) && (seed != 0)) {
@@ -145,38 +145,38 @@ static bool BadSeedsKnown(const HashInfo * hinfo) {
 #if defined(HAVE_THREADS)
 // For keeping track of progress printouts across threads
 static std::atomic<unsigned> seed_progress;
-static std::mutex print_mutex;
+static std::mutex            print_mutex;
 #else
 static unsigned seed_progress;
 #endif
 
 // Process part of a 2^32 range, split into g_NCPU threads
-template< typename hashtype >
-static void TestSeedRangeThread(const HashInfo * hinfo, const uint64_t hi,
-                             const uint32_t start, const uint32_t endlow,
-                             bool &result, bool &newresult) {
+template <typename hashtype>
+static void TestSeedRangeThread( const HashInfo * hinfo, const uint64_t hi, const uint32_t start,
+        const uint32_t endlow, bool & result, bool & newresult ) {
     const std::set<seed_t> & seeds = hinfo->badseeds;
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    const seed_t last = hi | endlow;
-    const hashtype zero = {0};
-    //static_assert(testbytes[0] == 0, "Code assumes first test byte is 0");
-    const std::vector<uint8_t> testbytes = {0,32,127,255};
-    const unsigned numtestbytes = testbytes.size();
-    std::vector<hashtype> hashes(numtestbytes);
-    std::set<hashtype> collisions;
+    const HashFn   hash = hinfo->hashFn(g_hashEndian);
+    const seed_t   last = hi | endlow;
+    const hashtype zero = { 0 };
+    // static_assert(testbytes[0] == 0, "Code assumes first test byte is 0");
+    const std::vector<uint8_t> testbytes = { 0, 32, 127, 255 };
+    const unsigned        numtestbytes   = testbytes.size();
+    std::vector<hashtype> hashes( numtestbytes );
+    std::set<hashtype>    collisions;
 
     const char * progress_fmt =
-        (last <= UINT64_C(0xffffffff)) ?
-            "%8" PRIx64 "%c"  : "%16" PRIx64 "%c";
+            (last <= UINT64_C(0xffffffff)) ?
+                "%8" PRIx64 "%c"  : "%16" PRIx64 "%c";
     const uint64_t progress_nl_every =
-        (last <= UINT64_C(0xffffffff)) ? 8 : 4;
+            (last <= UINT64_C(0xffffffff)) ? 8 : 4;
 
     int fails = 0;
+
     result = true;
 
     {
 #if defined(HAVE_THREADS)
-        std::lock_guard<std::mutex> lock(print_mutex);
+        std::lock_guard<std::mutex> lock( print_mutex );
 #endif
         printf("Testing [0x%016" PRIx64 ", 0x%016" PRIx64 "] ... \n", hi | start, last);
     }
@@ -197,11 +197,11 @@ static void TestSeedRangeThread(const HashInfo * hinfo, const uint64_t hi,
          */
         if ((seed & UINT64_C(0x1ffffff)) == UINT64_C(0x1ffffff)) {
 #if defined(HAVE_THREADS)
-            std::lock_guard<std::mutex> lock(print_mutex);
+            std::lock_guard<std::mutex> lock( print_mutex );
 #endif
-            unsigned count = ++seed_progress;
+            unsigned   count  = ++seed_progress;
             const char spacer = ((count % progress_nl_every) == 0) ? '\n' : ' ';
-            printf (progress_fmt, seed, spacer);
+            printf(progress_fmt, seed, spacer);
         }
 
         /* Test the next seed against 16 copies of each test byte */
@@ -216,7 +216,7 @@ static void TestSeedRangeThread(const HashInfo * hinfo, const uint64_t hi,
             bool known_seed = (std::find(seeds.begin(), seeds.end(), seed) != seeds.end());
             {
 #if defined(HAVE_THREADS)
-                std::lock_guard<std::mutex> lock(print_mutex);
+                std::lock_guard<std::mutex> lock( print_mutex );
 #endif
                 if (known_seed) {
                     printf("\nVerified broken seed 0x%" PRIx64 " => 0 with key[16] of all 0 bytes\n", seed);
@@ -234,7 +234,7 @@ static void TestSeedRangeThread(const HashInfo * hinfo, const uint64_t hi,
         /* Report if any collisions were found */
         if (FindCollisions(hashes, collisions, 1000, true) > 0) {
 #if defined(HAVE_THREADS)
-            std::lock_guard<std::mutex> lock(print_mutex);
+            std::lock_guard<std::mutex> lock( print_mutex );
 #endif
             bool known_seed = (std::find(seeds.begin(), seeds.end(), seed) != seeds.end());
             if (known_seed) {
@@ -255,75 +255,79 @@ static void TestSeedRangeThread(const HashInfo * hinfo, const uint64_t hi,
             }
             collisions.clear();
             result = false;
-            if (!known_seed)
+            if (!known_seed) {
                 newresult = true;
+            }
         }
     } while (seed++ != last);
 
- out:
+  out:
     return;
 }
 
 // Test a full 2**32 range [hi + 0, hi + 0xffffffff].
 // If no new bad seed is found, then newresult must be left unchanged.
-template< typename hashtype >
-static bool TestManySeeds(const HashInfo * hinfo, const uint64_t hi, bool &newresult) {
-  bool result = true;
-  seed_progress = 0;
-
-  if (g_NCPU == 1) {
-      TestSeedRangeThread<hashtype>(hinfo, hi, 0x0, 0xffffffff, result, newresult);
-      printf("\n");
-  } else {
+template <typename hashtype>
+static bool TestManySeeds( const HashInfo * hinfo, const uint64_t hi, bool & newresult ) {
+    bool result = true;
+
+    seed_progress = 0;
+
+    if (g_NCPU == 1) {
+        TestSeedRangeThread<hashtype>(hinfo, hi, 0x0, 0xffffffff, result, newresult);
+        printf("\n");
+    } else {
 #if defined(HAVE_THREADS)
-      // split into g_NCPU threads
-      std::thread t[g_NCPU];
-      const uint64_t len = UINT64_C(0x100000000) / g_NCPU;
-      // Can't make VLAs in C++, so have to use vectors, but can't
-      // pass a ref of a bool in a vector to a thread... :-<
-      bool * results    = new bool[g_NCPU]();
-      bool * newresults = new bool[g_NCPU]();
-
-      printf("%d threads starting...\n", g_NCPU);
-      for (int i=0; i < g_NCPU; i++) {
-          const uint32_t start = i * len;
-          const uint32_t end = (i < (g_NCPU - 1)) ? start + (len - 1) : 0xffffffff;
-          t[i] = std::thread {TestSeedRangeThread<hashtype>, hinfo, hi, start, end,
-                              std::ref(results[i]), std::ref(newresults[i])};
-      }
-
-      std::this_thread::sleep_for(std::chrono::seconds(1));
-
-      for (int i=0; i < g_NCPU; i++) {
-          t[i].join();
-      }
-
-      printf("All %d threads ended\n", g_NCPU);
-
-      for (int i=0; i < g_NCPU; i++) {
-          result &= results[i];
-          newresult |= newresults[i];
-      }
-
-      delete [] results;
-      delete [] newresults;
+        // split into g_NCPU threads
+        std::thread    t[g_NCPU];
+        const uint64_t len = UINT64_C(0x100000000) / g_NCPU;
+        // Can't make VLAs in C++, so have to use vectors, but can't
+        // pass a ref of a bool in a vector to a thread... :-<
+        bool * results    = new bool[g_NCPU]();
+        bool * newresults = new bool[g_NCPU]();
+
+        printf("%d threads starting...\n", g_NCPU);
+        for (int i = 0; i < g_NCPU; i++) {
+            const uint32_t start = i * len;
+            const uint32_t end   = (i < (g_NCPU - 1)) ? start + (len - 1) : 0xffffffff;
+            t[i] = std::thread {
+                TestSeedRangeThread<hashtype>, hinfo, hi, start, end,
+                std::ref(results[i]), std::ref(newresults[i])
+            };
+        }
+
+        std::this_thread::sleep_for(std::chrono::seconds(1));
+
+        for (int i = 0; i < g_NCPU; i++) {
+            t[i].join();
+        }
+
+        printf("All %d threads ended\n", g_NCPU);
+
+        for (int i = 0; i < g_NCPU; i++) {
+            result    &= results[i];
+            newresult |= newresults[i];
+        }
+
+        delete [] results;
+        delete [] newresults;
 #endif
-  }
+    }
 
-  // Since this can be threaded, just use the test parameters for the
-  // VCode input data.
-  addVCodeInput(hi);         // hi
-  addVCodeInput(0);          // lo start
-  addVCodeInput(0xffffffff); // lo end
-  // Nothing to add to VCodeOutput
-  addVCodeResult(result);
+    // Since this can be threaded, just use the test parameters for the
+    // VCode input data.
+    addVCodeInput(        hi); // hi
+    addVCodeInput(         0); // lo start
+    addVCodeInput(0xffffffff); // lo end
+    // Nothing to add to VCodeOutput
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
-template< typename hashtype >
-static bool BadSeedsFind(const HashInfo * hinfo) {
-    bool result = true;
+template <typename hashtype>
+static bool BadSeedsFind( const HashInfo * hinfo ) {
+    bool result    = true;
     bool newresult = false;
 
     printf("Testing the first 2**32 seeds ...\n");
@@ -347,10 +351,10 @@ static bool BadSeedsFind(const HashInfo * hinfo) {
 }
 
 //-----------------------------------------------------------------------------
-template < typename hashtype >
-bool BadSeedsTest(const HashInfo * hinfo, bool find_new_seeds) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
+template <typename hashtype>
+bool BadSeedsTest( const HashInfo * hinfo, bool find_new_seeds ) {
+    const HashFn hash   = hinfo->hashFn(g_hashEndian);
+    bool         result = true;
 
     // Never find new bad seeds for mock hashes, except for aesrng
     if (hinfo->isMock() && (strncmp(hinfo->name, "aesrng", 6) != 0)) {
diff --git a/tests/BadSeedsTest.h b/tests/BadSeedsTest.h
index d500b4ec..29f679f3 100644
--- a/tests/BadSeedsTest.h
+++ b/tests/BadSeedsTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool BadSeedsTest(const HashInfo * info, const bool find_new_seeds);
+template <typename hashtype>
+bool BadSeedsTest( const HashInfo * info, const bool find_new_seeds );
diff --git a/tests/BitIndependenceTest.cpp b/tests/BitIndependenceTest.cpp
index e2d66f4c..c140315a 100644
--- a/tests/BitIndependenceTest.cpp
+++ b/tests/BitIndependenceTest.cpp
@@ -63,139 +63,130 @@
 // The choices for VCode inputs may seem strange here, but they were
 // chosen in anticipation of threading this test.
 
-template< typename keytype, typename hashtype >
-static bool BicTest3(HashFn hash, const seed_t seed, const int reps, bool verbose = false )
-{
-  const int keybytes = sizeof(keytype);
-  const int keybits = keybytes * 8;
-  const int hashbytes = sizeof(hashtype);
-  const int hashbits = hashbytes * 8;
-  const int pagesize = hashbits*hashbits*4;
+template <typename keytype, typename hashtype>
+static bool BicTest3( HashFn hash, const seed_t seed, const int reps, bool verbose = false ) {
+    const int keybytes  = sizeof(keytype);
+    const int keybits   = keybytes * 8;
+    const int hashbytes = sizeof(hashtype);
+    const int hashbits  = hashbytes * 8;
+    const int pagesize  = hashbits * hashbits * 4;
 
-  Rand r(11938);
+    Rand r( 11938 );
 
-  double maxBias = 0;
-  int maxK = 0;
-  int maxA = 0;
-  int maxB = 0;
+    double maxBias = 0;
+    int    maxK    = 0;
+    int    maxA    = 0;
+    int    maxB    = 0;
 
-  keytype key;
-  hashtype h1,h2;
+    keytype  key;
+    hashtype h1, h2;
 
-  std::vector<int> bins(keybits*pagesize,0);
+    std::vector<int> bins( keybits * pagesize, 0 );
 
-  for(int keybit = 0; keybit < keybits; keybit++)
-  {
-    if(keybit % (keybits/10) == 0) printf(".");
+    for (int keybit = 0; keybit < keybits; keybit++) {
+        if (keybit % (keybits / 10) == 0) { printf("."); }
 
-    int * page = &bins[keybit*pagesize];
+        int * page = &bins[keybit * pagesize];
 
-    for(int irep = 0; irep < reps; irep++)
-    {
-      r.rand_p(&key, keybytes);
-      addVCodeInput(&key, keybytes);
-      addVCodeInput(keybit);
+        for (int irep = 0; irep < reps; irep++) {
+            r.rand_p(&key, keybytes);
+            addVCodeInput(&key  , keybytes);
+            addVCodeInput(keybit);
 
-      hash(&key, keybytes, seed, &h1);
-      key.flipbit(keybit);
-      hash(&key, keybytes, seed, &h2);
+            hash(&key, keybytes, seed, &h1);
+            key.flipbit(keybit);
+            hash(&key, keybytes, seed, &h2);
 
-      hashtype d = h1 ^ h2;
+            hashtype d = h1 ^ h2;
 
-      for(int out1 = 0; out1 < hashbits-1; out1++)
-      for(int out2 = out1+1; out2 < hashbits; out2++)
-      {
-        int * b = &page[(out1*hashbits+out2)*4];
+            for (int out1 = 0; out1 < hashbits - 1; out1++) {
+                for (int out2 = out1 + 1; out2 < hashbits; out2++) {
+                    int * b    = &page[(out1 * hashbits + out2) * 4];
 
-        uint32_t x = d.getbit(out1) | (d.getbit(out2) << 1);
+                    uint32_t x = d.getbit(out1) | (d.getbit(out2) << 1);
 
-        b[x]++;
-      }
+                    b[x]++;
+                }
+            }
+        }
     }
-  }
 
-  printf("\n");
+    printf("\n");
 
-  for(int out1 = 0; out1 < hashbits-1; out1++)
-  {
-    for(int out2 = out1+1; out2 < hashbits; out2++)
-    {
-      if(verbose) printf("(%3d,%3d) - ",out1,out2);
+    for (int out1 = 0; out1 < hashbits - 1; out1++) {
+        for (int out2 = out1 + 1; out2 < hashbits; out2++) {
+            if (verbose) { printf("(%3d,%3d) - ", out1, out2); }
 
-      for(int keybit = 0; keybit < keybits; keybit++)
-      {
-        int * page = &bins[keybit*pagesize];
-        int * bins = &page[(out1*hashbits+out2)*4];
+            for (int keybit = 0; keybit < keybits; keybit++) {
+                int * page  = &bins[keybit                   * pagesize];
+                int * bins  = &page[(out1 * hashbits + out2) * 4       ];
 
-        double bias = 0;
+                double bias = 0;
 
-        for(int b = 0; b < 4; b++)
-        {
-          double b2 = double(bins[b]) / double(reps / 2);
-          b2 = fabs(b2 * 2 - 1);
+                for (int b = 0; b < 4; b++) {
+                    double b2 = double(bins[b]) / double(reps / 2);
+                    b2 = fabs(b2 * 2 - 1);
 
-          if(b2 > bias) bias = b2;
-        }
+                    if (b2 > bias) { bias = b2; }
+                }
 
-        if(bias > maxBias)
-        {
-          maxBias = bias;
-          maxK = keybit;
-          maxA = out1;
-          maxB = out2;
-        }
+                if (bias > maxBias) {
+                    maxBias = bias;
+                    maxK    = keybit;
+                    maxA    = out1;
+                    maxB    = out2;
+                }
 
-        if(verbose)
-        {
-          if     (bias < 0.01) printf(".");
-          else if(bias < 0.05) printf("o");
-          else if(bias < 0.33) printf("O");
-          else                 printf("X");
-        }
-      }
+                if (verbose) {
+                    if     (bias < 0.01) { printf("."); } else if (bias < 0.05) { printf("o"); } else if (bias < 0.33) {
+                        printf("O");
+                    } else {
+                        printf("X");
+                    }
+                }
+            }
 
-      // Finished keybit
-      if(verbose) printf("\n");
-    }
+            // Finished keybit
+            if (verbose) { printf("\n"); }
+        }
 
-    if(verbose)
-    {
-      for(int i = 0; i < keybits+12; i++) printf("-");
-      printf("\n");
+        if (verbose) {
+            for (int i = 0; i < keybits + 12; i++) { printf("-"); }
+            printf("\n");
+        }
     }
-  }
 
-  addVCodeOutput(&bins[0], keybits*pagesize*sizeof(bins[0]));
-  addVCodeResult((uint32_t)(maxBias * 1000.0));
-  addVCodeResult(maxK);
-  addVCodeResult(maxA);
-  addVCodeResult(maxB);
+    addVCodeOutput(&bins[0], keybits * pagesize * sizeof(bins[0]));
+    addVCodeResult((uint32_t)(maxBias * 1000.0));
+    addVCodeResult(maxK);
+    addVCodeResult(maxA);
+    addVCodeResult(maxB);
 
-  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+    printf("Max bias %f - (%3d : %3d,%3d)\n", maxBias, maxK, maxA, maxB);
 
-  // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
-  bool result = (maxBias < 0.05);
-  return result;
+    // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
+    bool result = (maxBias < 0.05);
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool BicTest(const HashInfo * hinfo, const bool verbose) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
-    bool fewerreps = (hinfo->bits > 64 || hinfo->isVerySlow()) ? true : false;
+template <typename hashtype>
+bool BicTest( const HashInfo * hinfo, const bool verbose ) {
+    const HashFn hash      = hinfo->hashFn(g_hashEndian);
+    bool         result    = true;
+    bool         fewerreps = (hinfo->bits > 64 || hinfo->isVerySlow()) ? true : false;
 
     printf("[[[ BIC 'Bit Independence Criteria' Tests ]]]\n\n");
 
     const seed_t seed = hinfo->Seed(g_seed);
 
     if (fewerreps) {
-        result &= BicTest3<Blob<128>,hashtype>(hash,seed,100000,verbose);
+        result &= BicTest3<Blob<128>, hashtype>(hash, seed, 100000, verbose);
     } else {
-        const long reps = 64000000/hinfo->bits;
-        //result &= BicTest<uint64_t,hashtype>(hash,2000000);
-        result &= BicTest3<Blob<88>,hashtype>(hash,seed,(int)reps,verbose);
+        const long reps = 64000000 / hinfo->bits;
+        // result &= BicTest<uint64_t,hashtype>(hash,2000000);
+        result &= BicTest3<Blob<88>, hashtype>(hash, seed, (int)reps, verbose);
     }
 
     recordTestResult(result, "BIC", (const char *)NULL);
@@ -215,206 +206,192 @@ INSTANTIATE(BicTest, HASHTYPELIST);
 // Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
 // not really all that useful.
 
-template< typename keytype, typename hashtype >
-void BicTest1 ( HashFn hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
-{
-  Rand r(11938);
+template <typename keytype, typename hashtype>
+void BicTest1( HashFn hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose ) {
+    Rand r( 11938 );
 
-  const int keybytes = sizeof(keytype);
-  const int hashbytes = sizeof(hashtype);
-  const int hashbits = hashbytes * 8;
+    const int keybytes  = sizeof(keytype);
+    const int hashbytes = sizeof(hashtype);
+    const int hashbits  = hashbytes * 8;
 
-  std::vector<int> bins(hashbits*hashbits*4,0);
+    std::vector<int> bins( hashbits * hashbits * 4, 0 );
 
-  keytype key;
-  hashtype h1,h2;
+    keytype  key;
+    hashtype h1, h2;
 
-  for(int irep = 0; irep < reps; irep++)
-  {
-    if(verbose) {
-      if(irep % (reps/10) == 0) printf(".");
-    }
+    for (int irep = 0; irep < reps; irep++) {
+        if (verbose) {
+            if (irep % (reps / 10) == 0) { printf("."); }
+        }
 
-    r.rand_p(&key,keybytes);
-    hash(&key,keybytes,g_seed,&h1);
+        r.rand_p(&key, keybytes);
+        hash(&key, keybytes, g_seed, &h1);
 
-    key.flipbit(keybit);
-    hash(&key,keybytes,g_seed,&h2);
+        key.flipbit(keybit);
+        hash(&key, keybytes, g_seed, &h2);
 
-    hashtype d = h1 ^ h2;
+        hashtype d = h1 ^ h2;
 
-    for(int out1 = 0; out1 < hashbits; out1++)
-    for(int out2 = 0; out2 < hashbits; out2++)
-    {
-      if(out1 == out2) continue;
+        for (int out1 = 0; out1 < hashbits; out1++) {
+            for (int out2 = 0; out2 < hashbits; out2++) {
+                if (out1 == out2) { continue; }
 
-      uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+                uint32_t b = getbit(d, out1) | (getbit(d, out2) << 1);
 
-      bins[(out1 * hashbits + out2) * 4 + b]++;
-    }
-  }
-
-  if(verbose) printf("\n");
-
-  maxBias = 0;
-
-  for(int out1 = 0; out1 < hashbits; out1++)
-  {
-    for(int out2 = 0; out2 < hashbits; out2++)
-    {
-      if(out1 == out2)
-      {
-        if(verbose) printf("\\");
-        continue;
-      }
-
-      double bias = 0;
-
-      for(int b = 0; b < 4; b++)
-      {
-        double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
-        b2 = fabs(b2 * 2 - 1);
-
-        if(b2 > bias) bias = b2;
-      }
-
-      if(bias > maxBias)
-      {
-        maxBias = bias;
-        maxA = out1;
-        maxB = out2;
-      }
-
-      if(verbose)
-      {
-        if     (bias < 0.01) printf(".");
-        else if(bias < 0.05) printf("o");
-        else if(bias < 0.33) printf("O");
-        else                 printf("X");
-      }
+                bins[(out1 * hashbits + out2) * 4 + b]++;
+            }
+        }
     }
 
-    if(verbose) printf("\n");
-  }
+    if (verbose) { printf("\n"); }
+
+    maxBias = 0;
+
+    for (int out1 = 0; out1 < hashbits; out1++) {
+        for (int out2 = 0; out2 < hashbits; out2++) {
+            if (out1 == out2) {
+                if (verbose) { printf("\\"); }
+                continue;
+            }
+
+            double bias = 0;
+
+            for (int b = 0; b < 4; b++) {
+                double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
+                b2 = fabs(b2 * 2 - 1);
+
+                if (b2 > bias) { bias = b2; }
+            }
+
+            if (bias > maxBias) {
+                maxBias = bias;
+                maxA    = out1;
+                maxB    = out2;
+            }
+
+            if (verbose) {
+                if     (bias < 0.01) { printf("."); } else if (bias < 0.05) { printf("o"); } else if (bias < 0.33) {
+                    printf("O");
+                } else {
+                    printf("X");
+                }
+            }
+        }
+
+        if (verbose) { printf("\n"); }
+    }
 }
 
 //----------
 
-template< typename keytype, typename hashtype >
-bool BicTest1 ( HashFn hash, const int reps )
-{
-  const int keybytes = sizeof(keytype);
-  const int keybits = keybytes * 8;
+template <typename keytype, typename hashtype>
+bool BicTest1( HashFn hash, const int reps ) {
+    const int keybytes = sizeof(keytype);
+    const int keybits  = keybytes * 8;
 
-  double maxBias = 0;
-  int maxK = 0;
-  int maxA = 0;
-  int maxB = 0;
+    double maxBias     = 0;
+    int    maxK        = 0;
+    int    maxA        = 0;
+    int    maxB        = 0;
 
-  for(int i = 0; i < keybits; i++)
-  {
-    if(i % (keybits/10) == 0) printf(".");
+    for (int i = 0; i < keybits; i++) {
+        if (i % (keybits / 10) == 0) { printf("."); }
 
-    double bias;
-    int a,b;
+        double bias;
+        int    a, b;
 
-    BicTest1<keytype,hashtype>(hash,i,reps,bias,a,b,true);
+        BicTest1<keytype, hashtype>(hash, i, reps, bias, a, b, true);
 
-    if(bias > maxBias)
-    {
-      maxBias = bias;
-      maxK = i;
-      maxA = a;
-      maxB = b;
+        if (bias > maxBias) {
+            maxBias = bias;
+            maxK    = i;
+            maxA    = a;
+            maxB    = b;
+        }
     }
-  }
 
-  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+    printf("Max bias %f - (%3d : %3d,%3d)\n", maxBias, maxK, maxA, maxB);
 
-  // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
+    // Bit independence is harder to pass than avalanche, so we're a bit more lax here.
 
-  bool result = (maxBias < 0.05);
+    bool result = (maxBias < 0.05);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 // BIC test variant - iterate over output bits, then key bits. No temp storage,
 // but slooooow
 
-template< typename keytype, typename hashtype >
-void BicTest2 ( HashFn hash, const int reps, bool verbose = true )
-{
-  const int keybytes = sizeof(keytype);
-  const int keybits = keybytes * 8;
-  const int hashbytes = sizeof(hashtype);
-  const int hashbits = hashbytes * 8;
-
-  Rand r(11938);
-
-  double maxBias = 0;
-  int maxK = 0;
-  int maxA = 0;
-  int maxB = 0;
-
-  keytype key;
-  hashtype h1,h2;
-
-  for(int out1 = 0; out1 < hashbits-1; out1++)
-  for(int out2 = out1+1; out2 < hashbits; out2++)
-  {
-    if(verbose) printf("(%3d,%3d) - ",out1,out2);
-
-    for(int keybit = 0; keybit < keybits; keybit++)
-    {
-      int bins[4] = { 0, 0, 0, 0 };
-
-      for(int irep = 0; irep < reps; irep++)
-      {
-        r.rand_p(&key,keybytes);
-        hash(&key,keybytes,g_seed,&h1);
-        key.flipbit(keybit);
-        hash(&key,keybytes,g_seed,&h2);
+template <typename keytype, typename hashtype>
+void BicTest2( HashFn hash, const int reps, bool verbose = true ) {
+    const int keybytes  = sizeof(keytype);
+    const int keybits   = keybytes * 8;
+    const int hashbytes = sizeof(hashtype);
+    const int hashbits  = hashbytes * 8;
 
-        hashtype d = h1 ^ h2;
+    Rand r( 11938 );
 
-        uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
+    double maxBias = 0;
+    int    maxK    = 0;
+    int    maxA    = 0;
+    int    maxB    = 0;
 
-        bins[b]++;
-      }
+    keytype  key;
+    hashtype h1, h2;
 
-      double bias = 0;
+    for (int out1 = 0; out1 < hashbits - 1; out1++) {
+        for (int out2 = out1 + 1; out2 < hashbits; out2++) {
+            if (verbose) { printf("(%3d,%3d) - ", out1, out2); }
 
-      for(int b = 0; b < 4; b++)
-      {
-        double b2 = double(bins[b]) / double(reps / 2);
-        b2 = fabs(b2 * 2 - 1);
+            for (int keybit = 0; keybit < keybits; keybit++) {
+                int bins[4] = { 0, 0, 0, 0 };
 
-        if(b2 > bias) bias = b2;
-      }
+                for (int irep = 0; irep < reps; irep++) {
+                    r.rand_p(&key, keybytes);
+                    hash(&key, keybytes, g_seed, &h1);
+                    key.flipbit(keybit);
+                    hash(&key, keybytes, g_seed, &h2);
 
-      if(bias > maxBias)
-      {
-        maxBias = bias;
-        maxK = keybit;
-        maxA = out1;
-        maxB = out2;
-      }
+                    hashtype d = h1 ^ h2;
 
-      if(verbose)
-      {
-        if     (bias < 0.05) printf(".");
-        else if(bias < 0.10) printf("o");
-        else if(bias < 0.50) printf("O");
-        else                 printf("X");
-      }
-    }
+                    uint32_t b = getbit(d, out1) | (getbit(d, out2) << 1);
 
-    // Finished keybit
+                    bins[b]++;
+                }
 
-    if(verbose) printf("\n");
-  }
+                double bias = 0;
 
-  printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
+                for (int b = 0; b < 4; b++) {
+                    double b2 = double(bins[b]) / double(reps / 2);
+                    b2 = fabs(b2 * 2 - 1);
+
+                    if (b2 > bias) { bias = b2; }
+                }
+
+                if (bias > maxBias) {
+                    maxBias = bias;
+                    maxK    = keybit;
+                    maxA    = out1;
+                    maxB    = out2;
+                }
+
+                if (verbose) {
+                    if     (bias < 0.05) { printf("."); } else if (bias < 0.10) { printf("o"); } else if (bias < 0.50) {
+                        printf("O");
+                    } else {
+                        printf("X");
+                    }
+                }
+            }
+
+            // Finished keybit
+
+            if (verbose) { printf("\n"); }
+        }
+    }
+
+    printf("Max bias %f - (%3d : %3d,%3d)\n", maxBias, maxK, maxA, maxB);
 }
+
 #endif /* 0 */
diff --git a/tests/BitIndependenceTest.h b/tests/BitIndependenceTest.h
index a068729e..7cf6def5 100644
--- a/tests/BitIndependenceTest.h
+++ b/tests/BitIndependenceTest.h
@@ -45,5 +45,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool BicTest(const HashInfo * info, const bool verbose);
+template <typename hashtype>
+bool BicTest( const HashInfo * info, const bool verbose );
diff --git a/tests/CyclicKeysetTest.cpp b/tests/CyclicKeysetTest.cpp
index 52b17bcf..0b245071 100644
--- a/tests/CyclicKeysetTest.cpp
+++ b/tests/CyclicKeysetTest.cpp
@@ -49,7 +49,7 @@
 #include "Platform.h"
 #include "Hashinfo.h"
 #include "TestGlobals.h"
-#include "Stats.h"       // For EstimateNbCollisions
+#include "Stats.h" // For EstimateNbCollisions
 #include "Random.h"
 #include "Analyze.h"
 #include "Instantiate.h"
@@ -63,69 +63,68 @@
 //
 // (This keyset type is designed to make MurmurHash2 fail)
 
-static inline uint32_t f3mix ( uint32_t k )
-{
-  k ^= k >> 16;
-  k *= 0x85ebca6b;
-  k ^= k >> 13;
-  k *= 0xc2b2ae35;
-  k ^= k >> 16;
+static inline uint32_t f3mix( uint32_t k ) {
+    k ^= k >> 16;
+    k *= 0x85ebca6b;
+    k ^= k >> 13;
+    k *= 0xc2b2ae35;
+    k ^= k >> 16;
 
-  return k;
+    return k;
 }
 
-template < typename hashtype >
-static bool CyclicKeyImpl(HashFn hash, const seed_t seed, int cycleLen, int cycleReps, const int keycount, bool drawDiagram) {
-  printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n",cycleReps,cycleLen,keycount);
+template <typename hashtype>
+static bool CyclicKeyImpl( HashFn hash, const seed_t seed, int cycleLen,
+        int cycleReps, const int keycount, bool drawDiagram ) {
+    printf("Keyset 'Cyclic' - %d cycles of %d bytes - %d keys\n", cycleReps, cycleLen, keycount);
 
-  Rand r(483723);
+    Rand r( 483723 );
 
-  std::vector<hashtype> hashes;
-  hashes.resize(keycount);
+    std::vector<hashtype> hashes;
+    hashes.resize(keycount);
 
-  int keyLen = cycleLen * cycleReps;
+    int keyLen      = cycleLen * cycleReps;
 
-  uint8_t * cycle = new uint8_t[cycleLen + 16];
-  uint8_t * key = new uint8_t[keyLen];
+    uint8_t * cycle = new uint8_t[cycleLen + 16];
+    uint8_t * key   = new uint8_t[keyLen       ];
 
-  //----------
+    //----------
 
-  for(int i = 0; i < keycount; i++)
-  {
-    r.rand_p(cycle,cycleLen);
+    for (int i = 0; i < keycount; i++) {
+        r.rand_p(cycle, cycleLen);
 
-    *(uint32_t*)cycle = f3mix(i ^ 0x746a94f1);
+        *(uint32_t *)cycle = f3mix(i ^ 0x746a94f1);
 
-    for(int j = 0; j < keyLen; j++)
-    {
-      key[j] = cycle[j % cycleLen];
-    }
+        for (int j = 0; j < keyLen; j++) {
+            key[j] = cycle[j % cycleLen];
+        }
 
-    hash(key, keyLen, seed, &hashes[i]);
-    addVCodeInput(key, keyLen);
-  }
+        hash(key, keyLen, seed, &hashes[i]);
+        addVCodeInput(key, keyLen);
+    }
 
-  //----------
+    //----------
 
-  bool result = TestHashList(hashes,drawDiagram);
-  printf("\n");
+    bool result = TestHashList(hashes, drawDiagram);
+    printf("\n");
 
-  delete [] key;
-  delete [] cycle;
+    delete [] key;
+    delete [] cycle;
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  recordTestResult(result, "Cyclic", cycleLen);
+    recordTestResult(result, "Cyclic", cycleLen);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool CyclicKeyTest(const HashInfo * hinfo, const bool verbose) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
+template <typename hashtype>
+bool CyclicKeyTest( const HashInfo * hinfo, const bool verbose ) {
+    const HashFn hash   = hinfo->hashFn(g_hashEndian);
+    bool         result = true;
+
 #if defined(DEBUG)
     const int reps = 2;
 #else
@@ -136,12 +135,12 @@ bool CyclicKeyTest(const HashInfo * hinfo, const bool verbose) {
 
     const seed_t seed = hinfo->Seed(g_seed);
 
-    result &= CyclicKeyImpl<hashtype>(hash,seed,sizeof(hashtype)+0,8,reps,verbose);
-    result &= CyclicKeyImpl<hashtype>(hash,seed,sizeof(hashtype)+1,8,reps,verbose);
-    result &= CyclicKeyImpl<hashtype>(hash,seed,sizeof(hashtype)+2,8,reps,verbose);
-    result &= CyclicKeyImpl<hashtype>(hash,seed,sizeof(hashtype)+3,8,reps,verbose);
-    result &= CyclicKeyImpl<hashtype>(hash,seed,sizeof(hashtype)+4,8,reps,verbose);
-    result &= CyclicKeyImpl<hashtype>(hash,seed,sizeof(hashtype)+8,8,reps,verbose);
+    result &= CyclicKeyImpl<hashtype>(hash, seed, sizeof(hashtype) + 0, 8, reps, verbose);
+    result &= CyclicKeyImpl<hashtype>(hash, seed, sizeof(hashtype) + 1, 8, reps, verbose);
+    result &= CyclicKeyImpl<hashtype>(hash, seed, sizeof(hashtype) + 2, 8, reps, verbose);
+    result &= CyclicKeyImpl<hashtype>(hash, seed, sizeof(hashtype) + 3, 8, reps, verbose);
+    result &= CyclicKeyImpl<hashtype>(hash, seed, sizeof(hashtype) + 4, 8, reps, verbose);
+    result &= CyclicKeyImpl<hashtype>(hash, seed, sizeof(hashtype) + 8, 8, reps, verbose);
 
     printf("%s\n", result ? "" : g_failstr);
 
diff --git a/tests/CyclicKeysetTest.h b/tests/CyclicKeysetTest.h
index 54b72cb4..7119f1c4 100644
--- a/tests/CyclicKeysetTest.h
+++ b/tests/CyclicKeysetTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool CyclicKeyTest(const HashInfo * info, const bool verbose);
+template <typename hashtype>
+bool CyclicKeyTest( const HashInfo * info, const bool verbose );
diff --git a/tests/DiffDistributionTest.cpp b/tests/DiffDistributionTest.cpp
index d74efcf3..7feda675 100644
--- a/tests/DiffDistributionTest.cpp
+++ b/tests/DiffDistributionTest.cpp
@@ -59,61 +59,59 @@
 // generate random key pairs and run full distribution/collision tests on the
 // hash differentials
 
-template < typename keytype, typename hashtype >
-static bool DiffDistTest2(HashFn hash, const seed_t seed, bool drawDiagram) {
-  Rand r(857374);
+template <typename keytype, typename hashtype>
+static bool DiffDistTest2( HashFn hash, const seed_t seed, bool drawDiagram ) {
+    Rand r( 857374 );
 
-  int keybits = sizeof(keytype) * 8;
-  const int keycount = 256*256*32;
-  keytype k;
+    int       keybits  = sizeof(keytype) *      8;
+    const int keycount = 256           * 256 * 32;
+    keytype   k;
 
-  std::vector<hashtype> hashes(keycount);
-  hashtype h1,h2;
+    std::vector<hashtype> hashes( keycount );
+    hashtype h1, h2;
 
-  bool result = true;
+    bool result = true;
 
-  for(int keybit = 0; keybit < keybits; keybit++)
-  {
-    printf("Testing bit %d - %d keys\n",keybit, keycount);
+    for (int keybit = 0; keybit < keybits; keybit++) {
+        printf("Testing bit %d - %d keys\n", keybit, keycount);
 
-    for(int i = 0; i < keycount; i++)
-    {
-      r.rand_p(&k, sizeof(keytype));
-      hash(&k, sizeof(keytype), seed, &h1);
-      addVCodeInput(&k, sizeof(keytype));
+        for (int i = 0; i < keycount; i++) {
+            r.rand_p(&k, sizeof(keytype));
+            hash(&k, sizeof(keytype), seed, &h1);
+            addVCodeInput(&k, sizeof(keytype));
 
-      k.flipbit(keybit);
-      hash(&k, sizeof(keytype), seed, &h2);
-      addVCodeInput(&k, sizeof(keytype));
+            k.flipbit(keybit);
+            hash(&k, sizeof(keytype), seed, &h2);
+            addVCodeInput(&k, sizeof(keytype));
 
-      hashes[i] = h1 ^ h2;
-    }
+            hashes[i] = h1 ^ h2;
+        }
 
-    bool thisresult = TestHashList<hashtype>(hashes,drawDiagram,true,true);
-    printf("\n");
+        bool thisresult = TestHashList<hashtype>(hashes, drawDiagram, true, true);
+        printf("\n");
 
-    addVCodeResult(thisresult);
+        addVCodeResult(thisresult);
 
-    recordTestResult(thisresult, "DiffDist", keybit);
+        recordTestResult(thisresult, "DiffDist", keybit);
 
-    result &= thisresult;
-  }
+        result &= thisresult;
+    }
 
-  return result;
+    return result;
 }
 
 //----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool DiffDistTest(const HashInfo * hinfo, const bool verbose) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
+template <typename hashtype>
+bool DiffDistTest( const HashInfo * hinfo, const bool verbose ) {
+    const HashFn hash   = hinfo->hashFn(g_hashEndian);
+    bool         result = true;
 
     printf("[[[ DiffDist 'Differential Distribution' Tests ]]]\n\n");
 
     const seed_t seed = hinfo->Seed(g_seed);
 
-    result &= DiffDistTest2<Blob<64>,hashtype>(hash, seed, verbose);
+    result &= DiffDistTest2<Blob<64>, hashtype>(hash, seed, verbose);
 
     printf("%s\n", result ? "" : g_failstr);
 
@@ -126,7 +124,7 @@ INSTANTIATE(DiffDistTest, HASHTYPELIST);
 // An old implementation; currently unused.
 
 #if 0
-#include "SparseKeysetTest.h" // for SparseKeygenRecurse
+  #include "SparseKeysetTest.h" // for SparseKeygenRecurse
 //-----------------------------------------------------------------------------
 // Differential distribution test - for each N-bit input differential, generate
 // a large set of differential key pairs, hash them, and test the output
@@ -143,56 +141,53 @@ INSTANTIATE(DiffDistTest, HASHTYPELIST);
 
 // #TODO - put diagram drawing back on
 
-template < typename keytype, typename hashtype >
-void DiffDistTest ( HashFn hash, const int diffbits, int trials, double & worst, double & avg )
-{
-  std::vector<keytype>  keys(trials);
-  std::vector<hashtype> A(trials),B(trials);
+template <typename keytype, typename hashtype>
+void DiffDistTest( HashFn hash, const int diffbits, int trials, double & worst, double & avg ) {
+    std::vector<keytype>  keys( trials );
+    std::vector<hashtype> A( trials ), B(trials);
 
-  //FIXME seedHash(hash, g_seed);
-  for(int i = 0; i < trials; i++)
-  {
-    rand_p(&keys[i],sizeof(keytype));
+    // FIXME seedHash(hash, g_seed);
+    for (int i = 0; i < trials; i++) {
+        rand_p(&keys[i], sizeof(keytype));
 
-    hash(&keys[i],sizeof(keytype),g_seed,(uint32_t*)&A[i]);
-  }
+        hash(&keys[i], sizeof(keytype), g_seed, (uint32_t *)&A[i]);
+    }
 
-  //----------
+    //----------
 
-  std::vector<keytype> diffs;
+    std::vector<keytype> diffs;
 
-  keytype temp(0);
+    keytype temp( 0 );
 
-  SparseKeygenRecurse<keytype>(0,diffbits,true,temp,diffs);
+    SparseKeygenRecurse<keytype>(0, diffbits, true, temp, diffs);
 
-  //----------
+    //----------
 
-  worst = 0;
-  avg = 0;
+    worst = 0;
+    avg   = 0;
 
-  hashtype h2;
+    hashtype h2;
 
-  for(size_t j = 0; j < diffs.size(); j++)
-  {
-    keytype & d = diffs[j];
+    for (size_t j = 0; j < diffs.size(); j++) {
+        keytype & d = diffs[j];
 
-    for(int i = 0; i < trials; i++)
-    {
-      keytype k2 = keys[i] ^ d;
+        for (int i = 0; i < trials; i++) {
+            keytype k2 = keys[i] ^ d;
 
-      hash(&k2,sizeof(k2),g_seed,&h2);
+            hash(&k2, sizeof(k2), g_seed, &h2);
 
-      B[i] = A[i] ^ h2;
-    }
+            B[i] = A[i] ^ h2;
+        }
 
-    double dworst,davg;
+        double dworst, davg;
 
-    TestDistributionFast(B,dworst,davg);
+        TestDistributionFast(B, dworst, davg);
 
-    avg += davg;
-    worst = (dworst > worst) ? dworst : worst;
-  }
+        avg  += davg;
+        worst = (dworst > worst) ? dworst : worst;
+    }
 
-  avg /= double(diffs.size());
+    avg /= double(diffs.size());
 }
+
 #endif /* 0 */
diff --git a/tests/DiffDistributionTest.h b/tests/DiffDistributionTest.h
index 6ffb44be..3647d957 100644
--- a/tests/DiffDistributionTest.h
+++ b/tests/DiffDistributionTest.h
@@ -48,5 +48,5 @@
 // Differential distribution tests - generate a bunch of random keys,
 // see what happens to the hash value when we flip a bit of the key.
 
-template < typename hashtype >
-bool DiffDistTest(const HashInfo * info, const bool verbose);
+template <typename hashtype>
+bool DiffDistTest( const HashInfo * info, const bool verbose );
diff --git a/tests/DifferentialTest.cpp b/tests/DifferentialTest.cpp
index 032d885c..26ef2f2a 100644
--- a/tests/DifferentialTest.cpp
+++ b/tests/DifferentialTest.cpp
@@ -47,7 +47,7 @@
 #include "Platform.h"
 #include "Hashinfo.h"
 #include "TestGlobals.h"
-#include "Stats.h"        // for chooseUpToK
+#include "Stats.h" // for chooseUpToK
 #include "Random.h"
 #include "Analyze.h"
 #include "Instantiate.h"
@@ -59,7 +59,7 @@
 #include <math.h>
 
 #if defined(HAVE_THREADS)
-#include <atomic>
+  #include <atomic>
 typedef std::atomic<int> a_int;
 #else
 typedef int a_int;
@@ -70,47 +70,45 @@ typedef int a_int;
 // occured once (these could be false positives). If we find identical
 // hash counts of 3 or more (2+ collisions), the differential test fails.
 
-template < class keytype >
-static bool ProcessDifferentials ( std::map<keytype, uint32_t> & diffcounts, int reps, bool dumpCollisions )
-{
-  int totalcount = 0;
-  int ignore = 0;
+template <class keytype>
+static bool ProcessDifferentials( std::map<keytype, uint32_t> & diffcounts, int reps, bool dumpCollisions ) {
+    int totalcount = 0;
+    int ignore     = 0;
 
-  bool result = true;
+    bool result    = true;
 
-  if (diffcounts.size()) {
-      for (std::pair<keytype, uint32_t> dc : diffcounts) {
-          uint32_t count = dc.second;
+    if (diffcounts.size()) {
+        for (std::pair<keytype, uint32_t> dc: diffcounts) {
+            uint32_t count = dc.second;
 
-          totalcount += count;
+            totalcount += count;
 
-          if (count == 1) {
-              ignore++;
-          } else {
-              result = false;
+            if (count == 1) {
+                ignore++;
+            } else {
+                result = false;
 
-              if(dumpCollisions) {
-                  double pct = 100 * (double(count) / double(reps));
-                  dc.first.printbits("");
-                  printf(" - %4.2f%%\n", pct );
-              }
-          }
-      }
-  }
+                if (dumpCollisions) {
+                    double pct = 100 * (double(count) / double(reps));
+                    dc.first.printbits("");
+                    printf(" - %4.2f%%\n", pct);
+                }
+            }
+        }
+    }
 
-  printf("%d total collisions, of which %d single collisions were ignored",
-         totalcount,ignore);
+    printf("%d total collisions, of which %d single collisions were ignored", totalcount, ignore);
 
-  addVCodeResult(totalcount);
-  addVCodeResult(ignore);
+    addVCodeResult(totalcount);
+    addVCodeResult(ignore    );
 
-  if(result == false) {
-      printf(" !!!!!");
-  }
+    if (result == false) {
+        printf(" !!!!!");
+    }
 
-  printf("\n\n");
+    printf("\n\n");
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
@@ -121,144 +119,149 @@ static bool ProcessDifferentials ( std::map<keytype, uint32_t> & diffcounts, int
 // 2^32 tests, we'll probably see some spurious random collisions, so don't report
 // them.
 
-template < bool recursemore, typename keytype, typename hashtype >
-static void DiffTestRecurse(const HashFn hash, const seed_t seed, keytype & k1, keytype & k2, hashtype & h1, hashtype & h2, int start, int bitsleft, std::map<keytype, uint32_t> & diffcounts )
-{
-  const int bits = sizeof(keytype)*8;
+template <bool recursemore, typename keytype, typename hashtype>
+static void DiffTestRecurse( const HashFn hash, const seed_t seed, keytype & k1, keytype & k2, hashtype & h1,
+        hashtype & h2, int start, int bitsleft, std::map<keytype, uint32_t> & diffcounts ) {
+    const int bits = sizeof(keytype) * 8;
 
-  assume(start < bits);
-  for(int i = start; i < bits; i++)
-  {
-    keytype k2_prev = k2;
+    assume(start < bits);
+    for (int i = start; i < bits; i++) {
+        keytype k2_prev = k2;
 
-    k2.flipbit(i);
+        k2.flipbit(i);
 
-    bitsleft--;
+        bitsleft--;
 
-    hash(&k2, sizeof(k2), seed, &h2);
+        hash(&k2, sizeof(k2), seed, &h2);
 
-    if(h1 == h2)
-    {
-        ++diffcounts[k1 ^ k2];
-    }
+        if (h1 == h2) {
+            ++diffcounts[k1 ^ k2];
+        }
 
-    if(recursemore && likely((i+1) < bits))
-    {
-      if (bitsleft > 1)
-          DiffTestRecurse<true>(hash,seed,k1,k2,h1,h2,i+1,bitsleft,diffcounts);
-      else
-          DiffTestRecurse<false>(hash,seed,k1,k2,h1,h2,i+1,bitsleft,diffcounts);
-    }
+        if (recursemore && likely((i + 1) < bits)) {
+            if (bitsleft > 1) {
+                DiffTestRecurse<true>(hash, seed, k1, k2, h1, h2, i + 1, bitsleft, diffcounts);
+            } else {
+                DiffTestRecurse<false>(hash, seed, k1, k2, h1, h2, i + 1, bitsleft, diffcounts);
+            }
+        }
 
-    //k2.flipbit(i);
-    k2 = k2_prev;
-    bitsleft++;
-  }
+        // k2.flipbit(i);
+        k2 = k2_prev;
+        bitsleft++;
+    }
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename keytype, typename hashtype >
-static void DiffTestImplThread(const HashFn hash, const seed_t seed, std::map<keytype, uint32_t> &diffcounts, const uint8_t * keys, int diffbits, a_int & irepp, const int reps) {
-  const int keybytes = sizeof(keytype);
+template <typename keytype, typename hashtype>
+static void DiffTestImplThread( const HashFn hash, const seed_t seed, std::map<keytype, uint32_t> & diffcounts,
+        const uint8_t * keys, int diffbits, a_int & irepp, const int reps ) {
+    const int keybytes = sizeof(keytype);
+
+    keytype  k1, k2;
+    hashtype h1, h2;
 
-  keytype k1,k2;
-  hashtype h1,h2;
-  h1 = h2 = 0;
+    h1 = h2 = 0;
 
-  int irep;
-  while ((irep = irepp++) < reps) {
-    if ((reps >= 10) && (irep % (reps/10) == 0)) { printf("."); }
+    int irep;
+    while ((irep = irepp++) < reps) {
+        if ((reps >= 10) && (irep % (reps / 10) == 0)) { printf("."); }
 
-    memcpy(&k1, &keys[keybytes * irep], sizeof(k1));
-    k2 = k1;
+        memcpy(&k1, &keys[keybytes * irep], sizeof(k1));
+        k2 = k1;
 
-    hash(&k1, sizeof(k1), seed, (void*)&h1);
+        hash(&k1, sizeof(k1), seed, (void *)&h1);
 
-    DiffTestRecurse<true,keytype,hashtype>(hash,seed,k1,k2,h1,h2,0,diffbits,diffcounts);
-  }
+        DiffTestRecurse<true, keytype, hashtype>(hash, seed, k1, k2, h1, h2, 0, diffbits, diffcounts);
+    }
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename keytype, typename hashtype >
-static bool DiffTestImpl(HashFn hash, const seed_t seed, int diffbits, int reps, bool dumpCollisions) {
-  const int keybytes = sizeof(keytype);
-  const int keybits = sizeof(keytype) * 8;
-  const int hashbits = sizeof(hashtype) * 8;
+template <typename keytype, typename hashtype>
+static bool DiffTestImpl( HashFn hash, const seed_t seed, int diffbits, int reps, bool dumpCollisions ) {
+    const int keybytes = sizeof(keytype);
+    const int keybits  = sizeof(keytype ) * 8;
+    const int hashbits = sizeof(hashtype) * 8;
 
-  double diffcount = chooseUpToK(keybits,diffbits);
-  double testcount = (diffcount * double(reps));
-  double expected  = testcount / pow(2.0,double(hashbits));
+    double diffcount   = chooseUpToK(keybits, diffbits);
+    double testcount   = (diffcount * double(reps));
+    double expected    = testcount / pow(2.0, double(hashbits));
 
-  printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",
-         diffcount,diffbits,keybits,hashbits);
-  printf("%d reps, %0.f total tests, expecting %2.2f random collisions",
-         reps,testcount,expected);
+    printf("Testing %0.f up-to-%d-bit differentials in %d-bit keys -> %d bit hashes.\n",
+            diffcount, diffbits, keybits, hashbits);
+    printf("%d reps, %0.f total tests, expecting %2.2f random collisions", reps, testcount, expected);
 
-  Rand r(100);
-  std::vector<uint8_t> keys(reps * keybytes);
+    Rand r( 100 );
+    std::vector<uint8_t> keys( reps * keybytes );
 
-  for (int i = 0; i < reps; i++)
-      r.rand_p(&keys[i*keybytes],keybytes);
-  addVCodeInput(&keys[0], reps * keybytes);
+    for (int i = 0; i < reps; i++) {
+        r.rand_p(&keys[i * keybytes], keybytes);
+    }
+    addVCodeInput(&keys[0], reps * keybytes);
 
-  a_int irep(0);
+    a_int irep( 0 );
 
-  std::vector<std::map<keytype, uint32_t> > diffcounts(g_NCPU);
+    std::vector<std::map<keytype, uint32_t>> diffcounts( g_NCPU );
 
-  if ((g_NCPU == 1) || (reps < 10)) {
-      DiffTestImplThread<keytype,hashtype>(hash,seed,diffcounts[0],&keys[0],diffbits,irep,reps);
-  } else {
+    if ((g_NCPU == 1) || (reps < 10)) {
+        DiffTestImplThread<keytype, hashtype>(hash, seed, diffcounts[0], &keys[0], diffbits, irep, reps);
+    } else {
 #if defined(HAVE_THREADS)
-      std::thread t[g_NCPU];
-      for (int i=0; i < g_NCPU; i++) {
-          t[i] = std::thread {DiffTestImplThread<keytype,hashtype>,hash,seed,std::ref(diffcounts[i]),&keys[0],diffbits,std::ref(irep),reps};
-      }
-      for (int i=0; i < g_NCPU; i++) {
-          t[i].join();
-      }
-      for (int i=1; i < g_NCPU; i++)
-	  for (std::pair<keytype, uint32_t> dc : diffcounts[i])
-	      diffcounts[0][dc.first] += dc.second;
+        std::thread t[g_NCPU];
+        for (int i = 0; i < g_NCPU; i++) {
+            t[i] = std::thread {
+                DiffTestImplThread<keytype, hashtype>, hash, seed, std::ref(diffcounts[i]),
+                &keys[0], diffbits, std::ref(irep), reps
+            };
+        }
+        for (int i = 0; i < g_NCPU; i++) {
+            t[i].join();
+        }
+        for (int i = 1; i < g_NCPU; i++) {
+            for (std::pair<keytype, uint32_t> dc: diffcounts[i]) {
+                diffcounts[0][dc.first] += dc.second;
+            }
+        }
 #endif
-  }
+    }
 
-  for (std::pair<keytype, uint32_t> dc : diffcounts[0]) {
-      addVCodeOutput(&dc.first, sizeof(keytype));
-      addVCodeOutput(&dc.second, sizeof(uint32_t));
-  }
+    for (std::pair<keytype, uint32_t> dc: diffcounts[0]) {
+        addVCodeOutput(&dc.first , sizeof(keytype) );
+        addVCodeOutput(&dc.second, sizeof(uint32_t));
+    }
 
-  printf("\n");
+    printf("\n");
 
-  bool result = true;
+    bool result = true;
 
-  result &= ProcessDifferentials(diffcounts[0],reps,dumpCollisions);
+    result &= ProcessDifferentials(diffcounts[0], reps, dumpCollisions);
 
-  recordTestResult(result, "Differential", diffbits);
+    recordTestResult(result, "Differential", diffbits);
 
-  return result;
+    return result;
 }
 
 //----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool DiffTest(const HashInfo * hinfo, const bool verbose, const bool extra) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool dumpCollisions = verbose;
-    bool result = true;
+template <typename hashtype>
+bool DiffTest( const HashInfo * hinfo, const bool verbose, const bool extra ) {
+    const HashFn hash           = hinfo->hashFn(g_hashEndian);
+    bool         dumpCollisions = verbose;
+    bool         result         = true;
 
     // Do fewer reps with slow or very bad hashes
     bool slowhash = hinfo->bits > 128 || hinfo->isSlow();
-    int reps = hinfo->isMock() ? 2 : ((slowhash && !extra) ? 100 : 1000);
+    int  reps     = hinfo->isMock() ? 2 : ((slowhash && !extra) ? 100 : 1000);
 
     printf("[[[ Diff 'Differential' Tests ]]]\n\n");
 
     const seed_t seed = hinfo->Seed(g_seed);
 
-    result &= DiffTestImpl< Blob<64>,  hashtype >(hash,seed,5,reps,dumpCollisions);
-    result &= DiffTestImpl< Blob<128>, hashtype >(hash,seed,4,reps,dumpCollisions);
-    result &= DiffTestImpl< Blob<256>, hashtype >(hash,seed,3,reps,dumpCollisions);
+    result &= DiffTestImpl<Blob< 64>,  hashtype>(hash, seed, 5, reps, dumpCollisions);
+    result &= DiffTestImpl<Blob<128>, hashtype >(hash, seed, 4, reps, dumpCollisions);
+    result &= DiffTestImpl<Blob<256>, hashtype >(hash, seed, 3, reps, dumpCollisions);
 
     printf("%s\n", result ? "" : g_failstr);
 
diff --git a/tests/DifferentialTest.h b/tests/DifferentialTest.h
index c298dd6d..5efede43 100644
--- a/tests/DifferentialTest.h
+++ b/tests/DifferentialTest.h
@@ -47,5 +47,5 @@
 // Differential collision tests - generate a bunch of random keys,
 // see what happens to the hash value when we flip a few bits of the key.
 
-template < typename hashtype >
-bool DiffTest(const HashInfo * info, const bool verbose, const bool extra);
+template <typename hashtype>
+bool DiffTest( const HashInfo * info, const bool verbose, const bool extra );
diff --git a/tests/HashMapTest.cpp b/tests/HashMapTest.cpp
index 52145554..07f2a3b4 100644
--- a/tests/HashMapTest.cpp
+++ b/tests/HashMapTest.cpp
@@ -46,7 +46,7 @@
 #include "Timing.h"
 #include "Hashinfo.h"
 #include "TestGlobals.h"
-#include "Stats.h"       // For FilterOutliers, CalcMean, CalcStdv
+#include "Stats.h" // For FilterOutliers, CalcMean, CalcStdv
 #include "Random.h"
 
 #include "HashMapTest.h"
@@ -67,216 +67,211 @@
 using namespace std;
 
 typedef std::unordered_map<std::string, int,
-  std::function<size_t (const std::string &key)>> std_hashmap;
+        std::function<size_t (const std::string & key)>> std_hashmap;
 typedef phmap::flat_hash_map<std::string, int,
-  std::function<size_t (const std::string &key)>> fast_hashmap;
+        std::function<size_t (const std::string & key)>> fast_hashmap;
 
 //-----------------------------------------------------------------------------
 // This should be a realistic I-Cache test, when our hash is used inlined
 // in a hash table. There the size matters more than the bulk speed.
 
-std::vector<std::string> HashMapInit(bool verbose) {
-  std::vector<std::string> wordvec;
-  std::string line;
-  unsigned sum = 0;
-
-  const char * ptr = hashmap_words + 1; // Skip over initial newline
-  while (*ptr != '\0')
-  {
-      const char * end = (const char *)rawmemchr(ptr, '\n');
-      std::string str (ptr, end - ptr);
-      wordvec.push_back(str);
-      std::transform(str.begin(), str.begin()+1, str.begin(), ::toupper);
-      wordvec.push_back(str);
-      std::transform(str.begin(), str.end(), str.begin(), ::toupper);
-      wordvec.push_back(str);
-      sum += end - ptr;
-      ptr = end + 1;
-  }
-
-  if (verbose) {
-    printf ("Read %" PRId64 " words from internal list, ", wordvec.size());
-    printf ("avg len: %0.3f\n\n", (sum+0.0)/wordvec.size());
-  }
-  return wordvec;
+std::vector<std::string> HashMapInit( bool verbose ) {
+    std::vector<std::string> wordvec;
+    std::string line;
+    unsigned    sum  = 0;
+
+    const char * ptr = hashmap_words + 1; // Skip over initial newline
+
+    while (*ptr != '\0') {
+        const char * end = (const char *)rawmemchr(ptr, '\n');
+        std::string  str( ptr, end - ptr );
+        wordvec.push_back(str);
+        std::transform(str.begin(), str.begin() + 1, str.begin(), ::toupper);
+        wordvec.push_back(str);
+        std::transform(str.begin(), str.end(), str.begin(), ::toupper);
+        wordvec.push_back(str);
+        sum += end - ptr;
+        ptr  = end + 1;
+    }
+
+    if (verbose) {
+        printf("Read %" PRId64 " words from internal list, ", wordvec.size());
+        printf("avg len: %0.3f\n\n", (sum + 0.0) / wordvec.size());
+    }
+    return wordvec;
 }
 
 //-----------------------------------------------------------------------------
 
-static double HashMapSpeedTest ( HashFn hash, const int hashbits,
-                          std::vector<std::string> words,
-                          const seed_t seed, const int trials, bool verbose )
-{
-  //using phmap::flat_node_hash_map;
-  Rand r(82762);
-  std_hashmap hashmap(words.size(), [=](const std::string &key)
-                  {
-                    // 256 needed for hasshe2, but only size_t used
-                    static char out[256] = { 0 };
-                    hash(key.c_str(), key.length(), seed, &out);
-                    return *(size_t*)out;
-                  });
-  fast_hashmap phashmap(words.size(), [=](const std::string &key)
-                  {
-                    static char out[256] = { 0 }; // 256 for hasshe2, but stripped to 64/32
-                    hash(key.c_str(), key.length(), seed, &out);
-                    return *(size_t*)out;
-                  });
-
-  std::vector<std::string>::iterator it;
-  std::vector<double> times;
-  double t1;
-
-  printf("std::unordered_map\n");
-  printf("Init std HashMapTest:     ");
-  fflush(NULL);
-  times.reserve(trials);
-  if (0 /*need_minlen64_align16(pfhash)*/) {
-    for (it = words.begin(); it != words.end(); it++) {
-      // requires min len 64, and 16byte key alignment
-      (*it).resize(64);
+static double HashMapSpeedTest( HashFn hash, const int hashbits, std::vector<std::string> words,
+        const seed_t seed, const int trials, bool verbose ) {
+    // using phmap::flat_node_hash_map;
+    Rand r( 82762 );
+
+    std_hashmap hashmap( words.size(), [=]( const std::string & key ) {
+            // 256 needed for hasshe2, but only size_t used
+            static char out[256] = { 0 };
+            hash(key.c_str(), key.length(), seed, &out);
+            return *(size_t *)out;
+        } );
+    fast_hashmap phashmap( words.size(), [=]( const std::string & key ) {
+            // 256 for hasshe2, but stripped to 64/32
+            static char out[256] = { 0 };
+            hash(key.c_str(), key.length(), seed, &out);
+            return *(size_t *)out;
+        } );
+
+    std::vector<std::string>::iterator it;
+    std::vector<double> times;
+    double t1;
+
+    printf("std::unordered_map\n"      );
+    printf("Init std HashMapTest:     ");
+    fflush(NULL);
+    times.reserve(trials);
+    if (0 /*need_minlen64_align16(pfhash)*/) {
+        for (it = words.begin(); it != words.end(); it++) {
+            // requires min len 64, and 16byte key alignment
+            (*it).resize(64);
+        }
     }
-  }
-  {
-    // hash inserts plus 1% deletes
-    volatile int64_t begin, end;
-    int i = 0;
-    begin = timer_start();
-    for (it = words.begin(); it != words.end(); it++, i++) {
-      std::string line = *it;
-      hashmap[line] = 1;
-      if (i % 100 == 0)
-        hashmap.erase(line);
+    {
+        // hash inserts plus 1% deletes
+        volatile int64_t begin, end;
+        int i = 0;
+        begin = timer_start();
+        for (it = words.begin(); it != words.end(); it++, i++) {
+            std::string line = *it;
+            hashmap[line] = 1;
+            if (i % 100 == 0) {
+                hashmap.erase(line);
+            }
+        }
+        end = timer_end();
+        t1  = (double)(end - begin) / (double)words.size();
+    }
+    fflush(NULL);
+    printf("%0.3f cycles/op (%zu inserts, 1%% deletions)\n", t1, words.size());
+    printf("Running std HashMapTest:  ");
+    if (t1 > 10000.) { // e.g. multiply_shift 459271.700
+        printf("SKIP");
+        return 0.;
     }
-    end = timer_end();
-    t1 = (double)(end - begin) / (double)words.size();
-  }
-  fflush(NULL);
-  printf("%0.3f cycles/op (%zu inserts, 1%% deletions)\n",
-         t1, words.size());
-  printf("Running std HashMapTest:  ");
-  if (t1 > 10000.) { // e.g. multiply_shift 459271.700
-    printf("SKIP");
-    return 0.;
-  }
-  fflush(NULL);
-
-  for(int itrial = 0; itrial < trials; itrial++)
-    { // hash query
-      volatile int64_t begin, end;
-      int i = 0, found = 0;
-      double t;
-      begin = timer_start();
-      for ( it = words.begin(); it != words.end(); it++, i++ )
-        {
-          std::string line = *it;
-          if (hashmap[line])
-            found++;
+    fflush(NULL);
+
+    for (int itrial = 0; itrial < trials; itrial++) { // hash query
+        volatile int64_t begin, end;
+        int    i = 0, found = 0;
+        double t;
+        begin = timer_start();
+        for (it = words.begin(); it != words.end(); it++, i++) {
+            std::string line = *it;
+            if (hashmap[line]) {
+                found++;
+            }
         }
-      end = timer_end();
-      t = (double)(end - begin) / (double)words.size();
-      if(found > 0 && t > 0) times.push_back(t);
+        end = timer_end();
+        t   = (double)(end - begin) / (double)words.size();
+        if ((found > 0) && (t > 0)) { times.push_back(t); }
     }
-  hashmap.clear();
-
-  std::sort(times.begin(),times.end());
-  FilterOutliers(times);
-  double mean = CalcMean(times);
-  double stdv = CalcStdv(times);
-  printf("%0.3f cycles/op", mean);
-  printf(" (%0.1f stdv)\n", stdv);
-
-  times.clear();
-
-  printf("\ngreg7mdp/parallel-hashmap\n");
-  printf("Init fast HashMapTest:    ");
-  fflush(NULL);
-  times.reserve(trials);
-  { // hash inserts and 1% deletes
-    volatile int64_t begin, end;
-    int i = 0;
-    begin = timer_start();
-    for (it = words.begin(); it != words.end(); it++, i++) {
-      std::string line = *it;
-      phashmap[line] = 1;
-      if (i % 100 == 0)
-        phashmap.erase(line);
+    hashmap.clear();
+
+    std::sort(times.begin(), times.end());
+    FilterOutliers(times);
+    double mean = CalcMean(times);
+    double stdv = CalcStdv(times);
+    printf("%0.3f cycles/op", mean);
+    printf(" (%0.1f stdv)\n", stdv);
+
+    times.clear();
+
+    printf("\ngreg7mdp/parallel-hashmap\n");
+    printf("Init fast HashMapTest:    "   );
+    fflush(NULL);
+    times.reserve(trials);
+    { // hash inserts and 1% deletes
+        volatile int64_t begin, end;
+        int i = 0;
+        begin = timer_start();
+        for (it = words.begin(); it != words.end(); it++, i++) {
+            std::string line = *it;
+            phashmap[line] = 1;
+            if (i % 100 == 0) {
+                phashmap.erase(line);
+            }
+        }
+        end = timer_end();
+        t1  = (double)(end - begin) / (double)words.size();
+    }
+    fflush(NULL);
+    printf("%0.3f cycles/op (%zu inserts, 1%% deletions)\n", t1, words.size());
+    printf("Running fast HashMapTest: ");
+    if (t1 > 10000.) { // e.g. multiply_shift 459271.700
+        printf("SKIP");
+        return 0.;
     }
-    end = timer_end();
-    t1 = (double)(end - begin) / (double)words.size();
-  }
-  fflush(NULL);
-  printf("%0.3f cycles/op (%zu inserts, 1%% deletions)\n",
-         t1, words.size());
-  printf("Running fast HashMapTest: ");
-  if (t1 > 10000.) { // e.g. multiply_shift 459271.700
-    printf("SKIP");
-    return 0.;
-  }
-  fflush(NULL);
-  for(int itrial = 0; itrial < trials; itrial++)
-    { // hash query
-      volatile int64_t begin, end;
-      int i = 0, found = 0;
-      double t;
-      begin = timer_start();
-      for ( it = words.begin(); it != words.end(); it++, i++ )
-        {
-          std::string line = *it;
-          if (phashmap[line])
-            found++;
+    fflush(NULL);
+    for (int itrial = 0; itrial < trials; itrial++) { // hash query
+        volatile int64_t begin, end;
+        int    i = 0, found = 0;
+        double t;
+        begin = timer_start();
+        for (it = words.begin(); it != words.end(); it++, i++) {
+            std::string line = *it;
+            if (phashmap[line]) {
+                found++;
+            }
         }
-      end = timer_end();
-      t = (double)(end - begin) / (double)words.size();
-      if(found > 0 && t > 0) times.push_back(t);
+        end = timer_end();
+        t   = (double)(end - begin) / (double)words.size();
+        if ((found > 0) && (t > 0)) { times.push_back(t); }
     }
-  phashmap.clear();
-  fflush(NULL);
-
-  std::sort(times.begin(),times.end());
-  FilterOutliers(times);
-  double mean1 = CalcMean(times);
-  double stdv1 = CalcStdv(times);
-  printf("%0.3f cycles/op", mean1);
-  printf(" (%0.1f stdv) ", stdv1);
-  fflush(NULL);
-
-  return mean;
+    phashmap.clear();
+    fflush(NULL);
+
+    std::sort(times.begin(), times.end());
+    FilterOutliers(times);
+    double mean1 = CalcMean(times);
+    double stdv1 = CalcStdv(times);
+    printf("%0.3f cycles/op", mean1);
+    printf(" (%0.1f stdv) " , stdv1);
+    fflush(NULL);
+
+    return mean;
 }
 
 //-----------------------------------------------------------------------------
 
-static bool HashMapImpl ( HashFn hash,
-                   const int hashbits, std::vector<std::string> words,
-                   const seed_t seed, const int trials, bool verbose )
-{
-  double mean = 0.0;
-  try {
-    mean = HashMapSpeedTest( hash, hashbits, words, seed, trials, verbose);
-  }
-  catch (...) {
-    printf(" aborted !!!!\n");
-  }
-  // if faster than ~sha1
-  if (mean > 5. && mean < 1500.)
-    printf(" ....... PASS\n");
-  else
-    printf(" ....... FAIL\n");
-  return true;
+static bool HashMapImpl( HashFn hash, const int hashbits, std::vector<std::string> words,
+        const seed_t seed, const int trials, bool verbose ) {
+    double mean = 0.0;
+
+    try {
+        mean = HashMapSpeedTest(hash, hashbits, words, seed, trials, verbose);
+    } catch (...) {
+        printf(" aborted !!!!\n");
+    }
+    // if faster than ~sha1
+    if ((mean > 5.) && (mean < 1500.)) {
+        printf(" ....... PASS\n");
+    } else {
+        printf(" ....... FAIL\n");
+    }
+    return true;
 }
 
 //-----------------------------------------------------------------------------
 
-bool HashMapTest(const HashInfo * hinfo, const bool verbose, const bool extra) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    const int trials = (hinfo->isVerySlow() && !extra) ? 5 : 50;
-    bool result = true;
+bool HashMapTest( const HashInfo * hinfo, const bool verbose, const bool extra ) {
+    const HashFn hash   = hinfo->hashFn(g_hashEndian);
+    const int    trials = (hinfo->isVerySlow() && !extra) ? 5 : 50;
+    bool         result = true;
 
     printf("[[[ 'Hashmap' Speed Tests ]]]\n\n");
 
     if (hinfo->isMock()) {
-      printf("Skipping Hashmap test; it is designed for true hashes\n\n");
-      return result;
+        printf("Skipping Hashmap test; it is designed for true hashes\n\n");
+        return result;
     }
 
     std::vector<std::string> words = HashMapInit(verbose);
@@ -285,9 +280,9 @@ bool HashMapTest(const HashInfo * hinfo, const bool verbose, const bool extra) {
         return result;
     }
 
-    Rand r(477537);
+    Rand         r( 477537 );
     const seed_t seed = hinfo->Seed(g_seed ^ r.rand_u64());
-    result &= HashMapImpl(hash,hinfo->bits,words,seed,trials,verbose);
+    result &= HashMapImpl(hash, hinfo->bits, words, seed, trials, verbose);
 
     printf("\n%s\n", result ? "" : g_failstr);
 
diff --git a/tests/HashMapTest.h b/tests/HashMapTest.h
index 3ae36444..482c7ff0 100644
--- a/tests/HashMapTest.h
+++ b/tests/HashMapTest.h
@@ -43,6 +43,6 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-std::vector<std::string> HashMapInit(bool verbose);
+std::vector<std::string> HashMapInit( bool verbose );
 
-bool HashMapTest(const HashInfo * info, const bool verbose, const bool extra);
+bool HashMapTest( const HashInfo * info, const bool verbose, const bool extra );
diff --git a/tests/PRNGTest.cpp b/tests/PRNGTest.cpp
index 5036d884..364e340f 100644
--- a/tests/PRNGTest.cpp
+++ b/tests/PRNGTest.cpp
@@ -60,38 +60,38 @@
 //-----------------------------------------------------------------------------
 // Keyset 'Prng'
 
-template< typename hashtype >
-static void Prn_gen(int nbRn, HashFn hash, const seed_t seed, std::vector<hashtype> & hashes) {
-  assert(nbRn > 0);
-
-  printf("Generating random numbers by hashing previous output - %d keys\n", nbRn);
-
-  // Since hash() inputs depend upon previous outputs, we can't use
-  // that to verify cross-system consistency across hashes, so just
-  // use the test parameters for the input VCode.
-  addVCodeInput(nbRn);
-  addVCodeInput(sizeof(hashtype));
-
-  hashtype hcopy;
-  memset(&hcopy, 0, sizeof(hcopy));
-
-  // a generated random number becomes the input for the next one
-  for (int i=0; i< nbRn; i++) {
-      hashtype h;
-      hash(&hcopy, sizeof(hcopy), seed, &h);
-      hashes.push_back(h);
-      memcpy(&hcopy, &h, sizeof(h));
-  }
+template <typename hashtype>
+static void Prn_gen( int nbRn, HashFn hash, const seed_t seed, std::vector<hashtype> & hashes ) {
+    assert(nbRn > 0);
+
+    printf("Generating random numbers by hashing previous output - %d keys\n", nbRn);
+
+    // Since hash() inputs depend upon previous outputs, we can't use
+    // that to verify cross-system consistency across hashes, so just
+    // use the test parameters for the input VCode.
+    addVCodeInput(nbRn);
+    addVCodeInput(sizeof(hashtype));
+
+    hashtype hcopy;
+    memset(&hcopy, 0, sizeof(hcopy));
+
+    // a generated random number becomes the input for the next one
+    for (int i = 0; i < nbRn; i++) {
+        hashtype h;
+        hash(&hcopy, sizeof(hcopy), seed, &h);
+        hashes.push_back(h);
+        memcpy(&hcopy, &h, sizeof(h));
+    }
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool PRNGTest(const HashInfo * hinfo, const bool verbose, const bool extra) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
-    bool testCollision = true;
-    bool testDistribution = extra;
+template <typename hashtype>
+bool PRNGTest( const HashInfo * hinfo, const bool verbose, const bool extra ) {
+    const HashFn hash             = hinfo->hashFn(g_hashEndian);
+    bool         result           = true;
+    bool         testCollision    = true;
+    bool         testDistribution = extra;
     std::vector<hashtype> hashes;
 
     printf("[[[ Prng Tests ]]]\n\n");
diff --git a/tests/PRNGTest.h b/tests/PRNGTest.h
index 2a8f66b6..8f304336 100644
--- a/tests/PRNGTest.h
+++ b/tests/PRNGTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool PRNGTest(const HashInfo * info, const bool verbose, const bool extra);
+template <typename hashtype>
+bool PRNGTest( const HashInfo * info, const bool verbose, const bool extra );
diff --git a/tests/PerlinNoiseTest.cpp b/tests/PerlinNoiseTest.cpp
index 62268298..fb0cc4c8 100644
--- a/tests/PerlinNoiseTest.cpp
+++ b/tests/PerlinNoiseTest.cpp
@@ -60,59 +60,58 @@
 //-----------------------------------------------------------------------------
 // Keyset 'Perlin Noise' - X,Y coordinates on input & seed
 
-template< typename hashtype >
-static bool PerlinNoise (int Xbits, int Ybits, int inputLen, int step,
-        const HashInfo * hinfo, bool testColl, bool testDist, bool drawDiagram)
-{
-  assert(0 < Ybits && Ybits < 31);
-  assert(0 < Xbits && Xbits < 31);
-  assert(Xbits + Ybits < 31);
-  assert(inputLen*8 > Xbits);  // enough space to run the test
-
-  std::vector<hashtype> hashes;
-  int const xMax = (1 << Xbits);
-  int const yMax = (1 << Ybits);
-  const HashFn hash = hinfo->hashFn(g_hashEndian);
+template <typename hashtype>
+static bool PerlinNoise( int Xbits, int Ybits, int inputLen, int step, const HashInfo * hinfo,
+        bool testColl, bool testDist, bool drawDiagram ) {
+    assert(       0 < Ybits && Ybits < 31);
+    assert(       0 < Xbits && Xbits < 31);
+    assert(   Xbits + Ybits < 31         );
+    assert(inputLen * 8 > Xbits          ); // enough space to run the test
+
+    std::vector<hashtype> hashes;
+    int const    xMax = (1 << Xbits);
+    int const    yMax = (1 << Ybits);
+    const HashFn hash = hinfo->hashFn(g_hashEndian);
 
 #define INPUT_LEN_MAX 256
-  assert(inputLen <= INPUT_LEN_MAX);
-  uint8_t key[INPUT_LEN_MAX] = {0};
-
-  printf("Generating coordinates from %3i-byte keys - %d keys\n", inputLen, xMax * yMax);
-
-  addVCodeInput(yMax);
-  // Since seeding can be expensive, loop over the seed-dependent
-  // variable first.
-  for (uint64_t y = 0; y < yMax; y++) {
-      const seed_t seed = hinfo->Seed(y, true);
-      for (uint64_t x = 0; x < xMax; x++) {
-          // Put x in little-endian order
-          uint64_t xin = COND_BSWAP(x, isBE());
-          memcpy(key, &xin, sizeof(xin));
-              
-          hashtype h;
-          hash(key, inputLen, seed, &h);
-          addVCodeInput(key, inputLen);
-          hashes.push_back(h);
-      }
-  }
-
-  bool result = TestHashList(hashes,drawDiagram,testColl,testDist);
-  printf("\n");
-
-  recordTestResult(result, "PerlinNoise", inputLen);
-
-  addVCodeResult(result);
-
-  return result;
+    assert(inputLen <= INPUT_LEN_MAX     );
+    uint8_t key[INPUT_LEN_MAX] = { 0 };
+
+    printf("Generating coordinates from %3i-byte keys - %d keys\n", inputLen, xMax * yMax);
+
+    addVCodeInput(yMax);
+    // Since seeding can be expensive, loop over the seed-dependent
+    // variable first.
+    for (uint64_t y = 0; y < yMax; y++) {
+        const seed_t seed = hinfo->Seed(y, true);
+        for (uint64_t x = 0; x < xMax; x++) {
+            // Put x in little-endian order
+            uint64_t xin = COND_BSWAP(x, isBE());
+            memcpy(key, &xin, sizeof(xin));
+
+            hashtype h;
+            hash(key, inputLen, seed, &h);
+            addVCodeInput(key, inputLen);
+            hashes.push_back(h);
+        }
+    }
+
+    bool result = TestHashList(hashes, drawDiagram, testColl, testDist);
+    printf("\n");
+
+    recordTestResult(result, "PerlinNoise", inputLen);
+
+    addVCodeResult(result);
+
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template< typename hashtype >
-bool PerlinNoiseTest (const HashInfo * hinfo, const bool verbose, const bool extra) {
-    bool result = true;
-    bool testCollision = true;
+template <typename hashtype>
+bool PerlinNoiseTest( const HashInfo * hinfo, const bool verbose, const bool extra ) {
+    bool result           = true;
+    bool testCollision    = true;
     bool testDistribution = extra;
 
     printf("[[[ Keyset 'PerlinNoise' Tests ]]]\n\n");
diff --git a/tests/PerlinNoiseTest.h b/tests/PerlinNoiseTest.h
index 866e6061..25483937 100644
--- a/tests/PerlinNoiseTest.h
+++ b/tests/PerlinNoiseTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template< typename hashtype >
-bool PerlinNoiseTest (const HashInfo * info, const bool verbose, const bool extra);
+template <typename hashtype>
+bool PerlinNoiseTest( const HashInfo * info, const bool verbose, const bool extra );
diff --git a/tests/PermutationKeysetTest.cpp b/tests/PermutationKeysetTest.cpp
index ce55928c..bbe97074 100644
--- a/tests/PermutationKeysetTest.cpp
+++ b/tests/PermutationKeysetTest.cpp
@@ -60,182 +60,239 @@
 //-----------------------------------------------------------------------------
 // Keyset 'Combination' - all possible combinations of input blocks
 
-template< typename hashtype >
-static void CombinationKeygenRecurse(uint8_t * key, int len, int maxlen,
-        const uint8_t * blocks, uint32_t blockcount, uint32_t blocksz,
-        HashFn hash, const seed_t seed, std::vector<hashtype> & hashes) {
-  if(len == maxlen) return;  // end recursion
+template <typename hashtype>
+static void CombinationKeygenRecurse( uint8_t * key, int len, int maxlen, const uint8_t * blocks, uint32_t blockcount,
+        uint32_t blocksz, HashFn hash, const seed_t seed, std::vector<hashtype> & hashes ) {
+    if (len == maxlen) { return; } // end recursion
 
-  for(int i = 0; i < blockcount; i++)
-  {
-    memcpy(&key[len * blocksz], &blocks[i * blocksz], blocksz);
+    for (int i = 0; i < blockcount; i++) {
+        memcpy(&key[len * blocksz], &blocks[i * blocksz], blocksz);
 
-    hashtype h;
-    hash(key, (len+1) * blocksz, seed, &h);
-    addVCodeInput(key, (len+1) * blocksz);
-    hashes.push_back(h);
+        hashtype h;
+        hash(key, (len + 1) * blocksz, seed, &h);
+        addVCodeInput(key, (len + 1) * blocksz);
+        hashes.push_back(h);
 
-    CombinationKeygenRecurse(key,len+1,maxlen,blocks,blockcount,blocksz,hash,seed,hashes);
-  }
+        CombinationKeygenRecurse(key, len + 1, maxlen, blocks, blockcount, blocksz, hash, seed, hashes);
+    }
 }
 
-template< typename hashtype >
-static bool CombinationKeyTest( HashFn hash, const seed_t seed, int maxlen,
-        const uint8_t * blocks, uint32_t blockcount, uint32_t blocksz, const char * testdesc,
-        bool testColl, bool testDist, bool drawDiagram) {
-  printf("Keyset 'Combination %s' - up to %d blocks from a set of %d - ",testdesc,maxlen,blockcount);
+template <typename hashtype>
+static bool CombinationKeyTest( HashFn hash, const seed_t seed, int maxlen, const uint8_t * blocks, uint32_t blockcount,
+        uint32_t blocksz, const char * testdesc, bool testColl, bool testDist, bool drawDiagram ) {
+    printf("Keyset 'Combination %s' - up to %d blocks from a set of %d - ", testdesc, maxlen, blockcount);
 
-  //----------
+    //----------
 
-  std::vector<hashtype> hashes;
+    std::vector<hashtype> hashes;
 
-  uint8_t * key = new uint8_t[maxlen*blocksz];
+    uint8_t * key = new uint8_t[maxlen * blocksz];
 
-  CombinationKeygenRecurse(key,0,maxlen,blocks,blockcount,blocksz,hash,seed,hashes);
+    CombinationKeygenRecurse(key, 0, maxlen, blocks, blockcount, blocksz, hash, seed, hashes);
 
-  delete [] key;
+    delete [] key;
 
-  printf("%d keys\n",(int)hashes.size());
+    printf("%d keys\n", (int)hashes.size());
 
-  //----------
+    //----------
 
-  bool result = TestHashList(hashes,drawDiagram,testColl,testDist);
-  printf("\n");
+    bool result = TestHashList(hashes, drawDiagram, testColl, testDist);
+    printf("\n"       );
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
 const struct {
-    const char * desc;
-    const int maxlen;
-    const uint32_t nrBlocks;
-    const uint32_t szBlock; // Verify nrBlocks * szBlock == blocks.size()
-    const std::vector<uint8_t> blocks;
+    const char *                desc;
+    const int                   maxlen;
+    const uint32_t              nrBlocks;
+    const uint32_t              szBlock; // Verify nrBlocks * szBlock == blocks.size()
+    const std::vector<uint8_t>  blocks;
 } keytests[] = {
     // This one breaks lookup3, surprisingly
-    { "4-bytes [3 low bits]", 7, 8, 4,
-      { 0, 0, 0, 0,
-        1, 0, 0, 0,
-        2, 0, 0, 0,
-        3, 0, 0, 0,
-        4, 0, 0, 0,
-        5, 0, 0, 0,
-        6, 0, 0, 0,
-        7, 0, 0, 0 } },
-    { "4-bytes [3 high bits]", 7, 8, 4,
-      { 0, 0, 0,   0,
-        0, 0, 0,  32,
-        0, 0, 0,  64,
-        0, 0, 0,  96,
-        0, 0, 0, 128,
-        0, 0, 0, 160,
-        0, 0, 0, 192,
-        0, 0, 0, 224 } },
-    { "4-bytes [3 high+low bits]", 6, 15, 4,
-      { 0, 0, 0,   0,
-        1, 0, 0,   0,
-        2, 0, 0,   0,
-        3, 0, 0,   0,
-        4, 0, 0,   0,
-        5, 0, 0,   0,
-        6, 0, 0,   0,
-        7, 0, 0,   0,
-        0, 0, 0,  32,
-        0, 0, 0,  64,
-        0, 0, 0,  96,
-        0, 0, 0, 128,
-        0, 0, 0, 160,
-        0, 0, 0, 192,
-        0, 0, 0, 224 } },
-    { "4-bytes [0, low bit]",    0, 2, 4,
-      { 0, 0, 0, 0,
-        1, 0, 0, 0   } },
-    { "4-bytes [0, high bit]", 0, 2, 4,
-      { 0, 0, 0,   0,
-        0, 0, 0, 128 } },
-    { "8-bytes [0, low bit]",    0, 2, 8,
-      { 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 0, 0, 0, 0, 0, 0, 0, } },
-    { "8-bytes [0, high bit]", 0, 2, 8,
-      { 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 128, } },
-    { "16-bytes [0, low bit]",    0, 2, 16,
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } },
-    { "16-bytes [0, high bit]", 0, 2, 16,
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, } },
-    { "32-bytes [0, low bit]",    0, 2, 32,
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } },
-    { "32-bytes [0, high bit]", 0, 2, 32,
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, } },
-    { "64-bytes [0, low bit]",    0, 2, 64,
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } },
-    { "64-bytes [0, high bit]", 0, 2, 64,
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, } },
-    { "128-bytes [0, low bit]",    0, 2, 128,
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, } },
-    { "128-bytes [0, high bit]", 0, 2, 128,
-      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, } },
+    {
+        "4-bytes [3 low bits]", 7, 8, 4,
+        {
+            0, 0, 0, 0,
+            1, 0, 0, 0,
+            2, 0, 0, 0,
+            3, 0, 0, 0,
+            4, 0, 0, 0,
+            5, 0, 0, 0,
+            6, 0, 0, 0,
+            7, 0, 0, 0
+        }
+    },
+    {
+        "4-bytes [3 high bits]", 7, 8, 4,
+        {
+            0, 0, 0,   0,
+            0, 0, 0,  32,
+            0, 0, 0,  64,
+            0, 0, 0,  96,
+            0, 0, 0, 128,
+            0, 0, 0, 160,
+            0, 0, 0, 192,
+            0, 0, 0, 224
+        }
+    },
+    {
+        "4-bytes [3 high+low bits]", 6, 15, 4,
+        {
+            0, 0, 0,   0,
+            1, 0, 0,   0,
+            2, 0, 0,   0,
+            3, 0, 0,   0,
+            4, 0, 0,   0,
+            5, 0, 0,   0,
+            6, 0, 0,   0,
+            7, 0, 0,   0,
+            0, 0, 0,  32,
+            0, 0, 0,  64,
+            0, 0, 0,  96,
+            0, 0, 0, 128,
+            0, 0, 0, 160,
+            0, 0, 0, 192,
+            0, 0, 0, 224
+        }
+    },
+    {
+        "4-bytes [0, low bit]", 0, 2, 4,
+        {
+            0, 0, 0, 0,
+            1, 0, 0, 0
+        }
+    },
+    {
+        "4-bytes [0, high bit]", 0, 2, 4,
+        {
+            0, 0, 0,   0,
+            0, 0, 0, 128
+        }
+    },
+    {
+        "8-bytes [0, low bit]", 0, 2, 8,
+        {
+            0, 0, 0, 0, 0, 0, 0, 0,
+            1, 0, 0, 0, 0, 0, 0, 0,
+        }
+    },
+    {
+        "8-bytes [0, high bit]", 0, 2, 8,
+        {
+            0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 128,
+        }
+    },
+    {
+        "16-bytes [0, low bit]", 0, 2, 16,
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        }
+    },
+    {
+        "16-bytes [0, high bit]", 0, 2, 16,
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128,
+        }
+    },
+    {
+        "32-bytes [0, low bit]", 0, 2, 32,
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        }
+    },
+    {
+        "32-bytes [0, high bit]", 0, 2, 32,
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128,
+        }
+    },
+    {
+        "64-bytes [0, low bit]", 0, 2, 64,
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        }
+    },
+    {
+        "64-bytes [0, high bit]", 0, 2, 64,
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128,
+        }
+    },
+    {
+        "128-bytes [0, low bit]", 0, 2, 128,
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        }
+    },
+    {
+        "128-bytes [0, high bit]", 0, 2, 128,
+        {
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128,
+        }
+    },
 };
 
-template < typename hashtype >
-bool PermutedKeyTest(const HashInfo * hinfo, const bool verbose, const bool extra) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    const int default_maxlen = extra ? 23 : (hinfo->bits >= 128) ? 17 : 22;
-    bool result = true;
+template <typename hashtype>
+bool PermutedKeyTest( const HashInfo * hinfo, const bool verbose, const bool extra ) {
+    const HashFn hash           = hinfo->hashFn(g_hashEndian);
+    const int    default_maxlen = extra ? 23 : (hinfo->bits >= 128) ? 17 : 22;
+    bool         result         = true;
 
     printf("[[[ Keyset 'Permutation' Tests ]]]\n\n");
 
@@ -243,12 +300,11 @@ bool PermutedKeyTest(const HashInfo * hinfo, const bool verbose, const bool extr
 
     for (auto test: keytests) {
         bool curresult = true;
-        int maxlen = test.maxlen > 0 ? test.maxlen : default_maxlen;
+        int  maxlen    = test.maxlen > 0 ? test.maxlen : default_maxlen;
 
         assert(test.blocks.size() == test.nrBlocks * test.szBlock);
-        curresult &= CombinationKeyTest<hashtype>(hash, seed, maxlen,
-                &(test.blocks[0]), test.nrBlocks, test.szBlock, test.desc,
-                true, true, verbose);
+        curresult &= CombinationKeyTest<hashtype>(hash, seed, maxlen, &(test.blocks[0]),
+                test.nrBlocks, test.szBlock, test.desc, true, true, verbose);
 
         recordTestResult(curresult, "Permutation", test.desc);
 
diff --git a/tests/PermutationKeysetTest.h b/tests/PermutationKeysetTest.h
index 6d1a3b42..90506ddd 100644
--- a/tests/PermutationKeysetTest.h
+++ b/tests/PermutationKeysetTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool PermutedKeyTest(const HashInfo * info, const bool verbose, const bool extra);
+template <typename hashtype>
+bool PermutedKeyTest( const HashInfo * info, const bool verbose, const bool extra );
diff --git a/tests/PopcountTest.cpp b/tests/PopcountTest.cpp
index 3f907bc6..8fa8407d 100644
--- a/tests/PopcountTest.cpp
+++ b/tests/PopcountTest.cpp
@@ -78,258 +78,261 @@
 typedef uint32_t popcnt_hist[65];
 
 // Copy the results into g_NCPU ranges of 2^32
-static void PopcountThread(const HashInfo * hinfo, const seed_t seed, const int inputSize,
-        const unsigned start, const unsigned end, const unsigned step,
-        popcnt_hist &hist1, popcnt_hist &hist2) {
-  const HashFn hash = hinfo->hashFn(g_hashEndian);
-  long double const n = (end-(start+1)) / step;
-  uint64_t previous = 0;
+static void PopcountThread( const HashInfo * hinfo, const seed_t seed, const int inputSize, const unsigned start,
+        const unsigned end, const unsigned step, popcnt_hist & hist1, popcnt_hist & hist2 ) {
+    const HashFn      hash     = hinfo->hashFn(g_hashEndian);
+    long double const n        = (end - (start + 1)) / step;
+    uint64_t          previous = 0;
+
 #define INPUT_SIZE_MAX 256
-  assert(inputSize <= INPUT_SIZE_MAX);
-  char key[INPUT_SIZE_MAX] = {0};
+    assert(inputSize <= INPUT_SIZE_MAX  );
+    char key[INPUT_SIZE_MAX]       = { 0 };
 #define HASH_SIZE_MAX 64
-  char hbuff[HASH_SIZE_MAX] = {0};
-  const int hbits = std::min(hinfo->bits, 64U); // limited due to popcount8
-
-  assert(sizeof(unsigned) <= inputSize);
-  assert(start < end);
-  //assert(step > 0);
+    char      hbuff[HASH_SIZE_MAX] = { 0 };
+    const int hbits = std::min(hinfo->bits, 64U); // limited due to popcount8
 
-  uint64_t i = start - step;
-  memcpy(key, &i, sizeof(i));
-  hash(key, inputSize, seed, hbuff);
-  memcpy(&previous, hbuff, 8);
+    assert(sizeof(unsigned) <= inputSize);
+    assert(start < end);
+    // assert(step > 0);
 
-  for (uint64_t i=start; i<=end; i+=step) {
+    uint64_t i = start - step;
     memcpy(key, &i, sizeof(i));
     hash(key, inputSize, seed, hbuff);
+    memcpy(&previous, hbuff, 8);
+
+    for (uint64_t i = start; i <= end; i += step) {
+        memcpy(key, &i, sizeof(i));
+        hash(key, inputSize, seed, hbuff);
 
-    // popcount8 assumed to work on 64-bit
-    // note : ideally, one should rather popcount the whole hash
-    uint64_t h;
-    memcpy(&h, hbuff, 8);
+        // popcount8 assumed to work on 64-bit
+        // note : ideally, one should rather popcount the whole hash
+        uint64_t h;
+        memcpy(&h, hbuff, 8);
 
-    uint64_t setbits = popcount8(h);
-    hist1[setbits]++;
+        uint64_t setbits = popcount8(h);
+        hist1[setbits]++;
 
-    // derivative
-    setbits = popcount8(h ^ previous);
-    hist2[setbits]++;
-    previous = h;
-  }
+        // derivative
+        setbits  = popcount8(h ^ previous);
+        hist2[setbits]++;
+        previous = h;
+    }
 }
 
-static bool PopcountResults ( long double srefh, long double srefl,
-        long double b1h, long double b1l,
-        long double b0h, long double b0l )
-{
-  double worst;
-  {
-      double chi2 = (b1h-srefh) * (b1h-srefh) / (b1l+srefl);
-      printf("From counting 1s : %9.2Lf, %9.2Lf  -  moment chisq %10.4f\n",
-              b1h, b1l, chi2);
-      worst = chi2;
-  }
-  {
-      double chi2 = (b0h-srefh) * (b0h-srefh) / (b0l+srefl);
-      printf("From counting 0s : %9.2Lf, %9.2Lf  -  moment chisq %10.4f\n",
-              b0h, b0l, chi2);
-      worst = std::max(worst, chi2);
-  }
-
-  // note : previous threshold : 3.84145882069413
-  int const rank = (worst < 500.) + (worst < 50.) + (worst < 5.);
-  assert(0 <= rank && rank <= 3);
-
-  const char* rankstr[4] = { "FAIL !!!!", "pass", "Good", "Great" };
-  printf("Test result:  %s\n", rankstr[rank]);
-
-  addVCodeResult((uint32_t)(worst * 1000.0));
-
-  return (rank > 0);
+static bool PopcountResults( long double srefh, long double srefl, long double b1h,
+        long double b1l, long double b0h, long double b0l ) {
+    double worst;
+    {
+        double chi2 = (b1h - srefh) * (b1h - srefh) / (b1l + srefl);
+        printf("From counting 1s : %9.2Lf, %9.2Lf  -  moment chisq %10.4f\n", b1h, b1l, chi2);
+        worst = chi2;
+    }
+    {
+        double chi2 = (b0h - srefh) * (b0h - srefh) / (b0l + srefl);
+        printf("From counting 0s : %9.2Lf, %9.2Lf  -  moment chisq %10.4f\n", b0h, b0l, chi2);
+        worst = std::max(worst, chi2);
+    }
+
+    // note : previous threshold : 3.84145882069413
+    int const rank = (worst < 500.) + (worst < 50.) + (worst < 5.);
+
+    assert(0 <= rank && rank <= 3);
+
+    const char * rankstr[4] = { "FAIL !!!!", "pass", "Good", "Great" };
+    printf("Test result:  %s\n", rankstr[rank]);
+
+    addVCodeResult((uint32_t)(worst * 1000.0));
+
+    return rank > 0;
 }
 
-static bool PopcountTestImpl(const HashInfo * hinfo, int inputSize, int step) {
-  const HashFn hash = hinfo->hashFn(g_hashEndian);
-  const unsigned mx = 0xffffffff;
-  const long double n = UINT64_C(0x100000000) / step;
-  const int hbits = std::min(hinfo->bits, 64U); // limited due to popcount8
-
-  assert(hbits <= HASH_SIZE_MAX*8);
-  assert(inputSize >= 4);
-
-  printf("\nGenerating hashes from a linear sequence of %i-bit numbers "
-         "with a step size of %d ... \n", inputSize*8, step);
-
-  /* Notes on the ranking system.
-   * Ideally, this test should report and sum all popcount values
-   * and compare the resulting distribution to an ideal distribution.
-   *
-   * What happens here is quite simplified :
-   * the test gives "points" for each popcount, and sum them all.
-   * The metric (using N^5) is heavily influenced by the largest outliers.
-   * For example, a 64-bit hash should have a popcount close to 32.
-   * But a popcount==40 will tilt the metric upward
-   * more than popcount==24 will tilt the metric downward.
-   * In reality, both situations should be ranked similarly.
-   *
-   * To compensate, we measure both popcount1 and popcount0,
-   * and compare to some pre-calculated "optimal" sums for the hash size.
-   *
-   * Another limitation of this test is that it only popcounts the first 64-bit.
-   * For large hashes, bits beyond this limit are ignored.
-   *
-   * Derivative hash testing:
-   * In this scenario, 2 consecutive hashes are xored,
-   * and the outcome of this xor operation is then popcount controlled.
-   * Obviously, the _order_ in which the hash values are generated becomes critical.
-   *
-   * This scenario comes from the prng world,
-   * where derivative of the generated suite of random numbers is analyzed
-   * to ensure the suite is truly "random".
-   *
-   * However, in almost all prng, the seed of next random number is the previous random number.
-   *
-   * This scenario is quite different: it introduces a fixed distance between 2 consecutive "seeds".
-   * This is especially detrimental to algorithms relying on linear operations, such as multiplications.
-   *
-   * This scenario is relevant if the hash is used as a prng and generates values from a linearly increasing counter as a seed.
-   * It is not relevant for scenarios employing the hash as a prng
-   * with the more classical method of using the previous random number as a seed for the next one.
-   * This scenario has no relevance for classical usages of hash algorithms,
-   * such as hash tables, bloom filters and such, were only the raw values are ever used.
-   */
-
-  long double srefh, srefl;
-  switch (hbits/8) {
-      case 8:
-          srefh = 38918200.;
-          if (step == 2)
-            srefl = 273633.333333;
-          else if (step == 6)
-            srefl = 820900.0;
-          else
-            abort();
-          break;
-      case 4:
-          srefh = 1391290.;
-          if (step == 2)
-            srefl = 686.6666667;
-          else if (step == 6)
-            srefl = 2060.0;
-          else
-            abort();
-          break;
-      default:
-          printf("hash size not covered \n");
-          abort();
-  }
-
-  // Because of threading, the actual inputs can't be hashed into the
-  // main thread's state, so just hash the parameters of the input data.
-  addVCodeInput(0);          // start
-  addVCodeInput(0xffffffff); // end
-  addVCodeInput(step);       // step
-  addVCodeInput(inputSize);  // size
-
-  popcnt_hist rawhash[g_NCPU];
-  popcnt_hist xorhash[g_NCPU];
-  memset(rawhash, 0, sizeof(rawhash));
-  memset(xorhash, 0, sizeof(xorhash));
-
-  const seed_t seed = hinfo->Seed(g_seed, false, 1);
-
-  if (g_NCPU == 1) {
-      PopcountThread(hinfo, seed, inputSize, 0, 0xffffffff, step, rawhash[0], xorhash[0]);
-  } else {
+static bool PopcountTestImpl( const HashInfo * hinfo, int inputSize, int step ) {
+    const HashFn      hash  = hinfo->hashFn(g_hashEndian);
+    const unsigned    mx    = 0xffffffff;
+    const long double n     = UINT64_C(0x100000000) / step;
+    const int         hbits = std::min(hinfo->bits, 64U); // limited due to popcount8
+
+    assert(hbits <= HASH_SIZE_MAX * 8);
+    assert(inputSize >= 4);
+
+    printf("\nGenerating hashes from a linear sequence of %i-bit numbers "
+            "with a step size of %d ... \n", inputSize * 8, step);
+
+    /*
+     * Notes on the ranking system.
+     * Ideally, this test should report and sum all popcount values
+     * and compare the resulting distribution to an ideal distribution.
+     *
+     * What happens here is quite simplified :
+     * the test gives "points" for each popcount, and sum them all.
+     * The metric (using N^5) is heavily influenced by the largest outliers.
+     * For example, a 64-bit hash should have a popcount close to 32.
+     * But a popcount==40 will tilt the metric upward
+     * more than popcount==24 will tilt the metric downward.
+     * In reality, both situations should be ranked similarly.
+     *
+     * To compensate, we measure both popcount1 and popcount0,
+     * and compare to some pre-calculated "optimal" sums for the hash size.
+     *
+     * Another limitation of this test is that it only popcounts the first 64-bit.
+     * For large hashes, bits beyond this limit are ignored.
+     *
+     * Derivative hash testing:
+     * In this scenario, 2 consecutive hashes are xored,
+     * and the outcome of this xor operation is then popcount controlled.
+     * Obviously, the _order_ in which the hash values are generated becomes critical.
+     *
+     * This scenario comes from the prng world,
+     * where derivative of the generated suite of random numbers is analyzed
+     * to ensure the suite is truly "random".
+     *
+     * However, in almost all prng, the seed of next random number is the previous random number.
+     *
+     * This scenario is quite different: it introduces a fixed distance between 2 consecutive "seeds".
+     * This is especially detrimental to algorithms relying on linear operations, such as multiplications.
+     *
+     * This scenario is relevant if the hash is used as a prng and generates values from a linearly increasing counter
+     *as a seed.
+     * It is not relevant for scenarios employing the hash as a prng
+     * with the more classical method of using the previous random number as a seed for the next one.
+     * This scenario has no relevance for classical usages of hash algorithms,
+     * such as hash tables, bloom filters and such, were only the raw values are ever used.
+     */
+
+    long double srefh, srefl;
+    switch (hbits / 8) {
+    case 8:
+            srefh = 38918200.;
+            if (step == 2) {
+                srefl = 273633.333333;
+            } else if (step == 6) {
+                srefl = 820900.0;
+            } else {
+                abort();
+            }
+            break;
+    case 4:
+            srefh = 1391290.;
+            if (step == 2) {
+                srefl = 686.6666667;
+            } else if (step == 6) {
+                srefl = 2060.0;
+            } else {
+                abort();
+            }
+            break;
+    default:
+             printf("hash size not covered \n");
+             abort();
+    }
+
+    // Because of threading, the actual inputs can't be hashed into the
+    // main thread's state, so just hash the parameters of the input data.
+    addVCodeInput(         0); // start
+    addVCodeInput(0xffffffff); // end
+    addVCodeInput(      step); // step
+    addVCodeInput( inputSize); // size
+
+    popcnt_hist rawhash[g_NCPU];
+    popcnt_hist xorhash[g_NCPU];
+    memset(rawhash, 0, sizeof(rawhash));
+    memset(xorhash, 0, sizeof(xorhash));
+
+    const seed_t seed = hinfo->Seed(g_seed, false, 1);
+
+    if (g_NCPU == 1) {
+        PopcountThread(hinfo, seed, inputSize, 0, 0xffffffff, step, rawhash[0], xorhash[0]);
+    } else {
 #if defined(HAVE_THREADS)
-      // split into g_NCPU threads
-      std::thread t[g_NCPU];
-      printf("%d threads starting... ", g_NCPU);
-
-      const uint64_t len = UINT64_C(0x100000000) / (step * g_NCPU);
-      for (int i=0; i < g_NCPU; i++) {
-          const uint32_t start = i * len * step;
-          const uint32_t end = (i < (g_NCPU - 1)) ? start + (len * step - 1) : 0xffffffff;
-          //printf("thread[%d]: %d, 0x%x - 0x%x %d\n", i, inputSize, start, end, step);
-          t[i] = std::thread {PopcountThread, hinfo, seed, inputSize, start, end, step, std::ref(rawhash[i]), std::ref(xorhash[i]) };
-      }
-
-      std::this_thread::sleep_for(std::chrono::seconds(1));
-
-      for (int i=0; i < g_NCPU; i++) {
-          t[i].join();
-      }
-
-      printf(" done\n");
-      for (int i = 1; i < g_NCPU; i++) {
-	for (int j = 0; j <= hbits; j++) {
-	  rawhash[0][j] += rawhash[i][j];
-	  xorhash[0][j] += xorhash[i][j];
-	}
-      }
+        // split into g_NCPU threads
+        std::thread t[g_NCPU];
+        printf("%d threads starting... ", g_NCPU);
+
+        const uint64_t len = UINT64_C(0x100000000) / (step * g_NCPU);
+        for (int i = 0; i < g_NCPU; i++) {
+            const uint32_t start = i * len * step;
+            const uint32_t end   = (i < (g_NCPU - 1)) ? start + (len * step - 1) : 0xffffffff;
+            // printf("thread[%d]: %d, 0x%x - 0x%x %d\n", i, inputSize, start, end, step);
+            t[i] = std::thread {
+                PopcountThread, hinfo, seed, inputSize, start, end, step, std::ref(rawhash[i]), std::ref(xorhash[i])
+            };
+        }
+
+        std::this_thread::sleep_for(std::chrono::seconds(1));
+
+        for (int i = 0; i < g_NCPU; i++) {
+            t[i].join();
+        }
+
+        printf(" done\n");
+        for (int i = 1; i < g_NCPU; i++) {
+            for (int j = 0; j <= hbits; j++) {
+                rawhash[0][j] += rawhash[i][j];
+                xorhash[0][j] += xorhash[i][j];
+            }
+        }
 #endif
-  }
-
-  long double b0h = 0, b0l = 0, db0h = 0, db0l = 0;
-  long double b1h = 0, b1l = 0, db1h = 0, db1l = 0;
-  // b1h = SUM[ 1-bits**5 ]
-  // b0h = SUM[ 0-bits**5 ]
-  // b1l = SUM[ 1-bits**10 ]
-  // b0l = SUM[ 0-bits**10 ]
+    }
 
-  for (uint64_t j = 0; j <= hbits; j++) {
-    long double mult1 = j * j * j * j * j;
-    long double mult0 = (hbits - j) * (hbits - j) * (hbits - j) * (hbits - j) * (hbits - j);
-     b1h += mult1 * (long double)rawhash[0][j];
-     b0h += mult0 * (long double)rawhash[0][j];
-    db1h += mult1 * (long double)xorhash[0][j];
-    db0h += mult0 * (long double)xorhash[0][j];
-     b1l += mult1 * mult1 * (long double)rawhash[0][j];
-     b0l += mult0 * mult0 * (long double)rawhash[0][j];
-    db1l += mult1 * mult1 * (long double)xorhash[0][j];
-    db0l += mult0 * mult0 * (long double)xorhash[0][j];
-  }
+    long double b0h = 0, b0l = 0, db0h = 0, db0l = 0;
+    long double b1h = 0, b1l = 0, db1h = 0, db1l = 0;
+    // b1h = SUM[ 1-bits**5 ]
+    // b0h = SUM[ 0-bits**5 ]
+    // b1l = SUM[ 1-bits**10 ]
+    // b0l = SUM[ 0-bits**10 ]
+
+    for (uint64_t j = 0; j <= hbits; j++) {
+        long double mult1 = j * j * j * j * j;
+        long double mult0 = (hbits - j) * (hbits - j) * (hbits - j) * (hbits - j) * (hbits - j);
+        b1h  += mult1 *         (long double)rawhash[0][j];
+        b0h  += mult0 *         (long double)rawhash[0][j];
+        db1h += mult1 *         (long double)xorhash[0][j];
+        db0h += mult0 *         (long double)xorhash[0][j];
+        b1l  += mult1 * mult1 * (long double)rawhash[0][j];
+        b0l  += mult0 * mult0 * (long double)rawhash[0][j];
+        db1l += mult1 * mult1 * (long double)xorhash[0][j];
+        db0l += mult0 * mult0 * (long double)xorhash[0][j];
+    }
 
-  b1h  /= n;  b1l = (b1l/n  - b1h*b1h) / n;
-  db1h /= n; db1l = (db1l/n - db1h*db1h) / n;
-  b0h  /= n;  b0l = (b0l/n  - b0h*b0h) / n;
-  db0h /= n; db0l = (db0l/n - db0h*db0h) / n;
+    b1h  /= n;  b1l = (b1l  / n - b1h  * b1h ) / n;
+    db1h /= n; db1l = (db1l / n - db1h * db1h) / n;
+    b0h  /= n;  b0l = (b0l  / n - b0h  * b0h ) / n;
+    db0h /= n; db0l = (db0l / n - db0h * db0h) / n;
 
-  bool result = true;
+    bool result = true;
 
-  printf("Ideal results    : %9.2Lf, %9.2Lf\n", srefh, srefl);
+    printf("Ideal results    : %9.2Lf, %9.2Lf\n", srefh, srefl);
 
-  printf("\nResults from literal hashes :\n");
-  result &= PopcountResults(srefh, srefl, b1h, b1l, b0h, b0l);
+    printf("\nResults from literal hashes :\n"  );
+    result &= PopcountResults(srefh, srefl, b1h, b1l, b0h, b0l);
 
-  printf("\nResults from derivative hashes (XOR of 2 consecutive values) :\n");
-  result &= PopcountResults(srefh, srefl, db1h, db1l, db0h, db0l);
+    printf("\nResults from derivative hashes (XOR of 2 consecutive values) :\n");
+    result &= PopcountResults(srefh, srefl, db1h, db1l, db0h, db0l);
 
-  printf("\n");
+    printf("\n");
 
-  // Similar threading problems for the outputs, so just hash in the
-  // summary data.
-  addVCodeOutput(&rawhash[0][0], 65 * sizeof(rawhash[0][0]));
-  addVCodeOutput(&xorhash[0][0], 65 * sizeof(xorhash[0][0]));
+    // Similar threading problems for the outputs, so just hash in the
+    // summary data.
+    addVCodeOutput(&rawhash[0][0], 65 * sizeof(rawhash[0][0]));
+    addVCodeOutput(&xorhash[0][0], 65 * sizeof(xorhash[0][0]));
 
-  recordTestResult(result, "Popcount", inputSize);
+    recordTestResult(result, "Popcount", inputSize);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool PopcountTest(const HashInfo * hinfo, const bool extra) {
-    const int step = ((hinfo->isVerySlow() || hinfo->bits > 128) && extra) ? 6 : 2;
-    bool result = true;
+template <typename hashtype>
+bool PopcountTest( const HashInfo * hinfo, const bool extra ) {
+    const int step   = ((hinfo->isVerySlow() || hinfo->bits > 128) && extra) ? 6 : 2;
+    bool      result = true;
 
     printf("[[[ Popcount Tests ]]]\n");
 
     result &= PopcountTestImpl(hinfo, 4, step);
     if (extra) {
-        result &= PopcountTestImpl(hinfo, 8, step);
+        result &= PopcountTestImpl(hinfo,  8, step);
         result &= PopcountTestImpl(hinfo, 16, step);
     }
 
diff --git a/tests/PopcountTest.h b/tests/PopcountTest.h
index 33af51c6..623361c7 100644
--- a/tests/PopcountTest.h
+++ b/tests/PopcountTest.h
@@ -61,5 +61,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool PopcountTest(const HashInfo * info, const bool extra);
+template <typename hashtype>
+bool PopcountTest( const HashInfo * info, const bool extra );
diff --git a/tests/SanityTest.cpp b/tests/SanityTest.cpp
index 6ed17400..01a2e860 100644
--- a/tests/SanityTest.cpp
+++ b/tests/SanityTest.cpp
@@ -58,24 +58,23 @@
 // These sentinel bytes MUST be different values
 static const uint8_t sentinel1 = 0x5c;
 static const uint8_t sentinel2 = 0x36;
-static_assert(sentinel1 != sentinel2,
-        "valid sentinel bytes in SanityTest");
+static_assert(sentinel1 != sentinel2, "valid sentinel bytes in SanityTest");
 
 //----------------------------------------------------------------------------
 // Helper for printing out the right number of progress dots
 
-static void progressdots(int cur, int min, int max, int totaldots) {
+static void progressdots( int cur, int min, int max, int totaldots ) {
     // cur goes from [min, max]. When cur is max, totaldots should
     // have been printed. Print out enough dots, assuming either we
     // were called for cur-1, or that we are being called for the
     // first time with cur==min.
     assert(totaldots > 0);
-    assert(min < max);
-    assert(cur >= min);
-    assert(cur <= max);
+    assert(min < max    );
+    assert(cur >= min   );
+    assert(cur <= max   );
 
     int count = 0;
-    int span = max - min + 1;
+    int span  = max - min + 1;
     if (span > totaldots) {
         // Possibly zero dots per call.
         // Always print out one dot the first time through.
@@ -91,7 +90,7 @@ static void progressdots(int cur, int min, int max, int totaldots) {
     }
     if (count == 0) {
         int expect = (cur - min + 1) * totaldots / span;
-        int sofar =  (cur - min    ) * totaldots / span;
+        int sofar  = (cur - min    ) * totaldots / span;
         count = expect - sofar;
     }
 
@@ -115,7 +114,7 @@ static void progressdots(int cur, int min, int max, int totaldots) {
 
 #define maybeprintf(...) if (verbose) { printf(__VA_ARGS__); }
 
-static bool verify_sentinel(const uint8_t * buf, size_t len, const uint8_t sentinel, bool verbose) {
+static bool verify_sentinel( const uint8_t * buf, size_t len, const uint8_t sentinel, bool verbose ) {
     for (size_t i = 0; i < len; i++) {
         if (buf[i] != sentinel) {
             maybeprintf(" %" PRIu64 ": 0x%02X != 0x%02X: ", i, buf[i], sentinel);
@@ -125,8 +124,8 @@ static bool verify_sentinel(const uint8_t * buf, size_t len, const uint8_t senti
     return true;
 }
 
-template < bool checksentinels >
-static bool verify_hashmatch(const uint8_t * buf1, const uint8_t * buf2, size_t len, bool verbose) {
+template <bool checksentinels>
+static bool verify_hashmatch( const uint8_t * buf1, const uint8_t * buf2, size_t len, bool verbose ) {
     if (likely(memcmp(buf1, buf2, len) == 0)) {
         return true;
     }
@@ -136,8 +135,7 @@ static bool verify_hashmatch(const uint8_t * buf1, const uint8_t * buf2, size_t
                 (buf1[i] == sentinel1) && (buf2[i] == sentinel2)) {
             maybeprintf(" output byte %" PRIu64 " not altered:", i);
         } else {
-            maybeprintf(" output byte %" PRIu64 " inconsistent (0x%02X != 0x%02X):",
-                    i, buf1[i], buf2[i]);
+            maybeprintf(" output byte %" PRIu64 " inconsistent (0x%02X != 0x%02X):", i, buf1[i], buf2[i]);
         }
         break;
     }
@@ -149,32 +147,32 @@ static bool verify_hashmatch(const uint8_t * buf1, const uint8_t * buf2, size_t
 // that hashing the same thing gives the same result.
 //
 // This test can halt early, so don't add input bytes to the VCode.
-bool SanityTest1(const HashInfo * hinfo, const seed_t seed, bool verbose) {
-    Rand r(883743);
-    bool result = true;
-    bool danger = false;
-
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    const int hashbytes = hinfo->bits / 8;
-    const int reps = 10;
-    const int keymax = 256;
-    const int pad = 16*3;
-    const int buflen = keymax + pad;
-
-    uint8_t * buffer1 = new uint8_t[buflen];
-    uint8_t * buffer2 = new uint8_t[buflen];
-    uint8_t * hash1 = new uint8_t[buflen];
-    uint8_t * hash2 = new uint8_t[buflen];
+bool SanityTest1( const HashInfo * hinfo, const seed_t seed, bool verbose ) {
+    Rand r( 883743 );
+    bool result            = true;
+    bool danger            = false;
+
+    const HashFn hash      = hinfo->hashFn(g_hashEndian);
+    const int    hashbytes = hinfo->bits / 8;
+    const int    reps      = 10;
+    const int    keymax    = 256;
+    const int    pad       = 16 * 3;
+    const int    buflen    = keymax + pad;
+
+    uint8_t * buffer1      = new uint8_t[buflen];
+    uint8_t * buffer2      = new uint8_t[buflen];
+    uint8_t * hash1        = new uint8_t[buflen];
+    uint8_t * hash2        = new uint8_t[buflen];
 
     maybeprintf("Running sanity check 1       ");
 
     memset(hash1, sentinel1, buflen);
     memset(hash2, sentinel2, buflen);
 
-    for(int irep = 0; irep < reps; irep++) {
-        if (irep % (reps/10) == 0) maybeprintf(".");
+    for (int irep = 0; irep < reps; irep++) {
+        if (irep % (reps / 10) == 0) { maybeprintf("."); }
 
-        for(int len = 0; len <= keymax; len++) {
+        for (int len = 0; len <= keymax; len++) {
             // Make 2 copies of some random input data, and hash one
             // of them.
             r.rand_p(buffer1, buflen);
@@ -191,8 +189,7 @@ bool SanityTest1(const HashInfo * hinfo, const seed_t seed, bool verbose) {
             }
 
             // See if the hash overflowed its output buffer
-            if (!verify_sentinel(hash1 + hashbytes, buflen - hashbytes,
-                            sentinel1, verbose)) {
+            if (!verify_sentinel(hash1 + hashbytes, buflen - hashbytes, sentinel1, verbose)) {
                 maybeprintf(" hash overflowed output buffer (pass 1):");
                 result = false;
                 danger = true;
@@ -203,8 +200,7 @@ bool SanityTest1(const HashInfo * hinfo, const seed_t seed, bool verbose) {
             hash(buffer1, len, seed, hash2);
 
             // See if the hash overflowed output buffer this time
-            if (!verify_sentinel(hash2 + hashbytes, buflen - hashbytes,
-                            sentinel2, verbose)) {
+            if (!verify_sentinel(hash2 + hashbytes, buflen - hashbytes, sentinel2, verbose)) {
                 maybeprintf(" hash overflowed output buffer (pass 2):");
                 result = false;
                 danger = true;
@@ -219,8 +215,8 @@ bool SanityTest1(const HashInfo * hinfo, const seed_t seed, bool verbose) {
         }
     }
 
- end_sanity:
-    if(result == false) {
+  end_sanity:
+    if (result == false) {
         printf("%s", verbose ? " FAIL  !!!!!\n" : " FAIL");
     } else {
         printf("%s", verbose ? " PASS\n"        : " pass");
@@ -254,42 +250,41 @@ bool SanityTest1(const HashInfo * hinfo, const seed_t seed, bool verbose) {
 // This test is expensive, so only run 1 rep.
 //
 // This test can halt early, so don't add input bytes to the VCode.
-bool SanityTest2(const HashInfo * hinfo, const seed_t seed, bool verbose) {
-    Rand r(883744);
-    bool result = true;
+bool SanityTest2( const HashInfo * hinfo, const seed_t seed, bool verbose ) {
+    Rand r( 883744 );
+    bool result            = true;
 
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    const int hashbytes = hinfo->bits / 8;
-    const int reps = 5;
-    const int keymax = 128;
-    const int pad = 16; // Max alignment offset tested
-    const int buflen = keymax + pad*3;
+    const HashFn hash      = hinfo->hashFn(g_hashEndian);
+    const int    hashbytes = hinfo->bits / 8;
+    const int    reps      = 5;
+    const int    keymax    = 128;
+    const int    pad       = 16;// Max alignment offset tested
+    const int    buflen    = keymax + pad * 3;
 
     // XXX Check alignment!?!
-    uint8_t * buffer1 = new uint8_t[buflen];
-    uint8_t * buffer2 = new uint8_t[buflen];
-    uint8_t * hash1 = new uint8_t[hashbytes];
-    uint8_t * hash2 = new uint8_t[hashbytes];
-    uint8_t * hash3 = new uint8_t[hashbytes];
+    uint8_t * buffer1 = new uint8_t[buflen   ];
+    uint8_t * buffer2 = new uint8_t[buflen   ];
+    uint8_t * hash1   = new uint8_t[hashbytes];
+    uint8_t * hash2   = new uint8_t[hashbytes];
+    uint8_t * hash3   = new uint8_t[hashbytes];
 
     maybeprintf("Running sanity check 2       ");
 
     for (int irep = 0; irep < reps; irep++) {
-
-        for(int len = 1; len <= keymax; len++) {
-            ExtBlob key1(&buffer1[pad],    len);
+        for (int len = 1; len <= keymax; len++) {
+            ExtBlob key1( &buffer1[pad], len );
 
             // Fill the first buffer with random data
             r.rand_p(buffer1, buflen);
 
-            if (verbose) { progressdots(len + irep*keymax, 1, reps*keymax, 10); }
+            if (verbose) { progressdots(len + irep * keymax, 1, reps * keymax, 10); }
             // Record the hash of key1. hash1 becomes the correct
             // answer that the rest of the loop will test against.
             hash(key1, len, seed, hash1);
             addVCodeOutput(hash1, hashbytes);
 
             // See if the hash behaves sanely using only key1
-            for(int bit = 0; bit < (len * 8); bit++) {
+            for (int bit = 0; bit < (len * 8); bit++) {
                 // Flip a bit, hash the key -> we should get a different result.
                 key1.flipbit(bit);
                 hash(key1, len, seed, hash2);
@@ -311,9 +306,9 @@ bool SanityTest2(const HashInfo * hinfo, const seed_t seed, bool verbose) {
                 }
             }
 
-            for(int offset = pad; offset < pad*2; offset++) {
+            for (int offset = pad; offset < pad * 2; offset++) {
                 // Make key2 have alignment independent of key1
-                ExtBlob key2(&buffer2[offset], len);
+                ExtBlob key2( &buffer2[offset], len );
 
                 // Fill the second buffer with different random data
                 r.rand_p(buffer2, buflen);
@@ -343,7 +338,7 @@ bool SanityTest2(const HashInfo * hinfo, const seed_t seed, bool verbose) {
                     memcpy(buffer2 + offset - pad, buffer1, len + 2 * pad);
                     uint8_t * const key2_start = buffer2 + offset;
                     uint8_t * const key2_end   = buffer2 + offset + len;
-                    for(uint8_t * ptr = key2_start - pad; ptr < key2_end + pad; ptr++) {
+                    for (uint8_t * ptr = key2_start - pad; ptr < key2_end + pad; ptr++) {
                         if ((ptr >= key2_start) && (ptr < key2_end)) { continue; }
                         *ptr ^= 0xFF;
                         hash(key2, len, seed, hash3);
@@ -362,8 +357,8 @@ bool SanityTest2(const HashInfo * hinfo, const seed_t seed, bool verbose) {
         }
     }
 
- end_sanity:
-    if(result == false) {
+  end_sanity:
+    if (result == false) {
         printf("%s", verbose ? " FAIL  !!!!!\n" : " ... FAIL");
     } else {
         printf("%s", verbose ? " PASS\n"        : " ... pass");
@@ -387,18 +382,18 @@ bool SanityTest2(const HashInfo * hinfo, const seed_t seed, bool verbose) {
 // Seed() is first called once in the main process, and 2) when Seed()
 // is called per-hash inside each thread.
 
-static void hashthings(const HashInfo * hinfo, seed_t seed,
-        uint32_t reps, uint32_t order, bool reseed, bool verbose,
-        std::vector<uint8_t> &keys, std::vector<uint8_t> &hashes) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
+static void hashthings( const HashInfo * hinfo, seed_t seed, uint32_t reps, uint32_t order, bool reseed,
+        bool verbose, std::vector<uint8_t> & keys, std::vector<uint8_t> & hashes ) {
+    const HashFn   hash      = hinfo->hashFn(g_hashEndian);
     const uint32_t hashbytes = hinfo->bits / 8;
 
     // Each thread should hash the keys in a different, random order
-    std::vector<int> idxs(reps);
+    std::vector<int> idxs( reps );
+
     if (order != 0) {
-        Rand r(46742 + order);
+        Rand r( 46742 + order );
         for (int i = 0; i < reps; i++) { idxs[i] = i; }
-        for(int i = reps - 1; i > 0; i--) {
+        for (int i = reps - 1; i > 0; i--) {
             std::swap(idxs[i], idxs[r.rand_range(i + 1)]);
         }
     }
@@ -412,19 +407,19 @@ static void hashthings(const HashInfo * hinfo, seed_t seed,
         if (reseed) { seed = hinfo->Seed(idx * UINT64_C(0xa5), true, 1); }
         hash(&keys[idx * reps], idx + 1, seed, &hashes[idx * hashbytes]);
         if (verbose && (order < 2)) { progressdots(i, 0, reps - 1, 4); }
-        if (order == 0) { addVCodeInput(&keys[idx * reps], idx + 1);}
+        if (order == 0) { addVCodeInput(&keys[idx * reps], idx + 1); }
     }
 }
 
-static bool ThreadingTest (const HashInfo * hinfo, bool seedthread, bool verbose) {
-    Rand r(609163);
+static bool ThreadingTest( const HashInfo * hinfo, bool seedthread, bool verbose ) {
+    Rand r( 609163 );
 
-    const uint32_t hashbytes = hinfo->bits / 8;
-    const uint32_t reps = 1024*16;
-    const uint32_t keybytes = (reps * reps);
-    std::vector<uint8_t> keys(keybytes);
-    std::vector<uint8_t> mainhashes(reps * hashbytes);
-    const seed_t seed = seedthread ? 0 : hinfo->Seed(0x12345, true, 1);
+    const uint32_t       hashbytes = hinfo->bits / 8;
+    const uint32_t       reps      = 1024 * 16;
+    const uint32_t       keybytes  = (reps * reps);
+    std::vector<uint8_t> keys( keybytes );
+    std::vector<uint8_t> mainhashes( reps * hashbytes );
+    const seed_t         seed = seedthread ? 0 : hinfo->Seed(0x12345, true, 1);
     bool result = true;
 
     maybeprintf("Running thread-safety test %d ", seedthread ? 2 : 1);
@@ -447,10 +442,12 @@ static bool ThreadingTest (const HashInfo * hinfo, bool seedthread, bool verbose
     if (g_NCPU > 1) {
 #if defined(HAVE_THREADS)
         // Compute all the hashes in different random orders in threads
-        std::vector<std::vector<uint8_t> > threadhashes(g_NCPU, std::vector<uint8_t>(reps * hashbytes));
+        std::vector<std::vector<uint8_t>> threadhashes( g_NCPU, std::vector<uint8_t>(reps * hashbytes));
         std::thread t[g_NCPU];
         for (int i = 0; i < g_NCPU; i++) {
-            t[i] = std::thread {hashthings,hinfo,seed,reps,i+1,seedthread,verbose,std::ref(keys),std::ref(threadhashes[i])};
+            t[i] = std::thread {
+                hashthings, hinfo, seed, reps, i + 1, seedthread, verbose, std::ref(keys), std::ref(threadhashes[i])
+            };
         }
         for (int i = 0; i < g_NCPU; i++) {
             t[i].join();
@@ -468,7 +465,7 @@ static bool ThreadingTest (const HashInfo * hinfo, bool seedthread, bool verbose
             for (int j = 0; j < reps; j++) {
                 if (memcmp(&mainhashes[j * hashbytes], &threadhashes[i][j * hashbytes], hashbytes) != 0) {
                     maybeprintf("\nMismatch between main process and thread #%d at index %d\n", i, j);
-                    if (verbose) { ExtBlob(&mainhashes[j * hashbytes]     , hashbytes).printhex("  main   :"); }
+                    if (verbose) { ExtBlob(&mainhashes[j * hashbytes], hashbytes).printhex("  main   :"); }
                     if (verbose) { ExtBlob(&threadhashes[i][j * hashbytes], hashbytes).printhex("  thread :"); }
                     result = false;
                     break; // Only breaks out of j loop
@@ -476,7 +473,7 @@ static bool ThreadingTest (const HashInfo * hinfo, bool seedthread, bool verbose
             }
         }
 
-        if(result == false) {
+        if (result == false) {
             printf("%s", verbose ? " FAIL  !!!!!\n\n" : " ... FAIL");
         } else {
             printf("%s", verbose ? " PASS\n"         : " ... pass");
@@ -500,142 +497,136 @@ static bool ThreadingTest (const HashInfo * hinfo, bool seedthread, bool verbose
 //----------------------------------------------------------------------------
 // Appending zero bytes to a key should always cause it to produce a different
 // hash value
-bool AppendedZeroesTest (const HashInfo * hinfo, const seed_t seed, bool verbose) {
-  Rand r(173994);
+bool AppendedZeroesTest( const HashInfo * hinfo, const seed_t seed, bool verbose ) {
+    Rand r( 173994 );
 
-  const HashFn hash = hinfo->hashFn(g_hashEndian);
-  const int hashbytes = hinfo->bits / 8;
-  bool result = true;
+    const HashFn hash      = hinfo->hashFn(g_hashEndian);
+    const int    hashbytes = hinfo->bits / 8;
+    bool         result    = true;
 
-  maybeprintf("Running append zeroes test   ");
+    maybeprintf("Running append zeroes test   ");
 
-  for(int rep = 0; rep < 100; rep++)
-  {
-    if(rep % 10 == 0) maybeprintf(".");
+    for (int rep = 0; rep < 100; rep++) {
+        if (rep % 10 == 0) { maybeprintf("."); }
 
-    unsigned char key[256];
-    memset(key,0,sizeof(key));
+        unsigned char key[256];
+        memset(key, 0, sizeof(key));
 
-    r.rand_p(key,32);
-    // This test can halt early, so don't add input bytes to the VCode.
+        r.rand_p(key, 32);
+        // This test can halt early, so don't add input bytes to the VCode.
 
-    std::vector<std::vector<uint8_t>> hashes;
+        std::vector<std::vector<uint8_t>> hashes;
 
-    for(int i = 0; i < 32; i++) {
-      std::vector<uint8_t> h(hashbytes);
-      hash(key,32+i,seed,&h[0]);
-      hashes.push_back(h);
-      addVCodeOutput(&h[0], hashbytes);
-    }
+        for (int i = 0; i < 32; i++) {
+            std::vector<uint8_t> h( hashbytes );
+            hash(key, 32 + i, seed, &h[0]);
+            hashes.push_back(h);
+            addVCodeOutput(&h[0], hashbytes);
+        }
 
-    // Sort in little-endian order, for human friendliness
-    std::sort(hashes.begin(), hashes.end(),
-            [](const std::vector<uint8_t>& a, const std::vector<uint8_t>& b) {
+        // Sort in little-endian order, for human friendliness
+        std::sort(hashes.begin(), hashes.end(), []( const std::vector<uint8_t> & a, const std::vector<uint8_t> & b ) {
                 for (int i = a.size() - 1; i >= 0; i--) {
                     if (a[i] != b[i]) {
                         return a[i] < b[i];
                     }
                 }
                 return false;
-            } );
+            });
 
-    for(int i = 1; i < 32; i++) {
-        if (memcmp(&hashes[i][0], &hashes[i-1][0], hashbytes) == 0) {
-            result = false;
-            goto done;
+        for (int i = 1; i < 32; i++) {
+            if (memcmp(&hashes[i][0], &hashes[i - 1][0], hashbytes) == 0) {
+                result = false;
+                goto done;
+            }
         }
     }
-  }
 
- done:
-  if(result == false) {
-      printf("%s", verbose ? " FAIL  !!!!!\n" : " ... FAIL");
-  } else {
-      printf("%s", verbose ? " PASS\n"        : " ... pass");
-  }
+  done:
+    if (result == false) {
+        printf("%s", verbose ? " FAIL  !!!!!\n" : " ... FAIL");
+    } else {
+        printf("%s", verbose ? " PASS\n"        : " ... pass");
+    }
 
-  recordTestResult(result, "Sanity", "Append zeroes");
+    recordTestResult(result, "Sanity", "Append zeroes");
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
 //----------------------------------------------------------------------------
 // Prepending zero bytes to a key should also always cause it to
 // produce a different hash value
-bool PrependedZeroesTest (const HashInfo * hinfo, const seed_t seed, bool verbose) {
-  Rand r(534281);
+bool PrependedZeroesTest( const HashInfo * hinfo, const seed_t seed, bool verbose ) {
+    Rand r( 534281 );
 
-  const HashFn hash = hinfo->hashFn(g_hashEndian);
-  const int hashbytes = hinfo->bits / 8;
-  bool result = true;
+    const HashFn hash      = hinfo->hashFn(g_hashEndian);
+    const int    hashbytes = hinfo->bits / 8;
+    bool         result    = true;
 
-  maybeprintf("Running prepend zeroes test  ");
+    maybeprintf("Running prepend zeroes test  ");
 
-  for(int rep = 0; rep < 100; rep++)
-  {
-    if(rep % 10 == 0) maybeprintf(".");
+    for (int rep = 0; rep < 100; rep++) {
+        if (rep % 10 == 0) { maybeprintf("."); }
 
-    unsigned char key[256];
-    memset(key,0,sizeof(key));
+        unsigned char key[256];
+        memset(key, 0, sizeof(key));
 
-    r.rand_p(key+32,32);
-    // This test can halt early, so don't add input bytes to the VCode.
+        r.rand_p(key + 32, 32);
+        // This test can halt early, so don't add input bytes to the VCode.
 
-    std::vector<std::vector<uint8_t>> hashes;
+        std::vector<std::vector<uint8_t>> hashes;
 
-    for(int i = 0; i < 32; i++) {
-      std::vector<uint8_t> h(hashbytes);
-      hash(key+32-i,32+i,seed,&h[0]);
-      hashes.push_back(h);
-      addVCodeOutput(&h[0], hashbytes);
-    }
+        for (int i = 0; i < 32; i++) {
+            std::vector<uint8_t> h( hashbytes );
+            hash(key + 32 - i, 32 + i, seed, &h[0]);
+            hashes.push_back(h);
+            addVCodeOutput(&h[0], hashbytes);
+        }
 
-    // Sort in little-endian order, for human friendliness
-    std::sort(hashes.begin(), hashes.end(),
-            [](const std::vector<uint8_t>& a, const std::vector<uint8_t>& b) {
+        // Sort in little-endian order, for human friendliness
+        std::sort(hashes.begin(), hashes.end(), []( const std::vector<uint8_t> & a, const std::vector<uint8_t> & b ) {
                 for (int i = a.size() - 1; i >= 0; i--) {
                     if (a[i] != b[i]) {
                         return a[i] < b[i];
                     }
                 }
                 return false;
-            } );
+            });
 
-    for(int i = 1; i < 32; i++) {
-        if (memcmp(&hashes[i][0], &hashes[i-1][0], hashbytes) == 0) {
-	    result = false;
-	    goto done;
+        for (int i = 1; i < 32; i++) {
+            if (memcmp(&hashes[i][0], &hashes[i - 1][0], hashbytes) == 0) {
+                result = false;
+                goto done;
+            }
         }
     }
-  }
 
- done:
-  if(result == false) {
-      printf("%s", verbose ? " FAIL  !!!!!\n" : " ... FAIL");
-  } else {
-      printf("%s", verbose ? " PASS\n"        : " ... pass");
-  }
+  done:
+    if (result == false) {
+        printf("%s", verbose ? " FAIL  !!!!!\n" : " ... FAIL");
+    } else {
+        printf("%s", verbose ? " PASS\n"        : " ... pass");
+    }
 
-  recordTestResult(result, "Sanity", "Prepend zeroes");
+    recordTestResult(result, "Sanity", "Prepend zeroes");
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
-void SanityTestHeader(void) {
-    printf("%-25s   %13s     %13s     %13s\n",
-            "Name", " Sanity 1+2  ", "   Zeroes    ", " Thread-safe ");
-    printf("%-25s   %13s     %13s     %13s\n",
-            "-------------------------", "-------------",
-            "-------------", "-------------");
+void SanityTestHeader( void ) {
+    printf("%-25s   %13s     %13s     %13s\n", "Name", " Sanity 1+2  ", "   Zeroes    ", " Thread-safe ");
+    printf("%-25s   %13s     %13s     %13s\n", "-------------------------",
+            "-------------", "-------------", "-------------");
 }
 
-bool SanityTest(const HashInfo * hinfo, bool oneline) {
-    bool verbose = !oneline;
-    bool result = true;
+bool SanityTest( const HashInfo * hinfo, bool oneline ) {
+    bool verbose      = !oneline;
+    bool result       = true;
     bool threadresult = true;
 
     if (oneline) { printf("%-25s  ", hinfo->name); }
@@ -650,7 +641,7 @@ bool SanityTest(const HashInfo * hinfo, bool oneline) {
 
     // These should be last, as they re-seed
     threadresult &= ThreadingTest(hinfo, false, verbose);
-    threadresult &= ThreadingTest(hinfo, true, verbose);
+    threadresult &= ThreadingTest(hinfo, true , verbose);
 
     // If threading test cannot give meaningful results, then don't
     // bother printing them out. :) But still run them above so the
@@ -671,7 +662,7 @@ bool SanityTest(const HashInfo * hinfo, bool oneline) {
         printf("%sSANITY_FAILS unset, but hash failed", oneline ? "\t" : "");
     }
 
- out:
+  out:
     if (oneline) {
         printf("\n");
     }
diff --git a/tests/SanityTest.h b/tests/SanityTest.h
index fb9f5b1f..7347f374 100644
--- a/tests/SanityTest.h
+++ b/tests/SanityTest.h
@@ -46,5 +46,5 @@
  *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
-bool SanityTest(const HashInfo * hinfo, bool oneline = false);
-void SanityTestHeader(void);
+bool SanityTest( const HashInfo * hinfo, bool oneline = false );
+void SanityTestHeader( void );
diff --git a/tests/SeedTest.cpp b/tests/SeedTest.cpp
index 283e2fd0..2af3c57c 100644
--- a/tests/SeedTest.cpp
+++ b/tests/SeedTest.cpp
@@ -49,7 +49,7 @@
 #include "Platform.h"
 #include "Hashinfo.h"
 #include "TestGlobals.h"
-#include "Stats.h"       // For chooseUpToK
+#include "Stats.h" // For chooseUpToK
 #include "Analyze.h"
 #include "Instantiate.h"
 #include "VCode.h"
@@ -61,122 +61,122 @@
 //-----------------------------------------------------------------------------
 // Keyset 'Seed' - hash "the quick brown fox..." using different seeds
 
-template < typename hashtype, uint32_t seedbits, bool bigseed >
-static bool SeedTestImpl(const HashInfo * hinfo, bool drawDiagram) {
-  assert(seedbits <= 31);
-  const HashFn hash = hinfo->hashFn(g_hashEndian);
-  const int totalkeys = 1 << seedbits;
-  const int hibits = seedbits >> 1;
-  const int lobits = seedbits - hibits;
-  const int shiftbits = bigseed ? (64 - hibits) : (32 - hibits);
+template <typename hashtype, uint32_t seedbits, bool bigseed>
+static bool SeedTestImpl( const HashInfo * hinfo, bool drawDiagram ) {
+    assert(seedbits <= 31);
+    const HashFn hash      = hinfo->hashFn(g_hashEndian);
+    const int    totalkeys = 1 << seedbits;
+    const int    hibits    = seedbits >> 1;
+    const int    lobits    = seedbits - hibits;
+    const int    shiftbits = bigseed ? (64 - hibits) : (32 - hibits);
 
-  printf("Keyset 'Seed' - %d keys\n", totalkeys);
+    printf("Keyset 'Seed' - %d keys\n", totalkeys);
 
-  const char text[64] = "The quick brown fox jumps over the lazy dog";
-  const int len = (int)strlen(text);
+    const char text[64] = "The quick brown fox jumps over the lazy dog";
+    const int  len      = (int)strlen(text);
 
-  addVCodeInput(text, len);
-  addVCodeInput(totalkeys);
+    addVCodeInput(text     , len);
+    addVCodeInput(totalkeys);
 
-  //----------
+    //----------
 
-  std::vector<hashtype> hashes;
+    std::vector<hashtype> hashes;
 
-  hashes.resize(totalkeys);
+    hashes.resize(totalkeys);
 
-  for(seed_t i = 0; i < (1 << hibits); i++) {
-      for(seed_t j = 0; j < (1 << lobits); j++) {
-          const seed_t seed = (i << shiftbits) + j;
-          const seed_t hseed = hinfo->Seed(seed, true);
-          hash(text, len, hseed, &hashes[(i << lobits) + j]);
-      }
-  }
+    for (seed_t i = 0; i < (1 << hibits); i++) {
+        for (seed_t j = 0; j < (1 << lobits); j++) {
+            const seed_t seed  = (i << shiftbits) + j;
+            const seed_t hseed = hinfo->Seed(seed, true);
+            hash(text, len, hseed, &hashes[(i << lobits) + j]);
+        }
+    }
 
-  bool result = TestHashList(hashes,drawDiagram);
-  printf("\n");
+    bool result = TestHashList(hashes, drawDiagram);
+    printf("\n");
 
-  recordTestResult(result, "Seed", "Seq");
+    recordTestResult(result, "Seed", "Seq");
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 // Keyset 'SparseSeed' - hash "sphinx of black quartz..." using seeds with few
 // bits set/cleared
 
-template < typename hashtype, bool bigseed >
-static bool SparseSeedTestImpl(const HashInfo * hinfo, uint32_t maxbits, bool drawDiagram) {
-  assert(maxbits < 16);
-  const HashFn hash = hinfo->hashFn(g_hashEndian);
-  uint64_t totalkeys = 2 + 2*chooseUpToK(bigseed ? 64 : 32, maxbits);
-  uint64_t cnt = 0;
+template <typename hashtype, bool bigseed>
+static bool SparseSeedTestImpl( const HashInfo * hinfo, uint32_t maxbits, bool drawDiagram ) {
+    assert(maxbits < 16);
+    const HashFn hash      = hinfo->hashFn(g_hashEndian);
+    uint64_t     totalkeys = 2 + 2 * chooseUpToK(bigseed ? 64 : 32, maxbits);
+    uint64_t     cnt       = 0;
 
-  printf("Keyset 'SparseSeed' - %" PRId64 " keys\n", totalkeys);
+    printf("Keyset 'SparseSeed' - %" PRId64 " keys\n", totalkeys);
 
-  const char text[64] = "Sphinx of black quartz, judge my vow";
-  const int len = (int)strlen(text);
+    const char text[64] = "Sphinx of black quartz, judge my vow";
+    const int  len      = (int)strlen(text);
 
-  addVCodeInput(text, len);
-  addVCodeInput(totalkeys);
+    addVCodeInput(text     , len);
+    addVCodeInput(totalkeys);
 
-  //----------
+    //----------
 
-  std::vector<hashtype> hashes;
-  hashes.resize(totalkeys);
+    std::vector<hashtype> hashes;
+    hashes.resize(totalkeys);
 
-  seed_t seed;
+    seed_t seed;
 
-  seed = hinfo->Seed(0, true);
-  hash(text, len, seed, &hashes[cnt++]);
+    seed = hinfo->Seed(0, true);
+    hash(text, len, seed, &hashes[cnt++]);
 
-  seed = hinfo->Seed(~0, true);
-  hash(text, len, seed, &hashes[cnt++]);
+    seed = hinfo->Seed(~0, true);
+    hash(text, len, seed, &hashes[cnt++]);
 
-  for(seed_t i = 1; i <= maxbits; i++) {
-      uint64_t seed = (UINT64_C(1) << i) - 1;
-      bool done;
+    for (seed_t i = 1; i <= maxbits; i++) {
+        uint64_t seed = (UINT64_C(1) << i) - 1;
+        bool     done;
 
-      do {
-          seed_t hseed;
-          hseed = hinfo->Seed(seed, true);
-          hash(text, len, hseed, &hashes[cnt++]);
+        do {
+            seed_t hseed;
+            hseed = hinfo->Seed(seed, true);
+            hash(text, len, hseed, &hashes[cnt++]);
 
-          hseed = hinfo->Seed(~seed, true);
-          hash(text, len, hseed, &hashes[cnt++]);
+            hseed = hinfo->Seed(~seed, true);
+            hash(text, len, hseed, &hashes[cnt++]);
 
-          /* Next lexicographic bit pattern, from "Bit Twiddling Hacks" */
-          uint64_t t = (seed | (seed - 1)) + 1;
-          seed = t | ((((t & -t) / (seed & -seed)) >> 1) - 1);
-          done = bigseed ? (seed == ~0) : ((seed >> 32) != 0);
-      } while (!done);
-  }
+            /* Next lexicographic bit pattern, from "Bit Twiddling Hacks" */
+            uint64_t t = (seed | (seed - 1)) + 1;
+            seed = t | ((((t & -t) / (seed & -seed)) >> 1) - 1);
+            done = bigseed ? (seed == ~0) : ((seed >> 32) != 0);
+        } while (!done);
+    }
 
-  bool result = TestHashList(hashes,drawDiagram);
-  printf("\n");
+    bool result = TestHashList(hashes, drawDiagram);
+    printf("\n");
 
-  recordTestResult(result, "Seed", "Sparse");
+    recordTestResult(result, "Seed", "Sparse");
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool SeedTest(const HashInfo * hinfo, const bool verbose) {
+template <typename hashtype>
+bool SeedTest( const HashInfo * hinfo, const bool verbose ) {
     bool result = true;
 
     printf("[[[ Keyset 'Seed' Tests ]]]\n\n");
 
     if (hinfo->is32BitSeed()) {
-      result &= SeedTestImpl<hashtype,22,false>( hinfo, verbose );
-      result &= SparseSeedTestImpl<hashtype,false>( hinfo, 7, verbose );
+        result &= SeedTestImpl      <hashtype, 22, false>(hinfo   , verbose);
+        result &= SparseSeedTestImpl<hashtype, false    >(hinfo, 7, verbose);
     } else {
-      result &= SeedTestImpl<hashtype,22,true>( hinfo, verbose );
-      result &= SparseSeedTestImpl<hashtype,true>( hinfo, 5, verbose );
+        result &= SeedTestImpl      <hashtype, 22, true>(hinfo   , verbose);
+        result &= SparseSeedTestImpl<hashtype, true    >(hinfo, 5, verbose);
     }
 
     printf("%s\n", result ? "" : g_failstr);
diff --git a/tests/SeedTest.h b/tests/SeedTest.h
index 93a62572..6d8546ad 100644
--- a/tests/SeedTest.h
+++ b/tests/SeedTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool SeedTest(const HashInfo * info, const bool verbose);
+template <typename hashtype>
+bool SeedTest( const HashInfo * info, const bool verbose );
diff --git a/tests/SparseKeysetTest.cpp b/tests/SparseKeysetTest.cpp
index a4d64d16..fa63a9b7 100644
--- a/tests/SparseKeysetTest.cpp
+++ b/tests/SparseKeysetTest.cpp
@@ -58,12 +58,11 @@
 //-----------------------------------------------------------------------------
 // Keyset 'Sparse' - generate all possible N-bit keys with up to K bits set
 
-template < typename keytype, typename hashtype >
-static void SparseKeygenRecurse(HashFn hash, const seed_t seed,
-        int start, int bitsleft, bool inclusive,
-        keytype & k, std::vector<hashtype> & hashes) {
+template <typename keytype, typename hashtype>
+static void SparseKeygenRecurse( HashFn hash, const seed_t seed, int start, int bitsleft,
+        bool inclusive, keytype & k, std::vector<hashtype> & hashes ) {
     const int nbytes = sizeof(keytype);
-    const int nbits = nbytes * 8;
+    const int nbits  = nbytes * 8;
 
     hashtype h;
 
@@ -77,7 +76,7 @@ static void SparseKeygenRecurse(HashFn hash, const seed_t seed,
         }
 
         if (bitsleft > 1) {
-            SparseKeygenRecurse(hash, seed, i+1, bitsleft-1, inclusive, k, hashes);
+            SparseKeygenRecurse(hash, seed, i + 1, bitsleft - 1, inclusive, k, hashes);
         }
 
         k.flipbit(i);
@@ -85,98 +84,96 @@ static void SparseKeygenRecurse(HashFn hash, const seed_t seed,
 }
 
 //----------
-template < int keybits, typename hashtype >
-static bool SparseKeyImpl(HashFn hash, const seed_t seed,
-        const int setbits, bool inclusive,
-        bool testColl, bool testDist, bool drawDiagram) {
-  printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ",keybits,
-         inclusive ? "up to" : "exactly", setbits);
+template <int keybits, typename hashtype>
+static bool SparseKeyImpl( HashFn hash, const seed_t seed, const int setbits, bool inclusive,
+        bool testColl, bool testDist, bool drawDiagram ) {
+    printf("Keyset 'Sparse' - %d-bit keys with %s %d bits set - ", keybits, inclusive ? "up to" : "exactly", setbits);
 
-  typedef Blob<keybits> keytype;
+    typedef Blob<keybits> keytype;
 
-  std::vector<hashtype> hashes;
+    std::vector<hashtype> hashes;
 
-  keytype k;
-  memset(&k,0,sizeof(k));
+    keytype k;
+    memset(&k, 0, sizeof(k));
 
-  if (inclusive) {
-    hashes.resize(1);
-    hash(&k, sizeof(keytype), seed, &hashes[0]);
-  }
+    if (inclusive) {
+        hashes.resize(1);
+        hash(&k, sizeof(keytype), seed, &hashes[0]);
+    }
 
-  SparseKeygenRecurse(hash,seed,0,setbits,inclusive,k,hashes);
+    SparseKeygenRecurse(hash, seed, 0, setbits, inclusive, k, hashes);
 
-  printf("%d keys\n",(int)hashes.size());
+    printf("%d keys\n", (int)hashes.size());
 
-  bool result = TestHashList<hashtype>(hashes,drawDiagram,testColl,testDist);
-  printf("\n");
+    bool result = TestHashList<hashtype>(hashes, drawDiagram, testColl, testDist);
+    printf("\n"       );
 
-  recordTestResult(result, "Sparse", keybits);
+    recordTestResult(result, "Sparse", keybits);
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool SparseKeyTest(const HashInfo * hinfo, const bool verbose, const bool extra) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
+template <typename hashtype>
+bool SparseKeyTest( const HashInfo * hinfo, const bool verbose, const bool extra ) {
+    const HashFn hash   = hinfo->hashFn(g_hashEndian);
+    bool         result = true;
 
     printf("[[[ Keyset 'Sparse' Tests ]]]\n\n");
 
     const seed_t seed = hinfo->Seed(g_seed);
 
-    result &= SparseKeyImpl<  16,hashtype>(hash,seed,9,true,true,true,verbose);
-    result &= SparseKeyImpl<  24,hashtype>(hash,seed,8,true,true,true,verbose);
-    result &= SparseKeyImpl<  32,hashtype>(hash,seed,7,true,true,true,verbose);
-    result &= SparseKeyImpl<  40,hashtype>(hash,seed,6,true,true,true,verbose);
-    result &= SparseKeyImpl<  48,hashtype>(hash,seed,6,true,true,true,verbose);
-    result &= SparseKeyImpl<  56,hashtype>(hash,seed,5,true,true,true,verbose);
-    result &= SparseKeyImpl<  64,hashtype>(hash,seed,5,true,true,true,verbose);
-    result &= SparseKeyImpl<  72,hashtype>(hash,seed,5,true,true,true,verbose);
-    result &= SparseKeyImpl<  96,hashtype>(hash,seed,4,true,true,true,verbose);
+    result &= SparseKeyImpl<16, hashtype>(hash, seed, 9, true, true, true, verbose);
+    result &= SparseKeyImpl<24, hashtype>(hash, seed, 8, true, true, true, verbose);
+    result &= SparseKeyImpl<32, hashtype>(hash, seed, 7, true, true, true, verbose);
+    result &= SparseKeyImpl<40, hashtype>(hash, seed, 6, true, true, true, verbose);
+    result &= SparseKeyImpl<48, hashtype>(hash, seed, 6, true, true, true, verbose);
+    result &= SparseKeyImpl<56, hashtype>(hash, seed, 5, true, true, true, verbose);
+    result &= SparseKeyImpl<64, hashtype>(hash, seed, 5, true, true, true, verbose);
+    result &= SparseKeyImpl<72, hashtype>(hash, seed, 5, true, true, true, verbose);
+    result &= SparseKeyImpl<96, hashtype>(hash, seed, 4, true, true, true, verbose);
     if (extra) {
-        result &= SparseKeyImpl< 112,hashtype>(hash,seed,4,true,true,true,verbose);
-        result &= SparseKeyImpl< 128,hashtype>(hash,seed,4,true,true,true,verbose);
-        result &= SparseKeyImpl< 144,hashtype>(hash,seed,4,true,true,true,verbose);
+        result &= SparseKeyImpl<112, hashtype>(hash, seed, 4, true, true, true, verbose);
+        result &= SparseKeyImpl<128, hashtype>(hash, seed, 4, true, true, true, verbose);
+        result &= SparseKeyImpl<144, hashtype>(hash, seed, 4, true, true, true, verbose);
     }
-    result &= SparseKeyImpl< 160,hashtype>(hash,seed,4,true,true,true,verbose);
+    result &= SparseKeyImpl<160, hashtype>(hash, seed, 4, true, true, true, verbose);
     if (extra) {
-        result &= SparseKeyImpl< 192,hashtype>(hash,seed,4,true,true,true,verbose);
+        result &= SparseKeyImpl<192, hashtype>(hash, seed, 4, true, true, true, verbose);
     }
-    result &= SparseKeyImpl< 256,hashtype>(hash,seed,3,true,true,true,verbose);
+    result &= SparseKeyImpl<256, hashtype>(hash, seed, 3, true, true, true, verbose);
     if (extra) {
-        result &= SparseKeyImpl< 288,hashtype>(hash,seed,3,true,true,true,verbose);
-        result &= SparseKeyImpl< 320,hashtype>(hash,seed,3,true,true,true,verbose);
-        result &= SparseKeyImpl< 384,hashtype>(hash,seed,3,true,true,true,verbose);
-        result &= SparseKeyImpl< 448,hashtype>(hash,seed,3,true,true,true,verbose);
+        result &= SparseKeyImpl<288, hashtype>(hash, seed, 3, true, true, true, verbose);
+        result &= SparseKeyImpl<320, hashtype>(hash, seed, 3, true, true, true, verbose);
+        result &= SparseKeyImpl<384, hashtype>(hash, seed, 3, true, true, true, verbose);
+        result &= SparseKeyImpl<448, hashtype>(hash, seed, 3, true, true, true, verbose);
     } else if (hinfo->bits > 64) {
         goto END_Sparse;
     }
-    result &= SparseKeyImpl< 512,hashtype>(hash,seed,3,true,true,true,verbose);
+    result &= SparseKeyImpl<512, hashtype>(hash, seed, 3, true, true, true, verbose);
     if (extra) {
-        result &= SparseKeyImpl< 640,hashtype>(hash,seed,3,true,true,true,verbose);
-        result &= SparseKeyImpl< 768,hashtype>(hash,seed,3,true,true,true,verbose);
-        result &= SparseKeyImpl< 896,hashtype>(hash,seed,2,true,true,true,verbose);
+        result &= SparseKeyImpl<640, hashtype>(hash, seed, 3, true, true, true, verbose);
+        result &= SparseKeyImpl<768, hashtype>(hash, seed, 3, true, true, true, verbose);
+        result &= SparseKeyImpl<896, hashtype>(hash, seed, 2, true, true, true, verbose);
     }
-    result &= SparseKeyImpl<1024,hashtype>(hash,seed,2,true,true,true,verbose);
+    result &= SparseKeyImpl<1024, hashtype>(hash, seed, 2, true, true, true, verbose);
     if (extra) {
-        result &= SparseKeyImpl<1280,hashtype>(hash,seed,2,true,true,true,verbose);
-        result &= SparseKeyImpl<1536,hashtype>(hash,seed,2,true,true,true,verbose);
+        result &= SparseKeyImpl<1280, hashtype>(hash, seed, 2, true, true, true, verbose);
+        result &= SparseKeyImpl<1536, hashtype>(hash, seed, 2, true, true, true, verbose);
     }
-    result &= SparseKeyImpl<2048,hashtype>(hash,seed,2,true,true,true,verbose);
+    result &= SparseKeyImpl<2048, hashtype>(hash, seed, 2, true, true, true, verbose);
     if (extra) {
-        result &= SparseKeyImpl<3072,hashtype>(hash,seed,2,true,true,true,verbose);
-        result &= SparseKeyImpl<4096,hashtype>(hash,seed,2,true,true,true,verbose);
-        result &= SparseKeyImpl<6144,hashtype>(hash,seed,2,true,true,true,verbose);
-        result &= SparseKeyImpl<8192,hashtype>(hash,seed,2,true,true,true,verbose);
-        result &= SparseKeyImpl<9992,hashtype>(hash,seed,2,true,true,true,verbose);
+        result &= SparseKeyImpl<3072, hashtype>(hash, seed, 2, true, true, true, verbose);
+        result &= SparseKeyImpl<4096, hashtype>(hash, seed, 2, true, true, true, verbose);
+        result &= SparseKeyImpl<6144, hashtype>(hash, seed, 2, true, true, true, verbose);
+        result &= SparseKeyImpl<8192, hashtype>(hash, seed, 2, true, true, true, verbose);
+        result &= SparseKeyImpl<9992, hashtype>(hash, seed, 2, true, true, true, verbose);
     }
 
- END_Sparse:
+  END_Sparse:
     printf("%s\n", result ? "" : g_failstr);
 
     return result;
diff --git a/tests/SparseKeysetTest.h b/tests/SparseKeysetTest.h
index c3064ee6..676d2ca9 100644
--- a/tests/SparseKeysetTest.h
+++ b/tests/SparseKeysetTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool SparseKeyTest(const HashInfo * info, const bool verbose, const bool extra);
+template <typename hashtype>
+bool SparseKeyTest( const HashInfo * info, const bool verbose, const bool extra );
diff --git a/tests/SpeedTest.cpp b/tests/SpeedTest.cpp
index b2cf4d20..07bd185e 100644
--- a/tests/SpeedTest.cpp
+++ b/tests/SpeedTest.cpp
@@ -50,7 +50,7 @@
 #include "Timing.h"
 #include "Hashinfo.h"
 #include "TestGlobals.h"
-#include "Stats.h"       // For FilterOutliers, CalcMean, CalcStdv
+#include "Stats.h" // For FilterOutliers, CalcMean, CalcStdv
 #include "Random.h"
 
 #include "SpeedTest.h"
@@ -61,7 +61,7 @@
 
 constexpr int BULK_TRIALS  = 2999; // Timings per hash for large (>=128b) keys
 constexpr int TINY_TRIALS  = 200;  // Timings per hash for small (<128b) keys
-constexpr int TINY_SAMPLES = 15000;// Samples per timing run for small sizes
+constexpr int TINY_SAMPLES = 15000; // Samples per timing run for small sizes
 
 //-----------------------------------------------------------------------------
 // This is functionally a speed test, and so will not inform VCodes,
@@ -72,18 +72,17 @@ constexpr int TINY_SAMPLES = 15000;// Samples per timing run for small sizes
 // as possible, but that's hard to do portably. We'll try and get as close as
 // possible by marking the function as NEVER_INLINE (to keep the optimizer from
 // moving it) and marking the timing variables as "volatile register".
-NEVER_INLINE static int64_t timehash(HashFn hash, const seed_t seed,
-        const void * const key, int len) {
-  volatile int64_t begin, end;
-  uint32_t temp[16];
+NEVER_INLINE static int64_t timehash( HashFn hash, const seed_t seed, const void * const key, int len ) {
+    volatile int64_t begin, end;
+    uint32_t         temp[16];
 
-  begin = timer_start();
+    begin = timer_start();
 
-  hash(key,len,seed,temp);
+    hash(key, len, seed, temp);
 
-  end = timer_end();
+    end = timer_end();
 
-  return end - begin;
+    return end - begin;
 }
 
 //-----------------------------------------------------------------------------
@@ -109,177 +108,177 @@ NEVER_INLINE static int64_t timehash(HashFn hash, const seed_t seed,
 // x64 platforms, which leads to unfairly inflated cycle counts.
 //
 // WARNING: This assumes that at least 4 bytes can be written to key!
-NEVER_INLINE static uint64_t timehash_small(HashFn hash, const seed_t seed,
-        uint8_t * const key, int len) {
-  const uint64_t incr = 0x1000001;
-  uint64_t maxi = incr * TINY_SAMPLES;
-  volatile unsigned long long int begin, end;
-  uint32_t hash_temp[16] = {0};
-
-  begin = timer_start();
-
-  for (uint64_t i = 0; i < maxi; i += incr) {
-      hash(key, len, seed, hash_temp);
-      // It's possible that even with this loop data dependency that
-      // hash invocations still would not be fully serialized. Another
-      // option is to add lfence instruction to enforce serialization
-      // at the CPU level. It's hard to say which one is the most
-      // realistic and sensible approach.
-      uint32_t j = i ^ hash_temp[0];
-      memcpy(key, &j, 4);
-  }
-
-  end = timer_end();
-
-  return end - begin;
+NEVER_INLINE static uint64_t timehash_small( HashFn hash, const seed_t seed, uint8_t * const key, int len ) {
+    const uint64_t incr = 0x1000001;
+    uint64_t       maxi = incr * TINY_SAMPLES;
+    volatile unsigned long long int begin, end;
+    uint32_t hash_temp[16] = { 0 };
+
+    begin = timer_start();
+
+    for (uint64_t i = 0; i < maxi; i += incr) {
+        hash(key, len, seed, hash_temp);
+        // It's possible that even with this loop data dependency that
+        // hash invocations still would not be fully serialized. Another
+        // option is to add lfence instruction to enforce serialization
+        // at the CPU level. It's hard to say which one is the most
+        // realistic and sensible approach.
+        uint32_t j = i ^ hash_temp[0];
+        memcpy(key, &j, 4);
+    }
+
+    end = timer_end();
+
+    return end - begin;
 }
 
 //-----------------------------------------------------------------------------
 double stddev;
 
-static double SpeedTest(HashFn hash, seed_t seed, const int trials,
-        const int blocksize, const int align,
-        const int varysize, const int varyalign) {
-  Rand r(seed);
-  uint8_t *buf = new uint8_t[blocksize + 512]; // assumes (align + varyalign) <= 257
-  uintptr_t t1 = reinterpret_cast<uintptr_t>(buf);
-
-  t1 = (t1 + 255) & UINT64_C(0xFFFFFFFFFFFFFF00);
-  t1 += align;
-
-  uint8_t * block = reinterpret_cast<uint8_t*>(t1);
-
-  std::vector<int> sizes;
-  if (varysize > 0)
-  {
-      sizes.reserve(trials);
-      for(int i = 0; i < trials; i++)
-          sizes.push_back(blocksize - varysize + (i % (varysize + 1)));
-      for(int i = trials - 1; i > 0; i--)
-          std::swap(sizes[i], sizes[r.rand_range(i + 1)]);
-  }
-
-  std::vector<int> alignments;
-  if (varyalign > 0)
-  {
-      alignments.reserve(trials);
-      for(int i = 0; i < trials; i++)
-          alignments.push_back((i + 1) % (varyalign + 1));
-      for(int i = trials - 1; i > 0; i--)
-          std::swap(alignments[i], alignments[r.rand_range(i + 1)]);
-  }
-
-  //----------
-
-  std::vector<double> times;
-  times.reserve(trials);
-
-  int testsize = blocksize;
-  for(int itrial = 0; itrial < trials; itrial++)
-  {
-    if (varysize > 0)
-        testsize = sizes[itrial];
-    if (varyalign > 0)
-        block = reinterpret_cast<uint8_t*>(t1 + alignments[itrial]);
-
-    r.rand_p(block,testsize);
-
-    double t;
-    if (testsize < 128) {
-        t = (double)timehash_small(hash,seed,block,testsize)/(double)TINY_SAMPLES;
-    } else {
-        t = (double)timehash(hash,seed,block,testsize);
+static double SpeedTest( HashFn hash, seed_t seed, const int trials, const int blocksize,
+        const int align, const int varysize, const int varyalign ) {
+    Rand      r( seed );
+    uint8_t * buf = new uint8_t[blocksize + 512]; // assumes (align + varyalign) <= 257
+    uintptr_t t1  = reinterpret_cast<uintptr_t>(buf);
+
+    t1  = (t1 + 255) & UINT64_C(0xFFFFFFFFFFFFFF00);
+    t1 += align;
+
+    uint8_t * block = reinterpret_cast<uint8_t *>(t1);
+
+    std::vector<int> sizes;
+    if (varysize > 0) {
+        sizes.reserve(trials);
+        for (int i = 0; i < trials; i++) {
+            sizes.push_back(blocksize - varysize + (i % (varysize + 1)));
+        }
+        for (int i = trials - 1; i > 0; i--) {
+            std::swap(sizes[i], sizes[r.rand_range(i + 1)]);
+        }
     }
 
-    if(t > 0) times.push_back(t);
-  }
+    std::vector<int> alignments;
+    if (varyalign > 0) {
+        alignments.reserve(trials);
+        for (int i = 0; i < trials; i++) {
+            alignments.push_back((i + 1) % (varyalign + 1));
+        }
+        for (int i = trials - 1; i > 0; i--) {
+            std::swap(alignments[i], alignments[r.rand_range(i + 1)]);
+        }
+    }
+
+    //----------
 
-  delete [] buf;
+    std::vector<double> times;
+    times.reserve(trials);
 
-  //----------
+    int testsize = blocksize;
+    for (int itrial = 0; itrial < trials; itrial++) {
+        if (varysize > 0) {
+            testsize = sizes[itrial];
+        }
+        if (varyalign > 0) {
+            block = reinterpret_cast<uint8_t *>(t1 + alignments[itrial]);
+        }
 
-  std::sort(times.begin(),times.end());
+        r.rand_p(block, testsize);
 
-  FilterOutliers(times);
-  stddev = CalcStdv(times);
+        double t;
+        if (testsize < 128) {
+            t = (double)timehash_small(hash, seed, block, testsize) / (double)TINY_SAMPLES;
+        } else {
+            t = (double)timehash(hash      , seed, block, testsize);
+        }
 
-  return CalcMean(times);
+        if (t > 0) { times.push_back(t); }
+    }
+
+    delete [] buf;
+
+    //----------
+
+    std::sort(times.begin(), times.end());
+
+    FilterOutliers(times);
+    stddev = CalcStdv(times);
+
+    return CalcMean(times);
 }
 
 //-----------------------------------------------------------------------------
 // 256k blocks seem to give the best results.
 
-static void BulkSpeedTest ( HashFn hash, seed_t seed, bool vary_align, bool vary_size)
-{
-  const int blocksize = 256 * 1024;
-  const int maxvary = vary_size ? 127 : 0;
+static void BulkSpeedTest( HashFn hash, seed_t seed, bool vary_align, bool vary_size ) {
+    const int blocksize = 256 * 1024;
+    const int maxvary   = vary_size ? 127 : 0;
 
-  if (vary_size)
-      printf("Bulk speed test - [%d, %d]-byte keys\n",blocksize - maxvary, blocksize);
-  else
-      printf("Bulk speed test - %d-byte keys\n",blocksize);
-  double sumbpc = 0.0;
+    if (vary_size) {
+        printf("Bulk speed test - [%d, %d]-byte keys\n", blocksize - maxvary, blocksize);
+    } else {
+        printf("Bulk speed test - %d-byte keys\n", blocksize);
+    }
+    double sumbpc = 0.0;
 
-  volatile double warmup_cycles = SpeedTest(hash,seed,BULK_TRIALS,blocksize,0,0,0);
+    volatile double warmup_cycles = SpeedTest(hash, seed, BULK_TRIALS, blocksize, 0, 0, 0);
 
-  for(int align = 7; align >= 0; align--)
-  {
-    double cycles = SpeedTest(hash,seed,BULK_TRIALS,blocksize,align,maxvary,0);
+    for (int align = 7; align >= 0; align--) {
+        double cycles  = SpeedTest(hash, seed, BULK_TRIALS, blocksize, align, maxvary, 0);
 
-    double bestbpc = ((double)blocksize - ((double)maxvary / 2)) / cycles;
+        double bestbpc = ((double)blocksize - ((double)maxvary / 2)) / cycles;
 
-    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
-    printf("Alignment  %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
-    sumbpc += bestbpc;
-  }
-  if (vary_align)
-  {
-    double cycles = SpeedTest(hash,seed,BULK_TRIALS,blocksize,0,maxvary,7);
+        double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
+        printf("Alignment  %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n", align, bestbpc, bestbps);
+        sumbpc += bestbpc;
+    }
+    if (vary_align) {
+        double cycles  = SpeedTest(hash, seed, BULK_TRIALS, blocksize, 0, maxvary, 7);
 
-    double bestbpc = ((double)blocksize - ((double)maxvary / 2)) / cycles;
+        double bestbpc = ((double)blocksize - ((double)maxvary / 2)) / cycles;
 
-    double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
-    printf("Alignment rnd - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",bestbpc,bestbps);
-    // Deliberately not counted in the Average stat, so the two can be directly compared
-  }
+        double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
+        printf("Alignment rnd - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n", bestbpc, bestbps);
+        // Deliberately not counted in the Average stat, so the two can be directly compared
+    }
 
-  sumbpc = sumbpc / 8.0;
-  printf("Average       - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",sumbpc,(sumbpc * 3000000000.0 / 1048576.0));
-  fflush(NULL);
+    sumbpc = sumbpc / 8.0;
+    printf("Average       - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n", sumbpc, (sumbpc * 3000000000.0 / 1048576.0));
+    fflush(NULL);
 }
 
 //-----------------------------------------------------------------------------
 
-static double TinySpeedTest ( HashFn hash, int maxkeysize, seed_t seed, bool verbose, bool include_vary )
-{
-  double sum = 0.0;
-
-  printf("Small key speed test - [1, %2d]-byte keys\n",maxkeysize);
-
-  for(int i = 1; i <= maxkeysize; i++)
-  {
-    volatile int j = i;
-    double cycles = SpeedTest(hash,seed,TINY_TRIALS,j,0,0,0);
-    if(verbose) printf("  %2d-byte keys - %8.2f cycles/hash (%8.6f stdv%8.4f%%)\n",j,cycles,stddev,100.0*stddev/cycles);
-    sum += cycles;
-  }
-  if (include_vary) {
-    double cycles = SpeedTest(hash,seed,TINY_TRIALS*8,maxkeysize,0,maxkeysize-1,0);
-    if(verbose) printf(" rnd-byte keys - %8.2f cycles/hash (%8.6f stdv)\n", cycles,stddev);
-    // Deliberately not counted in the Average stat, so the two can be directly compared
-  }
-
-  sum = sum / (double)maxkeysize;
-  printf("Average        - %8.2f cycles/hash\n",sum);
-
-  return sum;
+static double TinySpeedTest( HashFn hash, int maxkeysize, seed_t seed, bool verbose, bool include_vary ) {
+    double sum = 0.0;
+
+    printf("Small key speed test - [1, %2d]-byte keys\n", maxkeysize);
+
+    for (int i = 1; i <= maxkeysize; i++) {
+        volatile int j      = i;
+        double       cycles = SpeedTest(hash, seed, TINY_TRIALS, j, 0, 0, 0);
+        if (verbose) {
+            printf("  %2d-byte keys - %8.2f cycles/hash (%8.6f stdv%8.4f%%)\n",
+                    j, cycles, stddev, 100.0 * stddev / cycles);
+        }
+        sum += cycles;
+    }
+    if (include_vary) {
+        double cycles = SpeedTest(hash, seed, TINY_TRIALS * 8, maxkeysize, 0, maxkeysize - 1, 0);
+        if (verbose) { printf(" rnd-byte keys - %8.2f cycles/hash (%8.6f stdv)\n", cycles, stddev); }
+        // Deliberately not counted in the Average stat, so the two can be directly compared
+    }
+
+    sum = sum / (double)maxkeysize;
+    printf("Average        - %8.2f cycles/hash\n", sum);
+
+    return sum;
 }
 
 //-----------------------------------------------------------------------------
-bool SpeedTest(const HashInfo * hinfo) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
-    Rand r(633692);
+bool SpeedTest( const HashInfo * hinfo ) {
+    const HashFn hash   = hinfo->hashFn(g_hashEndian);
+    bool         result = true;
+    Rand         r( 633692 );
 
     printf("[[[ Speed Tests ]]]\n\n");
 
@@ -300,23 +299,20 @@ bool SpeedTest(const HashInfo * hinfo) {
 //-----------------------------------------------------------------------------
 // Does 5 different speed tests to try to summarize hash performance
 
-void ShortSpeedTestHeader(void) {
+void ShortSpeedTestHeader( void ) {
     printf("Bulk results are in bytes/cycle, short results are in cycles/hash\n\n");
-    printf("%-25s  %11s  %18s  %18s  %18s  %18s  \n",
-            "Name", "   Bulk    ", " 1-8 bytes ", "9-16 bytes",
-            "17-24 bytes", "25-32 bytes");
-    printf("%-25s  %11s  %18s  %18s  %18s  %18s  \n",
-            "-------------------------", "-----------",
-            "------------------", "------------------",
-            "------------------", "------------------");
+    printf("%-25s  %11s  %18s  %18s  %18s  %18s  \n", "Name", "   Bulk    ",
+            " 1-8 bytes ", "9-16 bytes", "17-24 bytes", "25-32 bytes");
+    printf("%-25s  %11s  %18s  %18s  %18s  %18s  \n", "-------------------------", "-----------", "------------------",
+            "------------------", "------------------", "------------------");
 }
 
-void ShortSpeedTest(const HashInfo * hinfo) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
-    Rand r(321321);
+void ShortSpeedTest( const HashInfo * hinfo ) {
+    const HashFn hash   = hinfo->hashFn(g_hashEndian);
+    bool         result = true;
+    Rand         r( 321321 );
 
-    const int maxvaryalign = 7;
+    const int maxvaryalign    = 7;
     const int basealignoffset = 0;
 
     printf("%-25s", hinfo->name);
@@ -324,16 +320,15 @@ void ShortSpeedTest(const HashInfo * hinfo) {
     const seed_t seed = hinfo->Seed(g_seed ^ r.rand_u64());
 
     {
-        const int baselen = 256 * 1024;
+        const int baselen    = 256 * 1024;
         const int maxvarylen = 127;
 
         // Do a warmup to get things into cache
         volatile double warmup_cycles =
-            SpeedTest(hash,seed,BULK_TRIALS,baselen,0,0,0);
+                SpeedTest(hash, seed, BULK_TRIALS, baselen, 0, 0, 0);
 
         // Do a bulk speed test, varying precise block size and alignment
-        double cycles = SpeedTest(hash, seed, BULK_TRIALS,
-                baselen, basealignoffset, maxvarylen, maxvaryalign);
+        double cycles = SpeedTest(hash, seed, BULK_TRIALS, baselen, basealignoffset, maxvarylen, maxvaryalign);
         double curbpc = ((double)baselen - ((double)maxvarylen / 2)) / cycles;
         printf("    %8.2f  ", curbpc);
     }
@@ -342,18 +337,18 @@ void ShortSpeedTest(const HashInfo * hinfo) {
     // group of 8 byte lengths (1-8, 9-16, 17-24, 25-31), varying the
     // alignment during each test.
     for (int i = 1; i <= 4; i++) {
-        const int baselen = i * 8;
-        double cycles = 0.0;
-        double worstdevpct = 0.0;
+        const int baselen     = i * 8;
+        double    cycles      = 0.0;
+        double    worstdevpct = 0.0;
         for (int j = 0; j < 8; j++) {
-            double curcyc = SpeedTest(hash, seed, TINY_TRIALS,
-                    baselen + j, basealignoffset, 0, maxvaryalign);
-            double devpct = 100.0*stddev/curcyc;
+            double curcyc = SpeedTest(hash, seed, TINY_TRIALS, baselen + j, basealignoffset, 0, maxvaryalign);
+            double devpct = 100.0 * stddev / curcyc;
             cycles += curcyc;
-            if (worstdevpct < devpct)
+            if (worstdevpct < devpct) {
                 worstdevpct = devpct;
+            }
         }
-        printf("    %7.2f [%5.3f] ", cycles/8.0, worstdevpct);
+        printf("    %7.2f [%5.3f] ", cycles / 8.0, worstdevpct);
     }
 
     printf("\n");
diff --git a/tests/SpeedTest.h b/tests/SpeedTest.h
index 44062814..f6551c7d 100644
--- a/tests/SpeedTest.h
+++ b/tests/SpeedTest.h
@@ -43,6 +43,6 @@
  *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
-bool SpeedTest(const HashInfo * info);
-void ShortSpeedTest(const HashInfo * hinfo);
-void ShortSpeedTestHeader(void);
+bool SpeedTest( const HashInfo * info );
+void ShortSpeedTest( const HashInfo * hinfo );
+void ShortSpeedTestHeader( void );
diff --git a/tests/TextKeysetTest.cpp b/tests/TextKeysetTest.cpp
index 7e7f676d..37e3e05f 100644
--- a/tests/TextKeysetTest.cpp
+++ b/tests/TextKeysetTest.cpp
@@ -66,173 +66,171 @@
 // where "core" consists of all possible combinations of the given character
 // set of length N.
 
-template < typename hashtype >
-static bool TextKeyImpl(HashFn hash, const seed_t seed, const char * prefix, const char * coreset, const int corelen, const char * suffix, bool drawDiagram )
-{
-  const int prefixlen = (int)strlen(prefix);
-  const int suffixlen = (int)strlen(suffix);
-  const int corecount = (int)strlen(coreset);
-
-  const int keybytes = prefixlen + corelen + suffixlen;
-  long keycount = (long)pow(double(corecount),double(corelen));
-  if (keycount > INT32_MAX / 8)
-    keycount = INT32_MAX / 8;
-
-  uint8_t * key = new uint8_t[std::min(keybytes+1, 64)];
-  memcpy(key,prefix,prefixlen);
-  memset(key+prefixlen, 'X', corelen);
-  memcpy(key+prefixlen+corelen,suffix,suffixlen);
-  key[keybytes] = 0;
-
-  printf("Keyset 'Text' - keys of form \"%s\" - %ld keys\n", key, keycount);
-
-  //----------
-
-  std::vector<hashtype> hashes;
-  hashes.resize(keycount);
-
-  for(int i = 0; i < (int)keycount; i++)
-  {
-    int t = i;
-
-    for(int j = 0; j < corelen; j++)
-    {
-      key[prefixlen+j] = coreset[t % corecount]; t /= corecount;
+template <typename hashtype>
+static bool TextKeyImpl( HashFn hash, const seed_t seed, const char * prefix, const char * coreset,
+        const int corelen, const char * suffix, bool drawDiagram ) {
+    const int prefixlen = (int)strlen(prefix);
+    const int suffixlen = (int)strlen(suffix);
+    const int corecount = (int)strlen(coreset);
+
+    const int keybytes  = prefixlen + corelen + suffixlen;
+    long      keycount  = (long)pow(double(corecount), double(corelen));
+
+    if (keycount > INT32_MAX / 8) {
+        keycount = INT32_MAX / 8;
     }
 
-    hash(key,keybytes,seed,&hashes[i]);
-    addVCodeInput(key, keybytes);
-  }
+    uint8_t * key = new uint8_t[std::min(keybytes + 1, 64)];
+    memcpy(key, prefix, prefixlen);
+    memset(key + prefixlen, 'X', corelen);
+    memcpy(key + prefixlen + corelen, suffix, suffixlen);
+    key[keybytes] = 0;
 
-  //----------
-  bool result = TestHashList(hashes,drawDiagram);
-  printf("\n");
+    printf("Keyset 'Text' - keys of form \"%s\" - %ld keys\n", key, keycount);
 
-  memset(key+prefixlen, 'X', corelen);
-  recordTestResult(result, "Text", (const char *)key);
+    //----------
 
-  addVCodeResult(result);
+    std::vector<hashtype> hashes;
+    hashes.resize(keycount);
 
-  delete [] key;
+    for (int i = 0; i < (int)keycount; i++) {
+        int t = i;
 
-  return result;
+        for (int j = 0; j < corelen; j++) {
+            key[prefixlen + j] = coreset[t % corecount]; t /= corecount;
+        }
+
+        hash(key, keybytes, seed, &hashes[i]);
+        addVCodeInput(key, keybytes);
+    }
+
+    //----------
+    bool result = TestHashList(hashes, drawDiagram);
+    printf("\n");
+
+    memset(key + prefixlen, 'X', corelen);
+    recordTestResult(result, "Text", (const char *)key);
+
+    addVCodeResult(result);
+
+    delete [] key;
+
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 // Keyset 'Words' - pick random chars from coreset (alnum or password chars)
 
-template < typename hashtype >
-static bool WordsKeyImpl(HashFn hash, const seed_t seed,
-        const long keycount, const int minlen, const int maxlen,
-        const char * coreset, const char* name, bool drawDiagram) {
-  const int corecount = (int)strlen(coreset);
-  printf("Keyset 'Words' - %d-%d random chars from %s charset - %ld keys\n", minlen, maxlen, name, keycount);
-  assert (minlen >= 0);
-  assert (maxlen > minlen);
-
-  std::unordered_set<std::string> words; // need to be unique, otherwise we report collisions
-  std::vector<hashtype> hashes;
-  hashes.resize(keycount);
-  Rand r(483723);
-
-  char* key = new char[std::min(maxlen+1, 64)];
-  std::string key_str;
-
-  for(long i = 0; i < keycount; i++)
-  {
-    const int len = minlen + (r.rand_u32() % (maxlen - minlen));
-    key[len] = 0;
-    for(int j = 0; j < len; j++)
-    {
-      key[j] = coreset[r.rand_u32() % corecount];
-    }
-    key_str = key;
-    if (words.count(key_str) > 0) { // not unique
-      i--;
-      continue;
-    }
-    words.insert(key_str);
-
-    hash(key, len, seed, &hashes[i]);
-    addVCodeInput(key, len);
+template <typename hashtype>
+static bool WordsKeyImpl( HashFn hash, const seed_t seed, const long keycount, const int minlen,
+        const int maxlen, const char * coreset, const char * name, bool drawDiagram ) {
+    const int corecount = (int)strlen(coreset);
+
+    printf("Keyset 'Words' - %d-%d random chars from %s charset - %ld keys\n", minlen, maxlen, name, keycount);
+    assert(minlen >= 0    );
+    assert(maxlen > minlen);
+
+    std::unordered_set<std::string> words; // need to be unique, otherwise we report collisions
+    std::vector<hashtype>           hashes;
+    hashes.resize(keycount);
+    Rand r( 483723 );
+
+    char *      key = new char[std::min(maxlen + 1, 64)];
+    std::string key_str;
+
+    for (long i = 0; i < keycount; i++) {
+        const int len = minlen + (r.rand_u32() % (maxlen - minlen));
+        key[len] = 0;
+        for (int j = 0; j < len; j++) {
+            key[j] = coreset[r.rand_u32() % corecount];
+        }
+        key_str = key;
+        if (words.count(key_str) > 0) { // not unique
+            i--;
+            continue;
+        }
+        words.insert(key_str);
+
+        hash(key, len, seed, &hashes[i]);
+        addVCodeInput(key, len);
 
 #if 0 && defined DEBUG
-    uint64_t h;
-    memcpy(&h, &hashes[i], std::max(sizeof(hashtype),8));
-    printf("%d %s %lx\n", i, (char*)key, h);
+        uint64_t h;
+        memcpy(&h, &hashes[i], std::max(sizeof(hashtype), 8));
+        printf("%d %s %lx\n", i, (char *)key, h);
 #endif
-  }
-  delete [] key;
+    }
+    delete [] key;
 
-  //----------
-  bool result = TestHashList(hashes,drawDiagram);
-  printf("\n");
+    //----------
+    bool result = TestHashList(hashes, drawDiagram);
+    printf("\n");
 
-  recordTestResult(result, "Text", name);
+    recordTestResult(result, "Text", name);
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
-template < typename hashtype >
-static bool WordsStringImpl(HashFn hash, const seed_t seed,
-        std::vector<std::string> & words, bool drawDiagram) {
-  long wordscount = words.size();
-  printf("Keyset 'Words' - dictionary words - %ld keys\n", wordscount);
-
-  std::unordered_set<std::string> wordset; // need to be unique, otherwise we report collisions
-  std::vector<hashtype> hashes;
-  hashes.resize(wordscount);
-  Rand r(483723);
-
-  for(int i = 0; i < (int)wordscount; i++) {
-    if (wordset.count(words[i]) > 0) { // not unique
-      i--;
-      continue;
+template <typename hashtype>
+static bool WordsStringImpl( HashFn hash, const seed_t seed, std::vector<std::string> & words, bool drawDiagram ) {
+    long wordscount = words.size();
+
+    printf("Keyset 'Words' - dictionary words - %ld keys\n", wordscount);
+
+    std::unordered_set<std::string> wordset; // need to be unique, otherwise we report collisions
+    std::vector<hashtype>           hashes;
+    hashes.resize(wordscount);
+    Rand r( 483723 );
+
+    for (int i = 0; i < (int)wordscount; i++) {
+        if (wordset.count(words[i]) > 0) { // not unique
+            i--;
+            continue;
+        }
+        wordset.insert(words[i]);
+        const int    len = words[i].length();
+        const char * key = words[i].c_str();
+        hash(key, len, seed, &hashes[i]);
+        addVCodeInput(key, len);
     }
-    wordset.insert(words[i]);
-    const int len = words[i].length();
-    const char *key = words[i].c_str();
-    hash(key, len, seed, &hashes[i]);
-    addVCodeInput(key, len);
-  }
 
-  //----------
-  bool result = TestHashList(hashes,drawDiagram);
-  printf("\n");
+    //----------
+    bool result = TestHashList(hashes, drawDiagram);
+    printf("\n");
 
-  recordTestResult(result, "Text", "dictionary");
+    recordTestResult(result, "Text", "dictionary");
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool TextKeyTest(const HashInfo * hinfo, const bool verbose) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    const char * alnum = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+template <typename hashtype>
+bool TextKeyTest( const HashInfo * hinfo, const bool verbose ) {
+    const HashFn hash          = hinfo->hashFn(g_hashEndian);
+    const char * alnum         = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
     const char * passwordchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
-                                 ".,!?:;-+=()<>/|\"'@#$%&*_^";
+            ".,!?:;-+=()<>/|\"'@#$%&*_^";
     bool result = true;
 
     printf("[[[ Keyset 'Text' Tests ]]]\n\n");
 
     const seed_t seed = hinfo->Seed(g_seed);
 
-    result &= TextKeyImpl<hashtype>(hash, seed, "Foo",    alnum, 4, "Bar",    verbose );
-    result &= TextKeyImpl<hashtype>(hash, seed, "FooBar", alnum, 4, "",       verbose );
-    result &= TextKeyImpl<hashtype>(hash, seed, "",       alnum, 4, "FooBar", verbose );
+    result &= TextKeyImpl<hashtype>(hash, seed, "Foo"   , alnum, 4, "Bar"   , verbose);
+    result &= TextKeyImpl<hashtype>(hash, seed, "FooBar", alnum, 4, ""      , verbose);
+    result &= TextKeyImpl<hashtype>(hash, seed, ""      , alnum, 4, "FooBar", verbose);
 
     // maybe use random-len vector of strings here, from len 6-16
-    result &= WordsKeyImpl<hashtype>(hash, seed, 4000000, 6, 16, alnum, "alnum", verbose );
-    result &= WordsKeyImpl<hashtype>(hash, seed, 4000000, 6, 16, passwordchars, "password", verbose );
+    result &= WordsKeyImpl   <hashtype>(hash, seed, 4000000, 6, 16, alnum        , "alnum", verbose);
+    result &= WordsKeyImpl   <hashtype>(hash, seed, 4000000, 6, 16, passwordchars, "password", verbose);
 
     std::vector<std::string> words = HashMapInit(verbose);
-    result &= WordsStringImpl<hashtype>(hash, seed, words, verbose );
+    result &= WordsStringImpl<hashtype>(hash, seed, words, verbose);
 
     printf("%s\n", result ? "" : g_failstr);
 
diff --git a/tests/TextKeysetTest.h b/tests/TextKeysetTest.h
index c97e255f..870261dc 100644
--- a/tests/TextKeysetTest.h
+++ b/tests/TextKeysetTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool TextKeyTest(const HashInfo * info, const bool verbose);
+template <typename hashtype>
+bool TextKeyTest( const HashInfo * info, const bool verbose );
diff --git a/tests/TwoBytesKeysetTest.cpp b/tests/TwoBytesKeysetTest.cpp
index 4458598e..808f64f3 100644
--- a/tests/TwoBytesKeysetTest.cpp
+++ b/tests/TwoBytesKeysetTest.cpp
@@ -49,7 +49,7 @@
 #include "Platform.h"
 #include "Hashinfo.h"
 #include "TestGlobals.h"
-#include "Stats.h"       // for chooseK
+#include "Stats.h" // for chooseK
 #include "Analyze.h"
 #include "Instantiate.h"
 #include "VCode.h"
@@ -59,18 +59,18 @@
 //-----------------------------------------------------------------------------
 // Keyset 'TwoBytes' - generate all keys up to length N with two non-zero bytes
 
-template< typename hashtype >
-static void TwoBytesKeygen(HashFn hash, const seed_t seed,
-        int maxlen, std::vector<hashtype> & hashes) {
+template <typename hashtype>
+static void TwoBytesKeygen( HashFn hash, const seed_t seed, int maxlen, std::vector<hashtype> & hashes ) {
     //----------
     // Compute # of keys
     int keycount = 0;
+
     for (int i = 2; i <= maxlen; i++) {
-        keycount += (int)chooseK(i,2);
+        keycount += (int)chooseK(i, 2);
     }
-    keycount *= 255*255;
+    keycount *= 255 * 255;
     for (int i = 2; i <= maxlen; i++) {
-        keycount += i*255;
+        keycount += i * 255;
     }
 
     printf("Keyset 'TwoBytes' - up-to-%d-byte keys - %d keys\n", maxlen, keycount);
@@ -81,7 +81,7 @@ static void TwoBytesKeygen(HashFn hash, const seed_t seed,
     memset(key, 0, 256);
 
     for (int keylen = 2; keylen <= maxlen; keylen++) {
-        for (int byteA = 0; byteA < keylen; byteA++){
+        for (int byteA = 0; byteA < keylen; byteA++) {
             for (int valA = 1; valA <= 255; valA++) {
                 hashtype h;
                 key[byteA] = (uint8_t)valA;
@@ -96,8 +96,8 @@ static void TwoBytesKeygen(HashFn hash, const seed_t seed,
     //----------
     // Add all keys with two non-zero bytes
     for (int keylen = 2; keylen <= maxlen; keylen++) {
-        for (int byteA = 0; byteA < keylen-1; byteA++) {
-            for (int byteB = byteA+1; byteB < keylen; byteB++) {
+        for (int byteA = 0; byteA < keylen - 1; byteA++) {
+            for (int byteB = byteA + 1; byteB < keylen; byteB++) {
                 for (int valA = 1; valA <= 255; valA++) {
                     key[byteA] = (uint8_t)valA;
                     for (int valB = 1; valB <= 255; valB++) {
@@ -115,28 +115,29 @@ static void TwoBytesKeygen(HashFn hash, const seed_t seed,
     }
 }
 
-template < typename hashtype >
-static bool TwoBytesTest2(HashFn hash, const seed_t seed, int maxlen, bool drawDiagram) {
-  std::vector<hashtype> hashes;
+template <typename hashtype>
+static bool TwoBytesTest2( HashFn hash, const seed_t seed, int maxlen, bool drawDiagram ) {
+    std::vector<hashtype> hashes;
 
-  TwoBytesKeygen(hash, seed, maxlen, hashes);
+    TwoBytesKeygen(hash, seed, maxlen, hashes);
 
-  bool result = TestHashList(hashes,drawDiagram);
-  printf("\n");
+    bool result = TestHashList(hashes, drawDiagram);
+    printf("\n");
 
-  recordTestResult(result, "TwoBytes", maxlen);
+    recordTestResult(result, "TwoBytes", maxlen);
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
-template < typename hashtype >
-bool TwoBytesKeyTest(const HashInfo * hinfo, const bool verbose, const bool extra) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
-    int maxlen;
+template <typename hashtype>
+bool TwoBytesKeyTest( const HashInfo * hinfo, const bool verbose, const bool extra ) {
+    const HashFn hash   = hinfo->hashFn(g_hashEndian);
+    bool         result = true;
+    int          maxlen;
+
     if (extra) {
         maxlen = 24;
     } else if (hinfo->isVerySlow()) {
diff --git a/tests/TwoBytesKeysetTest.h b/tests/TwoBytesKeysetTest.h
index df21eac6..c5789be2 100644
--- a/tests/TwoBytesKeysetTest.h
+++ b/tests/TwoBytesKeysetTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool TwoBytesKeyTest(const HashInfo * info, const bool verbose, const bool extra);
+template <typename hashtype>
+bool TwoBytesKeyTest( const HashInfo * info, const bool verbose, const bool extra );
diff --git a/tests/WindowedKeysetTest.cpp b/tests/WindowedKeysetTest.cpp
index c92b5a6e..adf3594d 100644
--- a/tests/WindowedKeysetTest.cpp
+++ b/tests/WindowedKeysetTest.cpp
@@ -49,7 +49,7 @@
 #include "Platform.h"
 #include "Hashinfo.h"
 #include "TestGlobals.h"
-#include "Stats.h"       // For EstimateNbCollisions
+#include "Stats.h" // For EstimateNbCollisions
 #include "Analyze.h"
 #include "Instantiate.h"
 #include "VCode.h"
@@ -62,84 +62,84 @@
 // Keyset 'Window' - for all possible N-bit windows of a K-bit key, generate
 // all possible keys with bits set in that window
 
-template < typename keytype, typename hashtype >
-static bool WindowedKeyImpl(HashFn hash, const seed_t seed, int windowbits,
-        bool testCollision, bool testDistribution, bool drawDiagram) {
-  const int keybits = sizeof(keytype) * 8;
-  const int hashbits = sizeof(hashtype) * 8;
-  // calc keycount to expect min. 0.5 collisions: EstimateNbCollisions, except for 64++bit.
-  // there limit to 2^25 = 33554432 keys
-  int keycount = 1 << windowbits;
-  while (EstimateNbCollisions(keycount, hashbits) < 0.5 && windowbits < 25) {
-    if ((int)log2(2.0 * keycount) < 0) // overflow
-      break;
-    keycount *= 2;
-    windowbits = (int)log2(1.0 * keycount);
-    //printf (" enlarge windowbits to %d (%d keys)\n", windowbits, keycount);
-    //fflush (NULL);
-  }
-
-  std::vector<hashtype> hashes;
-  hashes.resize(keycount);
-
-  bool result = true;
-  int testcount = keybits;
-
-  printf("Keyset 'Window' - %3d-bit key, %3d-bit window - %d tests - %d keys\n",
-         keybits,windowbits,testcount,keycount);
-
-  for(int j = 0; j < testcount; j++)
-  {
-    int minbit = j;
-    keytype key;
-
-    for(int i = 0; i < keycount; i++)
-    {
-      key = i;
-      key.lrot(minbit);
-      hash(&key, sizeof(keytype), seed, &hashes[i]);
-      addVCodeInput(&key, sizeof(keytype));
+template <typename keytype, typename hashtype>
+static bool WindowedKeyImpl( HashFn hash, const seed_t seed, int windowbits,
+        bool testCollision, bool testDistribution, bool drawDiagram ) {
+    const int keybits  = sizeof(keytype ) * 8;
+    const int hashbits = sizeof(hashtype) * 8;
+    // calc keycount to expect min. 0.5 collisions: EstimateNbCollisions, except for 64++bit.
+    // there limit to 2^25 = 33554432 keys
+    int keycount = 1 << windowbits;
+
+    while (EstimateNbCollisions(keycount, hashbits) < 0.5 && windowbits < 25) {
+        if ((int)log2(2.0 * keycount) < 0) { // overflow
+            break;
+        }
+        keycount  *= 2;
+        windowbits = (int)log2(1.0 * keycount);
+        // printf (" enlarge windowbits to %d (%d keys)\n", windowbits, keycount);
+        // fflush (NULL);
     }
 
-    printf("Window at bit %3d\n",j);
+    std::vector<hashtype> hashes;
+    hashes.resize(keycount);
 
-    bool thisresult = TestHashList(hashes, drawDiagram, testCollision, testDistribution,
-                           /* do not test high/low bits (to not clobber the screen) */
-                           false, false, true);
+    bool result    = true;
+    int  testcount = keybits;
 
-    recordTestResult(thisresult, "Windowed", j);
+    printf("Keyset 'Window' - %3d-bit key, %3d-bit window - %d tests - %d keys\n",
+            keybits, windowbits, testcount, keycount);
 
-    addVCodeResult(thisresult);
+    for (int j = 0; j < testcount; j++) {
+        int     minbit = j;
+        keytype key;
 
-    result &= thisresult;
-  }
+        for (int i = 0; i < keycount; i++) {
+            key = i;
+            key.lrot(minbit);
+            hash(&key, sizeof(keytype), seed, &hashes[i]);
+            addVCodeInput(&key, sizeof(keytype));
+        }
 
-  return result;
+        printf("Window at bit %3d\n", j);
+
+        bool thisresult = TestHashList(hashes, drawDiagram, testCollision, testDistribution,
+                /* do not test high/low bits (to not clobber the screen) */
+                false, false, true);
+
+        recordTestResult(thisresult, "Windowed", j);
+
+        addVCodeResult(thisresult);
+
+        result &= thisresult;
+    }
+
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool WindowedKeyTest(const HashInfo * hinfo, const bool verbose, const bool extra) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
-    bool testCollision = true;
+template <typename hashtype>
+bool WindowedKeyTest( const HashInfo * hinfo, const bool verbose, const bool extra ) {
+    const HashFn hash          = hinfo->hashFn(g_hashEndian);
+    bool         result        = true;
+    bool         testCollision = true;
     // Skip distribution test for these - they're too easy to
     // distribute well, and it generates a _lot_ of testing.
     bool testDistribution = extra;
     // This value is now adjusted to generate at least 0.5 collisions per window,
     // except for 64++bit where it unrealistic. There use smaller but more keys,
     // to get a higher collision percentage.
-    int windowbits = 20;
+    int windowbits         = 20;
     constexpr int hashbits = sizeof(hashtype) * 8;
-    constexpr int keybits = (hashbits >= 64) ? 32 : 72;
+    constexpr int keybits  = (hashbits >= 64) ? 32 : 72;
 
     printf("[[[ Keyset 'Window' Tests ]]]\n\n");
 
     const seed_t seed = hinfo->Seed(g_seed);
 
-    result &= WindowedKeyImpl< Blob<keybits>, hashtype >(hash, seed,
-            windowbits, testCollision, testDistribution, verbose);
+    result &=
+            WindowedKeyImpl<Blob<keybits>, hashtype>(hash, seed, windowbits, testCollision, testDistribution, verbose);
 
     printf("\n%s\n", result ? "" : g_failstr);
 
diff --git a/tests/WindowedKeysetTest.h b/tests/WindowedKeysetTest.h
index 433efb72..cc545848 100644
--- a/tests/WindowedKeysetTest.h
+++ b/tests/WindowedKeysetTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool WindowedKeyTest(const HashInfo * info, const bool verbose, const bool extra);
+template <typename hashtype>
+bool WindowedKeyTest( const HashInfo * info, const bool verbose, const bool extra );
diff --git a/tests/ZeroesKeysetTest.cpp b/tests/ZeroesKeysetTest.cpp
index 8f7738ad..cb525675 100644
--- a/tests/ZeroesKeysetTest.cpp
+++ b/tests/ZeroesKeysetTest.cpp
@@ -59,44 +59,44 @@
 // Keyset 'Zeroes' - keys consisting of all zeroes, differing only in length
 // We reuse one block of empty bytes, otherwise the RAM cost is enormous.
 
-template < typename hashtype >
-static bool ZeroKeyImpl(HashFn hash, const seed_t seed, bool drawDiagram) {
-  int keycount = 200*1024;
+template <typename hashtype>
+static bool ZeroKeyImpl( HashFn hash, const seed_t seed, bool drawDiagram ) {
+    int keycount = 200 * 1024;
 
-  printf("Keyset 'Zeroes' - %d keys\n",keycount);
+    printf("Keyset 'Zeroes' - %d keys\n", keycount);
 
-  uint8_t * nullblock = new uint8_t[keycount];
-  memset(nullblock,0,keycount);
+    uint8_t * nullblock = new uint8_t[keycount];
+    memset(nullblock, 0, keycount);
 
-  addVCodeInput(nullblock, keycount);
+    addVCodeInput(nullblock, keycount);
 
-  //----------
-  std::vector<hashtype> hashes;
+    //----------
+    std::vector<hashtype> hashes;
 
-  hashes.resize(keycount);
+    hashes.resize(keycount);
 
-  for(int i = 0; i < keycount; i++) {
-      hash(nullblock, i, seed, &hashes[i]);
-  }
+    for (int i = 0; i < keycount; i++) {
+        hash(nullblock, i, seed, &hashes[i]);
+    }
 
-  bool result = TestHashList(hashes,drawDiagram);
-  printf("\n");
+    bool result = TestHashList(hashes, drawDiagram);
+    printf("\n");
 
-  delete [] nullblock;
+    delete [] nullblock;
 
-  recordTestResult(result, "Zeroes", (const char *)NULL);
+    recordTestResult(result, "Zeroes", (const char *)NULL);
 
-  addVCodeResult(result);
+    addVCodeResult(result);
 
-  return result;
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-template < typename hashtype >
-bool ZeroKeyTest(const HashInfo * hinfo, const bool verbose) {
-    const HashFn hash = hinfo->hashFn(g_hashEndian);
-    bool result = true;
+template <typename hashtype>
+bool ZeroKeyTest( const HashInfo * hinfo, const bool verbose ) {
+    const HashFn hash   = hinfo->hashFn(g_hashEndian);
+    bool         result = true;
 
     printf("[[[ Keyset 'Zeroes' Tests ]]]\n\n");
 
diff --git a/tests/ZeroesKeysetTest.h b/tests/ZeroesKeysetTest.h
index 6d124dd3..8ff346d8 100644
--- a/tests/ZeroesKeysetTest.h
+++ b/tests/ZeroesKeysetTest.h
@@ -47,5 +47,5 @@
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
 
-template < typename hashtype >
-bool ZeroKeyTest(const HashInfo * info, const bool verbose);
+template <typename hashtype>
+bool ZeroKeyTest( const HashInfo * info, const bool verbose );
diff --git a/util/Analyze.cpp b/util/Analyze.cpp
index 26c9f657..784adb64 100644
--- a/util/Analyze.cpp
+++ b/util/Analyze.cpp
@@ -59,7 +59,7 @@
 
 #include <set>
 #include <cassert>
-#include <cstring>    // for memset
+#include <cstring> // for memset
 #include <math.h>
 
 #include "Analyze.h"
@@ -79,174 +79,174 @@ static const double WARNING_PBOUND = exp2(-12); // 2**-12 == 1/4096 =~ 0.0244%,
 // (number of excess "heads" or "tails") over all those trials was the
 // specified worstbiascnt.
 
-bool ReportBias(const int worstbiascnt, const int coinflips, const int trials, const bool drawDiagram )
-{
-  double ratio = (double)worstbiascnt / (double)coinflips;
-  double p1value = 2 * exp(-(double)worstbiascnt * ratio); // two-tailed Chernoff Bound
-  double p_value = ScalePValue(p1value, trials);
-  int logp_value = GetLog2PValue(p_value);
-  bool result = true;
-
-  recordLog2PValue(logp_value);
-  if (drawDiagram)
-    printf(" worst bias is %f%% (%6d) (p<%8.6e) (^%2d)", ratio*200.0, worstbiascnt, p_value, logp_value);
-  else
-    printf(" worst bias is %f%% (^%2d)", ratio*200.0, logp_value);
-
-  if (p_value < FAILURE_PBOUND)
-  {
-    printf(" !!!!!\n");
-    result = false;
-  }
-  else if (p_value < WARNING_PBOUND)
-    printf(" !\n");
-  else
-    printf("\n");
+bool ReportBias( const int worstbiascnt, const int coinflips, const int trials, const bool drawDiagram ) {
+    double ratio      = (double)worstbiascnt / (double)coinflips;
+    double p1value    = 2 * exp(-(double)worstbiascnt * ratio); // two-tailed Chernoff Bound
+    double p_value    = ScalePValue(p1value, trials);
+    int    logp_value = GetLog2PValue(p_value);
+    bool   result     = true;
+
+    recordLog2PValue(logp_value);
+    if (drawDiagram) {
+        printf(" worst bias is %f%% (%6d) (p<%8.6e) (^%2d)", ratio * 200.0, worstbiascnt, p_value, logp_value);
+    } else {
+        printf(" worst bias is %f%% (^%2d)", ratio * 200.0, logp_value);
+    }
 
-  return result;
+    if (p_value < FAILURE_PBOUND) {
+        printf(" !!!!!\n");
+        result = false;
+    } else if (p_value < WARNING_PBOUND) {
+        printf(" !\n");
+    } else {
+        printf("\n");
+    }
+
+    return result;
 }
 
 //-----------------------------------------------------------------------------
 
-static bool ReportCollisions(uint64_t const nbH, int collcount, unsigned hashsize, bool maxcoll, bool highbits, bool header, bool verbose, bool drawDiagram )
-{
-  bool largehash = hashsize > (8 * sizeof(uint32_t));
-
-  // The expected number depends on what collision statistic is being
-  // reported on; "worst of N buckets" is very different than "sum
-  // over N buckets".
-  //
-  // Also determine an upper-bound on the unlikelihood of the observed
-  // collision count.
-  double expected, p_value;
-  if (maxcoll)
-  {
-    expected = EstimateMaxCollisions(nbH, hashsize);
-    p_value = EstimatedBinomialPValue(nbH, hashsize, collcount);
-  }
-  else
-  {
-    expected = EstimateNbCollisions(nbH, hashsize);
-    p_value = BoundedPoissonPValue(expected, collcount);
-  }
-  int logp_value = GetLog2PValue(p_value);
-
-  // Since p-values are now used to determine pass/warning/failure
-  // status, ratios are now solely for humans reading the results.
-  //
-  // If there were no collisions and none were expected, for a
-  // suitably fuzzy value of "none", then a ratio of 1.00 ("test
-  // exactly met expectations") is most sensible.
-  //
-  // If there were no collisions and there was a decent chance of
-  // seeing one, then a ratio of 0.00 ("test saw 0% of expected
-  // collisions") seems best.
-  //
-  // If there were any collisions, and the odds of seeing one were
-  // quite low (arbitrarily chosen to be 0.01), then a ratio isn't
-  // really meaningful, so we use +inf.
-  //
-  // A collision count matching the rounded expectation value is
-  // treated as "exactly expected". For small hash sizes, if the
-  // expected count has more than 0.1 after the decimal place and the
-  // actual collision count is the next integer above the expected
-  // one, then that case is also treated as "exactly expected".
-  //
-  // In all other cases, the true ratio is computed, but the value
-  // will be bounded to not clutter the output in failure cases.
-  double ratio;
-  if (collcount == 0)
-      ratio = (expected < 0.1) ? 1.00 : 0.00;
-  else if (expected < 0.01)
-      ratio = INFINITY;
-  else if (collcount == (int)round(expected))
-      ratio = 1.00;
-  else if (!largehash && (collcount == (int)round(expected+0.4)))
-      ratio = 1.00;
-  else {
-      ratio = double(collcount) / expected;
-      if (ratio >= 999.95)
-          ratio = INFINITY;
-  }
-
-  bool warning = false, failure = false;
-  if (p_value <  FAILURE_PBOUND)
-      failure = true;
-  else if (p_value < WARNING_PBOUND)
-      warning = true;
-  else if (isnan(ratio))
-      warning = true;
-
-  if (verbose)
-  {
-    if (header)
-      printf("Testing %s collisions (%s %3i-bit)", maxcoll ? "max" : "all",
-        highbits ? "high" : "low ", hashsize);
-
-    // 8 integer digits would match the 10.1 float specifier
-    // (10 characters - 1 decimal point - 1 digit after the decimal),
-    // but some hashes greatly exceed expected collision counts.
-    if (!finite(ratio))
-        printf(" - Expected %10.1f, actual %10i  (------) ", expected, collcount);
-    else if (ratio < 9.0)
-        printf(" - Expected %10.1f, actual %10i  (%5.3fx) ", expected, collcount, ratio);
-    else
-        printf(" - Expected %10.1f, actual %10i  (%#.4gx) ", expected, collcount, ratio);
-
-    // Since ratios and p-value summaries are most important to humans,
-    // and deltas and exact p-values add visual noise and variable line
-    // widths and possibly field counts, they are now only printed out
-    // in --verbose mode.
-    recordLog2PValue(logp_value);
-    if (drawDiagram)
-      printf("(%+i) (p<%8.6f) (^%2d)",  collcount - (int)round(expected), p_value, logp_value);
-    else
-      printf("(^%2d)", logp_value);
-
-    if (failure)
-      printf(" !!!!!\n");
-    else if (warning)
-      printf(" !\n");
-    else
-      printf("\n");
-  }
-
-  return !failure;
+static bool ReportCollisions( uint64_t const nbH, int collcount, unsigned hashsize, bool maxcoll,
+        bool highbits, bool header, bool verbose, bool drawDiagram ) {
+    bool largehash = hashsize > (8 * sizeof(uint32_t));
+
+    // The expected number depends on what collision statistic is being
+    // reported on; "worst of N buckets" is very different than "sum
+    // over N buckets".
+    //
+    // Also determine an upper-bound on the unlikelihood of the observed
+    // collision count.
+    double expected, p_value;
+
+    if (maxcoll) {
+        expected = EstimateMaxCollisions(nbH, hashsize);
+        p_value  = EstimatedBinomialPValue(nbH, hashsize, collcount);
+    } else {
+        expected = EstimateNbCollisions(nbH, hashsize);
+        p_value  = BoundedPoissonPValue(expected, collcount);
+    }
+    int logp_value = GetLog2PValue(p_value);
+
+    // Since p-values are now used to determine pass/warning/failure
+    // status, ratios are now solely for humans reading the results.
+    //
+    // If there were no collisions and none were expected, for a
+    // suitably fuzzy value of "none", then a ratio of 1.00 ("test
+    // exactly met expectations") is most sensible.
+    //
+    // If there were no collisions and there was a decent chance of
+    // seeing one, then a ratio of 0.00 ("test saw 0% of expected
+    // collisions") seems best.
+    //
+    // If there were any collisions, and the odds of seeing one were
+    // quite low (arbitrarily chosen to be 0.01), then a ratio isn't
+    // really meaningful, so we use +inf.
+    //
+    // A collision count matching the rounded expectation value is
+    // treated as "exactly expected". For small hash sizes, if the
+    // expected count has more than 0.1 after the decimal place and the
+    // actual collision count is the next integer above the expected
+    // one, then that case is also treated as "exactly expected".
+    //
+    // In all other cases, the true ratio is computed, but the value
+    // will be bounded to not clutter the output in failure cases.
+    double ratio;
+    if (collcount == 0) {
+        ratio = (expected < 0.1) ? 1.00 : 0.00;
+    } else if (expected < 0.01) {
+        ratio = INFINITY;
+    } else if (collcount == (int)round(expected)) {
+        ratio = 1.00;
+    } else if (!largehash && (collcount == (int)round(expected + 0.4))) {
+        ratio = 1.00;
+    } else {
+        ratio = double(collcount) / expected;
+        if (ratio >= 999.95) {
+            ratio = INFINITY;
+        }
+    }
+
+    bool warning = false, failure = false;
+    if (p_value <  FAILURE_PBOUND) {
+        failure = true;
+    } else if (p_value < WARNING_PBOUND) {
+        warning = true;
+    } else if (isnan(ratio)) {
+        warning = true;
+    }
+
+    if (verbose) {
+        if (header) {
+            printf("Testing %s collisions (%s %3i-bit)", maxcoll ? "max" : "all", highbits ? "high" : "low ", hashsize);
+        }
+
+        // 8 integer digits would match the 10.1 float specifier
+        // (10 characters - 1 decimal point - 1 digit after the decimal),
+        // but some hashes greatly exceed expected collision counts.
+        if (!finite(ratio)) {
+            printf(" - Expected %10.1f, actual %10i  (------) ", expected, collcount);
+        } else if (ratio < 9.0) {
+            printf(" - Expected %10.1f, actual %10i  (%5.3fx) ", expected, collcount, ratio);
+        } else {
+            printf(" - Expected %10.1f, actual %10i  (%#.4gx) ", expected, collcount, ratio);
+        }
+
+        // Since ratios and p-value summaries are most important to humans,
+        // and deltas and exact p-values add visual noise and variable line
+        // widths and possibly field counts, they are now only printed out
+        // in --verbose mode.
+        recordLog2PValue(logp_value);
+        if (drawDiagram) {
+            printf("(%+i) (p<%8.6f) (^%2d)", collcount - (int)round(expected), p_value, logp_value);
+        } else {
+            printf("(^%2d)", logp_value);
+        }
+
+        if (failure) {
+            printf(" !!!!!\n");
+        } else if (warning) {
+            printf(" !\n");
+        } else {
+            printf("\n");
+        }
+    }
+
+    return !failure;
 }
 
 //----------------------------------------------------------------------------
 
-static void plot ( double n )
-{
-  int ni = (int)floor(n);
-
-  // Less than [0,3) sigma is fine, [3, 12) sigma is notable, 12+ sigma is pretty bad
-  if(ni <= 2)
-    putchar('.');
-  else if (ni <= 11)
-    putchar('1' + ni - 3);
-  else
-    putchar('X');
+static void plot( double n ) {
+    int ni = (int)floor(n);
+
+    // Less than [0,3) sigma is fine, [3, 12) sigma is notable, 12+ sigma is pretty bad
+    if (ni <= 2) {
+        putchar('.');
+    } else if (ni <= 11) {
+        putchar('1' + ni - 3);
+    } else {
+        putchar('X');
+    }
 }
 
 //-----------------------------------------------------------------------------
 // Sort the hash list, count the total number of collisions and return
 // the first N collisions for further processing
-template< typename hashtype >
-unsigned int FindCollisions(std::vector<hashtype> & hashes,
-                            std::set<hashtype> & collisions,
-                            int maxCollisions,
-                            bool drawDiagram) {
+template <typename hashtype>
+unsigned int FindCollisions( std::vector<hashtype> & hashes, std::set<hashtype> & collisions,
+        int maxCollisions, bool drawDiagram ) {
     unsigned int collcount = 0;
-    blobsort(hashes.begin(),hashes.end());
+
+    blobsort(hashes.begin(), hashes.end());
 
     const size_t sz = hashes.size();
     for (size_t hnb = 1; hnb < sz; hnb++) {
-        if(hashes[hnb] == hashes[hnb-1]) {
+        if (hashes[hnb] == hashes[hnb - 1]) {
             collcount++;
-            if(collcount < maxCollisions) {
+            if (collcount < maxCollisions) {
 #if 0
-                printf ("  %zu: ", hnb);
+                printf("  %zu: ", hnb);
                 hashes[hnb].printhex("");
 #endif
                 if (drawDiagram) {
@@ -257,8 +257,9 @@ unsigned int FindCollisions(std::vector<hashtype> & hashes,
     }
 
 #if 0 && defined(DEBUG)
-    if (collcount)
-        printf ("\n");
+    if (collcount) {
+        printf("\n");
+    }
 #endif
 
     return collcount;
@@ -266,12 +267,12 @@ unsigned int FindCollisions(std::vector<hashtype> & hashes,
 
 INSTANTIATE(FindCollisions, HASHTYPELIST);
 
-template < typename hashtype >
-void PrintCollisions(std::set<hashtype> & collisions) {
+template <typename hashtype>
+void PrintCollisions( std::set<hashtype> & collisions ) {
     printf("\nCollisions:\n");
 
     for (auto it = collisions.begin(); it != collisions.end(); ++it) {
-        const hashtype &hash = *it;
+        const hashtype & hash = *it;
         hash.printhex("  ");
     }
     printf("\n");
@@ -295,260 +296,262 @@ INSTANTIATE(PrintCollisions, HASHTYPELIST);
 // since a collision for N bits is also a collision for N-k bits.
 //
 // This requires the vector of hashes to be sorted.
-template< typename hashtype >
-static void CountRangedNbCollisions ( std::vector<hashtype> & hashes, uint64_t const nbH, int minHBits, int maxHBits, int threshHBits, int * collcounts)
-{
-  const int origBits = sizeof(hashtype) * 8;
-  assert(minHBits >= 1);
-  assert(minHBits <= maxHBits);
-  assert(origBits >= maxHBits);
-  assert((threshHBits == 0) || (threshHBits >= minHBits));
-  assert((threshHBits == 0) || (threshHBits <= maxHBits));
-
-  const int collbins = maxHBits - minHBits + 1;
-  const int maxcollbins = (threshHBits == 0) ? 0 : threshHBits - minHBits + 1;
-  int prevcoll[maxcollbins + 1];
-  int maxcoll[maxcollbins + 1];
-
-  memset(collcounts, 0, sizeof(collcounts[0])*collbins);
-  memset(prevcoll, 0, sizeof(prevcoll[0])*maxcollbins);
-  memset(maxcoll, 0, sizeof(maxcoll[0])*maxcollbins);
-
-  for (uint64_t hnb = 1; hnb < nbH; hnb++) {
-    hashtype hdiff = hashes[hnb-1] ^ hashes[hnb];
-    int hzb = hdiff.highzerobits();
-    if (hzb > maxHBits)
-      hzb = maxHBits;
-    if (hzb >= minHBits)
-      collcounts[hzb - minHBits]++;
-    // If we don't care about maximum collision counts, or if this
-    // hash is a collision for *all* bit widths where we do care about
-    // maximums, then this is all that need be done for this hash.
-    if (hzb >= threshHBits)
-      continue;
-    // If we do care about maximum collision counts, then any window
-    // sizes which are strictly larger than hzb have just encountered
-    // a non-collision. For each of those window sizes, see how many
-    // collisions there have been since the last non-collision, and
-    // record it if that's the new peak.
-    if (hzb < minHBits - 1)
-      hzb = minHBits - 1;
-    // coll is the total number of collisions so far, for the window
-    // width corresponding to index i
-    int coll = 0;
-    for (int i = collbins - 1; i >= maxcollbins; i--)
-      coll += collcounts[i];
-    for (int i = maxcollbins - 1; i > hzb - minHBits; i--)
-    {
-      coll += collcounts[i];
-      // See if this is the new peak for this window width
-      maxcoll[i] = std::max(maxcoll[i], coll - prevcoll[i]);
-      // Record the total number of collisions seen so far at this
-      // non-collision, so that when the next non-collision happens we
-      // can compute how many collisions there have been since this one.
-      prevcoll[i] = coll;
-    }
-  }
-
-  for (int i = collbins - 2; i >= 0; i--)
-    collcounts[i] += collcounts[i + 1];
-  for (int i = maxcollbins - 1; i >= 0; i--)
-    collcounts[i] = std::max(maxcoll[i], collcounts[i] - prevcoll[i]);
+template <typename hashtype>
+static void CountRangedNbCollisions( std::vector<hashtype> & hashes, uint64_t const nbH,
+        int minHBits, int maxHBits, int threshHBits, int * collcounts ) {
+    const int origBits = sizeof(hashtype) * 8;
+
+    assert(minHBits >= 1       );
+    assert(minHBits <= maxHBits);
+    assert(origBits >= maxHBits);
+    assert((threshHBits == 0) || (threshHBits >= minHBits));
+    assert((threshHBits == 0) || (threshHBits <= maxHBits));
+
+    const int collbins    = maxHBits - minHBits + 1;
+    const int maxcollbins = (threshHBits == 0) ? 0 : threshHBits - minHBits + 1;
+    int       prevcoll[maxcollbins + 1];
+    int       maxcoll[maxcollbins + 1];
+
+    memset(collcounts, 0, sizeof(collcounts[0]) * collbins );
+    memset(prevcoll  , 0, sizeof(prevcoll[0]) * maxcollbins);
+    memset(maxcoll   , 0, sizeof(maxcoll[0]) * maxcollbins );
+
+    for (uint64_t hnb = 1; hnb < nbH; hnb++) {
+        hashtype hdiff = hashes[hnb - 1] ^ hashes[hnb];
+        int      hzb   = hdiff.highzerobits();
+        if (hzb > maxHBits) {
+            hzb = maxHBits;
+        }
+        if (hzb >= minHBits) {
+            collcounts[hzb - minHBits]++;
+        }
+        // If we don't care about maximum collision counts, or if this
+        // hash is a collision for *all* bit widths where we do care about
+        // maximums, then this is all that need be done for this hash.
+        if (hzb >= threshHBits) {
+            continue;
+        }
+        // If we do care about maximum collision counts, then any window
+        // sizes which are strictly larger than hzb have just encountered
+        // a non-collision. For each of those window sizes, see how many
+        // collisions there have been since the last non-collision, and
+        // record it if that's the new peak.
+        if (hzb < minHBits - 1) {
+            hzb = minHBits - 1;
+        }
+        // coll is the total number of collisions so far, for the window
+        // width corresponding to index i
+        int coll = 0;
+        for (int i = collbins - 1; i >= maxcollbins; i--) {
+            coll += collcounts     [i];
+        }
+        for (int i = maxcollbins - 1; i > hzb - minHBits; i--) {
+            coll      += collcounts[i];
+            // See if this is the new peak for this window width
+            maxcoll[i] = std::max(maxcoll[i], coll - prevcoll[i]);
+            // Record the total number of collisions seen so far at this
+            // non-collision, so that when the next non-collision happens we
+            // can compute how many collisions there have been since this one.
+            prevcoll[i] = coll;
+        }
+    }
+
+    for (int i = collbins - 2; i >= 0; i--) {
+        collcounts[i] += collcounts[i + 1];
+    }
+    for (int i = maxcollbins - 1; i >= 0; i--) {
+        collcounts[i] = std::max(maxcoll[i], collcounts[i] - prevcoll[i]);
+    }
 }
 
 //-----------------------------------------------------------------------------
 //
 
-static bool ReportBitsCollisions (uint64_t nbH, int * collcounts, int minBits, int maxBits, bool highbits, bool drawDiagram )
-{
-  if (maxBits <= 1 || minBits > maxBits) return true;
-
-  int spacelen = 80;
-  spacelen -= printf("Testing all collisions (%s %2i..%2i bits) - ",
-          highbits ? "high" : "low ", minBits, maxBits);
-
-  double maxCollDev = 0.0;
-  int maxCollDevBits = 0;
-  int maxCollDevNb = 0;
-  double maxCollDevExp = 1.0;
-  double maxPValue = INFINITY;
-
-  for (int b = minBits; b <= maxBits; b++) {
-      int    const nbColls = collcounts[b - minBits];
-      double const expected = EstimateNbCollisions(nbH, b);
-      assert(expected > 0.0);
-      double const dev = (double)nbColls / expected;
-      double const p_value = BoundedPoissonPValue(expected, nbColls);
-      //printf("%d bits, %d/%f, p %f\n", b, nbColls, expected, p_value);
-      if (p_value < maxPValue) {
-          maxPValue = p_value;
-          maxCollDev = dev;
-          maxCollDevBits = b;
-          maxCollDevNb = nbColls;
-          maxCollDevExp = expected;
-      }
-  }
-
-  const char * spaces = "                ";
-  int i_maxCollDevExp = (int)round(maxCollDevExp);
-  spacelen -= printf("Worst is %2i bits: %i/%i ", maxCollDevBits, maxCollDevNb, i_maxCollDevExp);
-  if (spacelen < 0)
-      spacelen = 0;
-  else if (spacelen > strlen(spaces))
-      spacelen = strlen(spaces);
-
-  if (maxCollDev >= 999.95)
-      maxCollDev = INFINITY;
-
-  if (!finite(maxCollDev))
-      printf("%.*s(------) ", spacelen, spaces);
-  else if (maxCollDev < 9.0)
-      printf("%.*s(%5.3fx) ", spacelen, spaces, maxCollDev);
-  else
-      printf("%.*s(%#.4gx) ", spacelen, spaces, maxCollDev);
-
-
-  double p_value = ScalePValue(maxPValue, maxBits - minBits + 1);
-  int logp_value = GetLog2PValue(p_value);
-
-  recordLog2PValue(logp_value);
-  if (drawDiagram)
-    printf("(%+i) (p<%8.6f) (^%2d)", maxCollDevNb - i_maxCollDevExp, p_value, logp_value);
-  else
-    printf("(^%2d)", logp_value);
-
-  if (p_value < FAILURE_PBOUND)
-  {
-    printf(" !!!!!\n");
-    return false;
-  }
-  else if (p_value < WARNING_PBOUND)
-    printf(" !\n");
-  else
-    printf("\n");
-  return true;
+static bool ReportBitsCollisions( uint64_t nbH, int * collcounts, int minBits,
+        int maxBits, bool highbits, bool drawDiagram ) {
+    if ((maxBits <= 1) || (minBits > maxBits)) { return true; }
+
+    int spacelen = 80;
+    spacelen -= printf("Testing all collisions (%s %2i..%2i bits) - ", highbits ? "high" : "low ", minBits, maxBits);
+
+    double maxCollDev     = 0.0;
+    int    maxCollDevBits = 0;
+    int    maxCollDevNb   = 0;
+    double maxCollDevExp  = 1.0;
+    double maxPValue      = INFINITY;
+
+    for (int b = minBits; b <= maxBits; b++) {
+        int const    nbColls  = collcounts[b - minBits];
+        double const expected = EstimateNbCollisions(nbH, b);
+        assert(expected > 0.0);
+        double const dev      = (double)nbColls / expected;
+        double const p_value  = BoundedPoissonPValue(expected, nbColls);
+        // printf("%d bits, %d/%f, p %f\n", b, nbColls, expected, p_value);
+        if (p_value < maxPValue) {
+            maxPValue      = p_value;
+            maxCollDev     = dev;
+            maxCollDevBits = b;
+            maxCollDevNb   = nbColls;
+            maxCollDevExp  = expected;
+        }
+    }
+
+    const char * spaces = "                ";
+    int          i_maxCollDevExp = (int)round(maxCollDevExp);
+    spacelen -= printf("Worst is %2i bits: %i/%i ", maxCollDevBits, maxCollDevNb, i_maxCollDevExp);
+    if (spacelen < 0) {
+        spacelen = 0;
+    } else if (spacelen > strlen(spaces)) {
+        spacelen = strlen(spaces);
+    }
+
+    if (maxCollDev >= 999.95) {
+        maxCollDev = INFINITY;
+    }
+
+    if (!finite(maxCollDev)) {
+        printf("%.*s(------) ", spacelen, spaces);
+    } else if (maxCollDev < 9.0) {
+        printf("%.*s(%5.3fx) ", spacelen, spaces, maxCollDev);
+    } else {
+        printf("%.*s(%#.4gx) ", spacelen, spaces, maxCollDev);
+    }
+
+    double p_value    = ScalePValue(maxPValue, maxBits - minBits + 1);
+    int    logp_value = GetLog2PValue(p_value);
+
+    recordLog2PValue(logp_value);
+    if (drawDiagram) {
+        printf("(%+i) (p<%8.6f) (^%2d)", maxCollDevNb - i_maxCollDevExp, p_value, logp_value);
+    } else {
+        printf("(^%2d)", logp_value);
+    }
+
+    if (p_value < FAILURE_PBOUND) {
+        printf(" !!!!!\n");
+        return false;
+    } else if (p_value < WARNING_PBOUND) {
+        printf(" !\n");
+    } else {
+        printf("\n");
+    }
+    return true;
 }
 
 //----------------------------------------------------------------------------
 // Measure the distribution "score" for each possible N-bit span, with
 // N going from 8 to 20 inclusive.
 
-static int MaxDistBits ( const uint64_t nbH )
-{
-  // If there aren't 5 keys per bin over 8 bins, then don't bother
-  // testing distribution at all.
-  if (nbH < (5 * 8))
-    return 0;
-  int maxwidth = 20;
-  // We need at least 5 keys per bin to reliably test distribution biases
-  // down to 1%, so don't bother to test sparser distributions than that
-  while(double(nbH) / double(1 << maxwidth) < 5.0)
-    --maxwidth;
-  return maxwidth;
+static int MaxDistBits( const uint64_t nbH ) {
+    // If there aren't 5 keys per bin over 8 bins, then don't bother
+    // testing distribution at all.
+    if (nbH < (5 * 8)) {
+        return 0;
+    }
+    int maxwidth = 20;
+    // We need at least 5 keys per bin to reliably test distribution biases
+    // down to 1%, so don't bother to test sparser distributions than that
+    while (double(nbH) / double(1 << maxwidth) < 5.0) {
+        --maxwidth;
+    }
+    return maxwidth;
 }
 
-template< typename hashtype >
-static bool TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram )
-{
-  const int hashbits = sizeof(hashtype) * 8;
-  const uint64_t nbH = hashes.size();
-  int maxwidth = MaxDistBits(nbH);
-  int minwidth = 8;
+template <typename hashtype>
+static bool TestDistribution( std::vector<hashtype> & hashes, bool drawDiagram ) {
+    const int      hashbits = sizeof(hashtype) * 8;
+    const uint64_t nbH      = hashes.size();
+    int            maxwidth = MaxDistBits(nbH);
+    int            minwidth = 8;
 
-  if (maxwidth < minwidth) return true;
+    if (maxwidth < minwidth) { return true; }
 
-  printf("Testing distribution   (any  %2i..%2i bits)%s", minwidth, maxwidth, drawDiagram ? "\n[" : " - ");
+    printf("Testing distribution   (any  %2i..%2i bits)%s", minwidth, maxwidth, drawDiagram ? "\n[" : " - ");
 
-  std::vector<unsigned> bins;
-  bins.resize(1 << maxwidth);
+    std::vector<unsigned> bins;
+    bins.resize(1 << maxwidth);
 
-  double worstN = 0; // Only report on biases above 0
-  int worstStart = -1;
-  int worstWidth = -1;
-  int tests = 0;
+    double worstN     = 0; // Only report on biases above 0
+    int    worstStart = -1;
+    int    worstWidth = -1;
+    int    tests      = 0;
 
-  for(int start = 0; start < hashbits; start++)
-  {
-    int width = maxwidth;
-    int bincount = (1 << width);
+    for (int start = 0; start < hashbits; start++) {
+        int width    = maxwidth;
+        int bincount = (1 << width);
 
-    memset(&bins[0],0,sizeof(int)*bincount);
+        memset(&bins[0], 0, sizeof(int) * bincount);
 
-    for(uint64_t j = 0; j < nbH; j++)
-    {
-      uint32_t index = hashes[j].window(start,width);
+        for (uint64_t j = 0; j < nbH; j++) {
+            uint32_t index = hashes[j].window(start, width);
 
-      bins[index]++;
-    }
+            bins[index]++;
+        }
 
-    // Test the distribution, then fold the bins in half,
-    // repeat until we're down to 256 bins
+        // Test the distribution, then fold the bins in half,
+        // repeat until we're down to 256 bins
 
-    while(bincount >= 256)
-    {
-      double n = calcScore(&bins[0],bincount,nbH);
+        while (bincount >= 256) {
+            double n = calcScore(&bins[0], bincount, nbH);
 
-      tests++;
+            tests++;
 
-      if(drawDiagram) plot(n);
+            if (drawDiagram) { plot(n); }
 
-      if(n > worstN)
-      {
-        worstN = n;
-        worstStart = start;
-        worstWidth = width;
-      }
+            if (n > worstN) {
+                worstN     = n;
+                worstStart = start;
+                worstWidth = width;
+            }
 
-      width--;
-      bincount /= 2;
+            width--;
+            bincount /= 2;
 
-      if(width < minwidth) break;
+            if (width < minwidth) { break; }
 
-      // To allow the compiler to parallelize this loop
-      assume((bincount % 8) == 0);
+            // To allow the compiler to parallelize this loop
+            assume((bincount % 8) == 0);
 
-      for(int i = 0; i < bincount; i++)
-      {
-        bins[i] += bins[i+bincount];
-      }
-    }
+            for (int i = 0; i < bincount; i++) {
+                bins[i] += bins[i + bincount];
+            }
+        }
 
-    if(drawDiagram) printf("]\n%s", ((start + 1) == hashbits) ? "" : "[");
-  }
+        if (drawDiagram) { printf("]\n%s", ((start + 1) == hashbits) ? "" : "["); }
+    }
 
-  addVCodeResult((uint32_t)worstN);
-  addVCodeResult(worstWidth);
-  addVCodeResult(worstStart);
+    addVCodeResult((uint32_t)worstN);
+    addVCodeResult(worstWidth      );
+    addVCodeResult(worstStart      );
 
-  double p_value = ScalePValue(GetNormalPValue(0, 1, worstN), tests);
-  int logp_value = GetLog2PValue(p_value);
-  double mult = normalizeScore(worstN, worstWidth, tests);
+    double p_value    = ScalePValue(GetNormalPValue(0, 1, worstN), tests);
+    int    logp_value = GetLog2PValue(p_value);
+    double mult       = normalizeScore(worstN, worstWidth, tests);
 
-  if (worstStart == -1)
-      printf("No positive bias detected            %5.3fx  ", 0.0);
-  else if (mult < 9.0)
-      printf("Worst bias is %2d bits at bit %3d:    %5.3fx  ",
-              worstWidth, worstStart, mult);
-  else
-      printf("Worst bias is %2d bits at bit %3d:    %#.4gx  ",
-              worstWidth, worstStart, mult);
+    if (worstStart == -1) {
+        printf("No positive bias detected            %5.3fx  ", 0.0);
+    } else if (mult < 9.0) {
+        printf("Worst bias is %2d bits at bit %3d:    %5.3fx  ", worstWidth, worstStart, mult);
+    } else {
+        printf("Worst bias is %2d bits at bit %3d:    %#.4gx  ", worstWidth, worstStart, mult);
+    }
 
-  recordLog2PValue(logp_value);
-  if (drawDiagram)
-    printf("(%f) (p<%8.6f) (^%2d)", worstN, p_value, logp_value);
-  else
-    printf("(^%2d)", logp_value);
+    recordLog2PValue(logp_value);
+    if (drawDiagram) {
+        printf("(%f) (p<%8.6f) (^%2d)", worstN, p_value, logp_value);
+    } else {
+        printf("(^%2d)", logp_value);
+    }
 
-  if (p_value < FAILURE_PBOUND)
-  {
-    printf(" !!!!!\n");
-    return false;
-  }
-  else if (p_value < WARNING_PBOUND)
-    printf(" !\n");
-  else
-    printf("\n");
-  return true;
+    if (p_value < FAILURE_PBOUND) {
+        printf(" !!!!!\n");
+        return false;
+    } else if (p_value < WARNING_PBOUND) {
+        printf(" !\n");
+    } else {
+        printf("\n");
+    }
+    return true;
 }
 
 //-----------------------------------------------------------------------------
@@ -556,239 +559,242 @@ static bool TestDistribution ( std::vector<hashtype> & hashes, bool drawDiagram
 // comparing them to a list of i.i.d. random numbers across the full
 // origBits range.
 
-static void ComputeCollBitBounds ( std::vector<int> & nbBitsvec, int origBits, uint64_t nbH, int & minBits, int & maxBits, int & threshBits )
-{
-  const int nlognBits = GetNLogNBound(nbH);
-
-  minBits = origBits + 1;
-  maxBits = 0;
-  threshBits = 0;
-
-  for(const int nbBits: nbBitsvec)
-  {
-    // If the nbBits value is too large for this hashtype, do nothing.
-    if (nbBits >= origBits)
-      continue;
-    // If many hashes are being tested (compared to the hash width),
-    // then the expected number of collisions will approach the number
-    // of keys (indeed, it will converge to every hash bucket being
-    // full, leaving nbH - 2**nbBits collisions). In those cases, it is
-    // not very useful to count all collisions, so at some point of high
-    // expected collisions, it is better to instead count the number of
-    // keys in the fullest bucket. The cutoff here is if there are
-    // (n*log(n)) hashes, where n is the number of hash buckets. This
-    // cutoff is an inflection point where the "balls-into-bins"
-    // statistics really start changing. ReportCollisions() will
-    // estimate the correct key count for that differently, as it is a
-    // different statistic.
-    if (nbBits < nlognBits)
-      threshBits = std::max(threshBits, nbBits);
-    // Record the highest and lowest valid bit widths to test
-    maxBits = std::max(maxBits, nbBits);
-    minBits = std::min(minBits, nbBits);
-  }
+static void ComputeCollBitBounds( std::vector<int> & nbBitsvec, int origBits,
+        uint64_t nbH, int & minBits, int & maxBits, int & threshBits ) {
+    const int nlognBits = GetNLogNBound(nbH);
+
+    minBits    = origBits + 1;
+    maxBits    = 0;
+    threshBits = 0;
+
+    for (const int nbBits: nbBitsvec) {
+        // If the nbBits value is too large for this hashtype, do nothing.
+        if (nbBits >= origBits) {
+            continue;
+        }
+        // If many hashes are being tested (compared to the hash width),
+        // then the expected number of collisions will approach the number
+        // of keys (indeed, it will converge to every hash bucket being
+        // full, leaving nbH - 2**nbBits collisions). In those cases, it is
+        // not very useful to count all collisions, so at some point of high
+        // expected collisions, it is better to instead count the number of
+        // keys in the fullest bucket. The cutoff here is if there are
+        // (n*log(n)) hashes, where n is the number of hash buckets. This
+        // cutoff is an inflection point where the "balls-into-bins"
+        // statistics really start changing. ReportCollisions() will
+        // estimate the correct key count for that differently, as it is a
+        // different statistic.
+        if (nbBits < nlognBits) {
+            threshBits = std::max(threshBits, nbBits);
+        }
+        // Record the highest and lowest valid bit widths to test
+        maxBits = std::max(maxBits, nbBits);
+        minBits = std::min(minBits, nbBits);
+    }
 }
 
-static int FindMinBits_TargetCollisionShare(uint64_t nbHashes, double share)
-{
+static int FindMinBits_TargetCollisionShare( uint64_t nbHashes, double share ) {
     int nb;
-    for (nb=2; nb<64; nb++) {
+
+    for (nb = 2; nb < 64; nb++) {
         double const maxColls = (double)(1ULL << nb) * share;
-        double const nbColls = EstimateNbCollisions(nbHashes, nb);
-        if (nbColls < maxColls) return nb;
+        double const nbColls  = EstimateNbCollisions(nbHashes, nb);
+        if (nbColls < maxColls) { return nb; }
     }
     assert(0);
     return nb;
 }
 
-static int FindMaxBits_TargetCollisionNb(uint64_t nbHashes, int minCollisions, int maxbits)
-{
+static int FindMaxBits_TargetCollisionNb( uint64_t nbHashes, int minCollisions, int maxbits ) {
     int nb;
-    for (nb=maxbits; nb>2; nb--) {
+
+    for (nb = maxbits; nb > 2; nb--) {
         double const nbColls = EstimateNbCollisions(nbHashes, nb);
-        if (nbColls > minCollisions) return nb;
+        if (nbColls > minCollisions) { return nb; }
     }
-    //assert(0);
+    // assert(0);
     return nb;
 }
 
-template < typename hashtype >
-bool TestHashList ( std::vector<hashtype> & hashes, bool drawDiagram,
-                    bool testCollision, bool testDist ,
-                    bool testHighBits, bool testLowBits ,
-                    bool verbose )
-{
-  bool result = true;
-
-  if (testCollision)
-  {
-    unsigned const hashbits = sizeof(hashtype) * 8;
-    uint64_t const nbH = hashes.size();
-    if (verbose)
-      printf("Testing all collisions (     %3i-bit)", hashbits);
-
-    addVCodeOutput(&hashes[0], sizeof(hashtype) * nbH);
-
-    std::set<hashtype> collisions;
-    int collcount = FindCollisions(hashes, collisions, 1000, drawDiagram);
-
-    /*
-     * Do all other compute-intensive stuff (as requested) before
-     * displaying any results from FindCollisions, to be a little bit
-     * more human-friendly.
-     */
-
-    std::vector<int> nbBitsvec = { 224, 160, 128, 64, 32, 12, 8, };
-    /*
-     * cyan: The 12- and -8-bit tests are too small : tables are necessarily saturated.
-     * It would be better to count the nb of collisions per Cell, and
-     * compared the distribution of values against a random source.
-     * But that would be a different test.
-     *
-     * rurban: No, these tests are for non-prime hash tables, using only
-     *     the lower 5-10 bits
-     *
-     * fwojcik: Collision counting did not previously reflect
-     * rurban's comment, as the code counted the sum of collisions
-     * across _all_ buckets. So if there are many more hashes than
-     * 2**nbBits, and the hash is even _slightly_ not broken, then
-     * every n-bit truncated hash value will appear at least once, in
-     * which case the "actual" value reported would always be
-     * (hashes.size() - 2**nbBits). Checking the results in doc/
-     * confirms this. cyan's comment was correct.
-     *
-     * Collision counting has now been modified to report on the
-     * single bucket with the most collisions when fuller hash tables
-     * are being tested, and ReportCollisions() computes an
-     * appropriate "expected" statistic.
-     */
-
-    /*
-     * Compute the number of bits for a collision count of
-     * approximately 100.
-     */
-    if (testHighBits || testLowBits)
-    {
-      int const hundredCollBits = FindMaxBits_TargetCollisionNb(nbH, 100, hashbits);
-      if (EstimateNbCollisions(nbH, hundredCollBits) >= 100)
-        nbBitsvec.push_back(hundredCollBits);
-      std::sort(nbBitsvec.rbegin(), nbBitsvec.rend());
-      nbBitsvec.erase(std::unique(nbBitsvec.begin(), nbBitsvec.end()), nbBitsvec.end());
-    }
-
-    /*
-     * Each bit width value in nbBitsvec is explicitly reported on. If
-     * any of those values are less than the n*log(n) bound, then the
-     * bin with the most collisions will be reported on, otherwise the
-     * total sum of collisions across all bins will be reported on.
-     *
-     * But there are many more bit widths that a) are probably used in
-     * the real world, and b) we can now cheaply analyze and report
-     * on. Any bit width above the n*log(n) bound that has a
-     * reasonable number of expected collisions is worth analyzing, so
-     * that range of widths is computed here.
-     *
-     * This is slightly complicated by the fact that
-     * TestDistribution() may also get invoked, which does an
-     * RMSE-based comparison to the expected distribution over some
-     * range of bit width values. If that will be invoked, then
-     * there's no point in doubly-reporting on collision counts for
-     * those bit widths, so they get excluded here.
-     */
-    std::vector<int> testBitsvec;
-    int const nlognBits = GetNLogNBound(nbH);
-    int const minTBits = testDist ? std::max(MaxDistBits(nbH)+1, nlognBits) : nlognBits;
-    int const maxTBits = FindMaxBits_TargetCollisionNb(nbH, 10, hashbits - 1);
-
-    if (testHighBits || testLowBits)
-      for (int i = minTBits; i <= maxTBits; i++)
-        testBitsvec.push_back(i);
-
-    /*
-     * Given the range of hash sizes we care about, compute all
-     * collision counts for them, for high- and low-bits as requested.
-     */
-    std::vector<hashtype> revhashes;
-    std::vector<int> collcounts_fwd;
-    std::vector<int> collcounts_rev;
-    int minBits, maxBits, threshBits;
-
-    if (testHighBits || testLowBits)
-    {
-      std::vector<int> combinedBitsvec;
-      combinedBitsvec.reserve(200); // Workaround for GCC bug 100366
-      combinedBitsvec.insert(combinedBitsvec.begin(), nbBitsvec.begin(),   nbBitsvec.end());
-      combinedBitsvec.insert(combinedBitsvec.begin(), testBitsvec.begin(), testBitsvec.end());
-      std::sort(combinedBitsvec.rbegin(), combinedBitsvec.rend());
-      combinedBitsvec.erase(std::unique(combinedBitsvec.begin(), combinedBitsvec.end()), combinedBitsvec.end());
-      ComputeCollBitBounds(combinedBitsvec, hashbits, nbH, minBits, maxBits, threshBits);
-    }
-
-    if (testHighBits && (maxBits > 0))
-    {
-      collcounts_fwd.reserve(maxBits - minBits + 1);
-      CountRangedNbCollisions(hashes, nbH, minBits, maxBits, threshBits, &collcounts_fwd[0]);
-    }
-
-    if (testLowBits && (maxBits > 0))
-    {
-      // reverse: bitwise flip the hashes. lowest bits first
-      revhashes.reserve(hashes.size());
-      for(const auto hashval: hashes)
-      {
-        hashtype rev = hashval;
-        rev.reversebits();
-        revhashes.push_back(rev);
-      }
-      blobsort(revhashes.begin(), revhashes.end());
-
-      collcounts_rev.reserve(maxBits - minBits + 1);
-      CountRangedNbCollisions(revhashes, nbH, minBits, maxBits, threshBits, &collcounts_rev[0]);
-    }
-
-    addVCodeResult(collcount);
-    if (testHighBits && (collcounts_fwd.size() != 0)) {
-        addVCodeResult(&collcounts_fwd[0], sizeof(collcounts_fwd[0]) *
-                collcounts_fwd.size());
-    }
-    if (testLowBits && (collcounts_rev.size() != 0)) {
-        addVCodeResult(&collcounts_rev[0], sizeof(collcounts_rev[0]) *
-                collcounts_rev.size());
-    }
-
-    // Report on complete collisions, now that the heavy lifting is complete
-    result &= ReportCollisions(nbH, collcount, hashbits, false, false, false, verbose, drawDiagram);
-    if(!result && drawDiagram)
-    {
-      PrintCollisions(collisions);
-    }
-
-    if (testHighBits || testLowBits)
-      for(const int nbBits: nbBitsvec)
-      {
-        if ((nbBits < minBits) || (nbBits > maxBits))
-          continue;
-        bool maxcoll = (nbBits <= threshBits) ? true : false;
-        if (testHighBits)
-          result &= ReportCollisions(nbH, collcounts_fwd[nbBits - minBits], nbBits,
-              maxcoll, true, true, true, drawDiagram);
-        if (testLowBits)
-          result &= ReportCollisions(nbH, collcounts_rev[nbBits - minBits], nbBits,
-              maxcoll, false, true, true, drawDiagram);
-      }
-
-    if (testHighBits)
-      result &= ReportBitsCollisions(nbH, &collcounts_fwd[minTBits - minBits], minTBits, maxTBits, true, drawDiagram);
-    if (testLowBits)
-      result &= ReportBitsCollisions(nbH, &collcounts_rev[minTBits - minBits], minTBits, maxTBits, false, drawDiagram);
-  }
-
-  //----------
-
-  if(testDist)
-    result &= TestDistribution(hashes,drawDiagram);
-
-  return result;
+template <typename hashtype>
+bool TestHashList( std::vector<hashtype> & hashes, bool drawDiagram, bool testCollision,
+        bool testDist, bool testHighBits, bool testLowBits, bool verbose ) {
+    bool result = true;
+
+    if (testCollision) {
+        unsigned const hashbits = sizeof(hashtype) * 8;
+        uint64_t const nbH      = hashes.size();
+        if (verbose) {
+            printf("Testing all collisions (     %3i-bit)", hashbits);
+        }
+
+        addVCodeOutput(&hashes[0], sizeof(hashtype) * nbH);
+
+        std::set<hashtype> collisions;
+        int collcount = FindCollisions(hashes, collisions, 1000, drawDiagram);
+
+        /*
+         * Do all other compute-intensive stuff (as requested) before
+         * displaying any results from FindCollisions, to be a little bit
+         * more human-friendly.
+         */
+
+        std::vector<int> nbBitsvec = { 224, 160, 128, 64, 32, 12, 8, };
+        /*
+         * cyan: The 12- and -8-bit tests are too small : tables are necessarily saturated.
+         * It would be better to count the nb of collisions per Cell, and
+         * compared the distribution of values against a random source.
+         * But that would be a different test.
+         *
+         * rurban: No, these tests are for non-prime hash tables, using only
+         *     the lower 5-10 bits
+         *
+         * fwojcik: Collision counting did not previously reflect
+         * rurban's comment, as the code counted the sum of collisions
+         * across _all_ buckets. So if there are many more hashes than
+         * 2**nbBits, and the hash is even _slightly_ not broken, then
+         * every n-bit truncated hash value will appear at least once, in
+         * which case the "actual" value reported would always be
+         * (hashes.size() - 2**nbBits). Checking the results in doc/
+         * confirms this. cyan's comment was correct.
+         *
+         * Collision counting has now been modified to report on the
+         * single bucket with the most collisions when fuller hash tables
+         * are being tested, and ReportCollisions() computes an
+         * appropriate "expected" statistic.
+         */
+
+        /*
+         * Compute the number of bits for a collision count of
+         * approximately 100.
+         */
+        if (testHighBits || testLowBits) {
+            int const hundredCollBits = FindMaxBits_TargetCollisionNb(nbH, 100, hashbits);
+            if (EstimateNbCollisions(nbH, hundredCollBits) >= 100) {
+                nbBitsvec.push_back(hundredCollBits);
+            }
+            std::sort(nbBitsvec.rbegin(), nbBitsvec.rend());
+            nbBitsvec.erase(std::unique(nbBitsvec.begin(), nbBitsvec.end()), nbBitsvec.end());
+        }
+
+        /*
+         * Each bit width value in nbBitsvec is explicitly reported on. If
+         * any of those values are less than the n*log(n) bound, then the
+         * bin with the most collisions will be reported on, otherwise the
+         * total sum of collisions across all bins will be reported on.
+         *
+         * But there are many more bit widths that a) are probably used in
+         * the real world, and b) we can now cheaply analyze and report
+         * on. Any bit width above the n*log(n) bound that has a
+         * reasonable number of expected collisions is worth analyzing, so
+         * that range of widths is computed here.
+         *
+         * This is slightly complicated by the fact that
+         * TestDistribution() may also get invoked, which does an
+         * RMSE-based comparison to the expected distribution over some
+         * range of bit width values. If that will be invoked, then
+         * there's no point in doubly-reporting on collision counts for
+         * those bit widths, so they get excluded here.
+         */
+        std::vector<int> testBitsvec;
+        int const        nlognBits = GetNLogNBound(nbH);
+        int const        minTBits  = testDist ? std::max(MaxDistBits(nbH) + 1, nlognBits) : nlognBits;
+        int const        maxTBits  = FindMaxBits_TargetCollisionNb(nbH, 10, hashbits - 1);
+
+        if (testHighBits || testLowBits) {
+            for (int i = minTBits; i <= maxTBits; i++) {
+                testBitsvec.push_back(i);
+            }
+        }
+
+        /*
+         * Given the range of hash sizes we care about, compute all
+         * collision counts for them, for high- and low-bits as requested.
+         */
+        std::vector<hashtype> revhashes;
+        std::vector<int>      collcounts_fwd;
+        std::vector<int>      collcounts_rev;
+        int minBits, maxBits, threshBits;
+
+        if (testHighBits || testLowBits) {
+            std::vector<int> combinedBitsvec;
+            combinedBitsvec.reserve(200); // Workaround for GCC bug 100366
+            combinedBitsvec.insert(combinedBitsvec.begin(), nbBitsvec.begin()  , nbBitsvec.end()  );
+            combinedBitsvec.insert(combinedBitsvec.begin(), testBitsvec.begin(), testBitsvec.end());
+            std::sort(combinedBitsvec.rbegin(), combinedBitsvec.rend());
+            combinedBitsvec.erase(std::unique(combinedBitsvec.begin(), combinedBitsvec.end()), combinedBitsvec.end());
+            ComputeCollBitBounds(combinedBitsvec, hashbits, nbH, minBits, maxBits, threshBits);
+        }
+
+        if (testHighBits && (maxBits > 0)) {
+            collcounts_fwd.reserve(maxBits - minBits + 1);
+            CountRangedNbCollisions(hashes, nbH, minBits, maxBits, threshBits, &collcounts_fwd[0]);
+        }
+
+        if (testLowBits && (maxBits > 0)) {
+            // reverse: bitwise flip the hashes. lowest bits first
+            revhashes.reserve(hashes.size());
+            for (const auto hashval: hashes) {
+                hashtype rev = hashval;
+                rev.reversebits();
+                revhashes.push_back(rev);
+            }
+            blobsort(revhashes.begin(), revhashes.end());
+
+            collcounts_rev.reserve(maxBits - minBits + 1);
+            CountRangedNbCollisions(revhashes, nbH, minBits, maxBits, threshBits, &collcounts_rev[0]);
+        }
+
+        addVCodeResult(collcount);
+        if (testHighBits && (collcounts_fwd.size() != 0)) {
+            addVCodeResult(&collcounts_fwd[0], sizeof(collcounts_fwd[0]) *
+                    collcounts_fwd.size());
+        }
+        if (testLowBits && (collcounts_rev.size() != 0)) {
+            addVCodeResult(&collcounts_rev[0], sizeof(collcounts_rev[0]) *
+                    collcounts_rev.size());
+        }
+
+        // Report on complete collisions, now that the heavy lifting is complete
+        result &= ReportCollisions(nbH, collcount, hashbits, false, false, false, verbose, drawDiagram);
+        if (!result && drawDiagram) {
+            PrintCollisions(collisions);
+        }
+
+        if (testHighBits || testLowBits) {
+            for (const int nbBits: nbBitsvec) {
+                if ((nbBits < minBits) || (nbBits > maxBits)) {
+                    continue;
+                }
+                bool maxcoll = (nbBits <= threshBits) ? true : false;
+                if (testHighBits) {
+                    result &= ReportCollisions(nbH, collcounts_fwd[nbBits - minBits],
+                            nbBits, maxcoll, true, true, true, drawDiagram);
+                }
+                if (testLowBits) {
+                    result &= ReportCollisions(nbH, collcounts_rev[nbBits - minBits],
+                            nbBits, maxcoll, false, true, true, drawDiagram);
+                }
+            }
+        }
+
+        if (testHighBits) {
+            result &= ReportBitsCollisions(nbH, &collcounts_fwd[minTBits - minBits],
+                    minTBits, maxTBits, true, drawDiagram);
+        }
+        if (testLowBits) {
+            result &= ReportBitsCollisions(nbH, &collcounts_rev[minTBits - minBits],
+                    minTBits, maxTBits, false, drawDiagram);
+        }
+    }
+
+    //----------
+
+    if (testDist) {
+        result &= TestDistribution(hashes, drawDiagram);
+    }
+
+    return result;
 }
 
 INSTANTIATE(TestHashList, HASHTYPELIST);
@@ -804,89 +810,81 @@ INSTANTIATE(TestHashList, HASHTYPELIST);
 // I'm not sure it's that useful (and hash functions that fail this test but
 // pass the normal distribution test still work well in practice)
 
-template < typename hashtype >
-double TestDistributionBytepairs ( std::vector<hashtype> & hashes, bool drawDiagram )
-{
-  const int nbytes = sizeof(hashtype);
-  const int hashbits = nbytes * 8;
+template <typename hashtype>
+double TestDistributionBytepairs( std::vector<hashtype> & hashes, bool drawDiagram ) {
+    const int nbytes   = sizeof(hashtype);
+    const int hashbits = nbytes * 8;
+
+    const int nbins    = 65536;
 
-  const int nbins = 65536;
+    std::vector<unsigned> bins( nbins, 0 );
 
-  std::vector<unsigned> bins(nbins,0);
+    double worst = 0;
 
-  double worst = 0;
+    for (int a = 0; a < hashbits; a++) {
+        if (drawDiagram) { if ((a % 8 == 0) && (a > 0)) { printf("\n"); } }
 
-  for(int a = 0; a < hashbits; a++)
-  {
-    if(drawDiagram) if((a % 8 == 0) && (a > 0)) printf("\n");
+        if (drawDiagram) { printf("["); }
 
-    if(drawDiagram) printf("[");
+        for (int b = 0; b < hashbits; b++) {
+            if (drawDiagram) { if ((b % 8 == 0) && (b > 0)) { printf(" "); } }
 
-    for(int b = 0; b < hashbits; b++)
-    {
-      if(drawDiagram) if((b % 8 == 0) && (b > 0)) printf(" ");
+            bins.clear();
+            bins.resize(nbins, 0);
 
-      bins.clear();
-      bins.resize(nbins,0);
+            for (uint64_t i = 0; i < hashes.size(); i++) {
+                uint32_t pa = window(hashes[i], a, 8);
+                uint32_t pb = window(hashes[i], b, 8);
 
-      for(uint64_t i = 0; i < hashes.size(); i++)
-      {
-        uint32_t pa = window(hashes[i],a,8);
-        uint32_t pb = window(hashes[i],b,8);
+                bins[pa | (pb << 8)]++;
+            }
 
-        bins[pa | (pb << 8)]++;
-      }
+            double s = calcScore(bins, nbins, hashes.size());
 
-      double s = calcScore(bins,nbins,hashes.size());
+            if (drawDiagram) { plot(s); }
 
-      if(drawDiagram) plot(s);
+            if (s > worst) {
+                worst = s;
+            }
+        }
 
-      if(s > worst)
-      {
-        worst = s;
-      }
+        if (drawDiagram) { printf("]\n"); }
     }
 
-    if(drawDiagram) printf("]\n");
-  }
-
-  return worst;
+    return worst;
 }
 
 //-----------------------------------------------------------------------------
 // Simplified test - only check 64k distributions, and only on byte boundaries
 
-template < typename hashtype >
-void TestDistributionFast ( std::vector<hashtype> & hashes, double & dworst, double & davg )
-{
-  const int hashbits = sizeof(hashtype) * 8;
-  const int nbins = 65536;
+template <typename hashtype>
+void TestDistributionFast( std::vector<hashtype> & hashes, double & dworst, double & davg ) {
+    const int hashbits = sizeof(hashtype) * 8;
+    const int nbins    = 65536;
 
-  std::vector<unsigned> bins(nbins,0);
+    std::vector<unsigned> bins( nbins, 0 );
 
-  dworst = -1.0e90;
-  davg = 0;
+    dworst = -1.0e90;
+    davg   = 0;
 
-  for(int start = 0; start < hashbits; start += 8)
-  {
-    bins.clear();
-    bins.resize(nbins,0);
+    for (int start = 0; start < hashbits; start += 8) {
+        bins.clear();
+        bins.resize(nbins, 0);
 
-    for(uint64_t j = 0; j < hashes.size(); j++)
-    {
-      uint32_t index = window(hashes[j],start,16);
+        for (uint64_t j = 0; j < hashes.size(); j++) {
+            uint32_t index = window(hashes[j], start, 16);
 
-      bins[index]++;
-    }
+            bins[index]++;
+        }
 
-    double n = calcScore(&bins.front(),nbins,(int)hashes.size());
+        double n = calcScore(&bins.front(), nbins, (int)hashes.size());
 
-    davg += n;
+        davg += n;
 
-    if(n > dworst) dworst = n;
-  }
+        if (n > dworst) { dworst = n; }
+    }
 
-  davg /= double(hashbits/8);
+    davg /= double(hashbits / 8);
 }
 
 //-----------------------------------------------------------------------------
diff --git a/util/Analyze.h b/util/Analyze.h
index c5abdd78..7991f544 100644
--- a/util/Analyze.h
+++ b/util/Analyze.h
@@ -50,19 +50,15 @@
  *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
-bool ReportBias(const int biascnt, const int coinflips, const int trials, const bool drawDiagram);
+bool ReportBias( const int biascnt, const int coinflips, const int trials, const bool drawDiagram );
 
-template < typename hashtype >
-unsigned int FindCollisions ( std::vector<hashtype> & hashes,
-                              std::set<hashtype> & collisions,
-                              int maxCollisions = 1000,
-                              bool drawDiagram = false);
+template <typename hashtype>
+unsigned int FindCollisions( std::vector<hashtype> & hashes, std::set<hashtype> & collisions,
+        int maxCollisions = 1000, bool drawDiagram = false );
 
-template < typename hashtype >
-void PrintCollisions(std::set<hashtype> & collisions);
+template <typename hashtype>
+void PrintCollisions( std::set<hashtype> & collisions );
 
-template < typename hashtype >
-bool TestHashList ( std::vector<hashtype> & hashes, bool drawDiagram,
-                    bool testCollision = true, bool testDist = true,
-                    bool testHighBits = true, bool testLowBits = true,
-                    bool verbose = true );
+template <typename hashtype>
+bool TestHashList( std::vector<hashtype> & hashes, bool drawDiagram, bool testCollision = true, bool testDist = true,
+        bool testHighBits = true, bool testLowBits = true, bool verbose = true );
diff --git a/util/Blob.h b/util/Blob.h
index 344775e3..50f667f4 100644
--- a/util/Blob.h
+++ b/util/Blob.h
@@ -49,302 +49,311 @@
 extern const uint32_t hzb[256];
 
 //-----------------------------------------------------------------------------
-#define _bytes ((_bits+7)/8)
-template < unsigned _bits >
+#define _bytes ((_bits + 7) / 8)
+template <unsigned _bits>
 class Blob {
+  public:
+    //----------
+    // constructors
 
-public:
-  //----------
-  // constructors
+    Blob() {
+        memset(bytes, 0, sizeof(bytes));
+    }
+
+    Blob( const void * p, size_t len ) {
+        len = std::min(len, sizeof(bytes));
+        memcpy(bytes, p, len);
+        memset(&bytes[len], 0, sizeof(bytes) - len);
+    }
+
+    Blob( uint64_t x ) :
+        Blob((x = COND_BSWAP( x, isBE()), &x), sizeof(x)) {}
+
+    //----------
+    // unary operators
+
+    uint8_t & operator [] ( int i ) {
+        // assert(i < sizeof(bytes));
+        return bytes[i];
+    }
+
+    const uint8_t & operator [] ( int i ) const {
+        // assert(i < sizeof(bytes));
+        return bytes[i];
+    }
+
+    Blob & operator = ( const Blob & k ) {
+        memcpy(bytes, k.bytes, sizeof(bytes));
+        return *this;
+    }
+
+    Blob & operator = ( const uint32_t & x ) {
+        const uint32_t y = COND_BSWAP(x, isBE());
+
+        memcpy(bytes, &y, sizeof(y));
+        return *this;
+    }
+
+    //----------
+    // boolean operators
+
+    bool operator < ( const Blob & k ) const {
+        for (int i = sizeof(bytes) - 1; i >= 0; i--) {
+            if (bytes[i] < k.bytes[i]) { return true; }
+            if (bytes[i] > k.bytes[i]) { return false; }
+        }
+        return false;
+    }
+
+    bool operator == ( const Blob & k ) const {
+        int r = memcmp(&bytes[0], &k.bytes[0], sizeof(bytes));
+
+        return (r == 0) ? true : false;
+    }
+
+    bool operator != ( const Blob & k ) const {
+        return !(*this == k);
+    }
+
+    //----------
+    // bitwise operations
+
+    Blob operator ^ ( const Blob & k ) const {
+        Blob t;
+
+        for (size_t i = 0; i < sizeof(bytes); i++) {
+            t.bytes[i] = bytes[i] ^ k.bytes[i];
+        }
+
+        return t;
+    }
+
+    Blob & operator ^= ( const Blob & k ) {
+        for (size_t i = 0; i < sizeof(bytes); i++) {
+            bytes[i] ^= k.bytes[i];
+        }
+        return *this;
+    }
+
+    //----------
+    // interface
+
+    FORCE_INLINE uint8_t getbit( size_t bit ) const {
+        return _getbit(bit, bytes, sizeof(bytes));
+    }
+
+    FORCE_INLINE void printhex( const char * prefix = "" ) const {
+        _printhex(prefix, bytes, sizeof(bytes));
+    }
+
+    FORCE_INLINE void printbits( const char * prefix = "" ) const {
+        _printbits(prefix, bytes, sizeof(bytes));
+    }
+
+    FORCE_INLINE uint32_t highzerobits( void ) const {
+        return _highzerobits(bytes, sizeof(bytes));
+    }
 
-  Blob() {
-    memset(bytes, 0, sizeof(bytes));
-  }
+    FORCE_INLINE uint32_t window( size_t start, size_t count ) const {
+        return _window(start, count, bytes, sizeof(bytes));
+    }
+
+    FORCE_INLINE void flipbit( size_t bit ) {
+        _flipbit(bit, bytes, sizeof(bytes));
+    }
+
+    FORCE_INLINE void reversebits( void ) {
+        _reversebits(bytes, sizeof(bytes));
+    }
 
-  Blob(const void * p, size_t len) {
-    len = std::min(len, sizeof(bytes));
-    memcpy(bytes, p, len);
-    memset(&bytes[len], 0, sizeof(bytes) - len);
-  }
+    FORCE_INLINE void lrot( size_t c ) {
+        _lrot(c, bytes, sizeof(bytes));
+    }
+
+  protected:
+    //----------
+    // implementations
+
+    static FORCE_INLINE uint8_t _getbit( size_t bit, const uint8_t * bytes, const size_t len ) {
+        size_t byte = bit >> 3;
+
+        bit &= 7;
+        if (byte > len) { return 0; }
+        return (bytes[byte] >> bit) & 1;
+    }
+
+    static void _printhex( const char * prefix, const uint8_t * bytes, const size_t len ) {
+        const size_t buflen = 4 + 2 * len + ((len + 3) / 4);
+        char         buf[buflen];
+        char *       p;
+
+        buf[0] = '[';
+        buf[1] = ' ';
+        // Space preceding the closing ']' gets added by the loop below
+        buf[buflen - 2] = ']';
+        buf[buflen - 1] = '\0';
+
+        // Print using MSB-first notation
+        p = &buf[2];
+        for (size_t i = len; i != 0; i--) {
+            uint8_t vh = (bytes[i - 1] >> 4);
+            uint8_t vl = (bytes[i - 1] & 15);
+            *p++       = vh + ((vh <= 9) ? '0' : 'W'); // 'W' + 10 == 'a'
+            *p++       = vl + ((vl <= 9) ? '0' : 'W');
+            if ((i & 3) == 1) {
+                *p++ = ' ';
+            }
+        }
+
+        printf("%s%s\n", prefix, buf);
+    }
+
+    static void _printbits( const char * prefix, const uint8_t * bytes, const size_t len ) {
+        const size_t buflen = 4 + 9 * len;
+        char         buf[buflen];
+        char *       p;
+
+        buf[0] = '[';
+        buf[1] = ' ';
+        // Space preceding the closing ']' gets added by the loop below
+        buf[buflen - 2] = ']';
+        buf[buflen - 1] = '\0';
+
+        // Print using MSB-first notation
+        p = &buf[2];
+        for (size_t i = len; i != 0; i--) {
+            uint8_t v = bytes[i - 1];
+            for (int j = 7; j >= 0; j--) {
+                *p++ = (v & (1 << j)) ? '1' : '0';
+            }
+            *p++ = ' ';
+        }
+
+        printf("%s%s\n", prefix, buf);
+    }
+
+    static FORCE_INLINE uint32_t _highzerobits( const uint8_t * bytes, const size_t len ) {
+        uint32_t zb = 0;
+
+        for (ssize_t i = len - 1; i >= 0; i--) {
+            zb += hzb[bytes[i]];
+            if (bytes[i] != 0) {
+                break;
+            }
+        }
+        return zb;
+    }
+
+    // Bit-windowing function.
+    // Select some N-bit subset of the Blob, where N <= 24.
+    static FORCE_INLINE uint32_t _window( size_t start, size_t count, const uint8_t * bytes, const size_t len ) {
+        assume(count <= 24);
+        const size_t   bitlen = 8 * len;
+        const uint32_t mask   = (1 << count) - 1;
+        uint32_t       v;
+
+        if (start <= (bitlen - 25)) {
+            memcpy(&v, &bytes[start >> 3], 4);
+            v   = COND_BSWAP(v, isBE());
+            v >>= (start & 7     );
+        } else {
+            memcpy(&v, &bytes[len - 4], 4);
+            v   = COND_BSWAP(v, isBE());
+            v >>= 32     + start - bitlen;
+            if ((start + count) > bitlen) {
+                uint32_t v2;
+                memcpy(&v2, bytes, 4);
+                v2   = COND_BSWAP(v2, isBE());
+                v2 <<= bitlen - start;
+                v   |= v2;
+            }
+        }
+        return v & mask;
+    }
 
-  Blob(uint64_t x) :
-      Blob((x = COND_BSWAP(x, isBE()), &x), sizeof(x)) {};
+    static FORCE_INLINE void _flipbit( size_t bit, uint8_t * bytes, const size_t len ) {
+        const size_t byteoffset = bit >> 3;
+        const size_t bitoffset  = bit & 7;
 
-  //----------
-  // unary operators
+        if (byteoffset < len) {
+            bytes[byteoffset] ^= (1 << bitoffset);
+        }
+    }
 
-  uint8_t & operator [] (int i) {
-    //assert(i < sizeof(bytes));
-    return bytes[i];
-  }
+    // from the "Bit Twiddling Hacks" webpage
+    static FORCE_INLINE uint8_t _byterev( uint8_t b ) {
+        return ((b * UINT64_C(0x0802) & UINT64_C(0x22110)) |
+               (b * UINT64_C(0x8020) & UINT64_C(0x88440)))  * UINT64_C(0x10101) >> 16;
+    }
 
-  const uint8_t & operator [](int i) const {
-    //assert(i < sizeof(bytes));
-    return bytes[i];
-  }
+    // 0xf00f1001 => 0x8008f00f
+    static FORCE_INLINE void _reversebits( uint8_t * bytes, const size_t len ) {
+        uint8_t tmp[len];
 
-  Blob & operator = (const Blob & k) {
-    memcpy(bytes, k.bytes, sizeof(bytes));
-    return *this;
-  }
+        for (size_t i = 0; i < len; i++) {
+            tmp[len - i - 1] = _byterev(bytes[i]);
+        }
+        memcpy(bytes, tmp, len);
+    }
 
-  Blob & operator = (const uint32_t & x) {
-    const uint32_t y = COND_BSWAP(x, isBE());
-    memcpy(bytes, &y, sizeof(y));
-    return *this;
-  }
+    static void _lrot( size_t c, uint8_t * bytes, const size_t len ) {
+        const size_t byteoffset = c >> 3;
+        const size_t bitoffset  = c & 7;
+        uint8_t      tmp[len];
+
+        for (size_t i = 0; i < len; i++) {
+            tmp[(i + byteoffset) % len] = bytes[i];
+        }
+        if (bitoffset == 0) {
+            memcpy(bytes, tmp, len);
+        } else {
+            for (size_t i = 0; i < len; i++) {
+                uint8_t a = tmp[i];
+                uint8_t b = (i == 0) ? tmp[len - 1] : tmp[i - 1];
+                bytes[i] = (a << bitoffset) | (b >> (8 - bitoffset));
+            }
+        }
+    }
 
-  //----------
-  // boolean operators
-
-  bool operator < (const Blob & k) const {
-    for(int i = sizeof(bytes) -1; i >= 0; i--) {
-      if(bytes[i] < k.bytes[i]) return true;
-      if(bytes[i] > k.bytes[i]) return false;
-    }
-    return false;
-  }
-
-  bool operator == ( const Blob & k ) const {
-    int r = memcmp(&bytes[0], &k.bytes[0], sizeof(bytes));
-    return (r == 0) ? true : false;
-  }
-
-  bool operator != ( const Blob & k ) const {
-    return !(*this == k);
-  }
-
-  //----------
-  // bitwise operations
-
-  Blob operator ^ (const Blob & k) const {
-    Blob t;
-
-    for(size_t i = 0; i < sizeof(bytes); i++) {
-      t.bytes[i] = bytes[i] ^ k.bytes[i];
-    }
-
-    return t;
-  }
-
-  Blob & operator ^= (const Blob & k) {
-    for(size_t i = 0; i < sizeof(bytes); i++) {
-      bytes[i] ^= k.bytes[i];
-    }
-    return *this;
-  }
-
-  //----------
-  // interface
-
-  FORCE_INLINE uint8_t getbit(size_t bit) const {
-      return _getbit(bit, bytes, sizeof(bytes));
-  }
-
-  FORCE_INLINE void printhex(const char * prefix = "") const {
-      _printhex(prefix, bytes, sizeof(bytes));
-  }
-
-  FORCE_INLINE void printbits(const char * prefix = "") const {
-      _printbits(prefix, bytes, sizeof(bytes));
-  }
-
-  FORCE_INLINE uint32_t highzerobits(void) const {
-      return _highzerobits(bytes, sizeof(bytes));
-  }
-
-  FORCE_INLINE uint32_t window(size_t start, size_t count) const {
-      return _window(start, count, bytes, sizeof(bytes));
-  }
-
-  FORCE_INLINE void flipbit(size_t bit) {
-      _flipbit(bit, bytes, sizeof(bytes));
-  }
-
-  FORCE_INLINE void reversebits(void) {
-      _reversebits(bytes, sizeof(bytes));
-  }
-
-  FORCE_INLINE void lrot(size_t c) {
-      _lrot(c, bytes, sizeof(bytes));
-  }
-
-protected:
-  //----------
-  // implementations
-
-  static FORCE_INLINE uint8_t _getbit(size_t bit, const uint8_t * bytes, const size_t len) {
-      size_t byte = bit >> 3;
-      bit &= 7;
-      if (byte > len) return 0;
-      return (bytes[byte] >> bit) & 1;
-  }
-
-  static void _printhex(const char * prefix, const uint8_t * bytes, const size_t len) {
-      const size_t buflen = 4 + 2 * len + ((len + 3) / 4);
-      char buf[buflen];
-      char * p;
-
-      buf[0]          = '[';
-      buf[1]          = ' ';
-      // Space preceding the closing ']' gets added by the loop below
-      buf[buflen - 2] = ']';
-      buf[buflen - 1] = '\0';
-
-      // Print using MSB-first notation
-      p = &buf[2];
-      for (size_t i = len; i != 0; i--) {
-          uint8_t vh = (bytes[i - 1] >> 4);
-          uint8_t vl = (bytes[i - 1] & 15);
-          *p++ = vh + ((vh <= 9) ? '0' : 'W'); // 'W' + 10 == 'a'
-          *p++ = vl + ((vl <= 9) ? '0' : 'W');
-          if ((i & 3) == 1) {
-              *p++ = ' ';
-          }
-      }
-
-      printf("%s%s\n", prefix, buf);
-  }
-
-  static void _printbits(const char * prefix, const uint8_t * bytes, const size_t len) {
-      const size_t buflen = 4 + 9 * len;
-      char buf[buflen];
-      char * p;
-
-      buf[0]          = '[';
-      buf[1]          = ' ';
-      // Space preceding the closing ']' gets added by the loop below
-      buf[buflen - 2] = ']';
-      buf[buflen - 1] = '\0';
-
-      // Print using MSB-first notation
-      p = &buf[2];
-      for (size_t i = len; i != 0; i--) {
-          uint8_t v = bytes[i - 1];
-          for (int j = 7; j >= 0; j--) {
-              *p++ = (v & (1 << j)) ? '1' : '0';
-          }
-          *p++ = ' ';
-      }
-
-      printf("%s%s\n", prefix, buf);
-  }
-
-  static FORCE_INLINE uint32_t _highzerobits(const uint8_t * bytes, const size_t len) {
-      uint32_t zb = 0;
-      for (ssize_t i = len - 1; i >= 0; i--) {
-          zb += hzb[bytes[i]];
-          if (bytes[i] != 0) {
-              break;
-          }
-      }
-      return zb;
-  }
-
-  // Bit-windowing function.
-  // Select some N-bit subset of the Blob, where N <= 24.
-  static FORCE_INLINE uint32_t _window(size_t start, size_t count, const uint8_t * bytes, const size_t len) {
-      assume(count <= 24);
-      const size_t bitlen = 8 * len;
-      const uint32_t mask = (1 << count) - 1;
-      uint32_t v;
-
-      if (start <= (bitlen - 25)) {
-          memcpy(&v, &bytes[start >> 3], 4);
-          v = COND_BSWAP(v, isBE());
-          v >>= (start & 7);
-      } else {
-          memcpy(&v, &bytes[len - 4], 4);
-          v = COND_BSWAP(v, isBE());
-          v >>= 32 + start - bitlen;
-          if ((start + count) > bitlen) {
-              uint32_t v2;
-              memcpy(&v2, bytes, 4);
-              v2 = COND_BSWAP(v2, isBE());
-              v2 <<= bitlen - start;
-              v |= v2;
-          }
-      }
-      return v & mask;
-  }
-
-  static FORCE_INLINE void _flipbit(size_t bit, uint8_t * bytes, const size_t len) {
-      const size_t byteoffset = bit >> 3;
-      const size_t bitoffset  = bit & 7;
-      if (byteoffset < len) {
-          bytes[byteoffset] ^= (1 << bitoffset);
-      }
-  }
-
-  // from the "Bit Twiddling Hacks" webpage
-  static FORCE_INLINE uint8_t _byterev(uint8_t b) {
-      return ((b * UINT64_C(0x0802) & UINT64_C(0x22110)) |
-              (b * UINT64_C(0x8020) & UINT64_C(0x88440)))  * UINT64_C(0x10101) >> 16;
-  }
-
-  // 0xf00f1001 => 0x8008f00f
-  static FORCE_INLINE void _reversebits(uint8_t * bytes, const size_t len) {
-      uint8_t tmp[len];
-
-      for (size_t i = 0; i < len; i++)
-          tmp[len - i - 1] = _byterev(bytes[i]);
-      memcpy(bytes, tmp, len);
-  }
-
-  static void _lrot(size_t c, uint8_t * bytes, const size_t len) {
-      const size_t byteoffset = c >> 3;
-      const size_t bitoffset  = c & 7;
-      uint8_t tmp[len];
-
-      for (size_t i = 0; i < len; i++) {
-          tmp[(i + byteoffset) % len] = bytes[i];
-      }
-      if (bitoffset == 0) {
-          memcpy(bytes, tmp, len);
-      } else {
-          for (size_t i = 0; i < len; i++) {
-              uint8_t a = tmp[i];
-              uint8_t b = (i == 0) ? tmp[len - 1] : tmp[i - 1];
-              bytes[i] = (a << bitoffset) | (b >> (8 - bitoffset));
-          }
-      }
-  }
-
-private:
-  //----------
-  uint8_t bytes[_bytes];
-};
+  private:
+    //----------
+    uint8_t  bytes[_bytes];
+}; // class Blob
 
 // from the "Bit Twiddling Hacks" webpage
-template<> FORCE_INLINE void Blob<32>::reversebits(void) {
+template <>
+FORCE_INLINE void Blob<32>::reversebits( void ) {
     uint32_t v = GET_U32<false>(bytes, 0);
+
     // swap odd and even bits
-    v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) <<  1);
+    v = ((v >>  1) & 0x55555555) | ((v & 0x55555555) << 1);
     // swap consecutive pairs
-    v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) <<  2);
+    v = ((v >>  2) & 0x33333333) | ((v & 0x33333333) << 2);
     // swap nibbles ...
-    v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) <<  4);
+    v = ((v >>  4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
     // swap bytes
-    v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) <<  8);
+    v = ((v >>  8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
     // swap 2-byte long pairs
-    v = ( v >> 16             ) | ( v               << 16);
+    v = ((v >> 16)             ) | ((v) << 16);
     PUT_U32<false>(v, bytes, 0);
 }
 
-template<> FORCE_INLINE void Blob<64>::reversebits(void) {
+template <>
+FORCE_INLINE void Blob<64>::reversebits( void ) {
     uint64_t v = GET_U64<false>(bytes, 0);
+
     // swap odd and even bits
-    v = ((v >> 1)  & UINT64_C(0x5555555555555555)) | ((v & UINT64_C(0x5555555555555555)) <<  1);
+    v = ((v >>  1) & UINT64_C(0x5555555555555555)) | ((v & UINT64_C(0x5555555555555555)) <<  1);
     // swap consecutive pairs
-    v = ((v >> 2)  & UINT64_C(0x3333333333333333)) | ((v & UINT64_C(0x3333333333333333)) <<  2);
+    v = ((v >>  2) & UINT64_C(0x3333333333333333)) | ((v & UINT64_C(0x3333333333333333)) <<  2);
     // swap nibbles ...
-    v = ((v >> 4)  & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((v & UINT64_C(0x0F0F0F0F0F0F0F0F)) <<  4);
+    v = ((v >>  4) & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((v & UINT64_C(0x0F0F0F0F0F0F0F0F)) <<  4);
     // swap bytes
-    v = ((v >> 8)  & UINT64_C(0x00FF00FF00FF00FF)) | ((v & UINT64_C(0x00FF00FF00FF00FF)) <<  8);
+    v = ((v >>  8) & UINT64_C(0x00FF00FF00FF00FF)) | ((v & UINT64_C(0x00FF00FF00FF00FF)) <<  8);
     // swap 2-byte long pairs
     v = ((v >> 16) & UINT64_C(0x0000FFFF0000FFFF)) | ((v & UINT64_C(0x0000FFFF0000FFFF)) << 16);
     // swap 4-byte long pairs
-    v = ( v >> 32                      ) | ( v                       << 32);
+    v = ((v >> 32)) | ((v) << 32);
     PUT_U64<false>(v, bytes, 0);
 }
 
@@ -352,73 +361,72 @@ template<> FORCE_INLINE void Blob<64>::reversebits(void) {
 // Blob-like class for externally managed buffers.
 // The operator overloads of Blob<> are made private, and so are not exposed.
 
-typedef void * voidptr_t;
+typedef void *       voidptr_t;
 typedef const void * constvoidptr_t;
 
 class ExtBlob : private Blob<0> {
+  public:
+    //----------
+    // constructors
 
-public:
-  //----------
-  // constructors
-
-  ExtBlob(uint8_t * p, size_t l) {
-      ptr = p;
-      len = l;
-  }
+    ExtBlob( uint8_t * p, size_t l ) {
+        ptr = p;
+        len = l;
+    }
 
-  ExtBlob(uint8_t * p, const uint8_t * i, size_t l) {
-      ptr = p;
-      len = l;
-      memcpy(ptr, i, len);
-  }
+    ExtBlob( uint8_t * p, const uint8_t * i, size_t l ) {
+        ptr = p;
+        len = l;
+        memcpy(ptr, i, len);
+    }
 
-  //----------
-  // conversion operators
+    //----------
+    // conversion operators
 
-  operator voidptr_t () const {
-      return (voidptr_t)ptr;
-  }
+    operator voidptr_t () const {
+        return (voidptr_t)ptr;
+    }
 
-  operator constvoidptr_t () const {
-      return (constvoidptr_t)ptr;
-  }
+    operator constvoidptr_t () const {
+        return (constvoidptr_t)ptr;
+    }
 
-  //----------
-  // interface
+    //----------
+    // interface
 
-  FORCE_INLINE uint8_t getbit(size_t bit) const {
-      return _getbit(bit, ptr, len);
-  }
+    FORCE_INLINE uint8_t getbit( size_t bit ) const {
+        return _getbit(bit, ptr, len);
+    }
 
-  FORCE_INLINE void printhex(const char * prefix = "") const {
-      _printhex(prefix, ptr, len);
-  }
+    FORCE_INLINE void printhex( const char * prefix = "" ) const {
+        _printhex(prefix, ptr, len);
+    }
 
-  FORCE_INLINE void printbits(const char * prefix = "") const {
-      _printbits(prefix, ptr, len);
-  }
+    FORCE_INLINE void printbits( const char * prefix = "" ) const {
+        _printbits(prefix, ptr, len);
+    }
 
-  FORCE_INLINE uint32_t highzerobits(void) const {
-      return _highzerobits(ptr, len);
-  }
+    FORCE_INLINE uint32_t highzerobits( void ) const {
+        return _highzerobits(ptr, len);
+    }
 
-  FORCE_INLINE uint32_t window(size_t start, size_t count) const {
-      return _window(start, count, ptr, len);
-  }
+    FORCE_INLINE uint32_t window( size_t start, size_t count ) const {
+        return _window(start, count, ptr, len);
+    }
 
-  FORCE_INLINE void flipbit(size_t bit) {
-      _flipbit(bit, ptr, len);
-  }
+    FORCE_INLINE void flipbit( size_t bit ) {
+        _flipbit(bit, ptr, len);
+    }
 
-  FORCE_INLINE void reversebits(void) {
-      _reversebits(ptr, len);
-  }
+    FORCE_INLINE void reversebits( void ) {
+        _reversebits(ptr, len);
+    }
 
-  FORCE_INLINE void lrot(size_t c) {
-      _lrot(c, ptr, len);
-  }
+    FORCE_INLINE void lrot( size_t c ) {
+        _lrot(c, ptr, len);
+    }
 
-private:
+  private:
     uint8_t * ptr;
-    size_t len;
-};
+    size_t    len;
+}; // class ExtBlob
diff --git a/util/Blobsort.cpp b/util/Blobsort.cpp
index f2942095..2b180098 100644
--- a/util/Blobsort.cpp
+++ b/util/Blobsort.cpp
@@ -29,177 +29,177 @@
 // Blob sorting routine unit tests
 
 static const uint32_t SORT_TESTS = 19;
-static const uint32_t TEST_SIZE = 100000;
+static const uint32_t TEST_SIZE  = 100000;
 
-template < typename blobtype >
-static void blobfill(std::vector<blobtype> & blobs, int testnum) {
-    if (testnum >= SORT_TESTS) { return ; }
+template <typename blobtype>
+static void blobfill( std::vector<blobtype> & blobs, int testnum ) {
+    if (testnum >= SORT_TESTS) { return; }
 
-    Rand r(testnum + 0xb840a149);
+    Rand r( testnum + 0xb840a149 );
 
-    switch(testnum) {
-    case 0: // Consecutive numbers
-    case 1: // Consecutive numbers, sorted almost
-    case 2: // Consecutive numbers, scrambled
-        {
-            for (uint32_t n = 0; n < TEST_SIZE; n++) {
-                blobs[n] = n;
-            }
-            break;
+    switch (testnum) {
+    case  0: // Consecutive numbers
+    case  1: // Consecutive numbers, sorted almost
+    case  2: // Consecutive numbers, scrambled
+    {
+        for (uint32_t n = 0; n < TEST_SIZE; n++) {
+            blobs[n] = n;
         }
-    case 3: // Consecutive numbers, backwards
-        {
-            for (uint32_t n = 0; n < TEST_SIZE; n++) {
-                blobs[n] = TEST_SIZE - 1 - n;
-            }
-            break;
+        break;
+    }
+    case  3: // Consecutive numbers, backwards
+    {
+        for (uint32_t n = 0; n < TEST_SIZE; n++) {
+            blobs[n] = TEST_SIZE - 1 - n;
         }
-    case 4: // Random numbers
-    case 5: // Random numbers, sorted
-    case 6: // Random numbers, sorted almost
-    case 7: // Random numbers, sorted backwards
+        break;
+    }
+    case  4: // Random numbers
+    case  5: // Random numbers, sorted
+    case  6: // Random numbers, sorted almost
+    case  7: // Random numbers, sorted backwards
     case 10: // All zero bytes in LSB position
     case 11: // All zero bytes in MSB position
     case 12: // All zero bytes in LSB+1 position
     case 13: // All zero bytes in MSB-1 position
     case 14: // Random numbers, except each position has some missing bytes
-        {
-            for (uint32_t n = 0; n < TEST_SIZE; n++) {
-                r.rand_p(&blobs[n], sizeof(blobtype));
-            }
-            break;
+    {
+        for (uint32_t n = 0; n < TEST_SIZE; n++) {
+            r.rand_p(&blobs[n], sizeof(blobtype));
         }
-    case 8: // Many duplicates
-        {
-            uint32_t x = 0;
-            do {
-                r.rand_p(&blobs[x], sizeof(blobtype));
-                uint32_t count = 1 + r.rand_range(TEST_SIZE - 1 - x);
-                for (uint32_t i = 1; i < count; i++) {
-                    blobs[x + i] = blobs[x];
-                }
-                x += count;
-            } while (x < TEST_SIZE);
-            break;
-        }
-    case 9: // All duplicates
-        {
-            r.rand_p(&blobs[0], sizeof(blobtype));
-            for (uint32_t i = 1; i < TEST_SIZE; i++) {
-                blobs[i] = blobs[0];
+        break;
+    }
+    case  8: // Many duplicates
+    {
+        uint32_t x = 0;
+        do {
+            r.rand_p(&blobs[x], sizeof(blobtype));
+            uint32_t count = 1 + r.rand_range(TEST_SIZE - 1 - x);
+            for (uint32_t i = 1; i < count; i++) {
+                blobs[x + i] = blobs[x];
             }
-            break;
+            x += count;
+        } while (x < TEST_SIZE);
+        break;
+    }
+    case  9: // All duplicates
+    {
+        r.rand_p(&blobs[0], sizeof(blobtype));
+        for (uint32_t i = 1; i < TEST_SIZE; i++) {
+            blobs[i] = blobs[0];
         }
+        break;
+    }
     case 15: // All zeroes
-        {
-            memset(&blobs[0], 0, TEST_SIZE * sizeof(blobtype));
-            break;
-        }
+    {
+        memset(&blobs[0], 0, TEST_SIZE * sizeof(blobtype));
+        break;
+    }
     case 16: // All ones
-        {
-            for (uint32_t i = 0; i < TEST_SIZE; i++) {
-                blobs[i] = 1;
-            }
-            break;
+    {
+        for (uint32_t i = 0; i < TEST_SIZE; i++) {
+            blobs[i] = 1;
         }
+        break;
+    }
     case 17: // All Fs
-        {
-            memset(&blobs[0], 0xFF, TEST_SIZE * sizeof(blobtype));
-            break;
-        }
+    {
+        memset(&blobs[0], 0xFF, TEST_SIZE * sizeof(blobtype));
+        break;
+    }
     case 18: // All 0xAAA and 0x555
-        {
-            uint32_t i = 0;
-            do {
-                uint64_t rndnum = r.rand_u64();
-                for (int j = 0; j < 64; j++) {
-                    if (rndnum & 1) {
-                        memset(&blobs[i], 0xAA, sizeof(blobtype));
-                    } else {
-                        memset(&blobs[i], 0x55, sizeof(blobtype));
-                    }
-                    i++;
-                    rndnum >>= 1;
-                    if (i == TEST_SIZE) { break; }
+    {
+        uint32_t i = 0;
+        do {
+            uint64_t rndnum = r.rand_u64();
+            for (int j = 0; j < 64; j++) {
+                if (rndnum & 1) {
+                    memset(&blobs[i], 0xAA, sizeof(blobtype));
+                } else {
+                    memset(&blobs[i], 0x55, sizeof(blobtype));
                 }
-            } while (i < TEST_SIZE);
-            break;
-        }
+                i++;
+                rndnum >>= 1;
+                if (i == TEST_SIZE) { break; }
+            }
+        } while (i < TEST_SIZE);
+        break;
+    }
     default: unreachable(); break;
     }
 
-    switch(testnum) {
-        // Sorted backwards
-    case 7:
-        {
-            std::sort(blobs.rbegin(), blobs.rend());
-            break;
-        }
-        // Sorted
-    case 5:
-    case 6:
-        {
-            std::sort(blobs.begin(), blobs.end());
-            if (testnum == 5) break;
-        }
-        // 6 is fallthrough to...
-        // "Almost sorted" == mix up a few entries
-    case 1:
-        {
-            for (uint32_t n = 0; n < TEST_SIZE / 1000; n++) {
-                std::swap(blobs[r.rand_range(TEST_SIZE)],
-                        blobs[r.rand_range(TEST_SIZE)]);
-            }
-            break;
+    switch (testnum) {
+    // Sorted backwards
+    case  7:
+    {
+        std::sort(blobs.rbegin(), blobs.rend());
+        break;
+    }
+    // Sorted
+    case  5:
+    case  6:
+    {
+        std::sort(blobs.begin(), blobs.end());
+        if (testnum == 5) { break; }
+    }
+    // 6 is fallthrough to...
+    // "Almost sorted" == mix up a few entries
+    case  1:
+    {
+        for (uint32_t n = 0; n < TEST_SIZE / 1000; n++) {
+            std::swap(blobs[r.rand_range(TEST_SIZE)], blobs[r.rand_range(TEST_SIZE)]);
         }
-        // "Scrambled" == shuffle all the entries
-    case 2:
-        {
-            for (uint32_t n = TEST_SIZE - 1; n > 0; n--) {
-                std::swap(blobs[n], blobs[r.rand_range(n + 1)]);
-            }
-            break;
+        break;
+    }
+    // "Scrambled" == shuffle all the entries
+    case  2:
+    {
+        for (uint32_t n = TEST_SIZE - 1; n > 0; n--) {
+            std::swap(blobs[n], blobs[r.rand_range(n + 1)]);
         }
-        // Zero out bytes in some position
+        break;
+    }
+    // Zero out bytes in some position
     case 10:
     case 11:
     case 12:
     case 13:
-        {
-            uint32_t offset = (testnum == 10) ? 0 :
-                ((testnum == 11) ? (sizeof(blobtype) - 1) :
-                        ((testnum == 12) ? 1 : (sizeof(blobtype) - 2)));
-            for (uint32_t n = 0; n < TEST_SIZE; n++) {
-                blobs[n][offset] = 0;
-            }
-            break;
+    {
+        uint32_t offset = (testnum == 10) ? 0 :
+                                            ((testnum == 11) ? (sizeof(blobtype) - 1) :
+                                                               ((testnum == 12) ? 1 : (sizeof(blobtype) - 2)));
+        for (uint32_t n = 0; n < TEST_SIZE; n++) {
+            blobs[n][offset] = 0;
         }
-        // Exclude a byte value from each position
+        break;
+    }
+    // Exclude a byte value from each position
     case 14:
-        {
-            uint8_t excludes[sizeof(blobtype)];
-            r.rand_p(excludes, sizeof(excludes));
-            for (uint32_t n = 0; n < TEST_SIZE; n++) {
-                for (uint32_t i = 0; i < sizeof(blobtype); i++) {
-                    if (blobs[n][i] == excludes[i]) {
-                        blobs[n][i] = ~excludes[i];
-                    }
+    {
+        uint8_t excludes[sizeof(blobtype)];
+        r.rand_p(excludes, sizeof(excludes));
+        for (uint32_t n = 0; n < TEST_SIZE; n++) {
+            for (uint32_t i = 0; i < sizeof(blobtype); i++) {
+                if (blobs[n][i] == excludes[i]) {
+                    blobs[n][i] = ~excludes[i];
                 }
             }
-            break;
         }
+        break;
+    }
     default: break;
     }
 }
 
-template < typename blobtype >
-static bool blobverify(std::vector<blobtype> & blobs) {
-    bool passed = true;
+template <typename blobtype>
+static bool blobverify( std::vector<blobtype> & blobs ) {
+    bool passed     = true;
 
     const size_t sz = blobs.size();
+
     for (size_t nb = 1; nb < sz; nb++) {
         if (!((blobs[nb - 1] < blobs[nb]) ||
-                        (blobs[nb - 1] == blobs[nb]))) {
+                (blobs[nb - 1] == blobs[nb]))) {
             passed = false;
         }
         if (blobs[nb] < blobs[nb - 1]) {
@@ -210,16 +210,16 @@ static bool blobverify(std::vector<blobtype> & blobs) {
     return passed;
 }
 
-template < typename blobtype >
-static bool test_blobsort_type(void) {
+template <typename blobtype>
+static bool test_blobsort_type( void ) {
     bool passed = true;
-    std::vector<blobtype> blobs(TEST_SIZE);
+    std::vector<blobtype> blobs( TEST_SIZE );
 
     for (int i = 0; i < SORT_TESTS; i++) {
         blobfill(blobs, i);
         blobsort(blobs.begin(), blobs.end());
         passed &= blobverify(blobs);
-        //printf("After test %d: %s\n", i, passed ? "ok" : "no");
+        // printf("After test %d: %s\n", i, passed ? "ok" : "no");
     }
 
     return passed;
@@ -239,14 +239,15 @@ static bool test_blobsort_type(void) {
 // the list, which means the first template function gets called,
 // which ignores that type and just passes its input through.
 
-template < typename T >
-static bool AND(bool in) {
+template <typename T>
+static bool AND( bool in ) {
     return in;
 }
 
-template < typename T, typename... More >
+template <typename T, typename ... More>
 typename std::enable_if<!std::is_integral<T>::value, bool>::type
-static AND(bool in) {
+
+static AND( bool in ) {
     return test_blobsort_type<T>() && AND<More...>(in);
 }
 
@@ -256,4 +257,4 @@ static AND(bool in) {
 // cause it to run during startup, which takes a few seconds.
 // So this is only referenced in DEBUG mode.
 extern bool blobsort_test_result;
-bool blobsort_test_result = AND<HASHTYPELIST, int>(true);
+bool        blobsort_test_result = AND<HASHTYPELIST, int>(true);
diff --git a/util/Blobsort.h b/util/Blobsort.h
index 154fa669..bee308a0 100644
--- a/util/Blobsort.h
+++ b/util/Blobsort.h
@@ -19,149 +19,154 @@
 
 //-----------------------------------------------------------------------------
 // Blob sorting routines
-static const uint32_t    RADIX_BITS   = 8;
-static const uint32_t    RADIX_SIZE   = (uint32_t)1 << RADIX_BITS;
-static const uint32_t    RADIX_MASK   = RADIX_SIZE - 1;
-
-template< typename T >
-static void radixsort( T * begin, T * end )
-{
-  const uint32_t RADIX_LEVELS = sizeof(T);
-  const size_t count = end - begin;
-
-  size_t freqs [RADIX_LEVELS][RADIX_SIZE] = {};
-  T * ptr = begin;
-  // Record byte frequencies in each position over all items except
-  // the last one.
-  do {
+static const uint32_t RADIX_BITS = 8;
+static const uint32_t RADIX_SIZE = (uint32_t)1 << RADIX_BITS;
+static const uint32_t RADIX_MASK = RADIX_SIZE - 1;
+
+template <typename T>
+static void radixsort( T * begin, T * end ) {
+    const uint32_t RADIX_LEVELS = sizeof(T);
+    const size_t   count        = end - begin;
+
+    size_t freqs[RADIX_LEVELS][RADIX_SIZE] = {};
+    T *    ptr = begin;
+
+    // Record byte frequencies in each position over all items except
+    // the last one.
+    do {
+        for (uint32_t pass = 0; pass < RADIX_LEVELS; pass++) {
+            uint8_t value = (*ptr)[pass];
+            ++freqs[pass][value];
+        }
+    } while (++ptr < (end - 1));
+    // Process the last item separately, so that we can record which
+    // passes (if any) would do no reordering of items, and which can
+    // therefore be skipped entirely.
+    uint32_t trivial_passes = 0;
     for (uint32_t pass = 0; pass < RADIX_LEVELS; pass++) {
-      uint8_t value = (*ptr)[pass];
-      ++freqs[pass][value];
-    }
-  } while (++ptr < (end - 1));
-  // Process the last item separately, so that we can record which
-  // passes (if any) would do no reordering of items, and which can
-  // therefore be skipped entirely.
-  uint32_t trivial_passes = 0;
-  for (uint32_t pass = 0; pass < RADIX_LEVELS; pass++) {
-    uint8_t value = (*ptr)[pass];
-    if (++freqs[pass][value] == count)
-      trivial_passes |= 1UL << pass;
-  }
-
-  std::unique_ptr<T[]> queue_area(new T[count]);
-  T * from = begin;
-  T * to   = queue_area.get();
-
-  for (uint32_t pass = 0; pass < RADIX_LEVELS; pass++) {
-    // If this pass would do nothing, just skip it.
-    if (trivial_passes & (1UL << pass))
-      continue;
-
-    // Array of pointers to the current position in each queue,
-    // pre-arranged based on the known final sizes of each queue. This
-    // way all the entries end up contiguous with no gaps.
-    T * queue_ptrs[RADIX_SIZE];
-    T * next = to;
-    for (size_t i = 0; i < RADIX_SIZE; i++) {
-      queue_ptrs[i] = next;
-      next += freqs[pass][i];
+        uint8_t value = (*ptr)[pass];
+        if (++freqs[pass][value] == count) {
+            trivial_passes |= 1UL << pass;
+        }
     }
 
-    // Copy each element into its queue based on the current byte.
-    for (size_t i = 0; i < count; i++) {
-      uint8_t index = from[i][pass];
-      *queue_ptrs[index]++ = from[i];
-      __builtin_prefetch(queue_ptrs[index] + 1);
-    }
+    std::unique_ptr<T[]> queue_area( new T[count] );
+    T * from = begin;
+    T * to   = queue_area.get();
 
-    std::swap(from, to);
-  }
+    for (uint32_t pass = 0; pass < RADIX_LEVELS; pass++) {
+        // If this pass would do nothing, just skip it.
+        if (trivial_passes & (1UL << pass)) {
+            continue;
+        }
+
+        // Array of pointers to the current position in each queue,
+        // pre-arranged based on the known final sizes of each queue. This
+        // way all the entries end up contiguous with no gaps.
+        T * queue_ptrs[RADIX_SIZE];
+        T * next = to;
+        for (size_t i = 0; i < RADIX_SIZE; i++) {
+            queue_ptrs[i] = next;
+            next += freqs[pass][i];
+        }
+
+        // Copy each element into its queue based on the current byte.
+        for (size_t i = 0; i < count; i++) {
+            uint8_t index = from[i][pass];
+            *queue_ptrs[index]++ = from[i];
+            __builtin_prefetch(queue_ptrs[index] + 1);
+        }
+
+        std::swap(from, to);
+    }
 
-  // Because the swap always happens in the above loop, the "from"
-  // area has the sorted payload. If that's not the original array,
-  // then do a final copy.
-  if (from != begin)
-    std::copy(from, from + count, begin);
+    // Because the swap always happens in the above loop, the "from"
+    // area has the sorted payload. If that's not the original array,
+    // then do a final copy.
+    if (from != begin) {
+        std::copy(from, from + count, begin);
+    }
 }
 
 //-----------------------------------------------------------------------------
-static const uint32_t    SORT_CUTOFF  = 60;
+static const uint32_t SORT_CUTOFF = 60;
 
 // This is an in-place MSB radix sort that recursively sorts each
 // block, sometimes known as an "American Flag Sort". Testing shows
 // that performance increases by devolving to std::sort once we get
 // down to small block sizes. Both 40 and 60 items are best on my
 // system, but there could be a better value for the general case.
-template< typename T >
-static void flagsort( T * begin, T * end, int idx )
-{
-  const uint32_t DIGITS = sizeof(T);
-  const size_t count = end - begin;
-  assume(idx >= 0);
-  assume(idx < DIGITS);
-
-  // Each pass must compute its own frequency table, because the
-  // counts depend on all previous bytes, since each pass operates on
-  // a successively smaller subset of the total list to sort.
-  size_t freqs[RADIX_SIZE] = {};
-  T * ptr = begin;
-  do {
-    ++freqs[(*ptr)[idx]];
-  } while (++ptr < (end - 1));
-  // As in radix sort, if this pass would do no rearrangement, then
-  // there's no need to iterate over every item. Since this case is
-  // only likely to hit in degenerate cases (e.g. donothing64), just
-  // devolve into radixsort since that performs better on lists of
-  // many similar values.
-  if (++freqs[(*ptr)[idx]] == count) {
-      // If there are no more passes, then we're just done.
-      if (idx == 0) {
-          return;
-      }
-      return radixsort(begin, end);
-  }
-
-  T * block_ptrs[RADIX_SIZE];
-  ptr = begin;
-  for (size_t i = 0; i < RADIX_SIZE; i++) {
-    block_ptrs[i] = ptr;
-    ptr += freqs[i];
-  }
-
-  // Move all values into their correct block, maintaining a stable
-  // sort ordering inside each block.
-  ptr     = begin;
-  T * nxt = begin + freqs[0];
-  uint8_t curblock = 0;
-  while (curblock < (RADIX_SIZE - 1)) {
-    if (expectp(ptr >= nxt, 0.0944)) {
-      curblock++;
-      nxt += freqs[curblock];
-      continue;
+template <typename T>
+static void flagsort( T * begin, T * end, int idx ) {
+    const uint32_t DIGITS = sizeof(T);
+    const size_t   count  = end - begin;
+
+    assume(idx >= 0    );
+    assume(idx < DIGITS);
+
+    // Each pass must compute its own frequency table, because the
+    // counts depend on all previous bytes, since each pass operates on
+    // a successively smaller subset of the total list to sort.
+    size_t freqs[RADIX_SIZE] = {};
+    T *    ptr               = begin;
+    do {
+        ++freqs[(*ptr)[idx]];
+    } while (++ptr < (end - 1));
+    // As in radix sort, if this pass would do no rearrangement, then
+    // there's no need to iterate over every item. Since this case is
+    // only likely to hit in degenerate cases (e.g. donothing64), just
+    // devolve into radixsort since that performs better on lists of
+    // many similar values.
+    if (++freqs[(*ptr)[idx]] == count) {
+        // If there are no more passes, then we're just done.
+        if (idx == 0) {
+            return;
+        }
+        return radixsort(begin, end);
     }
-    uint8_t value = (*ptr)[idx];
-    if (unpredictable(value == curblock)) { // p ~= 0.501155
-      ptr++;
-      continue;
+
+    T * block_ptrs[RADIX_SIZE];
+    ptr = begin;
+    for (size_t i = 0; i < RADIX_SIZE; i++) {
+        block_ptrs[i] = ptr;
+        ptr += freqs[i];
+    }
+
+    // Move all values into their correct block, maintaining a stable
+    // sort ordering inside each block.
+    ptr = begin;
+    T *     nxt      = begin + freqs[0];
+    uint8_t curblock = 0;
+    while (curblock < (RADIX_SIZE - 1)) {
+        if (expectp((ptr >= nxt), 0.0944)) {
+            curblock++;
+            nxt += freqs[curblock];
+            continue;
+        }
+        uint8_t value = (*ptr)[idx];
+        if (unpredictable(value == curblock)) { // p ~= 0.501155
+            ptr++;
+            continue;
+        }
+        // assert(block_ptrs[value] < end);
+        std::swap(*ptr, *block_ptrs[value]++); // MAYBE do this better manually?
+    }
+
+    if (idx == 0) {
+        return;
+    }
+
+    // Sort each block by the next less-significant byte, or by
+    // std::sort if there are only a few entries in the block.
+    ptr = begin;
+    for (int i = 0; i < RADIX_SIZE; i++) {
+        if (expectp((freqs[i] > SORT_CUTOFF), 0.00390611)) {
+            flagsort(ptr, ptr + freqs[i], idx - 1);
+        } else if (expectp((freqs[i] > 1), 0.3847)) {
+            std::sort(ptr, ptr + freqs[i]);
+        }
+        ptr += freqs[i];
     }
-    //assert(block_ptrs[value] < end);
-    std::swap(*ptr, *block_ptrs[value]++); // MAYBE do this better manually?
-  }
-
-  if (idx == 0)
-    return;
-
-  // Sort each block by the next less-significant byte, or by
-  // std::sort if there are only a few entries in the block.
-  ptr = begin;
-  for (int i = 0; i < RADIX_SIZE; i++) {
-    if (expectp(freqs[i] > SORT_CUTOFF, 0.00390611))
-      flagsort(ptr, ptr + freqs[i], idx - 1);
-    else if (expectp(freqs[i] > 1, 0.3847))
-      std::sort(ptr, ptr + freqs[i]);
-    ptr += freqs[i];
-  }
 }
 
 //-----------------------------------------------------------------------------
@@ -170,20 +175,21 @@ static void flagsort( T * begin, T * end, int idx )
 // that is, so some effort into finding the right cutoff might be
 // appropriate. This approach handily beats just using std::sort, at
 // least on my system (526 seconds vs 1430).
-template< class Iter >
-static void blobsort ( Iter iter_begin, Iter iter_end )
-{
-  typedef typename std::iterator_traits<Iter>::value_type T;
-  // Nothing to sort if there are 0 or 1 items
-  if ((iter_end - iter_begin) < 2)
-    return;
-  else if ((iter_end - iter_begin) <= SORT_CUTOFF)
-    return std::sort(iter_begin, iter_end);
-
-  T * begin = &(*iter_begin);
-  T * end   = &(*iter_end);
-  if (sizeof(T) > 4)
-    flagsort(begin, end, sizeof(T) - 1);
-  else
-    radixsort(begin, end);
+template <class Iter>
+static void blobsort( Iter iter_begin, Iter iter_end ) {
+    typedef typename std::iterator_traits<Iter>::value_type T;
+    // Nothing to sort if there are 0 or 1 items
+    if ((iter_end - iter_begin) < 2) {
+        return;
+    } else if ((iter_end - iter_begin) <= SORT_CUTOFF) {
+        return std::sort(iter_begin, iter_end);
+    }
+
+    T * begin = &(*iter_begin);
+    T * end   = &(*iter_end  );
+    if (sizeof(T) > 4) {
+        flagsort(begin, end, sizeof(T) - 1);
+    } else {
+        radixsort(begin, end);
+    }
 }
diff --git a/util/Instantiate.h b/util/Instantiate.h
index 6964ded1..2cb8b840 100644
--- a/util/Instantiate.h
+++ b/util/Instantiate.h
@@ -24,13 +24,13 @@
 
 #if defined(__cplusplus) && (__cplusplus >= 201402L)
 // C++14 allows auto variables to determine function return types
-#define INSTANTIATE(FN, TYPELIST)                               \
-    template < typename ... Types>                              \
-    auto FN ## _instantiator() {                                \
-        static auto instances =                                 \
-            std::tuple_cat(std::make_tuple(FN<Types>)...);      \
-        return &instances;                                      \
-    }                                                           \
+#define INSTANTIATE(FN, TYPELIST)                          \
+    template < typename ... Types>                         \
+    auto FN ## _instantiator() {                           \
+        static auto instances =                            \
+            std::tuple_cat(std::make_tuple(FN<Types>)...); \
+        return &instances;                                 \
+    }                                                      \
     template auto FN ## _instantiator<TYPELIST>();
 #else
 // C++11 doesn't, so YOU get a void*, and YOU get a void*,....
diff --git a/util/Platform.cpp b/util/Platform.cpp
index bfb7e9bb..f99b6851 100644
--- a/util/Platform.cpp
+++ b/util/Platform.cpp
@@ -19,12 +19,12 @@
 #include "Platform.h"
 
 #if defined(HAVE_THREADS)
-unsigned g_NCPU        = 4;
+unsigned g_NCPU       = 4;
 #else
-const unsigned g_NCPU  = 1;
+const unsigned g_NCPU = 1;
 #endif
 
-void DisableThreads(void) {
+void DisableThreads( void ) {
 #if defined(HAVE_THREADS)
     printf("WARNING: disabling threaded mode\n");
     g_NCPU = 1;
diff --git a/util/Random.h b/util/Random.h
index 9cff1735..01a26c1c 100644
--- a/util/Random.h
+++ b/util/Random.h
@@ -46,105 +46,96 @@
 // Xorshift RNG based on code by George Marsaglia
 // http://en.wikipedia.org/wiki/Xorshift
 
-class Rand
-{
- private:
-  uint32_t x;
-  uint32_t y;
-  uint32_t z;
-  uint32_t w;
-
- public:
-  Rand()
-  {
-    reseed(uint32_t(0));
-  }
-
-  Rand( uint32_t seed )
-  {
-    reseed(seed);
-  }
-
-  void reseed ( uint32_t seed )
-  {
-    x = 0x498b3bc5 ^ seed;
-    y = 0;
-    z = 0;
-    w = 0;
-
-    for(int i = 0; i < 10; i++) mix();
-  }
-
-  void reseed ( uint64_t seed )
-  {
-    x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
-    y = 0x5a05089a ^ (uint32_t)(seed >> 32);
-    z = 0;
-    w = 0;
-
-    for(int i = 0; i < 10; i++) mix();
-  }
-
-  //-----------------------------------------------------------------------------
-
-  void mix ( void )
-  {
-    uint32_t t = x ^ (x << 11);
-    x = y; y = z; z = w;
-    w = w ^ (w >> 19) ^ t ^ (t >> 8); 
-  }
-
-  uint32_t rand_u32 ( void )
-  {
-    mix();
-
-    return x;
-  }
-
-  uint64_t rand_u64 ( void ) 
-  {
-    mix();
-
-    uint64_t a = x;
-    uint64_t b = y;
-
-    return (a << 32) | b;
-  }
+class Rand {
+  private:
+    uint32_t  x;
+    uint32_t  y;
+    uint32_t  z;
+    uint32_t  w;
+
+  public:
+    Rand() {
+        reseed(uint32_t(0));
+    }
+
+    Rand( uint32_t seed ) {
+        reseed(seed);
+    }
+
+    void reseed( uint32_t seed ) {
+        x = 0x498b3bc5 ^ seed;
+        y = 0;
+        z = 0;
+        w = 0;
+
+        for (int i = 0; i < 10; i++) { mix(); }
+    }
+
+    void reseed( uint64_t seed ) {
+        x = 0x498b3bc5 ^ (uint32_t)(seed >>  0);
+        y = 0x5a05089a ^ (uint32_t)(seed >> 32);
+        z = 0;
+        w = 0;
+
+        for (int i = 0; i < 10; i++) { mix(); }
+    }
+
+    //-----------------------------------------------------------------------------
+
+    void mix( void ) {
+        uint32_t t = x ^ (x << 11);
+
+        x = y; y = z; z = w;
+        w = w ^ (w >> 19) ^ t ^ (t >> 8);
+    }
+
+    uint32_t rand_u32( void ) {
+        mix();
+
+        return x;
+    }
+
+    uint64_t rand_u64( void ) {
+        mix();
+
+        uint64_t a = x;
+        uint64_t b = y;
+
+        return (a << 32) | b;
+    }
 
 #if defined(HAVE_INT128)
-  uint128_t rand_u128 ( void )
-  {
-    uint128_t a = rand_u64();
-    return (a << 64) | rand_u64();
-  }
+
+    uint128_t rand_u128( void ) {
+        uint128_t a = rand_u64();
+
+        return (a << 64) | rand_u64();
+    }
+
 #endif
 
-  // Returns a value in the range [0, max)
-  uint32_t rand_range ( uint32_t max )
-  {
-      uint64_t r = rand_u32();
-      return (r * max) >> 32;
-
-  }
-
-  void rand_p ( void * blob, int bytes )
-  {
-    uint8_t * blocks = reinterpret_cast<uint8_t*>(blob);
-    int i;
-
-    while(bytes >= 4)
-    {
-      uint32_t r = COND_BSWAP(rand_u32(), isBE());
-      memcpy(blocks, &r, 4);
-      blocks += 4;
-      bytes -= 4;
+    // Returns a value in the range [0, max)
+    uint32_t rand_range( uint32_t max ) {
+        uint64_t r = rand_u32();
+
+        return (r * max) >> 32;
     }
 
-    for (i = 0; i < bytes; i++)
-    {
-      blocks[i] = (uint8_t)rand_u32();
+    void rand_p( void * blob, int bytes ) {
+        uint8_t * blocks = reinterpret_cast<uint8_t *>(blob);
+        int       i;
+
+        while (bytes >= 4) {
+            uint32_t r = COND_BSWAP(rand_u32(), isBE());
+            memcpy(blocks, &r, 4);
+            blocks += 4;
+            bytes  -= 4;
+        }
+
+        for (i = 0; i < bytes; i++) {
+            blocks[i] = (uint8_t)rand_u32();
+        }
     }
-  }
-};
+}; // class Rand
 
 //-----------------------------------------------------------------------------
diff --git a/util/Stats.cpp b/util/Stats.cpp
index 89fa5c35..8f04b9ee 100644
--- a/util/Stats.cpp
+++ b/util/Stats.cpp
@@ -60,169 +60,153 @@
 
 //-----------------------------------------------------------------------------
 
-double CalcMean(std::vector<double> & v) {
-  const size_t sz = v.size();
-  double mean = 0;
+double CalcMean( std::vector<double> & v ) {
+    const size_t sz   = v.size();
+    double       mean = 0;
 
-  for(size_t i = 0; i < sz; i++) {
-      mean += v[i];
-  }
+    for (size_t i = 0; i < sz; i++) {
+        mean += v[i];
+    }
 
-  mean /= double(sz);
+    mean /= double(sz);
 
-  return mean;
+    return mean;
 }
 
-double CalcMean ( std::vector<double> & v, int a, int b )
-{
-  double mean = 0;
+double CalcMean( std::vector<double> & v, int a, int b ) {
+    double mean = 0;
 
-  for(int i = a; i <= b; i++)
-  {
-    mean += v[i];
-  }
+    for (int i = a; i <= b; i++) {
+        mean += v[i];
+    }
 
-  mean /= (b-a+1);
+    mean /= (b - a + 1);
 
-  return mean;
+    return mean;
 }
 
-double CalcStdv ( std::vector<double> & v, int a, int b )
-{
-  double mean = CalcMean(v,a,b);
+double CalcStdv( std::vector<double> & v, int a, int b ) {
+    double mean = CalcMean(v, a, b);
 
-  double stdv = 0;
+    double stdv = 0;
 
-  for(int i = a; i <= b; i++)
-  {
-    double x = v[i] - mean;
+    for (int i = a; i <= b; i++) {
+        double x = v[i] - mean;
 
-    stdv += x*x;
-  }
+        stdv += x * x;
+    }
 
-  stdv = sqrt(stdv / (b-a+1));
+    stdv = sqrt(stdv / (b - a + 1));
 
-  return stdv;
+    return stdv;
 }
 
-double CalcStdv ( std::vector<double> & v )
-{
-  return CalcStdv(v, 0, v.size());
+double CalcStdv( std::vector<double> & v ) {
+    return CalcStdv(v, 0, v.size());
 }
 
 // Return true if the largest value in v[0,len) is more than three
 // standard deviations from the mean
 
-bool ContainsOutlier ( std::vector<double> & v, size_t len )
-{
-  double mean = 0;
+bool ContainsOutlier( std::vector<double> & v, size_t len ) {
+    double mean = 0;
 
-  for(size_t i = 0; i < len; i++)
-  {
-    mean += v[i];
-  }
+    for (size_t i = 0; i < len; i++) {
+        mean += v[i];
+    }
 
-  mean /= double(len);
+    mean /= double(len);
 
-  double stdv = 0;
+    double stdv = 0;
 
-  for(size_t i = 0; i < len; i++)
-  {
-    double x = v[i] - mean;
-    stdv += x*x;
-  }
+    for (size_t i = 0; i < len; i++) {
+        double x = v[i] - mean;
+        stdv += x * x;
+    }
 
-  stdv = sqrt(stdv / double(len));
+    stdv = sqrt(stdv / double(len));
 
-  double cutoff = mean + stdv*3;
+    double cutoff = mean + stdv * 3;
 
-  return v[len-1] > cutoff;
+    return v[len - 1] > cutoff;
 }
 
 // Do a binary search to find the largest subset of v that does not contain
 // outliers.
 
-void FilterOutliers ( std::vector<double> & v )
-{
-  std::sort(v.begin(),v.end());
+void FilterOutliers( std::vector<double> & v ) {
+    std::sort(v.begin(), v.end());
 
-  size_t len = 0;
-  const size_t sz = v.size();
+    size_t       len = 0;
+    const size_t sz  = v.size();
 
-  for(size_t x = 0x40000000; x; x = x >> 1 )
-  {
-    if((len | x) >= sz) continue;
+    for (size_t x = 0x40000000; x; x = x >> 1) {
+        if ((len | x) >= sz) { continue; }
 
-    if(!ContainsOutlier(v,len | x))
-    {
-      len |= x;
+        if (!ContainsOutlier(v, len | x)) {
+            len |= x;
+        }
     }
-  }
 
-  v.resize(len);
+    v.resize(len);
 }
 
 #if 0
 // Iteratively tighten the set to find a subset that does not contain
 // outliers. I'm not positive this works correctly in all cases.
 
-void FilterOutliers2 ( std::vector<double> & v )
-{
-  std::sort(v.begin(),v.end());
+void FilterOutliers2( std::vector<double> & v ) {
+    std::sort(v.begin(), v.end());
 
-  int a = 0;
-  int b = (int)(v.size() - 1);
+    int a = 0;
+    int b = (int)(v.size() - 1);
 
-  for(int i = 0; i < 10; i++)
-  {
-    //printf("%d %d\n",a,b);
+    for (int i = 0; i < 10; i++) {
+        // printf("%d %d\n",a,b);
 
-    double mean = CalcMean(v,a,b);
-    double stdv = CalcStdv(v,a,b);
+        double mean = CalcMean(v, a, b);
+        double stdv = CalcStdv(v, a, b);
 
-    double cutA = mean - stdv*3;
-    double cutB = mean + stdv*3;
+        double cutA = mean - stdv * 3;
+        double cutB = mean + stdv * 3;
 
-    while((a < b) && (v[a] < cutA)) a++;
-    while((b > a) && (v[b] > cutB)) b--;
-  }
+        while ((a < b) && (v[a] < cutA)) { a++; }
+        while ((b > a) && (v[b] > cutB)) { b--; }
+    }
 
-  std::vector<double> v2;
+    std::vector<double> v2;
 
-  v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);
+    v2.insert(v2.begin(), v.begin() + a, v.begin() + b + 1);
 
-  v.swap(v2);
+    v.swap(v2);
 }
+
 #endif
 
 //-----------------------------------------------------------------------------
 
-double chooseK ( int n, int k )
-{
-  if(k > (n - k)) k = n - k;
+double chooseK( int n, int k ) {
+    if (k > (n - k)) { k = n - k; }
 
-  double c = 1;
+    double c = 1;
 
-  for(int i = 0; i < k; i++)
-  {
-    double t = double(n-i) / double(i+1);
+    for (int i = 0; i < k; i++) {
+        double t = double(n - i) / double(i + 1);
 
-    c *= t;
-  }
+        c *= t;
+    }
 
     return c;
 }
 
-double chooseUpToK ( int n, int k )
-{
-  double c = 0;
+double chooseUpToK( int n, int k ) {
+    double c = 0;
 
-  for(int i = 1; i <= k; i++)
-  {
-    c += chooseK(n,i);
-  }
+    for (int i = 1; i <= k; i++) {
+        c += chooseK(n, i);
+    }
 
-  return c;
+    return c;
 }
 
 //-----------------------------------------------------------------------------
@@ -232,49 +216,49 @@ double chooseUpToK ( int n, int k )
 // Note: with 32bit 77163 keys will get a 50% probability of one collision.
 
 // Naive multiplication, no accuracy at all
-static double ExpectedNBCollisions_Slow ( const double nbH, const double nbBits )
-{
-  long balls = nbH;
-  long double bins = nbBits;
-  long double result = 1.0;
-  for (long i = 1; i < balls / 2; i++) {
-    // take a pair from the front and the end to minimize errors
-    result *= ((bins - i) / bins) * ((bins - (nbH - i)) / bins);
-  }
-  return (double)(nbH * result);
+static double ExpectedNBCollisions_Slow( const double nbH, const double nbBits ) {
+    long        balls  = nbH;
+    long double bins   = nbBits;
+    long double result = 1.0;
+
+    for (long i = 1; i < balls / 2; i++) {
+        // take a pair from the front and the end to minimize errors
+        result *= ((bins - i) / bins) * ((bins - (nbH - i)) / bins);
+    }
+    return (double)(nbH * result);
 }
 
 // Still too inaccurate: https://preshing.com/20110504/hash-collision-probabilities/
-static double EstimateNbCollisions_Taylor(const double nbH, const double nbBits)
-{
-  const long double k = nbH;
-  const long double b = nbBits;
-  return (double)(k * (1.0 - expl(-0.5 * k * (k - 1.0) / b)));
+static double EstimateNbCollisions_Taylor( const double nbH, const double nbBits ) {
+    const long double k = nbH;
+    const long double b = nbBits;
+
+    return (double)(k * (1.0 - expl(-0.5 * k * (k - 1.0) / b)));
 }
 
 // demerphq: (double(count) * double(count-1)) / pow(2.0,double(sizeof(hashtype) * 8 + 1));
 // the very same as our calc. pow 2 vs exp2. Just the high cutoff is missing here.
-static double EstimateNbCollisions_Demerphq(const double nbH, const double nbBits)
-{
-  return (nbH * (nbH - 1)) / pow(2.0, nbBits + 1);
+static double EstimateNbCollisions_Demerphq( const double nbH, const double nbBits ) {
+    return (nbH * (nbH - 1)) / pow(2.0, nbBits + 1);
 }
 
 // The previous best calculation, highly prone to inaccuracies with low results (1.0 - 10.0)
 // TODO: return also the error.
-static double EstimateNbCollisions_previmpl(const double nbH, const double nbBits)
-{
-  double exp = exp2(nbBits); // 2 ^ bits
-  double result = (nbH * (nbH-1)) / (2.0 * exp);
-  if (result > nbH)
-    result = nbH;
-  // improved floating point accuracy
-  if (result <= exp || nbBits > 32)
-    return result;
-  return result - exp;
+static double EstimateNbCollisions_previmpl( const double nbH, const double nbBits ) {
+    double exp    = exp2(nbBits); // 2 ^ bits
+    double result = (nbH * (nbH - 1)) / (2.0 * exp);
+
+    if (result > nbH) {
+        result = nbH;
+    }
+    // improved floating point accuracy
+    if ((result <= exp) || (nbBits > 32)) {
+        return result;
+    }
+    return result - exp;
 }
 
-static double EstimateNbCollisions_fwojcik(const double nbH, const int nbBits)
-{
+static double EstimateNbCollisions_fwojcik( const double nbH, const int nbBits ) {
     // If the probability that there are 1 or more collisions (p(C >=
     // 1)) is not much higher than the probability of exactly 1
     // collision (p(C == 1)), then the classically-good approximation
@@ -285,8 +269,8 @@ static double EstimateNbCollisions_fwojcik(const double nbH, const int nbBits)
     // of p(C >= 1)/p(C == 1) is about 1/(1-2**(n-2r-1)). This uses
     // the new estimator if that ratio is > 1 + 2**-8. That cutoff
     // minimizes the error around the values we care about.
-    if (nbBits - 2.0*log2(nbH) >= 8 - 1) {
-        return nbH * (nbH - 1) * exp2(-nbBits-1);
+    if (nbBits - 2.0 * log2(nbH) >= 8 - 1) {
+        return nbH * (nbH - 1) * exp2(-nbBits - 1);
     }
 
     // The probability that any given hash bucket is empty after nbH
@@ -316,26 +300,25 @@ static double EstimateNbCollisions_fwojcik(const double nbH, const int nbBits)
     // m/n and pF at the same general orders of magnitude, so it tends
     // to have very good precision. At low hash occupancy, pF is too
     // close to m/n for this formula to work well.
-    double logpE = (double)nbH  * log1p(-exp2(-nbBits));
+    double logpE  = (double)nbH * log1p(-exp2(-nbBits));
     double result = exp2(nbBits) * (exp2(-nbBits) * (double)nbH + expm1(logpE));
 
     return result;
 }
 
-double EstimateNbCollisions(const unsigned long nbH, const int nbBits)
-{
-  return EstimateNbCollisions_fwojcik((const double)nbH, (const double)nbBits);
+double EstimateNbCollisions( const unsigned long nbH, const int nbBits ) {
+    return EstimateNbCollisions_fwojcik((const double)nbH, (const double)nbBits);
 }
 
 #define COLLISION_ESTIMATORS 3
-static double EstimateNbCollisionsCand(const unsigned long nbH, const int nbBits, const int estimator)
-{
-    switch(estimator) {
+
+static double EstimateNbCollisionsCand( const unsigned long nbH, const int nbBits, const int estimator ) {
+    switch (estimator) {
     case 0: return EstimateNbCollisions_fwojcik((const double)nbH, (const double)nbBits);
     case 1: return EstimateNbCollisions_previmpl((const double)nbH, (const double)nbBits);
     case 2: return EstimateNbCollisions_Demerphq((const double)nbH, (const double)nbBits);
-    //case 3: return EstimateNbCollisions_Taylor((const double)nbH, (const double)nbBits);
-    //case 4: return ExpectedNBCollisions_Slow((const double)nbH, (const double)nbBits);
+    // case 3: return EstimateNbCollisions_Taylor((const double)nbH, (const double)nbBits);
+    // case 4: return ExpectedNBCollisions_Slow((const double)nbH, (const double)nbBits);
     default: { printf("Invalid estimator requested\n"); exit(1); }
     }
     return NAN;
@@ -352,471 +335,591 @@ static double EstimateNbCollisionsCand(const unsigned long nbH, const int nbBits
  */
 static double realcoll[58][18] = {
     /* 149633745 */
-    { 9.66830188511513408e-62, 4.15250404044246501e-52, 7.66001792990870096e-33,
-      3.28995264957314909e-23, 6.06889145411344312e-04, 3.10727242021280714e-01,
-      3.18184245207177412e+02, 2.54544870233834445e+03, 2.03619731305636706e+04,
-      1.62792385217456205e+05, 2.57656049031511368e+06, 1.90430490019698478e+07,
-      5.94342984822125658e+07, 1.32858774460385174e+08, 1.45439441000000000e+08,
-      1.49109457000000000e+08, 1.49629649000000000e+08, 1.49633489000000000e+08 },
+    {
+        9.66830188511513408e-62, 4.15250404044246501e-52, 7.66001792990870096e-33,
+        3.28995264957314909e-23, 6.06889145411344312e-04, 3.10727242021280714e-01,
+        3.18184245207177412e+02, 2.54544870233834445e+03, 2.03619731305636706e+04,
+        1.62792385217456205e+05, 2.57656049031511368e+06, 1.90430490019698478e+07,
+        5.94342984822125658e+07, 1.32858774460385174e+08, 1.45439441000000000e+08,
+        1.49109457000000000e+08, 1.49629649000000000e+08, 1.49633489000000000e+08
+    },
     /* 86536545 */
-    { 3.23362916384237121e-62, 1.38883315060948101e-52, 2.56194496903768089e-33,
-      1.10034698561685720e-23, 2.02978192359201898e-04, 1.03924834404869174e-01,
-      1.06418943269388180e+02, 8.51346660380768071e+02, 6.81046060560096157e+03,
-      5.44636796883101269e+04, 8.65959061394601478e+05, 6.61418293104189448e+06,
-      2.27556140267314911e+07, 6.98558535013311207e+07, 8.23422410045954734e+07,
-      8.60122570000000000e+07, 8.65324490000000000e+07, 8.65362890000000000e+07 },
+    {
+        3.23362916384237121e-62, 1.38883315060948101e-52, 2.56194496903768089e-33,
+        1.10034698561685720e-23, 2.02978192359201898e-04, 1.03924834404869174e-01,
+        1.06418943269388180e+02, 8.51346660380768071e+02, 6.81046060560096157e+03,
+        5.44636796883101269e+04, 8.65959061394601478e+05, 6.61418293104189448e+06,
+        2.27556140267314911e+07, 6.98558535013311207e+07, 8.23422410045954734e+07,
+        8.60122570000000000e+07, 8.65324490000000000e+07, 8.65362890000000000e+07
+    },
     /* 75498113 */
-    { 2.46129292104772484e-62, 1.05711726017762883e-52, 1.95003715543977527e-33,
-      8.37534580859870329e-24, 1.54497860659825494e-04, 7.91029046026853616e-02,
-      8.10013164325720538e+01, 6.48007286993706316e+02, 5.18385065708740240e+03,
-      4.14575199616562895e+04, 6.59692186580697889e+05, 5.06817564395631664e+06,
-      1.77549757986361682e+07, 5.89072678887400925e+07, 7.13038090638692677e+07,
-      7.49738250000000000e+07, 7.54940170000000000e+07, 7.54978570000000000e+07 },
+    {
+        2.46129292104772484e-62, 1.05711726017762883e-52, 1.95003715543977527e-33,
+        8.37534580859870329e-24, 1.54497860659825494e-04, 7.91029046026853616e-02,
+        8.10013164325720538e+01, 6.48007286993706316e+02, 5.18385065708740240e+03,
+        4.14575199616562895e+04, 6.59692186580697889e+05, 5.06817564395631664e+06,
+        1.77549757986361682e+07, 5.89072678887400925e+07, 7.13038090638692677e+07,
+        7.49738250000000000e+07, 7.54940170000000000e+07, 7.54978570000000000e+07
+    },
     /* 56050289 */
-    { 1.35658440124283578e-62, 5.82648563760172142e-53, 1.07479689405983373e-33,
-      4.61621750982936253e-24, 8.51541829923128089e-05, 4.35989416694992429e-02,
-      4.46452925853961631e+01, 3.57161013077325094e+02, 2.85720313997638277e+03,
-      2.28521884740198511e+04, 3.64148636055323470e+05, 2.82665629721443821e+06,
-      1.02311598958176058e+07, 3.98670968021314815e+07, 5.18559915916659608e+07,
-      5.55260010000000000e+07, 5.60461930000000000e+07, 5.60500330000000000e+07 },
+    {
+        1.35658440124283578e-62, 5.82648563760172142e-53, 1.07479689405983373e-33,
+        4.61621750982936253e-24, 8.51541829923128089e-05, 4.35989416694992429e-02,
+        4.46452925853961631e+01, 3.57161013077325094e+02, 2.85720313997638277e+03,
+        2.28521884740198511e+04, 3.64148636055323470e+05, 2.82665629721443821e+06,
+        1.02311598958176058e+07, 3.98670968021314815e+07, 5.18559915916659608e+07,
+        5.55260010000000000e+07, 5.60461930000000000e+07, 5.60500330000000000e+07
+    },
     /* 49925029 */
-    { 1.07628616390943998e-62, 4.62261387512834023e-53, 8.52721751060712554e-34,
-      3.66241203339361373e-24, 6.75595774724252468e-05, 3.45905036499356000e-02,
-      3.54206590004570572e+01, 2.83364333813803171e+02, 2.26685462770169033e+03,
-      1.81309949687949847e+04, 2.89045130868813896e+05, 2.25101610920316912e+06,
-      8.23359498302312009e+06, 3.40035930111785606e+07, 4.57307533941198885e+07,
-      4.94007410000000000e+07, 4.99209330000000000e+07, 4.99247730000000000e+07 },
+    {
+        1.07628616390943998e-62, 4.62261387512834023e-53, 8.52721751060712554e-34,
+        3.66241203339361373e-24, 6.75595774724252468e-05, 3.45905036499356000e-02,
+        3.54206590004570572e+01, 2.83364333813803171e+02, 2.26685462770169033e+03,
+        1.81309949687949847e+04, 2.89045130868813896e+05, 2.25101610920316912e+06,
+        8.23359498302312009e+06, 3.40035930111785606e+07, 4.57307533941198885e+07,
+        4.94007410000000000e+07, 4.99209330000000000e+07, 4.99247730000000000e+07
+    },
     /* 44251425 */
-    { 8.45562327779528750e-63, 3.63166254454270828e-53, 6.69923495212561545e-34,
-      2.87729950275996440e-24, 5.30768075507823733e-05, 2.71753254548965095e-02,
-      2.78275216109708978e+01, 2.22619519580197675e+02, 1.78091434578536018e+03,
-      1.42446392954819730e+04, 2.27182256963651860e+05, 1.77461480911257491e+06,
-      6.55507402957992628e+06, 2.86743406137902029e+07, 4.00572308235341832e+07,
-      4.37271370000000000e+07, 4.42473290000000000e+07, 4.42511690000000000e+07 },
+    {
+        8.45562327779528750e-63, 3.63166254454270828e-53, 6.69923495212561545e-34,
+        2.87729950275996440e-24, 5.30768075507823733e-05, 2.71753254548965095e-02,
+        2.78275216109708978e+01, 2.22619519580197675e+02, 1.78091434578536018e+03,
+        1.42446392954819730e+04, 2.27182256963651860e+05, 1.77461480911257491e+06,
+        6.55507402957992628e+06, 2.86743406137902029e+07, 4.00572308235341832e+07,
+        4.37271370000000000e+07, 4.42473290000000000e+07, 4.42511690000000000e+07
+    },
     /* 43691201 */
-    { 8.24288176206433810e-63, 3.54029075928611856e-53, 6.53068375830698963e-34,
-      2.80490731624468888e-24, 5.17414074132004304e-05, 2.64916005848709717e-02,
-      2.71273877811360791e+01, 2.17018473441357912e+02, 1.73610754462317163e+03,
-      1.38862852138241597e+04, 2.21476017148987623e+05, 1.73055958502948540e+06,
-      6.39857166559864674e+06, 2.81548679497163482e+07, 3.94970225171834230e+07,
-      4.31669130000000000e+07, 4.36871050000000000e+07, 4.36909450000000000e+07 },
+    {
+        8.24288176206433810e-63, 3.54029075928611856e-53, 6.53068375830698963e-34,
+        2.80490731624468888e-24, 5.17414074132004304e-05, 2.64916005848709717e-02,
+        2.71273877811360791e+01, 2.17018473441357912e+02, 1.73610754462317163e+03,
+        1.38862852138241597e+04, 2.21476017148987623e+05, 1.73055958502948540e+06,
+        6.39857166559864674e+06, 2.81548679497163482e+07, 3.94970225171834230e+07,
+        4.31669130000000000e+07, 4.36871050000000000e+07, 4.36909450000000000e+07
+    },
     /* 33558529 */
-    { 4.86291784915122170e-63, 2.08860731252391586e-53, 3.85280045646069782e-34,
-      1.65476519585125690e-24, 3.05250300699314860e-05, 1.56288153909619858e-02,
-      1.60039018771892643e+01, 1.28030930083075560e+02, 1.02422920513447593e+03,
-      8.19266670739054098e+03, 1.30763213462519823e+05, 1.02731598739112553e+06,
-      3.86648187299589021e+06, 1.90513077430028245e+07, 2.93656306571820080e+07,
-      3.30342410000000000e+07, 3.35544330000000000e+07, 3.35582730000000000e+07 },
+    {
+        4.86291784915122170e-63, 2.08860731252391586e-53, 3.85280045646069782e-34,
+        1.65476519585125690e-24, 3.05250300699314860e-05, 1.56288153909619858e-02,
+        1.60039018771892643e+01, 1.28030930083075560e+02, 1.02422920513447593e+03,
+        8.19266670739054098e+03, 1.30763213462519823e+05, 1.02731598739112553e+06,
+        3.86648187299589021e+06, 1.90513077430028245e+07, 2.93656306571820080e+07,
+        3.30342410000000000e+07, 3.35544330000000000e+07, 3.35582730000000000e+07
+    },
     /* 33554432 */
-    { 4.86173054093815170e-63, 2.08809736752937507e-53, 3.85185977398010151e-34,
-      1.65436117580224877e-24, 3.05175772154867956e-05, 1.56249995294880754e-02,
-      1.59999944369014884e+01, 1.27999670665119382e+02, 1.02397913646883865e+03,
-      8.19066658538974480e+03, 1.30731328417170167e+05, 1.02706774802737299e+06,
-      3.86557557111472497e+06, 1.90477651439465471e+07, 2.93615350309002101e+07,
-      3.30301440000000000e+07, 3.35503360000000000e+07, 3.35541760000000000e+07 },
+    {
+        4.86173054093815170e-63, 2.08809736752937507e-53, 3.85185977398010151e-34,
+        1.65436117580224877e-24, 3.05175772154867956e-05, 1.56249995294880754e-02,
+        1.59999944369014884e+01, 1.27999670665119382e+02, 1.02397913646883865e+03,
+        8.19066658538974480e+03, 1.30731328417170167e+05, 1.02706774802737299e+06,
+        3.86557557111472497e+06, 1.90477651439465471e+07, 2.93615350309002101e+07,
+        3.30301440000000000e+07, 3.35503360000000000e+07, 3.35541760000000000e+07
+    },
     /* 26977161 */
-    { 3.14256005499304537e-63, 1.34971926619110914e-53, 2.48979258747824472e-34,
-      1.06935777370422802e-24, 1.97261691747440925e-05, 1.00997986149531007e-02,
-      1.03421911410463228e+01, 8.27373811067683533e+01, 6.61889575586005321e+02,
-      5.29451037409544824e+03, 8.45461443414444802e+04, 6.66574543746769894e+05,
-      2.53827383658029372e+06, 1.35603369840820655e+07, 2.27896075604615994e+07,
-      2.64528730000000000e+07, 2.69730650000000000e+07, 2.69769050000000000e+07 },
+    {
+        3.14256005499304537e-63, 1.34971926619110914e-53, 2.48979258747824472e-34,
+        1.06935777370422802e-24, 1.97261691747440925e-05, 1.00997986149531007e-02,
+        1.03421911410463228e+01, 8.27373811067683533e+01, 6.61889575586005321e+02,
+        5.29451037409544824e+03, 8.45461443414444802e+04, 6.66574543746769894e+05,
+        2.53827383658029372e+06, 1.35603369840820655e+07, 2.27896075604615994e+07,
+        2.64528730000000000e+07, 2.69730650000000000e+07, 2.69769050000000000e+07
+    },
     /* 22370049 */
-    { 2.16085171788696973e-63, 9.28078745982995323e-54, 1.71200311073976113e-34,
-      7.35299737127754043e-25, 1.35638860682561044e-05, 6.94470966551262447e-03,
-      7.11138119182984063e+00, 5.68909651356401653e+01, 4.55122319603302856e+02,
-      3.64063288968196957e+03, 5.81554370404469810e+04, 4.59645385789985245e+05,
-      1.76481282635707408e+06, 1.00151462171464767e+07, 1.81959928124494441e+07,
-      2.18457610000000000e+07, 2.23659530000000000e+07, 2.23697930000000000e+07 },
+    {
+        2.16085171788696973e-63, 9.28078745982995323e-54, 1.71200311073976113e-34,
+        7.35299737127754043e-25, 1.35638860682561044e-05, 6.94470966551262447e-03,
+        7.11138119182984063e+00, 5.68909651356401653e+01, 4.55122319603302856e+02,
+        3.64063288968196957e+03, 5.81554370404469810e+04, 4.59645385789985245e+05,
+        1.76481282635707408e+06, 1.00151462171464767e+07, 1.81959928124494441e+07,
+        2.18457610000000000e+07, 2.23659530000000000e+07, 2.23697930000000000e+07
+    },
     /* 18877441 */
-    { 1.53878283990836292e-63, 6.60902197305242237e-54, 1.21914936914420980e-34,
-      5.23620666941341261e-25, 9.65909643476873488e-06, 4.94545737373954832e-03,
-      5.06414744590625077e+00, 4.05131288488040155e+01, 3.24101784837318064e+02,
-      2.59260655174234762e+03, 4.14247903550759002e+04, 3.28028082683300890e+05,
-      1.26742600458991365e+06, 7.54599182152087614e+06, 1.47296973581916802e+07,
-      1.83531530000000000e+07, 1.88733450000000000e+07, 1.88771850000000000e+07 },
+    {
+        1.53878283990836292e-63, 6.60902197305242237e-54, 1.21914936914420980e-34,
+        5.23620666941341261e-25, 9.65909643476873488e-06, 4.94545737373954832e-03,
+        5.06414744590625077e+00, 4.05131288488040155e+01, 3.24101784837318064e+02,
+        2.59260655174234762e+03, 4.14247903550759002e+04, 3.28028082683300890e+05,
+        1.26742600458991365e+06, 7.54599182152087614e+06, 1.47296973581916802e+07,
+        1.83531530000000000e+07, 1.88733450000000000e+07, 1.88771850000000000e+07
+    },
     /* 18616785 */
-    { 1.49658179329122305e-63, 6.42776985797483522e-54, 1.18571425534766178e-34,
-      5.09260394911920045e-25, 9.39419617181328754e-06, 4.80982843914157677e-03,
-      4.92526345384282216e+00, 3.94020589843511928e+01, 3.15213358531706945e+02,
-      2.52150762757849679e+03, 4.02895318773614636e+04, 3.19083263398166222e+05,
-      1.23344671390196425e+06, 7.37060359433948807e+06, 1.44720266633904669e+07,
-      1.80924970000000000e+07, 1.86126890000000000e+07, 1.86165290000000000e+07 },
+    {
+        1.49658179329122305e-63, 6.42776985797483522e-54, 1.18571425534766178e-34,
+        5.09260394911920045e-25, 9.39419617181328754e-06, 4.80982843914157677e-03,
+        4.92526345384282216e+00, 3.94020589843511928e+01, 3.15213358531706945e+02,
+        2.52150762757849679e+03, 4.02895318773614636e+04, 3.19083263398166222e+05,
+        1.23344671390196425e+06, 7.37060359433948807e+06, 1.44720266633904669e+07,
+        1.80924970000000000e+07, 1.86126890000000000e+07, 1.86165290000000000e+07
+    },
     /* 17676661 */
-    { 1.34924729526152486e-63, 5.79497300736470505e-54, 1.06898383980911691e-34,
-      4.59125063193266000e-25, 8.46936253854919755e-06, 4.33631361902940549e-03,
-      4.44038440299461268e+00, 3.55230335814082565e+01, 2.84181603549241117e+02,
-      2.27328227266108661e+03, 3.63257830806934944e+04, 2.87837384102243173e+05,
-      1.11455845455760439e+06, 6.74926355401089974e+06, 1.35443510115238819e+07,
-      1.71523730000000000e+07, 1.76725650000000000e+07, 1.76764050000000000e+07 },
+    {
+        1.34924729526152486e-63, 5.79497300736470505e-54, 1.06898383980911691e-34,
+        4.59125063193266000e-25, 8.46936253854919755e-06, 4.33631361902940549e-03,
+        4.44038440299461268e+00, 3.55230335814082565e+01, 2.84181603549241117e+02,
+        2.27328227266108661e+03, 3.63257830806934944e+04, 2.87837384102243173e+05,
+        1.11455845455760439e+06, 6.74926355401089974e+06, 1.35443510115238819e+07,
+        1.71523730000000000e+07, 1.76725650000000000e+07, 1.76764050000000000e+07
+    },
     /* 16777216 */
-    { 1.21543259901182161e-63, 5.22024326324805573e-54, 9.62964914796432828e-35,
-      4.13590281624610549e-25, 7.62939407650033587e-06, 3.90624976656302669e-03,
-      3.99999912579873262e+00, 3.19999574025932816e+01, 2.55997380594878024e+02,
-      2.04783322146484898e+03, 3.27253730219586105e+04, 2.59434518880420335e+05,
-      1.00621717678566615e+06, 6.17199266255285591e+06, 1.26597333208222985e+07,
-      1.62529280000000075e+07, 1.67731200000000000e+07, 1.67769600000000000e+07 },
+    {
+        1.21543259901182161e-63, 5.22024326324805573e-54, 9.62964914796432828e-35,
+        4.13590281624610549e-25, 7.62939407650033587e-06, 3.90624976656302669e-03,
+        3.99999912579873262e+00, 3.19999574025932816e+01, 2.55997380594878024e+02,
+        2.04783322146484898e+03, 3.27253730219586105e+04, 2.59434518880420335e+05,
+        1.00621717678566615e+06, 6.17199266255285591e+06, 1.26597333208222985e+07,
+        1.62529280000000075e+07, 1.67731200000000000e+07, 1.67769600000000000e+07
+    },
     /* 16777214 */
-    { 1.21543230923011700e-63, 5.22024201864511143e-54, 9.62964685207712960e-35,
-      4.13590183017006213e-25, 7.62939225751109495e-06, 3.90624883524053534e-03,
-      3.99999817212472886e+00, 3.19999497732139844e+01, 2.55997319560658525e+02,
-      2.04783273324324227e+03, 3.27253652246982456e+04, 2.59434457346894662e+05,
-      1.00621694177949021e+06, 6.17199139831178170e+06, 1.26597313574535716e+07,
-      1.62529260000000075e+07, 1.67731180000000000e+07, 1.67769580000000000e+07 },
+    {
+        1.21543230923011700e-63, 5.22024201864511143e-54, 9.62964685207712960e-35,
+        4.13590183017006213e-25, 7.62939225751109495e-06, 3.90624883524053534e-03,
+        3.99999817212472886e+00, 3.19999497732139844e+01, 2.55997319560658525e+02,
+        2.04783273324324227e+03, 3.27253652246982456e+04, 2.59434457346894662e+05,
+        1.00621694177949021e+06, 6.17199139831178170e+06, 1.26597313574535716e+07,
+        1.62529260000000075e+07, 1.67731180000000000e+07, 1.67769580000000000e+07
+    },
     /* 15082603 */
-    { 9.82298962180288047e-64, 4.21894191745907802e-54, 7.78257418132130597e-35,
-      3.34259015874689832e-25, 6.16599052016874108e-06, 3.15698714588672326e-03,
-      3.23275437590726122e+00, 2.58620091390967453e+01, 2.06894417561625545e+02,
-      1.65504939094220754e+03, 2.64517551029136412e+04, 2.09891694997857179e+05,
-      8.16575685588646214e+05, 5.13336480662504770e+06, 1.10033654155580010e+07,
-      1.45583150000001676e+07, 1.50785070000000000e+07, 1.50823470000000000e+07 },
+    {
+        9.82298962180288047e-64, 4.21894191745907802e-54, 7.78257418132130597e-35,
+        3.34259015874689832e-25, 6.16599052016874108e-06, 3.15698714588672326e-03,
+        3.23275437590726122e+00, 2.58620091390967453e+01, 2.06894417561625545e+02,
+        1.65504939094220754e+03, 2.64517551029136412e+04, 2.09891694997857179e+05,
+        8.16575685588646214e+05, 5.13336480662504770e+06, 1.10033654155580010e+07,
+        1.45583150000001676e+07, 1.50785070000000000e+07, 1.50823470000000000e+07
+    },
     /* 14986273 */
-    { 9.69791481108703163e-64, 4.16522269530128191e-54, 7.68347970702294475e-35,
-      3.30002940611432092e-25, 6.08747978902901173e-06, 3.11678965155155231e-03,
-      3.19159215049388845e+00, 2.55327118282773071e+01, 2.04260070593989951e+02,
-      1.63397663226719487e+03, 2.61151435765585957e+04, 2.07231508480752498e+05,
-      8.06367654055638355e+05, 5.07635187903902307e+06, 1.09097087114329021e+07,
-      1.44619850000002030e+07, 1.49821770000000000e+07, 1.49860170000000000e+07 },
+    {
+        9.69791481108703163e-64, 4.16522269530128191e-54, 7.68347970702294475e-35,
+        3.30002940611432092e-25, 6.08747978902901173e-06, 3.11678965155155231e-03,
+        3.19159215049388845e+00, 2.55327118282773071e+01, 2.04260070593989951e+02,
+        1.63397663226719487e+03, 2.61151435765585957e+04, 2.07231508480752498e+05,
+        8.06367654055638355e+05, 5.07635187903902307e+06, 1.09097087114329021e+07,
+        1.44619850000002030e+07, 1.49821770000000000e+07, 1.49860170000000000e+07
+    },
     /* 14776336 */
-    { 9.42810913278675722e-64, 4.04934203884380436e-54, 7.46971762574649011e-35,
-      3.20821929129359426e-25, 5.91812001988149620e-06, 3.03007744976589765e-03,
-      3.10279887462500303e+00, 2.48223666728909436e+01, 1.98577376650443540e+02,
-      1.58851938758362576e+03, 2.53890076205234654e+04, 2.01492261805796676e+05,
-      7.84335037057878566e+05, 4.95288674782931432e+06, 1.07058149018839840e+07,
-      1.42520480000003017e+07, 1.47722400000000000e+07, 1.47760800000000000e+07 },
+    {
+        9.42810913278675722e-64, 4.04934203884380436e-54, 7.46971762574649011e-35,
+        3.20821929129359426e-25, 5.91812001988149620e-06, 3.03007744976589765e-03,
+        3.10279887462500303e+00, 2.48223666728909436e+01, 1.98577376650443540e+02,
+        1.58851938758362576e+03, 2.53890076205234654e+04, 2.01492261805796676e+05,
+        7.84335037057878566e+05, 4.95288674782931432e+06, 1.07058149018839840e+07,
+        1.42520480000003017e+07, 1.47722400000000000e+07, 1.47760800000000000e+07
+    },
     /* 14196869 */
-    { 8.70314528971027262e-64, 3.73797243916420662e-54, 6.89534209398419660e-35,
-      2.96152687883942827e-25, 5.46305284013487504e-06, 2.79708305378238405e-03,
-      2.86421266221348869e+00, 2.29136797245160615e+01, 1.83308057120624454e+02,
-      1.46637609822502554e+03, 2.34378018895664463e+04, 1.86065371296118683e+05,
-      7.25048552277948707e+05, 4.61779125281785242e+06, 1.01446868737243451e+07,
-      1.36725810000009108e+07, 1.41927730000000000e+07, 1.41966130000000000e+07 },
+    {
+        8.70314528971027262e-64, 3.73797243916420662e-54, 6.89534209398419660e-35,
+        2.96152687883942827e-25, 5.46305284013487504e-06, 2.79708305378238405e-03,
+        2.86421266221348869e+00, 2.29136797245160615e+01, 1.83308057120624454e+02,
+        1.46637609822502554e+03, 2.34378018895664463e+04, 1.86065371296118683e+05,
+        7.25048552277948707e+05, 4.61779125281785242e+06, 1.01446868737243451e+07,
+        1.36725810000009108e+07, 1.41927730000000000e+07, 1.41966130000000000e+07
+    },
     /* 12204240 */
-    { 6.43150420527001539e-64, 2.76231002257211870e-54, 5.09556260386307283e-35,
-      2.18852747383125011e-25, 4.03712062080382464e-06, 2.06700575761862432e-03,
-      2.11661365131384116e+00, 1.69328955058294497e+01, 1.35462286951825348e+02,
-      1.08364216400000464e+03, 1.73228893695771148e+04, 1.37669261714004766e+05,
-      5.38415595845002681e+05, 3.53292539626187785e+06, 8.23848823565938789e+06,
-      1.16799520000407528e+07, 1.22001440000000000e+07, 1.22039840000000000e+07 },
+    {
+        6.43150420527001539e-64, 2.76231002257211870e-54, 5.09556260386307283e-35,
+        2.18852747383125011e-25, 4.03712062080382464e-06, 2.06700575761862432e-03,
+        2.11661365131384116e+00, 1.69328955058294497e+01, 1.35462286951825348e+02,
+        1.08364216400000464e+03, 1.73228893695771148e+04, 1.37669261714004766e+05,
+        5.38415595845002681e+05, 3.53292539626187785e+06, 8.23848823565938789e+06,
+        1.16799520000407528e+07, 1.22001440000000000e+07, 1.22039840000000000e+07
+    },
     /* 11017633 */
-    { 5.24164589759972754e-64, 2.25126977074033947e-54, 4.15285973017258180e-35,
-      1.78363967259666233e-25, 3.29023445600991739e-06, 1.68460004130569592e-03,
-      1.72503026241426105e+00, 1.38002320160382475e+01, 1.10401210801834779e+02,
-      8.83168387150024387e+02, 1.41193736003445592e+04, 1.12282200585662198e+05,
-      4.40082662240044388e+05, 2.94038767245387891e+06, 7.12661430867962260e+06,
-      1.04933450003918260e+07, 1.10135370000000000e+07, 1.10173770000000000e+07 },
+    {
+        5.24164589759972754e-64, 2.25126977074033947e-54, 4.15285973017258180e-35,
+        1.78363967259666233e-25, 3.29023445600991739e-06, 1.68460004130569592e-03,
+        1.72503026241426105e+00, 1.38002320160382475e+01, 1.10401210801834779e+02,
+        8.83168387150024387e+02, 1.41193736003445592e+04, 1.12282200585662198e+05,
+        4.40082662240044388e+05, 2.94038767245387891e+06, 7.12661430867962260e+06,
+        1.04933450003918260e+07, 1.10135370000000000e+07, 1.10173770000000000e+07
+    },
     /* 9437505 */
-    { 3.84596615253128342e-64, 1.65182988466448099e-54, 3.04708831357108469e-35,
-      1.30871446548116017e-25, 2.41415208102884383e-06, 1.23604586537905408e-03,
-      1.26571085309146980e+00, 1.01256804873721595e+01, 8.10050383096763937e+01,
-      6.48014349639423358e+02, 1.03611138831922271e+04, 8.24657129882121953e+04,
-      3.24156550320632989e+05, 2.21947546481000213e+06, 5.68524343875118531e+06,
-      8.91321700797987171e+06, 9.43340900000000000e+06, 9.43724900000000000e+06 },
+    {
+        3.84596615253128342e-64, 1.65182988466448099e-54, 3.04708831357108469e-35,
+        1.30871446548116017e-25, 2.41415208102884383e-06, 1.23604586537905408e-03,
+        1.26571085309146980e+00, 1.01256804873721595e+01, 8.10050383096763937e+01,
+        6.48014349639423358e+02, 1.03611138831922271e+04, 8.24657129882121953e+04,
+        3.24156550320632989e+05, 2.21947546481000213e+06, 5.68524343875118531e+06,
+        8.91321700797987171e+06, 9.43340900000000000e+06, 9.43724900000000000e+06
+    },
     /* 8390657 */
-    { 3.04006590453258966e-64, 1.30569836376521308e-54, 2.40858835538382027e-35,
-      1.03448082158999336e-25, 1.90828029650285053e-06, 9.77039511733760911e-04,
-      1.00048838056196132e+00, 8.00390259075751231e+00, 6.40309356878872933e+01,
-      5.12229243608175807e+02, 8.19066683023702899e+03, 6.52277588009487954e+04,
-      2.56891072309514391e+05, 1.78809403153571300e+06, 4.76371295024558529e+06,
-      7.86636905876981001e+06, 8.38656100000000000e+06, 8.39040100000000000e+06 },
+    {
+        3.04006590453258966e-64, 1.30569836376521308e-54, 2.40858835538382027e-35,
+        1.03448082158999336e-25, 1.90828029650285053e-06, 9.77039511733760911e-04,
+        1.00048838056196132e+00, 8.00390259075751231e+00, 6.40309356878872933e+01,
+        5.12229243608175807e+02, 8.19066683023702899e+03, 6.52277588009487954e+04,
+        2.56891072309514391e+05, 1.78809403153571300e+06, 4.76371295024558529e+06,
+        7.86636905876981001e+06, 8.38656100000000000e+06, 8.39040100000000000e+06
+    },
     /* 8388608 */
-    { 3.03858131641597245e-64, 1.30506073802432296e-54, 2.40741214349811932e-35,
-      1.03397564243176815e-25, 1.90734840543853551e-06, 9.76562383508887020e-04,
-      9.99999801317883907e-01, 7.99999396006690677e+00, 6.39996668511303071e+01,
-      5.11979106274727883e+02, 8.18666829515939844e+03, 6.51959881527814287e+04,
-      2.56766914989349432e+05, 1.78728773698867904e+06, 4.76194118448516913e+06,
-      7.86432005899994168e+06, 8.38451200000000000e+06, 8.38835200000000000e+06 },
+    {
+        3.03858131641597245e-64, 1.30506073802432296e-54, 2.40741214349811932e-35,
+        1.03397564243176815e-25, 1.90734840543853551e-06, 9.76562383508887020e-04,
+        9.99999801317883907e-01, 7.99999396006690677e+00, 6.39996668511303071e+01,
+        5.11979106274727883e+02, 8.18666829515939844e+03, 6.51959881527814287e+04,
+        2.56766914989349432e+05, 1.78728773698867904e+06, 4.76194118448516913e+06,
+        7.86432005899994168e+06, 8.38451200000000000e+06, 8.38835200000000000e+06
+    },
     /* 8303633 */
-    { 2.97733261180485959e-64, 1.27875461970161355e-54, 2.35888592027094511e-35,
-      1.01313378825585727e-25, 1.86890197043808392e-06, 9.56877808790931330e-04,
-      9.79842799195114300e-01, 7.83873807696676383e+00, 6.27096283547353366e+01,
-      5.01659346659709513e+02, 8.02170245095559312e+03, 6.38851939022925071e+04,
-      2.51643815255051391e+05, 1.75398342366120382e+06, 4.68858358349586092e+06,
-      7.77934506938103493e+06, 8.29953700000000000e+06, 8.30337700000000000e+06 },
+    {
+        2.97733261180485959e-64, 1.27875461970161355e-54, 2.35888592027094511e-35,
+        1.01313378825585727e-25, 1.86890197043808392e-06, 9.56877808790931330e-04,
+        9.79842799195114300e-01, 7.83873807696676383e+00, 6.27096283547353366e+01,
+        5.01659346659709513e+02, 8.02170245095559312e+03, 6.38851939022925071e+04,
+        2.51643815255051391e+05, 1.75398342366120382e+06, 4.68858358349586092e+06,
+        7.77934506938103493e+06, 8.29953700000000000e+06, 8.30337700000000000e+06
+    },
     /* 6445069 */
-    { 1.79368505410408035e-64, 7.70381864670101568e-55, 1.42110370965965099e-35,
-      6.10359395721248029e-26, 1.12591435658525644e-06, 5.76468150537344320e-04,
-      5.90303350141551664e-01, 4.72242478267542509e+00, 3.77792690805288558e+01,
-      3.02225885259077643e+02, 4.83334738231306892e+03, 3.85317788870130607e+04,
-      1.52297025401436375e+05, 1.09355884627841157e+06, 3.15298493161437940e+06,
-      5.92078340317591745e+06, 6.44097300000000000e+06, 6.44481300000000000e+06 },
+    {
+        1.79368505410408035e-64, 7.70381864670101568e-55, 1.42110370965965099e-35,
+        6.10359395721248029e-26, 1.12591435658525644e-06, 5.76468150537344320e-04,
+        5.90303350141551664e-01, 4.72242478267542509e+00, 3.77792690805288558e+01,
+        3.02225885259077643e+02, 4.83334738231306892e+03, 3.85317788870130607e+04,
+        1.52297025401436375e+05, 1.09355884627841157e+06, 3.15298493161437940e+06,
+        5.92078340317591745e+06, 6.44097300000000000e+06, 6.44481300000000000e+06
+    },
     /* 5471025 */
-    { 1.29249369610449219e-64, 5.55121815505495657e-55, 1.02401900603628891e-35,
-      4.39812814140828746e-26, 8.11311442279305058e-07, 4.15391458426019348e-04,
-      4.25360831402496142e-01, 3.40288541657277221e+00, 2.72230043153551051e+01,
-      2.17778977519387723e+02, 3.48307701466327671e+03, 2.77819973005047868e+04,
-      1.10006032571945238e+05, 8.02497636826934526e+05, 2.41479032500354247e+06,
-      4.94675240411104914e+06, 5.46692900000000000e+06, 5.47076900000000000e+06 },
+    {
+        1.29249369610449219e-64, 5.55121815505495657e-55, 1.02401900603628891e-35,
+        4.39812814140828746e-26, 8.11311442279305058e-07, 4.15391458426019348e-04,
+        4.25360831402496142e-01, 3.40288541657277221e+00, 2.72230043153551051e+01,
+        2.17778977519387723e+02, 3.48307701466327671e+03, 2.77819973005047868e+04,
+        1.10006032571945238e+05, 8.02497636826934526e+05, 2.41479032500354247e+06,
+        4.94675240411104914e+06, 5.46692900000000000e+06, 5.47076900000000000e+06
+    },
     /* 5461601 */
-    { 1.28804481454968919e-64, 5.53211035427330002e-55, 1.02049423892798245e-35,
-      4.38298938195209473e-26, 8.08518834066487105e-07, 4.13961643021164814e-04,
-      4.23896700541549154e-01, 3.39117237605436062e+00, 2.71293003988329815e+01,
-      2.17029372274540748e+02, 3.47109048311671313e+03, 2.76865308479067826e+04,
-      1.09629930206165693e+05, 7.99877169687261223e+05, 2.40792627883238578e+06,
-      4.93732868350143358e+06, 5.45750500000000000e+06, 5.46134500000000000e+06 },
+    {
+        1.28804481454968919e-64, 5.53211035427330002e-55, 1.02049423892798245e-35,
+        4.38298938195209473e-26, 8.08518834066487105e-07, 4.13961643021164814e-04,
+        4.23896700541549154e-01, 3.39117237605436062e+00, 2.71293003988329815e+01,
+        2.17029372274540748e+02, 3.47109048311671313e+03, 2.76865308479067826e+04,
+        1.09629930206165693e+05, 7.99877169687261223e+05, 2.40792627883238578e+06,
+        4.93732868350143358e+06, 5.45750500000000000e+06, 5.46134500000000000e+06
+    },
     /* 5000000 */
-    { 1.07952085348259170e-64, 4.63650676105773906e-55, 8.55284536172561161e-36,
-      3.67341911163567920e-26, 6.77626222278107512e-07, 3.46944625790372989e-04,
-      3.55271279996754563e-01, 2.84216929754907532e+00, 2.27372940653300759e+01,
-      1.81894492427756745e+02, 2.90925341562651647e+03, 2.32109475844556837e+04,
-      9.19864480283138982e+04, 6.76244582431662595e+05, 2.07902454915874335e+06,
-      4.47574982779582217e+06, 4.99590400000000000e+06, 4.99974400000000000e+06 },
+    {
+        1.07952085348259170e-64, 4.63650676105773906e-55, 8.55284536172561161e-36,
+        3.67341911163567920e-26, 6.77626222278107512e-07, 3.46944625790372989e-04,
+        3.55271279996754563e-01, 2.84216929754907532e+00, 2.27372940653300759e+01,
+        1.81894492427756745e+02, 2.90925341562651647e+03, 2.32109475844556837e+04,
+        9.19864480283138982e+04, 6.76244582431662595e+05, 2.07902454915874335e+06,
+        4.47574982779582217e+06, 4.99590400000000000e+06, 4.99974400000000000e+06
+    },
     /* 4720129 */
-    { 9.62052468491602810e-65, 4.13198388920750452e-55, 7.62216493209018785e-36,
-      3.27369491080454178e-26, 6.03890121950116545e-07, 3.09191742424983634e-04,
-      3.16612330098731132e-01, 2.53289784792646122e+00, 2.02631320402621107e+01,
-      1.62101808815417854e+02, 2.59273843912307711e+03, 2.06888306707860320e+04,
-      8.20335711247183208e+04, 6.05859806423343602e+05, 1.88701706041535083e+06,
-      4.19590551232236158e+06, 4.71603300000000000e+06, 4.71987300000000000e+06 },
+    {
+        9.62052468491602810e-65, 4.13198388920750452e-55, 7.62216493209018785e-36,
+        3.27369491080454178e-26, 6.03890121950116545e-07, 3.09191742424983634e-04,
+        3.16612330098731132e-01, 2.53289784792646122e+00, 2.02631320402621107e+01,
+        1.62101808815417854e+02, 2.59273843912307711e+03, 2.06888306707860320e+04,
+        8.20335711247183208e+04, 6.05859806423343602e+05, 1.88701706041535083e+06,
+        4.19590551232236158e+06, 4.71603300000000000e+06, 4.71987300000000000e+06
+    },
     /* 4598479 */
-    { 9.13102296289999889e-65, 3.92174450046805166e-55, 7.23434171226120578e-36,
-      3.10712610622505210e-26, 5.73163600862704501e-07, 2.93459763629244023e-04,
-      3.00502784877568652e-01, 2.40402154589327210e+00, 1.92321254470970260e+01,
-      1.53854000743080690e+02, 2.46084059619524533e+03, 1.96376437319819379e+04,
-      7.78830134114269749e+04, 5.76361321148565039e+05, 1.80542466236221301e+06,
-      4.07427236013673665e+06, 4.59438300000000000e+06, 4.59822300000000000e+06 },
+    {
+        9.13102296289999889e-65, 3.92174450046805166e-55, 7.23434171226120578e-36,
+        3.10712610622505210e-26, 5.73163600862704501e-07, 2.93459763629244023e-04,
+        3.00502784877568652e-01, 2.40402154589327210e+00, 1.92321254470970260e+01,
+        1.53854000743080690e+02, 2.46084059619524533e+03, 1.96376437319819379e+04,
+        7.78830134114269749e+04, 5.76361321148565039e+05, 1.80542466236221301e+06,
+        4.07427236013673665e+06, 4.59438300000000000e+06, 4.59822300000000000e+06
+    },
     /* 4514873 */
-    { 8.80201481185765059e-65, 3.78043657558362023e-55, 6.97367459966819779e-36,
-      2.99517043385208020e-26, 5.52511424504064165e-07, 2.82885849334287552e-04,
-      2.89675097340006849e-01, 2.31740008485763216e+00, 1.85391562717557470e+01,
-      1.48310408165256945e+02, 2.37218721144947949e+03, 1.89310433056085276e+04,
-      7.50922424384496408e+04, 5.56476519408195047e+05, 1.75003032936007436e+06,
-      3.99068042602826888e+06, 4.51077700000000000e+06, 4.51461700000000000e+06 },
+    {
+        8.80201481185765059e-65, 3.78043657558362023e-55, 6.97367459966819779e-36,
+        2.99517043385208020e-26, 5.52511424504064165e-07, 2.82885849334287552e-04,
+        2.89675097340006849e-01, 2.31740008485763216e+00, 1.85391562717557470e+01,
+        1.48310408165256945e+02, 2.37218721144947949e+03, 1.89310433056085276e+04,
+        7.50922424384496408e+04, 5.56476519408195047e+05, 1.75003032936007436e+06,
+        3.99068042602826888e+06, 4.51077700000000000e+06, 4.51461700000000000e+06
+    },
     /* 4216423 */
-    { 7.67678466448147999e-65, 3.29715390723822894e-55, 6.08217542984550923e-36,
-      2.61227445597212045e-26, 4.81879583396028819e-07, 2.46722346689160995e-04,
-      2.52643672927461205e-01, 2.02114881826249349e+00, 1.61691543761076666e+01,
-      1.29350920164308604e+02, 2.06897994841936315e+03, 1.65139961617354602e+04,
-      6.55409147975342930e+04, 4.88100916845553555e+05, 1.55700132055291533e+06,
-      3.69230361198300030e+06, 4.21232700000000000e+06, 4.21616700000000000e+06 },
+    {
+        7.67678466448147999e-65, 3.29715390723822894e-55, 6.08217542984550923e-36,
+        2.61227445597212045e-26, 4.81879583396028819e-07, 2.46722346689160995e-04,
+        2.52643672927461205e-01, 2.02114881826249349e+00, 1.61691543761076666e+01,
+        1.29350920164308604e+02, 2.06897994841936315e+03, 1.65139961617354602e+04,
+        6.55409147975342930e+04, 4.88100916845553555e+05, 1.55700132055291533e+06,
+        3.69230361198300030e+06, 4.21232700000000000e+06, 4.21616700000000000e+06
+    },
     /* 4194304 */
-    { 7.59645238547202323e-65, 3.26265145612235253e-55, 6.01852964128048457e-36,
-      2.58493879793062928e-26, 4.76837044516251121e-07, 2.44140566782865192e-04,
-      2.49999930461255154e-01, 1.99999888738057052e+00, 1.59999554953052812e+01,
-      1.27997365357353743e+02, 2.04733300825732044e+03, 1.63414126607763610e+04,
-      6.48586183619030489e+04, 4.83196861208001501e+05, 1.54299802768340637e+06,
-      3.67019187768841069e+06, 4.19020800000000000e+06, 4.19404800000000000e+06 },
+    {
+        7.59645238547202323e-65, 3.26265145612235253e-55, 6.01852964128048457e-36,
+        2.58493879793062928e-26, 4.76837044516251121e-07, 2.44140566782865192e-04,
+        2.49999930461255154e-01, 1.99999888738057052e+00, 1.59999554953052812e+01,
+        1.27997365357353743e+02, 2.04733300825732044e+03, 1.63414126607763610e+04,
+        6.48586183619030489e+04, 4.83196861208001501e+05, 1.54299802768340637e+06,
+        3.67019187768841069e+06, 4.19020800000000000e+06, 4.19404800000000000e+06
+    },
     /* 4000000 */
-    { 6.90893311684184468e-65, 2.96736417870870697e-55, 5.47382075781328512e-36,
-      2.35098811389739960e-26, 4.33680760573953185e-07, 2.22044549405662773e-04,
-      2.27373609983355179e-01, 1.81898839734530293e+00, 1.45518762974392430e+01,
-      1.16413034003141178e+02, 1.86206657745167763e+03, 1.48642188911844787e+04,
-      5.90168968299262124e+04, 4.41096638730170089e+05, 1.42185603096995712e+06,
-      3.47596677852119505e+06, 3.99590400000000000e+06, 3.99974400000000000e+06 },
+    {
+        6.90893311684184468e-65, 2.96736417870870697e-55, 5.47382075781328512e-36,
+        2.35098811389739960e-26, 4.33680760573953185e-07, 2.22044549405662773e-04,
+        2.27373609983355179e-01, 1.81898839734530293e+00, 1.45518762974392430e+01,
+        1.16413034003141178e+02, 1.86206657745167763e+03, 1.48642188911844787e+04,
+        5.90168968299262124e+04, 4.41096638730170089e+05, 1.42185603096995712e+06,
+        3.47596677852119505e+06, 3.99590400000000000e+06, 3.99974400000000000e+06
+    },
     /* 3981553 */
-    { 6.84535550514410596e-65, 2.94005780240874949e-55, 5.42344938429471275e-36,
-      2.32935377370571273e-26, 4.29689929206757446e-07, 2.20001243745771501e-04,
-      2.25281265106172607e-01, 1.80224964497290951e+00, 1.44179667037433958e+01,
-      1.15341784471186955e+02, 1.84493404804906459e+03, 1.47276033582964737e+04,
-      5.84764753082058160e+04, 4.37191522377733258e+05, 1.41053273133602901e+06,
-      3.45752890244734008e+06, 3.97745700000000000e+06, 3.98129700000000000e+06 },
+    {
+        6.84535550514410596e-65, 2.94005780240874949e-55, 5.42344938429471275e-36,
+        2.32935377370571273e-26, 4.29689929206757446e-07, 2.20001243745771501e-04,
+        2.25281265106172607e-01, 1.80224964497290951e+00, 1.44179667037433958e+01,
+        1.15341784471186955e+02, 1.84493404804906459e+03, 1.47276033582964737e+04,
+        5.84764753082058160e+04, 4.37191522377733258e+05, 1.41053273133602901e+06,
+        3.45752890244734008e+06, 3.97745700000000000e+06, 3.98129700000000000e+06
+    },
     /* 3469497 */
-    { 5.19785334334943400e-65, 2.23246101190900781e-55, 4.11816369412201186e-36,
-      1.76873783858285884e-26, 3.26274542418221493e-07, 1.67052565712777593e-04,
-      1.71061821672631342e-01, 1.36849425850745465e+00, 1.09479339161807978e+01,
-      8.75821816252248908e+01, 1.40096122943031264e+03, 1.11865973776804603e+04,
-      4.44589238065494865e+04, 3.35240937339222815e+05, 1.10925791919939918e+06,
-      2.94590981907640956e+06, 3.46540100000000000e+06, 3.46924100000000000e+06 },
+    {
+        5.19785334334943400e-65, 2.23246101190900781e-55, 4.11816369412201186e-36,
+        1.76873783858285884e-26, 3.26274542418221493e-07, 1.67052565712777593e-04,
+        1.71061821672631342e-01, 1.36849425850745465e+00, 1.09479339161807978e+01,
+        8.75821816252248908e+01, 1.40096122943031264e+03, 1.11865973776804603e+04,
+        4.44589238065494865e+04, 3.35240937339222815e+05, 1.10925791919939918e+06,
+        2.94590981907640956e+06, 3.46540100000000000e+06, 3.46924100000000000e+06
+    },
     /* 2796417 */
-    { 3.37671825984804601e-65, 1.45028944938533875e-55, 2.67531183056124863e-36,
-      1.14903768188624562e-26, 2.11960040488029904e-07, 1.08523540727069068e-04,
-      1.11128102763280903e-01, 8.89024657235948701e-01, 7.11218670620169569e+00,
-      5.68968183484790444e+01, 9.10163898031904523e+02, 7.27026311537105084e+03,
-      2.89302976804814243e+04, 2.20626239906953182e+05, 7.55430265292525059e+05,
-      2.27465918879699614e+06, 2.79232100000000000e+06, 2.79616100000000000e+06 },
+    {
+        3.37671825984804601e-65, 1.45028944938533875e-55, 2.67531183056124863e-36,
+        1.14903768188624562e-26, 2.11960040488029904e-07, 1.08523540727069068e-04,
+        1.11128102763280903e-01, 8.89024657235948701e-01, 7.11218670620169569e+00,
+        5.68968183484790444e+01, 9.10163898031904523e+02, 7.27026311537105084e+03,
+        2.89302976804814243e+04, 2.20626239906953182e+05, 7.55430265292525059e+05,
+        2.27465918879699614e+06, 2.79232100000000000e+06, 2.79616100000000000e+06
+    },
     /* 2396744 */
-    { 2.48047143920984062e-65, 1.06535437100683176e-55, 1.96523194297708407e-36,
-      8.44060692414111294e-27, 1.55701715756405132e-07, 7.97192784655151597e-05,
-      8.16325392969082797e-02, 6.53060210574274436e-01, 5.22447504133784690e+00,
-      4.17953751659456785e+01, 6.68609402176202252e+02, 5.34191798810462478e+03,
-      2.12726697966660395e+04, 1.63326698532949667e+05, 5.71039962053837837e+05,
-      1.87787878976813331e+06, 2.39264800000000000e+06, 2.39648800000000000e+06 },
+    {
+        2.48047143920984062e-65, 1.06535437100683176e-55, 1.96523194297708407e-36,
+        8.44060692414111294e-27, 1.55701715756405132e-07, 7.97192784655151597e-05,
+        8.16325392969082797e-02, 6.53060210574274436e-01, 5.22447504133784690e+00,
+        4.17953751659456785e+01, 6.68609402176202252e+02, 5.34191798810462478e+03,
+        2.12726697966660395e+04, 1.63326698532949667e+05, 5.71039962053837837e+05,
+        1.87787878976813331e+06, 2.39264800000000000e+06, 2.39648800000000000e+06
+    },
     /* 2098177 */
-    { 1.90096951102133711e-65, 8.16460188052975446e-56, 1.50610321353860109e-36,
-      6.46866404654879610e-27, 1.19325790165487525e-07, 6.10948045635459623e-05,
-      6.25610786307022049e-02, 5.00488559404961619e-01, 4.00390401824189190e+00,
-      3.20309469002191776e+01, 5.12416921058289972e+02, 4.09466699542457309e+03,
-      1.63148862712246882e+04, 1.25897567119276093e+05, 4.47225202517700847e+05,
-      1.58347287791373348e+06, 2.09408100000000000e+06, 2.09792100000000000e+06 },
+    {
+        1.90096951102133711e-65, 8.16460188052975446e-56, 1.50610321353860109e-36,
+        6.46866404654879610e-27, 1.19325790165487525e-07, 6.10948045635459623e-05,
+        6.25610786307022049e-02, 5.00488559404961619e-01, 4.00390401824189190e+00,
+        3.20309469002191776e+01, 5.12416921058289972e+02, 4.09466699542457309e+03,
+        1.63148862712246882e+04, 1.25897567119276093e+05, 4.47225202517700847e+05,
+        1.58347287791373348e+06, 2.09408100000000000e+06, 2.09792100000000000e+06
+    },
     /* 2097152 */
-    { 1.89911264358405187e-65, 8.15662669561360700e-56, 1.50463205158771428e-36,
-      6.46234545408261769e-27, 1.19209232707357876e-07, 6.10351271449853099e-05,
-      6.24999689559159743e-02, 4.99999682108684340e-01, 3.99999300640047295e+00,
-      3.19996592233267698e+01, 5.11916432816754536e+02, 4.09066992542314756e+03,
-      1.62989912696615120e+04, 1.25777098836656849e+05, 4.46821820522652706e+05,
-      1.58246663305044221e+06, 2.09305600000000000e+06, 2.09689600000000000e+06 },
+    {
+        1.89911264358405187e-65, 8.15662669561360700e-56, 1.50463205158771428e-36,
+        6.46234545408261769e-27, 1.19209232707357876e-07, 6.10351271449853099e-05,
+        6.24999689559159743e-02, 4.99999682108684340e-01, 3.99999300640047295e+00,
+        3.19996592233267698e+01, 5.11916432816754536e+02, 4.09066992542314756e+03,
+        1.62989912696615120e+04, 1.25777098836656849e+05, 4.46821820522652706e+05,
+        1.58246663305044221e+06, 2.09305600000000000e+06, 2.09689600000000000e+06
+    },
     /* 1271626 */
-    { 6.98247791753670586e-66, 2.99895143008623366e-56, 5.53208895202860154e-37,
-      2.37601411275257565e-27, 4.38297242534678273e-08, 2.24408188175120292e-05,
-      2.29793981925642821e-02, 1.83835170037565776e-01, 1.47068036811238745e+00,
-      1.17653794451473974e+01, 1.88228655326640251e+02, 1.50478955000098654e+03,
-      6.00493221828217247e+03, 4.69964688955476740e+04, 1.74675738335436967e+05,
-      7.93705775574441534e+05, 1.26753000000000000e+06, 1.27137000000000000e+06 },
+    {
+        6.98247791753670586e-66, 2.99895143008623366e-56, 5.53208895202860154e-37,
+        2.37601411275257565e-27, 4.38297242534678273e-08, 2.24408188175120292e-05,
+        2.29793981925642821e-02, 1.83835170037565776e-01, 1.47068036811238745e+00,
+        1.17653794451473974e+01, 1.88228655326640251e+02, 1.50478955000098654e+03,
+        6.00493221828217247e+03, 4.69964688955476740e+04, 1.74675738335436967e+05,
+        7.93705775574441534e+05, 1.26753000000000000e+06, 1.27137000000000000e+06
+    },
     /* 1180417 */
-    { 6.01674571488324041e-66, 2.58417260737716580e-56, 4.76695707305772932e-37,
-      2.04739247302188301e-27, 3.77677249682731562e-08, 1.93370751835450871e-05,
-      1.98011647667272750e-02, 1.58409305733226813e-01, 1.26727365222838628e+00,
-      1.01381384252463871e+01, 1.62196284074367895e+02, 1.29673859731428774e+03,
-      5.17557281139463612e+03, 4.05690452754900689e+04, 1.51559237337625702e+05,
-      7.11307437578365323e+05, 1.17632100000000000e+06, 1.18016100000000000e+06 },
+    {
+        6.01674571488324041e-66, 2.58417260737716580e-56, 4.76695707305772932e-37,
+        2.04739247302188301e-27, 3.77677249682731562e-08, 1.93370751835450871e-05,
+        1.98011647667272750e-02, 1.58409305733226813e-01, 1.26727365222838628e+00,
+        1.01381384252463871e+01, 1.62196284074367895e+02, 1.29673859731428774e+03,
+        5.17557281139463612e+03, 4.05690452754900689e+04, 1.51559237337625702e+05,
+        7.11307437578365323e+05, 1.17632100000000000e+06, 1.18016100000000000e+06
+    },
     /* 1048576 */
-    { 4.74777934504035996e-66, 2.03915570155726458e-56, 3.76157833530725135e-37,
-      1.61558559314867667e-27, 2.98022939659853163e-08, 1.52587745104367425e-05,
-      1.56249849436188217e-02, 1.24999870856632000e-01, 9.99998410544928107e-01,
-      7.99995168077293606e+00, 1.27989461928571330e+02, 1.02333268407003743e+03,
-      4.08535025830558106e+03, 3.20958386865916218e+04, 1.20799142289413823e+05,
-      5.95242529642230948e+05, 1.04448000000000000e+06, 1.04832000000000000e+06 },
+    {
+        4.74777934504035996e-66, 2.03915570155726458e-56, 3.76157833530725135e-37,
+        1.61558559314867667e-27, 2.98022939659853163e-08, 1.52587745104367425e-05,
+        1.56249849436188217e-02, 1.24999870856632000e-01, 9.99998410544928107e-01,
+        7.99995168077293606e+00, 1.27989461928571330e+02, 1.02333268407003743e+03,
+        4.08535025830558106e+03, 3.20958386865916218e+04, 1.20799142289413823e+05,
+        5.95242529642230948e+05, 1.04448000000000000e+06, 1.04832000000000000e+06
+    },
     /* 1000000 */
-    { 4.31807995946294477e-66, 1.85460122074063535e-56, 3.42113540777918151e-37,
-      1.46936646915992086e-27, 2.71050272070828090e-08, 1.38777739298982540e-05,
-      1.42108403697154325e-02, 1.13686715418339940e-01, 9.09493240826389937e-01,
-      7.27591504542061607e+00, 1.16406170946493603e+02, 9.30743673031597268e+02,
-      3.71605194956770447e+03, 2.92188944778244804e+04, 1.10274089241209091e+05,
-      5.53554744840516942e+05, 9.95904000000000000e+05, 9.99744000000000000e+05 },
+    {
+        4.31807995946294477e-66, 1.85460122074063535e-56, 3.42113540777918151e-37,
+        1.46936646915992086e-27, 2.71050272070828090e-08, 1.38777739298982540e-05,
+        1.42108403697154325e-02, 1.13686715418339940e-01, 9.09493240826389937e-01,
+        7.27591504542061607e+00, 1.16406170946493603e+02, 9.30743673031597268e+02,
+        3.71605194956770447e+03, 2.92188944778244804e+04, 1.10274089241209091e+05,
+        5.53554744840516942e+05, 9.95904000000000000e+05, 9.99744000000000000e+05
+    },
     /* 819841 */
-    { 2.90235045358949550e-66, 1.24655002796976490e-56, 2.29947893410337365e-37,
-      9.87618681981492889e-28, 1.82183490689266710e-08, 9.32779472321984348e-06,
-      9.55166172246109217e-03, 7.64132896251342869e-02, 6.11306051109687054e-01,
-      4.89043139187867482e+00, 7.82422349713714453e+01, 6.25659192034058037e+02,
-      2.49882041253832767e+03, 1.97089496950203138e+04, 7.51500479695295217e+04,
-      4.05315292462697893e+05, 8.15745000000000000e+05, 8.19585000000000000e+05 },
+    {
+        2.90235045358949550e-66, 1.24655002796976490e-56, 2.29947893410337365e-37,
+        9.87618681981492889e-28, 1.82183490689266710e-08, 9.32779472321984348e-06,
+        9.55166172246109217e-03, 7.64132896251342869e-02, 6.11306051109687054e-01,
+        4.89043139187867482e+00, 7.82422349713714453e+01, 6.25659192034058037e+02,
+        2.49882041253832767e+03, 1.97089496950203138e+04, 7.51500479695295217e+04,
+        4.05315292462697893e+05, 8.15745000000000000e+05, 8.19585000000000000e+05
+    },
     /* 652545 */
-    { 1.83870213969147930e-66, 7.89716555706012712e-57, 1.45676991938802090e-37,
-      6.25677916156810610e-28, 1.15417203919164522e-08, 5.90936084062561679e-06,
-      6.05118546342795372e-03, 4.84094816125079305e-02, 3.87275718825497939e-01,
-      3.09819716985184357e+00, 4.95688012285943671e+01, 3.96409870457700265e+02,
-      1.58371435214666167e+03, 1.25273157680588301e+04, 4.82278663969549234e+04,
-      2.79276527717245917e+05, 6.48449000000000000e+05, 6.52289000000000000e+05 },
+    {
+        1.83870213969147930e-66, 7.89716555706012712e-57, 1.45676991938802090e-37,
+        6.25677916156810610e-28, 1.15417203919164522e-08, 5.90936084062561679e-06,
+        6.05118546342795372e-03, 4.84094816125079305e-02, 3.87275718825497939e-01,
+        3.09819716985184357e+00, 4.95688012285943671e+01, 3.96409870457700265e+02,
+        1.58371435214666167e+03, 1.25273157680588301e+04, 4.82278663969549234e+04,
+        2.79276527717245917e+05, 6.48449000000000000e+05, 6.52289000000000000e+05
+    },
     /* 524801 */
-    { 1.18926762015466819e-66, 5.10786553475605035e-57, 9.42234882825664415e-38,
-      4.04686800688662073e-28, 7.46515384231198445e-09, 3.82215876724521482e-06,
-      3.91389055821865072e-03, 3.13111233760198990e-02, 2.50488917265499822e-01,
-      2.00390687460218908e+00, 3.20612857504726705e+01, 2.56417175606829119e+02,
-      1.02466699434609745e+03, 8.12310498202530835e+03, 3.15045400686032553e+04,
-      1.93198962659155397e+05, 5.20705000000000000e+05, 5.24545000000000000e+05 },
+    {
+        1.18926762015466819e-66, 5.10786553475605035e-57, 9.42234882825664415e-38,
+        4.04686800688662073e-28, 7.46515384231198445e-09, 3.82215876724521482e-06,
+        3.91389055821865072e-03, 3.13111233760198990e-02, 2.50488917265499822e-01,
+        2.00390687460218908e+00, 3.20612857504726705e+01, 2.56417175606829119e+02,
+        1.02466699434609745e+03, 8.12310498202530835e+03, 3.15045400686032553e+04,
+        1.93198962659155397e+05, 5.20705000000000000e+05, 5.24545000000000000e+05
+    },
     /* 401857 */
-    { 6.97321585851295025e-67, 2.99497340602616845e-57, 5.52475079285309336e-38,
-      2.37286239738541065e-28, 4.37715853666972486e-09, 2.24110517076658296e-06,
-      2.29489168613654978e-03, 1.83591329998224681e-02, 1.46873032685309740e-01,
-      1.17498225743608220e+00, 1.87991664504370917e+01, 1.50360504164546711e+02,
-      6.00992138052254290e+02, 4.77454013471333201e+03, 1.86505860938960723e+04,
-      1.21176669942356806e+05, 3.97761000000000000e+05, 4.01601000000000000e+05 },
+    {
+        6.97321585851295025e-67, 2.99497340602616845e-57, 5.52475079285309336e-38,
+        2.37286239738541065e-28, 4.37715853666972486e-09, 2.24110517076658296e-06,
+        2.29489168613654978e-03, 1.83591329998224681e-02, 1.46873032685309740e-01,
+        1.17498225743608220e+00, 1.87991664504370917e+01, 1.50360504164546711e+02,
+        6.00992138052254290e+02, 4.77454013471333201e+03, 1.86505860938960723e+04,
+        1.21176669942356806e+05, 3.97761000000000000e+05, 4.01601000000000000e+05
+    },
     /* 264097 */
-    { 3.01173257048041585e-67, 1.29352928945114011e-57, 2.38614037543525460e-38,
-      1.02483948761595803e-28, 1.89049517446831162e-09, 9.67933529325415291e-07,
-      9.91163931551744364e-04, 7.92931131353941630e-03, 6.34344816203459005e-02,
-      5.07475284133261262e-01, 8.11944852670449713e+00, 6.49462697901977464e+01,
-      2.59657344516898661e+02, 2.06775748649864772e+03, 8.14269081216647010e+03,
-      5.66232434728111548e+04, 2.60001000000000000e+05, 2.63841000000000000e+05 },
+    {
+        3.01173257048041585e-67, 1.29352928945114011e-57, 2.38614037543525460e-38,
+        1.02483948761595803e-28, 1.89049517446831162e-09, 9.67933529325415291e-07,
+        9.91163931551744364e-04, 7.92931131353941630e-03, 6.34344816203459005e-02,
+        5.07475284133261262e-01, 8.11944852670449713e+00, 6.49462697901977464e+01,
+        2.59657344516898661e+02, 2.06775748649864772e+03, 8.14269081216647010e+03,
+        5.66232434728111548e+04, 2.60001000000000000e+05, 2.63841000000000000e+05
+    },
     /* 204800 */
-    { 1.81112697232874206e-67, 7.77873111505544409e-58, 1.43492262097629106e-38,
-      6.16294572938377368e-29, 1.13686282610103304e-09, 5.82073766962628089e-07,
-      5.96043536214395245e-04, 4.76834822495310166e-03, 3.81467816548533359e-02,
-      3.05173987973646754e-01, 4.88271104998955341e+00, 3.90573427578099199e+01,
-      1.56169795671348624e+02, 1.24492319174046884e+03, 4.91958032892884057e+03,
-      3.52628737812490363e+04, 2.00704000000000000e+05, 2.04544000000000000e+05 },
+    {
+        1.81112697232874206e-67, 7.77873111505544409e-58, 1.43492262097629106e-38,
+        6.16294572938377368e-29, 1.13686282610103304e-09, 5.82073766962628089e-07,
+        5.96043536214395245e-04, 4.76834822495310166e-03, 3.81467816548533359e-02,
+        3.05173987973646754e-01, 4.88271104998955341e+00, 3.90573427578099199e+01,
+        1.56169795671348624e+02, 1.24492319174046884e+03, 4.91958032892884057e+03,
+        3.52628737812490363e+04, 2.00704000000000000e+05, 2.04544000000000000e+05
+    },
     /* 200000 */
-    { 1.72722507485033383e-67, 7.41837520931333590e-58, 1.36844868928954633e-38,
-      5.87744236675266698e-29, 1.08419675147463808e-09, 5.55108736753989574e-07,
-      5.68431345360095224e-04, 4.54745070256641366e-03, 3.63796017604134173e-02,
-      2.91036567035938998e-01, 4.65651731179381212e+00, 3.72480912910018702e+01,
-      1.48936880685972909e+02, 1.18736413772828405e+03, 4.69345257857060551e+03,
-      3.37256310720094916e+04, 1.95904000000000000e+05, 1.99744000000000000e+05 },
+    {
+        1.72722507485033383e-67, 7.41837520931333590e-58, 1.36844868928954633e-38,
+        5.87744236675266698e-29, 1.08419675147463808e-09, 5.55108736753989574e-07,
+        5.68431345360095224e-04, 4.54745070256641366e-03, 3.63796017604134173e-02,
+        2.91036567035938998e-01, 4.65651731179381212e+00, 3.72480912910018702e+01,
+        1.48936880685972909e+02, 1.18736413772828405e+03, 4.69345257857060551e+03,
+        3.37256310720094916e+04, 1.95904000000000000e+05, 1.99744000000000000e+05
+    },
     /* 102774 */
-    { 4.56093001325520124e-68, 1.95890452462759358e-58, 3.61354104306368883e-39,
-      1.55200406027122712e-29, 2.86294217011813689e-10, 1.46582639109909510e-07,
-      1.50100622302544283e-04, 1.20080497023619128e-03, 9.60643923810312883e-03,
-      7.68514803825075948e-02, 1.22961449148191515e+00, 9.83636673154418517e+00,
-      3.93379364327392551e+01, 3.14142047803753769e+02, 1.24891387725365462e+03,
-      9.44593016329059901e+03, 9.86780000000517612e+04, 1.02518000000000000e+05 },
+    {
+        4.56093001325520124e-68, 1.95890452462759358e-58, 3.61354104306368883e-39,
+        1.55200406027122712e-29, 2.86294217011813689e-10, 1.46582639109909510e-07,
+        1.50100622302544283e-04, 1.20080497023619128e-03, 9.60643923810312883e-03,
+        7.68514803825075948e-02, 1.22961449148191515e+00, 9.83636673154418517e+00,
+        3.93379364327392551e+01, 3.14142047803753769e+02, 1.24891387725365462e+03,
+        9.44593016329059901e+03, 9.86780000000517612e+04, 1.02518000000000000e+05
+    },
     /* 100000 */
-    { 4.31804109670444684e-68, 1.85458452931295726e-58, 3.42110461752972125e-39,
-      1.46935324484847411e-29, 2.71047832615944429e-10, 1.38776490299235408e-07,
-      1.42107125931920287e-04, 1.13685699991618186e-03, 9.09485551682193138e-03,
-      7.27588132541049926e-02, 1.16413254204269756e+00, 9.31255441700961661e+00,
-      3.72432805975374706e+01, 2.97429023684080164e+02, 1.18266355295424069e+03,
-      8.95817783366734693e+03, 9.59040000001018925e+04, 9.97440000000000000e+04 },
+    {
+        4.31804109670444684e-68, 1.85458452931295726e-58, 3.42110461752972125e-39,
+        1.46935324484847411e-29, 2.71047832615944429e-10, 1.38776490299235408e-07,
+        1.42107125931920287e-04, 1.13685699991618186e-03, 9.09485551682193138e-03,
+        7.27588132541049926e-02, 1.16413254204269756e+00, 9.31255441700961661e+00,
+        3.72432805975374706e+01, 2.97429023684080164e+02, 1.18266355295424069e+03,
+        8.95817783366734693e+03, 9.59040000001018925e+04, 9.97440000000000000e+04
+    },
     /* 77163 */
-    { 2.57100957639565332e-68, 1.10424020483221446e-58, 2.03696364544404734e-39,
-      8.74869224032312274e-30, 1.61384886736889072e-10, 8.26290620092283361e-08,
-      8.46121594356573939e-05, 6.76897272021500683e-04, 5.41517795449147563e-03,
-      4.33214094483818091e-02, 6.93138659749164665e-01, 5.54487683849644686e+00,
-      2.21763200560975164e+01, 1.77172840383531820e+02, 7.05445676326827083e+02,
-      5.40962011023344166e+03, 7.30670000269061129e+04, 7.69070000000000000e+04 },
+    {
+        2.57100957639565332e-68, 1.10424020483221446e-58, 2.03696364544404734e-39,
+        8.74869224032312274e-30, 1.61384886736889072e-10, 8.26290620092283361e-08,
+        8.46121594356573939e-05, 6.76897272021500683e-04, 5.41517795449147563e-03,
+        4.33214094483818091e-02, 6.93138659749164665e-01, 5.54487683849644686e+00,
+        2.21763200560975164e+01, 1.77172840383531820e+02, 7.05445676326827083e+02,
+        5.40962011023344166e+03, 7.30670000269061129e+04, 7.69070000000000000e+04
+    },
     /* 50643 */
-    { 1.10744301397987420e-68, 4.75643152722723048e-59, 8.77406750868841857e-40,
-      3.76843330027129536e-30, 6.95153246489491803e-11, 3.55918462202453374e-08,
-      3.64460505120626550e-05, 2.91568403117305078e-04, 2.33254716226988625e-03,
-      1.86603732873723421e-02, 2.98564872499666734e-01, 2.38845326899687205e+00,
-      9.55291197889362387e+00, 7.63560630702938568e+01, 3.04504893070908849e+02,
-      2.36897008846858444e+03, 4.65470174614963616e+04, 5.03870000000000000e+04 },
+    {
+        1.10744301397987420e-68, 4.75643152722723048e-59, 8.77406750868841857e-40,
+        3.76843330027129536e-30, 6.95153246489491803e-11, 3.55918462202453374e-08,
+        3.64460505120626550e-05, 2.91568403117305078e-04, 2.33254716226988625e-03,
+        1.86603732873723421e-02, 2.98564872499666734e-01, 2.38845326899687205e+00,
+        9.55291197889362387e+00, 7.63560630702938568e+01, 3.04504893070908849e+02,
+        2.36897008846858444e+03, 4.65470174614963616e+04, 5.03870000000000000e+04
+    },
     /* 6 */
-    { 1.29542528326416669e-76, 5.56380922603113208e-67, 1.02634164867540313e-47,
-      4.40810381558357815e-38, 8.13151629364128326e-19, 4.16333634234433703e-16,
-      4.26325641456043956e-13, 3.41060513164744692e-12, 2.72848410531216727e-11,
-      2.18278728421267612e-10, 3.49245965372384226e-09, 2.79396771690754164e-08,
-      1.11758707843634397e-07, 8.94069600576588975e-07, 3.57627754965526357e-06,
-      2.86101567327154416e-05, 3.66091750036190520e-03, 5.82894668923472636e-02 },
+    {
+        1.29542528326416669e-76, 5.56380922603113208e-67, 1.02634164867540313e-47,
+        4.40810381558357815e-38, 8.13151629364128326e-19, 4.16333634234433703e-16,
+        4.26325641456043956e-13, 3.41060513164744692e-12, 2.72848410531216727e-11,
+        2.18278728421267612e-10, 3.49245965372384226e-09, 2.79396771690754164e-08,
+        1.11758707843634397e-07, 8.94069600576588975e-07, 3.57627754965526357e-06,
+        2.86101567327154416e-05, 3.66091750036190520e-03, 5.82894668923472636e-02
+    },
 };
 
-static void printdouble( const int width, const double value )
-{
-    if (width < 10)
+static void printdouble( const int width, const double value ) {
+    if (width < 10) {
         printf("%.*s|", width - 1, "----------");
-    else if (value == 0.0)
-        printf (" %*.3f |", width - 2, value);
-    else if (value < 1.0e-100)
-        printf (" %.*e |", width - 9, value);
-    else if (value < 1.0e-6)
-        printf (" %.*e  |", width - 9, value);
-    else if (value < 1.0)
-        printf ("  %*.*f |", width - 3, width - 5, value);
-    else if (value < 1.0e6)
-        printf (" %*.3f |", width - 2, value);
-    else
-        printf (" %*.1f   |", width - 4, value);
+    } else if (value == 0.0) {
+        printf(" %*.3f |", width - 2, value);
+    } else if (value < 1.0e-100) {
+        printf(" %.*e |", width - 9, value);
+    } else if (value < 1.0e-6) {
+        printf(" %.*e  |", width - 9, value);
+    } else if (value < 1.0) {
+        printf("  %*.*f |", width - 3, width - 5, value);
+    } else if (value < 1.0e6) {
+        printf(" %*.3f |", width - 2, value);
+    } else {
+        printf(" %*.1f   |", width - 4, value);
+    }
 }
 
-void ReportCollisionEstimates( void )
-{
+void ReportCollisionEstimates( void ) {
     const int keys[] = {
-      149633745, 86536545, 75498113, 56050289, 49925029, 44251425,
-      43691201, 33558529, 33554432, 26977161, 22370049, 18877441,
-      18616785, 17676661, 16777216, 16777214, 15082603, 14986273,
-      14776336, 14196869, 12204240, 11017633, 9437505, 8390657,
-      8388608, 8303633, 6445069, 5471025, 5461601, 5000000,
-      4720129, 4598479, 4514873, 4216423, 4194304, 4000000,
-      3981553, 3469497, 2796417, 2396744, 2098177, 2097152,
-      1271626, 1180417, 1048576, 1000000, 819841, 652545,
-      524801, 401857, 264097, 204800, 200000, 102774,
-      100000, 77163, 50643, 6
+        149633745, 86536545, 75498113, 56050289, 49925029, 44251425,
+         43691201, 33558529, 33554432, 26977161, 22370049, 18877441,
+         18616785, 17676661, 16777216, 16777214, 15082603, 14986273,
+         14776336, 14196869, 12204240, 11017633,  9437505,  8390657,
+          8388608,  8303633,  6445069,  5471025,  5461601,  5000000,
+          4720129,  4598479,  4514873,  4216423,  4194304,  4000000,
+          3981553,  3469497,  2796417,  2396744,  2098177,  2097152,
+          1271626,  1180417,  1048576,  1000000,   819841,   652545,
+           524801,   401857,   264097,   204800,   200000,   102774,
+           100000,    77163,    50643,        6
     };
     const int bits[] = { 256, 224, 160, 128, 64, 55, 45, 42, 39, 36, 32, 29, 27, 24, 22, 19, 12, 8 };
-    printf ("EstimateNbCollisions:\n");
-    printf ("  # keys   : bits|    True answer     |   A: _fwojcik()    |   B: _previmpl()   |   C: _Demerphq()   |    Error A   |    Error B   |    Error C   |\n");
-    printf ("---------------------------------------------------------------------------------------------------------------------------------------------------\n");
-    for (int i = 0; i < sizeof(keys)/sizeof(keys[0]); i++) {
-      const int key = keys[i];
-      for (int j = 0; j < sizeof(bits)/sizeof(bits[0]); j++) {
-        const int bit = bits[j];
-        printf (" %9d : %3d |", key, bit);
-        printdouble(20, realcoll[i][j]);
-        for (int k = 0; k < COLLISION_ESTIMATORS; k++) {
-            printdouble(20, EstimateNbCollisionsCand(key, bit, k));
-        }
-        for (int k = 0; k < COLLISION_ESTIMATORS; k++) {
-            double delta = EstimateNbCollisionsCand(key, bit, k) - realcoll[i][j];
-            double deltapct = delta/realcoll[i][j]*100.0;
-            if (deltapct > 9999.999)
-                deltapct = 9999.999;
-            printf(" %+11.5f%% |", deltapct);
+
+    printf("EstimateNbCollisions:\n");
+    printf(
+            "  # keys   : bits|    True answer     |   A: _fwojcik()    |   B: _previmpl()   |   C: _Demerphq()   |    Error A   |    Error B   |    Error C   |\n");
+    printf(
+            "---------------------------------------------------------------------------------------------------------------------------------------------------\n");
+    for (int i = 0; i < sizeof(keys) / sizeof(keys[0]); i++) {
+        const int key = keys[i];
+        for (int j = 0; j < sizeof(bits) / sizeof(bits[0]); j++) {
+            const int bit = bits[j];
+            printf(" %9d : %3d |", key, bit);
+            printdouble(20, realcoll[i][j]);
+            for (int k = 0; k < COLLISION_ESTIMATORS; k++) {
+                printdouble(20, EstimateNbCollisionsCand(key, bit, k));
+            }
+            for (int k = 0; k < COLLISION_ESTIMATORS; k++) {
+                double delta    = EstimateNbCollisionsCand(key, bit, k) - realcoll[i][j];
+                double deltapct = delta / realcoll[i][j] * 100.0;
+                if (deltapct > 9999.999) {
+                    deltapct = 9999.999;
+                }
+                printf(" %+11.5f%% |", deltapct);
+            }
+            printf("\n");
         }
-        printf("\n");
-      }
     }
 }
 
 //-----------------------------------------------------------------------------
+
 /*
  * Compute the lowest number of hash bits (n) such that there are
  * fewer than (2**n)*log(2**n) hashes, for a given hash count.
@@ -824,13 +927,15 @@ void ReportCollisionEstimates( void )
  * This may validly return a value exceeding the number of hash bits
  * that exist for the hash being tested!
  */
-int GetNLogNBound ( unsigned nbH )
-{
-  int nbHBits;
-  for (nbHBits = 1; nbHBits <= 255; nbHBits++)
-    if (nbH < (log(2.0) * nbHBits * exp2(nbHBits)))
-      break;
-  return nbHBits - 1;
+int GetNLogNBound( unsigned nbH ) {
+    int nbHBits;
+
+    for (nbHBits = 1; nbHBits <= 255; nbHBits++) {
+        if (nbH < (log(2.0) * nbHBits * exp2(nbHBits))) {
+            break;
+        }
+    }
+    return nbHBits - 1;
 }
 
 /*
@@ -849,17 +954,15 @@ int GetNLogNBound ( unsigned nbH )
  * use pow(), but this alternate formulation does the same thing for
  * values in 1-p space.
  */
-double ScalePValue ( double p_value, unsigned testcount )
-{
-  return -expm1(log1p(-p_value) * testcount);
+double ScalePValue( double p_value, unsigned testcount ) {
+    return -expm1(log1p(-p_value) * testcount);
 }
 
 /*
  * This is exactly the same as ScalePValue, but for 2**N tests.
  */
-double ScalePValue2N ( double p_value, unsigned testbits )
-{
-  return -expm1(log1p(-p_value) * exp2(testbits));
+double ScalePValue2N( double p_value, unsigned testbits ) {
+    return -expm1(log1p(-p_value) * exp2(testbits));
 }
 
 /*
@@ -875,8 +978,7 @@ double ScalePValue2N ( double p_value, unsigned testbits )
  * the caret (^) to display these values, as that can indicate
  * exponentiation, and the p-value is no less than 1/(2**logp_value).
  */
-int GetLog2PValue ( double p_value )
-{
+int GetLog2PValue( double p_value ) {
     return (log2(p_value) <= -99.0) ? 99 : -ceil(log2(p_value));
 }
 
@@ -884,10 +986,9 @@ int GetLog2PValue ( double p_value )
  * Given a mean and standard deviation, return (1.0 - p) for the given
  * random normal variable.
  */
-double GetNormalPValue(const double mu, const double sd, const double variable)
-{
-    double stdvar = (variable - mu) / sd;
-    double p_value = erfc(stdvar/sqrt(2.0))/2.0;
+double GetNormalPValue( const double mu, const double sd, const double variable ) {
+    double stdvar  = (variable - mu) / sd;
+    double p_value = erfc(stdvar / sqrt(2.0)) / 2.0;
 
     return p_value;
 }
@@ -895,16 +996,20 @@ double GetNormalPValue(const double mu, const double sd, const double variable)
 /*
  * A helper function for the Peizer and Pratt approximation below.
  */
-static double GFunc_PeizerPratt(const double x) {
-    if (x < 0.0)
+static double GFunc_PeizerPratt( const double x ) {
+    if (x < 0.0) {
         return NAN;
-    if (x == 0.0)
+    }
+    if (x == 0.0) {
         return 1.0;
-    if (x == 1.0)
+    }
+    if (x == 1.0) {
         return 0.0;
-    if (x > 1.0)
-        return -GFunc_PeizerPratt(1.0/x);
-    return (1.0 - x*x + 2*x*log(x))/((1.0 - x)*(1.0 - x));
+    }
+    if (x > 1.0) {
+        return -GFunc_PeizerPratt(1.0 / x);
+    }
+    return (1.0 - x * x + 2 * x * log(x)) / ((1.0 - x) * (1.0 - x));
 }
 
 /*
@@ -929,28 +1034,29 @@ static double GFunc_PeizerPratt(const double x) {
  *   "APPROXIMATIONS TO THE BINOMIAL", by MYRTLE ANNA BRUCE
  *   https://core.ac.uk/download/pdf/33362622.pdf
  */
-double EstimatedBinomialPValue(const unsigned long nbH, const int nbBits, const int maxColl)
-{
-    const double s = maxColl + 1;
-    const double n = nbH;
-    const double t = nbH - maxColl;
-    const double p = exp2(-nbBits);
-    const double q = 1.0 - p;
+double EstimatedBinomialPValue( const unsigned long nbH, const int nbBits, const int maxColl ) {
+    const double s     = maxColl + 1;
+    const double n     = nbH;
+    const double t     = nbH - maxColl;
+    const double p     = exp2(-nbBits);
+    const double q     = 1.0 - p;
 
-    const double d1 = s + 1.0/6.0 - p * (n + 1.0/3.0);
-    const double d2 = d1 + 0.02 * (q/(s+0.5) - p/(t+0.5) + (q-0.5)/(n+1));
+    const double d1    = s + 1.0 / 6.0 - p * (n + 1.0 / 3.0);
+    const double d2    = d1 + 0.02 * (q / (s + 0.5) - p / (t + 0.5) + (q - 0.5) / (n + 1));
 
-    const double num = 1.0 + q*GFunc_PeizerPratt(s/(n*p)) + p*GFunc_PeizerPratt(t/(n*q));
-    const double denom = (n + 1.0/6.0) * p * q;
-    const double z2 = d2 * sqrt(num/denom);
+    const double num   = 1.0 + q * GFunc_PeizerPratt(s / (n * p)) + p * GFunc_PeizerPratt(t / (n * q));
+    const double denom = (n + 1.0 / 6.0) * p * q;
+    const double z2    = d2 * sqrt(num / denom);
 
     // (1.0 - p) for one hash bin
     double p_value = GetNormalPValue(0.0, 1.0, z2);
-    //fprintf(stderr, "Pr(Xi > %ld; %d, %d) ~= 1.0 - N(%f)\n", nbH, nbBits, maxColl, z2);
+    // fprintf(stderr, "Pr(Xi > %ld; %d, %d) ~= 1.0 - N(%f)\n", nbH, nbBits, maxColl, z2);
 
     // (1.0 - p) across all 2**nbBits hash bins
     double pm_value = ScalePValue2N(p_value, nbBits);
-    //fprintf(stderr,"Pr(Xm > %ld; %d, %d) ~= 1.0-((1.0-%e)**(2**n)) == %.12f\n", nbH, nbBits, maxColl, p_value, pm_value, pm_value);
+
+    // fprintf(stderr,"Pr(Xm > %ld; %d, %d) ~= 1.0-((1.0-%e)**(2**n)) == %.12f\n", nbH, nbBits, maxColl, p_value,
+    // pm_value, pm_value);
 
     return pm_value;
 }
@@ -972,14 +1078,13 @@ double EstimatedBinomialPValue(const unsigned long nbH, const int nbBits, const
  * 50th-percentile for a given nbBits were computed via linear
  * regression from Monte Carlo experiments by fwojcik [N ~= 80,000,000].
  */
-double EstimateMaxCollisions(const unsigned long nbH, const int nbBits)
-{
+double EstimateMaxCollisions( const unsigned long nbH, const int nbBits ) {
     double alpha = -expm1(-0.128775055 * nbBits - 0.759110989);
     double m     = (double)nbH - 16;
     double n     = exp2(nbBits);
     double logn  = nbBits * log(2);
 
-    return (m/n) + alpha * sqrt(2.0 * (m/n) * logn);
+    return (m / n) + alpha * sqrt(2.0 * (m / n) * logn);
 }
 
 /*
@@ -990,7 +1095,7 @@ double EstimateMaxCollisions(const unsigned long nbH, const int nbBits)
  * p-value using a single calculation. This is taken from:
  *
  * "Sharp Bounds on Tail Probabilities for Poisson Random Variables", by
- *   Peter Harremo�s
+ *   Peter Harremoës
  *   https://helda.helsinki.fi/bitstream/handle/10138/229679/witmse_proc_17.pdf
  *
  * Similar to other places in SMHasher3, this returns 1.0-p, so the
@@ -998,13 +1103,13 @@ double EstimateMaxCollisions(const unsigned long nbH, const int nbBits)
  * computing real p-values for lower-than-expected collision counts,
  * since that is never a failure condition.
  */
-double BoundedPoissonPValue(const double expected, const uint64_t collisions)
-{
-    if (collisions < expected)
+double BoundedPoissonPValue( const double expected, const uint64_t collisions ) {
+    if (collisions < expected) {
         return 1.0;
-    double x = (double)collisions - 0.5;
+    }
+    double x            = (double)collisions - 0.5;
     double g_over_root2 = sqrt(x * log(x / expected) + expected - x);
-    double p_lbound = erfc(g_over_root2)/2.0;
+    double p_lbound     = erfc(g_over_root2) / 2.0;
     return p_lbound;
 }
 
@@ -1071,32 +1176,32 @@ double BoundedPoissonPValue(const double expected, const uint64_t collisions)
 // sumN{(Bi**2)} - M * lambda
 //
 // NB: bincount must be a non-zero multiple of 8!
-double calcScore ( const unsigned * bins, const int bincount, const int keycount )
-{
-  const double n = bincount;
-  const double k = keycount;
-  const double lambda = k/n;
+double calcScore( const unsigned * bins, const int bincount, const int keycount ) {
+    const double n      = bincount;
+    const double k      = keycount;
+    const double lambda = k / n;
 
-  uint64_t sumsq = 0;
+    uint64_t sumsq      = 0;
 
-  assume(bincount >= 8);
-  for(int i = 0; i < (bincount>>3)<<3; i++)
-    sumsq += (uint64_t)bins[i] * (uint64_t)bins[i];
+    assume(bincount >= 8);
+    for (int i = 0; i < (bincount >> 3) << 3; i++) {
+        sumsq += (uint64_t)bins[i] * (uint64_t)bins[i];
+    }
 
-  double sumsqe = (double)sumsq - lambda * k;
-  double rmse = sqrt(sumsqe/n);
-  double rmse_ratio_m1 = (rmse - sqrt(lambda))/sqrt(lambda); // == rmse/sqrt(lambda) - 1.0
-  double score = (rmse_ratio_m1) * sqrt(2.0 * n);
+    double sumsqe        = (double)sumsq - lambda * k;
+    double rmse          = sqrt(sumsqe / n);
+    double rmse_ratio_m1 = (rmse - sqrt(lambda)) / sqrt(lambda); // == rmse/sqrt(lambda) - 1.0
+    double score         = (rmse_ratio_m1) * sqrt(2.0 * n);
 
-  return score;
+    return score;
 }
 
 // Convert the score from calcScore back into (rmse/sqrt(lambda) -
 // 1.0), to show the user something like the previous report.
-double normalizeScore ( double score, int scorewidth, int tests )
-{
-    if (score <= 0)
+double normalizeScore( double score, int scorewidth, int tests ) {
+    if (score <= 0) {
         return 0.0;
+    }
 
     // Never return a result higher than this, as a precise value
     // would be visually cluttered and not really meaningful.
@@ -1104,8 +1209,9 @@ double normalizeScore ( double score, int scorewidth, int tests )
 
     double result = score / sqrt(2.0 * scorewidth);
 
-    if (result > maxresult)
+    if (result > maxresult) {
         return maxresult;
+    }
 
     return result;
 }
diff --git a/util/Stats.h b/util/Stats.h
index 2dce94dc..35d3acef 100644
--- a/util/Stats.h
+++ b/util/Stats.h
@@ -50,27 +50,27 @@
  *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  *     OTHER DEALINGS IN THE SOFTWARE.
  */
-double CalcMean ( std::vector<double> & v );
-double CalcMean ( std::vector<double> & v, int a, int b );
-double CalcStdv ( std::vector<double> & v );
-double CalcStdv ( std::vector<double> & v, int a, int b );
-bool ContainsOutlier ( std::vector<double> & v, size_t len );
-void FilterOutliers ( std::vector<double> & v );
+double CalcMean( std::vector<double> & v );
+double CalcMean( std::vector<double> & v, int a, int b );
+double CalcStdv( std::vector<double> & v );
+double CalcStdv( std::vector<double> & v, int a, int b );
+bool ContainsOutlier( std::vector<double> & v, size_t len );
+void FilterOutliers( std::vector<double> & v );
 
-double chooseK ( int b, int k );
-double chooseUpToK ( int n, int k );
+double chooseK( int b, int k );
+double chooseUpToK( int n, int k );
 
-double EstimateNbCollisions(const unsigned long nbH, const int nbBits);
+double EstimateNbCollisions( const unsigned long nbH, const int nbBits );
 void ReportCollisionEstimates( void );
 
-int GetNLogNBound ( unsigned nbH );
-double ScalePValue ( double p_value, unsigned testcount );
-double ScalePValue2N ( double p_value, unsigned testbits );
-int GetLog2PValue ( double p_value );
-double GetNormalPValue(const double mu, const double sd, const double variable);
-double EstimatedBinomialPValue(const unsigned long nbH, const int nbBits, const int maxColl);
-double EstimateMaxCollisions(const unsigned long nbH, const int nbBits);
-double BoundedPoissonPValue(const double expected, const uint64_t collisions);
+int GetNLogNBound( unsigned nbH );
+double ScalePValue( double p_value, unsigned testcount );
+double ScalePValue2N( double p_value, unsigned testbits );
+int GetLog2PValue( double p_value );
+double GetNormalPValue( const double mu, const double sd, const double variable );
+double EstimatedBinomialPValue( const unsigned long nbH, const int nbBits, const int maxColl );
+double EstimateMaxCollisions( const unsigned long nbH, const int nbBits );
+double BoundedPoissonPValue( const double expected, const uint64_t collisions );
 
-double calcScore ( const unsigned * bins, const int bincount, const int ballcount );
-double normalizeScore ( double score, int scorewidth, int tests );
+double calcScore( const unsigned * bins, const int bincount, const int ballcount );
+double normalizeScore( double score, int scorewidth, int tests );
diff --git a/util/TestGlobals.h b/util/TestGlobals.h
index 1dc66418..58f92b64 100644
--- a/util/TestGlobals.h
+++ b/util/TestGlobals.h
@@ -47,42 +47,42 @@ extern HashInfo::endianness g_hashEndian;
 // Recording test results for final summary printout
 
 #define COUNT_MAX_PVALUE 18
-extern uint32_t g_log2pValueCounts[COUNT_MAX_PVALUE+2];
+extern uint32_t g_log2pValueCounts[COUNT_MAX_PVALUE + 2];
 
-static inline void recordLog2PValue(uint32_t log_pvalue) {
-  if (log_pvalue <= COUNT_MAX_PVALUE) {
-    g_log2pValueCounts[log_pvalue]++;
-  } else {
-    g_log2pValueCounts[COUNT_MAX_PVALUE+1]++;
-  }
+static inline void recordLog2PValue( uint32_t log_pvalue ) {
+    if (log_pvalue <= COUNT_MAX_PVALUE) {
+        g_log2pValueCounts[log_pvalue]++;
+    } else {
+        g_log2pValueCounts[COUNT_MAX_PVALUE + 1]++;
+    }
 }
 
 extern uint32_t g_testPass, g_testFail;
-extern std::vector< std::pair<const char *, char *> > g_testFailures;
+extern std::vector<std::pair<const char *, char *>> g_testFailures;
 
-static inline void recordTestResult(bool pass, const char * suitename, const char * testname) {
-  if (pass) {
-    g_testPass++;
-    return;
-  }
-  g_testFail++;
+static inline void recordTestResult( bool pass, const char * suitename, const char * testname ) {
+    if (pass) {
+        g_testPass++;
+        return;
+    }
+    g_testFail++;
 
-  char * ntestname = NULL;
-  if (testname != NULL) {
-    testname += strspn(testname, " ");
-    ntestname = strdup(testname);
-    if (!ntestname) {
-        printf("OOM\n");
-        exit(1);
+    char * ntestname = NULL;
+    if (testname != NULL) {
+        testname += strspn(testname, " ");
+        ntestname = strdup(testname);
+        if (!ntestname) {
+            printf("OOM\n");
+            exit(1);
+        }
     }
-  }
-  g_testFailures.push_back(std::pair<const char *, char *>(suitename, ntestname));
+    g_testFailures.push_back(std::pair<const char *, char *>(suitename, ntestname));
 }
 
-static inline void recordTestResult(bool pass, const char * suitename, uint64_t testnum) {
-  const uint64_t maxlen = sizeof("18446744073709551615"); // UINT64_MAX
-  char testname[maxlen];
-  snprintf(testname, maxlen, "%" PRIu64, testnum);
-  recordTestResult(pass, suitename, testname);
-}
+static inline void recordTestResult( bool pass, const char * suitename, uint64_t testnum ) {
+    const uint64_t maxlen = sizeof("18446744073709551615"); // UINT64_MAX
+    char           testname[maxlen];
 
+    snprintf(testname, maxlen, "%" PRIu64, testnum);
+    recordTestResult(pass, suitename, testname);
+}
diff --git a/util/VCode.cpp b/util/VCode.cpp
index 25804a1e..4f89d828 100644
--- a/util/VCode.cpp
+++ b/util/VCode.cpp
@@ -47,9 +47,9 @@
 //-----------------------------------------------------------------------------
 // Full CRC32c implementation
 // This is based on Mark Adler's implementation.
-static inline void crc32c_sw_update(uint32_t * const crcptr, const void * const ptr, size_t len) {
+static inline void crc32c_sw_update( uint32_t * const crcptr, const void * const ptr, size_t len ) {
     const uint8_t * next = (const uint8_t *)ptr;
-    uint64_t crc;
+    uint64_t        crc;
 
     crc = *crcptr;
     while (len && ((uintptr_t)next & 7) != 0) {
@@ -66,40 +66,40 @@ static inline void crc32c_sw_update(uint32_t * const crcptr, const void * const
         crc ^= wd1;
         if (isBE()) {
             crc =
-                crc32c_sw_table[15][ crc        & 0xff] ^
-                crc32c_sw_table[14][(crc >>  8) & 0xff] ^
-                crc32c_sw_table[13][(crc >> 16) & 0xff] ^
-                crc32c_sw_table[12][(crc >> 24) & 0xff] ^
-                crc32c_sw_table[11][(crc >> 32) & 0xff] ^
-                crc32c_sw_table[10][(crc >> 40) & 0xff] ^
-                crc32c_sw_table[ 9][(crc >> 48) & 0xff] ^
-                crc32c_sw_table[ 8][ crc >> 56]         ^
-                crc32c_sw_table[ 0][ wd2        & 0xff] ^
-                crc32c_sw_table[ 1][(wd2 >>  8) & 0xff] ^
-                crc32c_sw_table[ 2][(wd2 >> 16) & 0xff] ^
-                crc32c_sw_table[ 3][(wd2 >> 24) & 0xff] ^
-                crc32c_sw_table[ 4][(wd2 >> 32) & 0xff] ^
-                crc32c_sw_table[ 5][(wd2 >> 40) & 0xff] ^
-                crc32c_sw_table[ 6][(wd2 >> 48) & 0xff] ^
-                crc32c_sw_table[ 7][ wd2 >> 56]         ;
+                    crc32c_sw_table[15][crc &         0xff] ^
+                    crc32c_sw_table[14][(crc >>  8) & 0xff] ^
+                    crc32c_sw_table[13][(crc >> 16) & 0xff] ^
+                    crc32c_sw_table[12][(crc >> 24) & 0xff] ^
+                    crc32c_sw_table[11][(crc >> 32) & 0xff] ^
+                    crc32c_sw_table[10][(crc >> 40) & 0xff] ^
+                    crc32c_sw_table[ 9][(crc >> 48) & 0xff] ^
+                    crc32c_sw_table[ 8][crc        >>   56] ^
+                    crc32c_sw_table[ 0][wd2 &         0xff] ^
+                    crc32c_sw_table[ 1][(wd2 >>  8) & 0xff] ^
+                    crc32c_sw_table[ 2][(wd2 >> 16) & 0xff] ^
+                    crc32c_sw_table[ 3][(wd2 >> 24) & 0xff] ^
+                    crc32c_sw_table[ 4][(wd2 >> 32) & 0xff] ^
+                    crc32c_sw_table[ 5][(wd2 >> 40) & 0xff] ^
+                    crc32c_sw_table[ 6][(wd2 >> 48) & 0xff] ^
+                    crc32c_sw_table[ 7][wd2        >>   56];
         } else {
             crc =
-                crc32c_sw_table[15][ crc        & 0xff] ^
-                crc32c_sw_table[14][(crc >>  8) & 0xff] ^
-                crc32c_sw_table[13][(crc >> 16) & 0xff] ^
-                crc32c_sw_table[12][(crc >> 24) & 0xff] ^
-                crc32c_sw_table[11][(crc >> 32) & 0xff] ^
-                crc32c_sw_table[10][(crc >> 40) & 0xff] ^
-                crc32c_sw_table[ 9][(crc >> 48) & 0xff] ^
-                crc32c_sw_table[ 8][ crc >> 56]         ^
-                crc32c_sw_table[ 7][ wd2        & 0xff] ^
-                crc32c_sw_table[ 6][(wd2 >>  8) & 0xff] ^
-                crc32c_sw_table[ 5][(wd2 >> 16) & 0xff] ^
-                crc32c_sw_table[ 4][(wd2 >> 24) & 0xff] ^
-                crc32c_sw_table[ 3][(wd2 >> 32) & 0xff] ^
-                crc32c_sw_table[ 2][(wd2 >> 40) & 0xff] ^
-                crc32c_sw_table[ 1][(wd2 >> 48) & 0xff] ^
-                crc32c_sw_table[ 0][ wd2 >> 56]         ;
+                    crc32c_sw_table[15][crc &         0xff] ^
+                    crc32c_sw_table[14][(crc >>  8) & 0xff] ^
+                    crc32c_sw_table[13][(crc >> 16) & 0xff] ^
+                    crc32c_sw_table[12][(crc >> 24) & 0xff] ^
+                    crc32c_sw_table[11][(crc >> 32) & 0xff] ^
+                    crc32c_sw_table[10][(crc >> 40) & 0xff] ^
+                    crc32c_sw_table[ 9][(crc >> 48) & 0xff] ^
+                    crc32c_sw_table[ 8][crc        >>   56] ^
+                    crc32c_sw_table[ 7][wd2 &         0xff] ^
+                    crc32c_sw_table[ 6][(wd2 >>  8) & 0xff] ^
+                    crc32c_sw_table[ 5][(wd2 >> 16) & 0xff] ^
+                    crc32c_sw_table[ 4][(wd2 >> 24) & 0xff] ^
+                    crc32c_sw_table[ 3][(wd2 >> 32) & 0xff] ^
+                    crc32c_sw_table[ 2][(wd2 >> 40) & 0xff] ^
+                    crc32c_sw_table[ 1][(wd2 >> 48) & 0xff] ^
+                    crc32c_sw_table[ 0][wd2        >>   56];
         }
         next += 16;
         len  -= 16;
@@ -119,19 +119,19 @@ extern const uint32_t crc32_short[4][256];
 // HW_LONGBLOCK_LEN and HW_SHORTBLOCK_LEN must both be powers of
 // two. Altering these means the crc32_long and crc32_short tables
 // need to be rebuilt.
-const uint32_t HW_LONGBLOCK_LEN = 8192;
+const uint32_t HW_LONGBLOCK_LEN  = 8192;
 const uint32_t HW_SHORTBLOCK_LEN = 256;
 
 /* Apply the zeros operator table to crc. */
-static inline uint32_t crc32_shift(const uint32_t zeros[][256], uint32_t crc) {
+static inline uint32_t crc32_shift( const uint32_t zeros[][256], uint32_t crc ) {
     return zeros[0][crc & 0xff] ^ zeros[1][(crc >> 8) & 0xff] ^
            zeros[2][(crc >> 16) & 0xff] ^ zeros[3][crc >> 24];
 }
 
-static inline void crc32c_hw_update(uint32_t * crcptr, const void * ptr, size_t len) {
+static inline void crc32c_hw_update( uint32_t * crcptr, const void * ptr, size_t len ) {
     const uint8_t * next = (const uint8_t *)ptr;
     const uint8_t * end;
-    uint64_t crc0, crc1, crc2;      /* need to be 64 bits for crc32q */
+    uint64_t        crc0, crc1, crc2; /* need to be 64 bits for crc32q */
 
     /* Assume CRC is already pre-processed! */
     crc0 = *crcptr;
@@ -152,48 +152,48 @@ static inline void crc32c_hw_update(uint32_t * crcptr, const void * ptr, size_t
      * Bridge, and Ivy Bridge architectures, which have a throughput
      * of one crc per cycle, but a latency of three cycles.
      */
-    while (len >= HW_LONGBLOCK_LEN*3) {
+    while (len >= HW_LONGBLOCK_LEN * 3) {
         crc1 = 0;
         crc2 = 0;
-        end = next + HW_LONGBLOCK_LEN;
+        end  = next + HW_LONGBLOCK_LEN;
         do {
             uint64_t d1, d2, d3;
             memcpy(&d1, next, 8);
             memcpy(&d2, next + HW_LONGBLOCK_LEN, 8);
             memcpy(&d3, next + HW_LONGBLOCK_LEN + HW_LONGBLOCK_LEN, 8);
-            crc0 = HWCRC_U64(crc0, d1);
-            crc1 = HWCRC_U64(crc1, d2);
-            crc2 = HWCRC_U64(crc2, d3);
+            crc0  = HWCRC_U64(crc0, d1);
+            crc1  = HWCRC_U64(crc1, d2);
+            crc2  = HWCRC_U64(crc2, d3);
             next += 8;
         } while (next < end);
-        crc0 = crc32_shift(crc32_long, crc0) ^ crc1;
-        crc0 = crc32_shift(crc32_long, crc0) ^ crc2;
-        next += HW_LONGBLOCK_LEN*2;
-        len -= HW_LONGBLOCK_LEN*3;
+        crc0  = crc32_shift(crc32_long, crc0) ^ crc1;
+        crc0  = crc32_shift(crc32_long, crc0) ^ crc2;
+        next += HW_LONGBLOCK_LEN * 2;
+        len  -= HW_LONGBLOCK_LEN * 3;
     }
 
     /*
      * Do the same thing, but now on HW_SHORTBLOCK_LEN*3 blocks for
      * the remaining data less than a HW_LONGBLOCK_LEN*3 block.
      */
-    while (len >= HW_SHORTBLOCK_LEN*3) {
+    while (len >= HW_SHORTBLOCK_LEN * 3) {
         crc1 = 0;
         crc2 = 0;
-        end = next + HW_SHORTBLOCK_LEN;
+        end  = next + HW_SHORTBLOCK_LEN;
         do {
             uint64_t d1, d2, d3;
             memcpy(&d1, next, 8);
             memcpy(&d2, next + HW_SHORTBLOCK_LEN, 8);
             memcpy(&d3, next + HW_SHORTBLOCK_LEN + HW_SHORTBLOCK_LEN, 8);
-            crc0 = HWCRC_U64(crc0, d1);
-            crc1 = HWCRC_U64(crc1, d2);
-            crc2 = HWCRC_U64(crc2, d3);
+            crc0  = HWCRC_U64(crc0, d1);
+            crc1  = HWCRC_U64(crc1, d2);
+            crc2  = HWCRC_U64(crc2, d3);
             next += 8;
         } while (next < end);
-        crc0 = crc32_shift(crc32_short, crc0) ^ crc1;
-        crc0 = crc32_shift(crc32_short, crc0) ^ crc2;
-        next += HW_SHORTBLOCK_LEN*2;
-        len -= HW_SHORTBLOCK_LEN*3;
+        crc0  = crc32_shift(crc32_short, crc0) ^ crc1;
+        crc0  = crc32_shift(crc32_short, crc0) ^ crc2;
+        next += HW_SHORTBLOCK_LEN * 2;
+        len  -= HW_SHORTBLOCK_LEN * 3;
     }
 
     /*
@@ -204,7 +204,7 @@ static inline void crc32c_hw_update(uint32_t * crcptr, const void * ptr, size_t
     while (next < end) {
         uint64_t data;
         memcpy(&data, next, 8);
-        crc0 = HWCRC_U64(crc0, data);
+        crc0  = HWCRC_U64(crc0, data);
         next += 8;
     }
     len &= 7;
@@ -218,9 +218,10 @@ static inline void crc32c_hw_update(uint32_t * crcptr, const void * ptr, size_t
     /* DON'T post-process the CRC! */
     *crcptr = (uint32_t)crc0;
 }
+
 #endif
 
-static inline void crc32c_update(uint32_t * crc, const void * ptr, size_t len) {
+static inline void crc32c_update( uint32_t * crc, const void * ptr, size_t len ) {
 #if defined(HWCRC_U64)
     crc32c_hw_update(crc, ptr, len);
 #else
@@ -230,9 +231,10 @@ static inline void crc32c_update(uint32_t * crc, const void * ptr, size_t len) {
 
 //-----------------------------------------------------------------------------
 // CRC implementation self-tests
-template < bool use_hw, bool oneshot >
-static uint32_t vcode_crc_selftest_40(uint8_t offset) {
+template <bool use_hw, bool oneshot>
+static uint32_t vcode_crc_selftest_40( uint8_t offset ) {
     uint8_t buf[40];
+
     for (int i = 0; i < 40; i++) {
         buf[i] = offset + i;
     }
@@ -240,61 +242,62 @@ static uint32_t vcode_crc_selftest_40(uint8_t offset) {
     uint32_t crc = ~0;
     if (oneshot) {
 #if defined(HWCRC_U64)
-        if (use_hw)
+        if (use_hw) {
             crc32c_hw_update(&crc, buf, 40);
-        else
+        } else
 #endif
-            crc32c_sw_update(&crc, buf, 40);
+        crc32c_sw_update(&crc, buf, 40);
     } else {
 #if defined(HWCRC_U64)
         if (use_hw) {
-            crc32c_hw_update(&crc, &buf[0], 1);
-            crc32c_hw_update(&crc, &buf[1], 1);
-            crc32c_hw_update(&crc, &buf[2], 2);
-            crc32c_hw_update(&crc, &buf[4], 4);
-            crc32c_hw_update(&crc, &buf[8], 8);
+            crc32c_hw_update(&crc, &buf[0] ,  1);
+            crc32c_hw_update(&crc, &buf[1] ,  1);
+            crc32c_hw_update(&crc, &buf[2] ,  2);
+            crc32c_hw_update(&crc, &buf[4] ,  4);
+            crc32c_hw_update(&crc, &buf[8] ,  8);
             crc32c_hw_update(&crc, &buf[16], 16);
-            crc32c_hw_update(&crc, &buf[32], 1);
-            crc32c_hw_update(&crc, &buf[33], 1);
-            crc32c_hw_update(&crc, &buf[34], 2);
-            crc32c_hw_update(&crc, &buf[36], 4);
+            crc32c_hw_update(&crc, &buf[32],  1);
+            crc32c_hw_update(&crc, &buf[33],  1);
+            crc32c_hw_update(&crc, &buf[34],  2);
+            crc32c_hw_update(&crc, &buf[36],  4);
         } else {
 #endif
-            crc32c_sw_update(&crc, &buf[0], 1);
-            crc32c_sw_update(&crc, &buf[1], 1);
-            crc32c_sw_update(&crc, &buf[2], 2);
-            crc32c_sw_update(&crc, &buf[4], 4);
-            crc32c_sw_update(&crc, &buf[8], 8);
-            crc32c_sw_update(&crc, &buf[16], 16);
-            crc32c_sw_update(&crc, &buf[32], 1);
-            crc32c_sw_update(&crc, &buf[33], 1);
-            crc32c_sw_update(&crc, &buf[34], 2);
-            crc32c_sw_update(&crc, &buf[36], 4);
+        crc32c_sw_update(&crc, &buf[0] ,  1);
+        crc32c_sw_update(&crc, &buf[1] ,  1);
+        crc32c_sw_update(&crc, &buf[2] ,  2);
+        crc32c_sw_update(&crc, &buf[4] ,  4);
+        crc32c_sw_update(&crc, &buf[8] ,  8);
+        crc32c_sw_update(&crc, &buf[16], 16);
+        crc32c_sw_update(&crc, &buf[32],  1);
+        crc32c_sw_update(&crc, &buf[33],  1);
+        crc32c_sw_update(&crc, &buf[34],  2);
+        crc32c_sw_update(&crc, &buf[36],  4);
 #if defined(HWCRC_U64)
-        }
+    }
 #endif
-
     }
 
     return ~crc;
 }
 
-template < bool use_hw >
-static bool vcode_crc_selftest(void) {
+template <bool use_hw>
+static bool vcode_crc_selftest( void ) {
 #if !defined(HWCRC_U64)
     if (use_hw) { return true; }
 #endif
     constexpr uint32_t testcnt = 6;
-    uint8_t offsets[testcnt] = { 0x01, 0x29, 0x51, 0x79, 0xa1, 0xc9 };
-    uint32_t crcs[testcnt] = { 0x0e2c157f, 0xe980ebf6, 0xde74bded,
-                               0xd579c862, 0xba979ad0, 0x2b29d913 };
+    uint8_t offsets[testcnt]   = { 0x01, 0x29, 0x51, 0x79, 0xa1, 0xc9 };
+    uint32_t crcs[testcnt]     = {
+        0x0e2c157f, 0xe980ebf6, 0xde74bded,
+        0xd579c862, 0xba979ad0, 0x2b29d913
+    };
     uint32_t crc;
 
     for (int i = 0; i < testcnt; i++) {
-        crc = vcode_crc_selftest_40<use_hw,true>(offsets[i]);
+        crc = vcode_crc_selftest_40<use_hw, true>( offsets[i]);
         if (crc != crcs[i]) { return false; }
 
-        crc = vcode_crc_selftest_40<use_hw,false>(offsets[i]);
+        crc = vcode_crc_selftest_40<use_hw, false>(offsets[i]);
         if (crc != crcs[i]) { return false; }
     }
 
@@ -304,37 +307,39 @@ static bool vcode_crc_selftest(void) {
 //-----------------------------------------------------------------------------
 // VCode internal implementation
 vcode_state_t vcode_states[VCODE_COUNT];
-uint32_t g_doVCode = 0;
-uint32_t g_inputVCode = 1;
-uint32_t g_outputVCode = 1;
-uint32_t g_resultVCode = 1;
+uint32_t g_doVCode       = 0;
+uint32_t g_inputVCode    = 1;
+uint32_t g_outputVCode   = 1;
+uint32_t g_resultVCode   = 1;
 
 static const uint64_t K1 = UINT64_C(0x6A09E667F3BCC909); // sqrt(2)-1
 static const uint64_t K2 = UINT64_C(0xBB67AE8584CAA73B); // sqrt(3)-1
 
-static void resetWithSeed(vcode_state_t * state, uint64_t seed) {
+static void resetWithSeed( vcode_state_t * state, uint64_t seed ) {
     // Arbitrarily mix seed into 2 starting 32-bit "CRC" values,
     // and then pre-process them.
     uint64_t v1 = (seed + 1) * K1;
     uint64_t v2 = (seed + 2) * K2;
+
     state->data_hash = 0xffffffff ^ (v1 - (v1 >> 32));
     state->lens_hash = 0xffffffff ^ (v2 - (v2 >> 32));
 }
 
-static void update(vcode_state_t * state, const void * ptr, size_t len) {
+static void update( vcode_state_t * state, const void * ptr, size_t len ) {
     crc32c_update(&state->data_hash, ptr, len);
     crc32c_update_u64(&state->lens_hash, (uint64_t)len);
 }
 
-static void update_u32(vcode_state_t * state, uint32_t data) {
+static void update_u32( vcode_state_t * state, uint32_t data ) {
     crc32c_update_u64(&state->data_hash, (uint64_t)data);
     crc32c_update_u64(&state->lens_hash, 4);
 }
 
-static uint32_t getDigest(vcode_state_t * state) {
+static uint32_t getDigest( vcode_state_t * state ) {
     // Post-process the 2 final 32-bit CRCs, and then arbitrarily mix
     // them into a 32-bit signature.
     uint64_t combined;
+
     combined  = (uint64_t)(0xffffffff ^ state->data_hash);
     combined *= (uint64_t)(0xffffffff ^ state->lens_hash);
     combined ^= K1 ^ K2 ^ state->data_hash ^ state->lens_hash;
@@ -346,7 +351,7 @@ static uint32_t getDigest(vcode_state_t * state) {
 // VCode external interface implementation
 static uint32_t VCODE_MASK = 0x0;
 
-void VCODE_INIT(void) {
+void VCODE_INIT( void ) {
     if (!vcode_crc_selftest<false>()) {
         printf("VCode CRC32c SW self-test failed!\n");
         exit(1);
@@ -365,8 +370,8 @@ void VCODE_INIT(void) {
     VCODE_MASK = VCODE_FINALIZE() ^ 0x1;
 }
 
-uint32_t VCODE_FINALIZE(void) {
-    if (!g_doVCode) return 1;
+uint32_t VCODE_FINALIZE( void ) {
+    if (!g_doVCode) { return 1; }
 
     g_inputVCode  = getDigest(&vcode_states[0]);
     g_outputVCode = getDigest(&vcode_states[1]);
@@ -375,16 +380,17 @@ uint32_t VCODE_FINALIZE(void) {
     vcode_state_t finalvcode;
     resetWithSeed(&finalvcode, VCODE_COUNT);
 
-    update_u32(&finalvcode, g_inputVCode);
+    update_u32(&finalvcode, g_inputVCode );
     update_u32(&finalvcode, g_outputVCode);
     update_u32(&finalvcode, g_resultVCode);
 
     return VCODE_MASK ^ getDigest(&finalvcode);
 }
 
-void VCODE_HASH(const void * input, size_t len, unsigned idx) {
-    if (idx >= VCODE_COUNT)
+void VCODE_HASH( const void * input, size_t len, unsigned idx ) {
+    if (idx >= VCODE_COUNT) {
         return;
+    }
     update(&vcode_states[idx], input, len);
 }
 
@@ -392,828 +398,828 @@ void VCODE_HASH(const void * input, size_t len, unsigned idx) {
 // Pre-computed tables for CRC32c
 #if defined(HWCRC_U64)
 const uint32_t crc32_long[4][256] = {
-  {
-    0x00000000, 0xe040e0ac, 0xc56db7a9, 0x252d5705, 0x8f3719a3, 0x6f77f90f, 0x4a5aae0a, 0xaa1a4ea6,
-    0x1b8245b7, 0xfbc2a51b, 0xdeeff21e, 0x3eaf12b2, 0x94b55c14, 0x74f5bcb8, 0x51d8ebbd, 0xb1980b11,
-    0x37048b6e, 0xd7446bc2, 0xf2693cc7, 0x1229dc6b, 0xb83392cd, 0x58737261, 0x7d5e2564, 0x9d1ec5c8,
-    0x2c86ced9, 0xccc62e75, 0xe9eb7970, 0x09ab99dc, 0xa3b1d77a, 0x43f137d6, 0x66dc60d3, 0x869c807f,
-    0x6e0916dc, 0x8e49f670, 0xab64a175, 0x4b2441d9, 0xe13e0f7f, 0x017eefd3, 0x2453b8d6, 0xc413587a,
-    0x758b536b, 0x95cbb3c7, 0xb0e6e4c2, 0x50a6046e, 0xfabc4ac8, 0x1afcaa64, 0x3fd1fd61, 0xdf911dcd,
-    0x590d9db2, 0xb94d7d1e, 0x9c602a1b, 0x7c20cab7, 0xd63a8411, 0x367a64bd, 0x135733b8, 0xf317d314,
-    0x428fd805, 0xa2cf38a9, 0x87e26fac, 0x67a28f00, 0xcdb8c1a6, 0x2df8210a, 0x08d5760f, 0xe89596a3,
-    0xdc122db8, 0x3c52cd14, 0x197f9a11, 0xf93f7abd, 0x5325341b, 0xb365d4b7, 0x964883b2, 0x7608631e,
-    0xc790680f, 0x27d088a3, 0x02fddfa6, 0xe2bd3f0a, 0x48a771ac, 0xa8e79100, 0x8dcac605, 0x6d8a26a9,
-    0xeb16a6d6, 0x0b56467a, 0x2e7b117f, 0xce3bf1d3, 0x6421bf75, 0x84615fd9, 0xa14c08dc, 0x410ce870,
-    0xf094e361, 0x10d403cd, 0x35f954c8, 0xd5b9b464, 0x7fa3fac2, 0x9fe31a6e, 0xbace4d6b, 0x5a8eadc7,
-    0xb21b3b64, 0x525bdbc8, 0x77768ccd, 0x97366c61, 0x3d2c22c7, 0xdd6cc26b, 0xf841956e, 0x180175c2,
-    0xa9997ed3, 0x49d99e7f, 0x6cf4c97a, 0x8cb429d6, 0x26ae6770, 0xc6ee87dc, 0xe3c3d0d9, 0x03833075,
-    0x851fb00a, 0x655f50a6, 0x407207a3, 0xa032e70f, 0x0a28a9a9, 0xea684905, 0xcf451e00, 0x2f05feac,
-    0x9e9df5bd, 0x7edd1511, 0x5bf04214, 0xbbb0a2b8, 0x11aaec1e, 0xf1ea0cb2, 0xd4c75bb7, 0x3487bb1b,
-    0xbdc82d81, 0x5d88cd2d, 0x78a59a28, 0x98e57a84, 0x32ff3422, 0xd2bfd48e, 0xf792838b, 0x17d26327,
-    0xa64a6836, 0x460a889a, 0x6327df9f, 0x83673f33, 0x297d7195, 0xc93d9139, 0xec10c63c, 0x0c502690,
-    0x8acca6ef, 0x6a8c4643, 0x4fa11146, 0xafe1f1ea, 0x05fbbf4c, 0xe5bb5fe0, 0xc09608e5, 0x20d6e849,
-    0x914ee358, 0x710e03f4, 0x542354f1, 0xb463b45d, 0x1e79fafb, 0xfe391a57, 0xdb144d52, 0x3b54adfe,
-    0xd3c13b5d, 0x3381dbf1, 0x16ac8cf4, 0xf6ec6c58, 0x5cf622fe, 0xbcb6c252, 0x999b9557, 0x79db75fb,
-    0xc8437eea, 0x28039e46, 0x0d2ec943, 0xed6e29ef, 0x47746749, 0xa73487e5, 0x8219d0e0, 0x6259304c,
-    0xe4c5b033, 0x0485509f, 0x21a8079a, 0xc1e8e736, 0x6bf2a990, 0x8bb2493c, 0xae9f1e39, 0x4edffe95,
-    0xff47f584, 0x1f071528, 0x3a2a422d, 0xda6aa281, 0x7070ec27, 0x90300c8b, 0xb51d5b8e, 0x555dbb22,
-    0x61da0039, 0x819ae095, 0xa4b7b790, 0x44f7573c, 0xeeed199a, 0x0eadf936, 0x2b80ae33, 0xcbc04e9f,
-    0x7a58458e, 0x9a18a522, 0xbf35f227, 0x5f75128b, 0xf56f5c2d, 0x152fbc81, 0x3002eb84, 0xd0420b28,
-    0x56de8b57, 0xb69e6bfb, 0x93b33cfe, 0x73f3dc52, 0xd9e992f4, 0x39a97258, 0x1c84255d, 0xfcc4c5f1,
-    0x4d5ccee0, 0xad1c2e4c, 0x88317949, 0x687199e5, 0xc26bd743, 0x222b37ef, 0x070660ea, 0xe7468046,
-    0x0fd316e5, 0xef93f649, 0xcabea14c, 0x2afe41e0, 0x80e40f46, 0x60a4efea, 0x4589b8ef, 0xa5c95843,
-    0x14515352, 0xf411b3fe, 0xd13ce4fb, 0x317c0457, 0x9b664af1, 0x7b26aa5d, 0x5e0bfd58, 0xbe4b1df4,
-    0x38d79d8b, 0xd8977d27, 0xfdba2a22, 0x1dfaca8e, 0xb7e08428, 0x57a06484, 0x728d3381, 0x92cdd32d,
-    0x2355d83c, 0xc3153890, 0xe6386f95, 0x06788f39, 0xac62c19f, 0x4c222133, 0x690f7636, 0x894f969a,
-  },
-  {
-    0x00000000, 0x7e7c2df3, 0xfcf85be6, 0x82847615, 0xfc1cc13d, 0x8260ecce, 0x00e49adb, 0x7e98b728,
-    0xfdd5f48b, 0x83a9d978, 0x012daf6d, 0x7f51829e, 0x01c935b6, 0x7fb51845, 0xfd316e50, 0x834d43a3,
-    0xfe479fe7, 0x803bb214, 0x02bfc401, 0x7cc3e9f2, 0x025b5eda, 0x7c277329, 0xfea3053c, 0x80df28cf,
-    0x03926b6c, 0x7dee469f, 0xff6a308a, 0x81161d79, 0xff8eaa51, 0x81f287a2, 0x0376f1b7, 0x7d0adc44,
-    0xf963493f, 0x871f64cc, 0x059b12d9, 0x7be73f2a, 0x057f8802, 0x7b03a5f1, 0xf987d3e4, 0x87fbfe17,
-    0x04b6bdb4, 0x7aca9047, 0xf84ee652, 0x8632cba1, 0xf8aa7c89, 0x86d6517a, 0x0452276f, 0x7a2e0a9c,
-    0x0724d6d8, 0x7958fb2b, 0xfbdc8d3e, 0x85a0a0cd, 0xfb3817e5, 0x85443a16, 0x07c04c03, 0x79bc61f0,
-    0xfaf12253, 0x848d0fa0, 0x060979b5, 0x78755446, 0x06ede36e, 0x7891ce9d, 0xfa15b888, 0x8469957b,
-    0xf72ae48f, 0x8956c97c, 0x0bd2bf69, 0x75ae929a, 0x0b3625b2, 0x754a0841, 0xf7ce7e54, 0x89b253a7,
-    0x0aff1004, 0x74833df7, 0xf6074be2, 0x887b6611, 0xf6e3d139, 0x889ffcca, 0x0a1b8adf, 0x7467a72c,
-    0x096d7b68, 0x7711569b, 0xf595208e, 0x8be90d7d, 0xf571ba55, 0x8b0d97a6, 0x0989e1b3, 0x77f5cc40,
-    0xf4b88fe3, 0x8ac4a210, 0x0840d405, 0x763cf9f6, 0x08a44ede, 0x76d8632d, 0xf45c1538, 0x8a2038cb,
-    0x0e49adb0, 0x70358043, 0xf2b1f656, 0x8ccddba5, 0xf2556c8d, 0x8c29417e, 0x0ead376b, 0x70d11a98,
-    0xf39c593b, 0x8de074c8, 0x0f6402dd, 0x71182f2e, 0x0f809806, 0x71fcb5f5, 0xf378c3e0, 0x8d04ee13,
-    0xf00e3257, 0x8e721fa4, 0x0cf669b1, 0x728a4442, 0x0c12f36a, 0x726ede99, 0xf0eaa88c, 0x8e96857f,
-    0x0ddbc6dc, 0x73a7eb2f, 0xf1239d3a, 0x8f5fb0c9, 0xf1c707e1, 0x8fbb2a12, 0x0d3f5c07, 0x734371f4,
-    0xebb9bfef, 0x95c5921c, 0x1741e409, 0x693dc9fa, 0x17a57ed2, 0x69d95321, 0xeb5d2534, 0x952108c7,
-    0x166c4b64, 0x68106697, 0xea941082, 0x94e83d71, 0xea708a59, 0x940ca7aa, 0x1688d1bf, 0x68f4fc4c,
-    0x15fe2008, 0x6b820dfb, 0xe9067bee, 0x977a561d, 0xe9e2e135, 0x979eccc6, 0x151abad3, 0x6b669720,
-    0xe82bd483, 0x9657f970, 0x14d38f65, 0x6aafa296, 0x143715be, 0x6a4b384d, 0xe8cf4e58, 0x96b363ab,
-    0x12daf6d0, 0x6ca6db23, 0xee22ad36, 0x905e80c5, 0xeec637ed, 0x90ba1a1e, 0x123e6c0b, 0x6c4241f8,
-    0xef0f025b, 0x91732fa8, 0x13f759bd, 0x6d8b744e, 0x1313c366, 0x6d6fee95, 0xefeb9880, 0x9197b573,
-    0xec9d6937, 0x92e144c4, 0x106532d1, 0x6e191f22, 0x1081a80a, 0x6efd85f9, 0xec79f3ec, 0x9205de1f,
-    0x11489dbc, 0x6f34b04f, 0xedb0c65a, 0x93cceba9, 0xed545c81, 0x93287172, 0x11ac0767, 0x6fd02a94,
-    0x1c935b60, 0x62ef7693, 0xe06b0086, 0x9e172d75, 0xe08f9a5d, 0x9ef3b7ae, 0x1c77c1bb, 0x620bec48,
-    0xe146afeb, 0x9f3a8218, 0x1dbef40d, 0x63c2d9fe, 0x1d5a6ed6, 0x63264325, 0xe1a23530, 0x9fde18c3,
-    0xe2d4c487, 0x9ca8e974, 0x1e2c9f61, 0x6050b292, 0x1ec805ba, 0x60b42849, 0xe2305e5c, 0x9c4c73af,
-    0x1f01300c, 0x617d1dff, 0xe3f96bea, 0x9d854619, 0xe31df131, 0x9d61dcc2, 0x1fe5aad7, 0x61998724,
-    0xe5f0125f, 0x9b8c3fac, 0x190849b9, 0x6774644a, 0x19ecd362, 0x6790fe91, 0xe5148884, 0x9b68a577,
-    0x1825e6d4, 0x6659cb27, 0xe4ddbd32, 0x9aa190c1, 0xe43927e9, 0x9a450a1a, 0x18c17c0f, 0x66bd51fc,
-    0x1bb78db8, 0x65cba04b, 0xe74fd65e, 0x9933fbad, 0xe7ab4c85, 0x99d76176, 0x1b531763, 0x652f3a90,
-    0xe6627933, 0x981e54c0, 0x1a9a22d5, 0x64e60f26, 0x1a7eb80e, 0x640295fd, 0xe686e3e8, 0x98face1b,
-  },
-  {
-    0x00000000, 0xd29f092f, 0xa0d264af, 0x724d6d80, 0x4448bfaf, 0x96d7b680, 0xe49adb00, 0x3605d22f,
-    0x88917f5e, 0x5a0e7671, 0x28431bf1, 0xfadc12de, 0xccd9c0f1, 0x1e46c9de, 0x6c0ba45e, 0xbe94ad71,
-    0x14ce884d, 0xc6518162, 0xb41cece2, 0x6683e5cd, 0x508637e2, 0x82193ecd, 0xf054534d, 0x22cb5a62,
-    0x9c5ff713, 0x4ec0fe3c, 0x3c8d93bc, 0xee129a93, 0xd81748bc, 0x0a884193, 0x78c52c13, 0xaa5a253c,
-    0x299d109a, 0xfb0219b5, 0x894f7435, 0x5bd07d1a, 0x6dd5af35, 0xbf4aa61a, 0xcd07cb9a, 0x1f98c2b5,
-    0xa10c6fc4, 0x739366eb, 0x01de0b6b, 0xd3410244, 0xe544d06b, 0x37dbd944, 0x4596b4c4, 0x9709bdeb,
-    0x3d5398d7, 0xefcc91f8, 0x9d81fc78, 0x4f1ef557, 0x791b2778, 0xab842e57, 0xd9c943d7, 0x0b564af8,
-    0xb5c2e789, 0x675deea6, 0x15108326, 0xc78f8a09, 0xf18a5826, 0x23155109, 0x51583c89, 0x83c735a6,
-    0x533a2134, 0x81a5281b, 0xf3e8459b, 0x21774cb4, 0x17729e9b, 0xc5ed97b4, 0xb7a0fa34, 0x653ff31b,
-    0xdbab5e6a, 0x09345745, 0x7b793ac5, 0xa9e633ea, 0x9fe3e1c5, 0x4d7ce8ea, 0x3f31856a, 0xedae8c45,
-    0x47f4a979, 0x956ba056, 0xe726cdd6, 0x35b9c4f9, 0x03bc16d6, 0xd1231ff9, 0xa36e7279, 0x71f17b56,
-    0xcf65d627, 0x1dfadf08, 0x6fb7b288, 0xbd28bba7, 0x8b2d6988, 0x59b260a7, 0x2bff0d27, 0xf9600408,
-    0x7aa731ae, 0xa8383881, 0xda755501, 0x08ea5c2e, 0x3eef8e01, 0xec70872e, 0x9e3deaae, 0x4ca2e381,
-    0xf2364ef0, 0x20a947df, 0x52e42a5f, 0x807b2370, 0xb67ef15f, 0x64e1f870, 0x16ac95f0, 0xc4339cdf,
-    0x6e69b9e3, 0xbcf6b0cc, 0xcebbdd4c, 0x1c24d463, 0x2a21064c, 0xf8be0f63, 0x8af362e3, 0x586c6bcc,
-    0xe6f8c6bd, 0x3467cf92, 0x462aa212, 0x94b5ab3d, 0xa2b07912, 0x702f703d, 0x02621dbd, 0xd0fd1492,
-    0xa6744268, 0x74eb4b47, 0x06a626c7, 0xd4392fe8, 0xe23cfdc7, 0x30a3f4e8, 0x42ee9968, 0x90719047,
-    0x2ee53d36, 0xfc7a3419, 0x8e375999, 0x5ca850b6, 0x6aad8299, 0xb8328bb6, 0xca7fe636, 0x18e0ef19,
-    0xb2baca25, 0x6025c30a, 0x1268ae8a, 0xc0f7a7a5, 0xf6f2758a, 0x246d7ca5, 0x56201125, 0x84bf180a,
-    0x3a2bb57b, 0xe8b4bc54, 0x9af9d1d4, 0x4866d8fb, 0x7e630ad4, 0xacfc03fb, 0xdeb16e7b, 0x0c2e6754,
-    0x8fe952f2, 0x5d765bdd, 0x2f3b365d, 0xfda43f72, 0xcba1ed5d, 0x193ee472, 0x6b7389f2, 0xb9ec80dd,
-    0x07782dac, 0xd5e72483, 0xa7aa4903, 0x7535402c, 0x43309203, 0x91af9b2c, 0xe3e2f6ac, 0x317dff83,
-    0x9b27dabf, 0x49b8d390, 0x3bf5be10, 0xe96ab73f, 0xdf6f6510, 0x0df06c3f, 0x7fbd01bf, 0xad220890,
-    0x13b6a5e1, 0xc129acce, 0xb364c14e, 0x61fbc861, 0x57fe1a4e, 0x85611361, 0xf72c7ee1, 0x25b377ce,
-    0xf54e635c, 0x27d16a73, 0x559c07f3, 0x87030edc, 0xb106dcf3, 0x6399d5dc, 0x11d4b85c, 0xc34bb173,
-    0x7ddf1c02, 0xaf40152d, 0xdd0d78ad, 0x0f927182, 0x3997a3ad, 0xeb08aa82, 0x9945c702, 0x4bdace2d,
-    0xe180eb11, 0x331fe23e, 0x41528fbe, 0x93cd8691, 0xa5c854be, 0x77575d91, 0x051a3011, 0xd785393e,
-    0x6911944f, 0xbb8e9d60, 0xc9c3f0e0, 0x1b5cf9cf, 0x2d592be0, 0xffc622cf, 0x8d8b4f4f, 0x5f144660,
-    0xdcd373c6, 0x0e4c7ae9, 0x7c011769, 0xae9e1e46, 0x989bcc69, 0x4a04c546, 0x3849a8c6, 0xead6a1e9,
-    0x54420c98, 0x86dd05b7, 0xf4906837, 0x260f6118, 0x100ab337, 0xc295ba18, 0xb0d8d798, 0x6247deb7,
-    0xc81dfb8b, 0x1a82f2a4, 0x68cf9f24, 0xba50960b, 0x8c554424, 0x5eca4d0b, 0x2c87208b, 0xfe1829a4,
-    0x408c84d5, 0x92138dfa, 0xe05ee07a, 0x32c1e955, 0x04c43b7a, 0xd65b3255, 0xa4165fd5, 0x768956fa,
-  },
-  {
-    0x00000000, 0x4904f221, 0x9209e442, 0xdb0d1663, 0x21ffbe75, 0x68fb4c54, 0xb3f65a37, 0xfaf2a816,
-    0x43ff7cea, 0x0afb8ecb, 0xd1f698a8, 0x98f26a89, 0x6200c29f, 0x2b0430be, 0xf00926dd, 0xb90dd4fc,
-    0x87fef9d4, 0xcefa0bf5, 0x15f71d96, 0x5cf3efb7, 0xa60147a1, 0xef05b580, 0x3408a3e3, 0x7d0c51c2,
-    0xc401853e, 0x8d05771f, 0x5608617c, 0x1f0c935d, 0xe5fe3b4b, 0xacfac96a, 0x77f7df09, 0x3ef32d28,
-    0x0a118559, 0x43157778, 0x9818611b, 0xd11c933a, 0x2bee3b2c, 0x62eac90d, 0xb9e7df6e, 0xf0e32d4f,
-    0x49eef9b3, 0x00ea0b92, 0xdbe71df1, 0x92e3efd0, 0x681147c6, 0x2115b5e7, 0xfa18a384, 0xb31c51a5,
-    0x8def7c8d, 0xc4eb8eac, 0x1fe698cf, 0x56e26aee, 0xac10c2f8, 0xe51430d9, 0x3e1926ba, 0x771dd49b,
-    0xce100067, 0x8714f246, 0x5c19e425, 0x151d1604, 0xefefbe12, 0xa6eb4c33, 0x7de65a50, 0x34e2a871,
-    0x14230ab2, 0x5d27f893, 0x862aeef0, 0xcf2e1cd1, 0x35dcb4c7, 0x7cd846e6, 0xa7d55085, 0xeed1a2a4,
-    0x57dc7658, 0x1ed88479, 0xc5d5921a, 0x8cd1603b, 0x7623c82d, 0x3f273a0c, 0xe42a2c6f, 0xad2ede4e,
-    0x93ddf366, 0xdad90147, 0x01d41724, 0x48d0e505, 0xb2224d13, 0xfb26bf32, 0x202ba951, 0x692f5b70,
-    0xd0228f8c, 0x99267dad, 0x422b6bce, 0x0b2f99ef, 0xf1dd31f9, 0xb8d9c3d8, 0x63d4d5bb, 0x2ad0279a,
-    0x1e328feb, 0x57367dca, 0x8c3b6ba9, 0xc53f9988, 0x3fcd319e, 0x76c9c3bf, 0xadc4d5dc, 0xe4c027fd,
-    0x5dcdf301, 0x14c90120, 0xcfc41743, 0x86c0e562, 0x7c324d74, 0x3536bf55, 0xee3ba936, 0xa73f5b17,
-    0x99cc763f, 0xd0c8841e, 0x0bc5927d, 0x42c1605c, 0xb833c84a, 0xf1373a6b, 0x2a3a2c08, 0x633ede29,
-    0xda330ad5, 0x9337f8f4, 0x483aee97, 0x013e1cb6, 0xfbccb4a0, 0xb2c84681, 0x69c550e2, 0x20c1a2c3,
-    0x28461564, 0x6142e745, 0xba4ff126, 0xf34b0307, 0x09b9ab11, 0x40bd5930, 0x9bb04f53, 0xd2b4bd72,
-    0x6bb9698e, 0x22bd9baf, 0xf9b08dcc, 0xb0b47fed, 0x4a46d7fb, 0x034225da, 0xd84f33b9, 0x914bc198,
-    0xafb8ecb0, 0xe6bc1e91, 0x3db108f2, 0x74b5fad3, 0x8e4752c5, 0xc743a0e4, 0x1c4eb687, 0x554a44a6,
-    0xec47905a, 0xa543627b, 0x7e4e7418, 0x374a8639, 0xcdb82e2f, 0x84bcdc0e, 0x5fb1ca6d, 0x16b5384c,
-    0x2257903d, 0x6b53621c, 0xb05e747f, 0xf95a865e, 0x03a82e48, 0x4aacdc69, 0x91a1ca0a, 0xd8a5382b,
-    0x61a8ecd7, 0x28ac1ef6, 0xf3a10895, 0xbaa5fab4, 0x405752a2, 0x0953a083, 0xd25eb6e0, 0x9b5a44c1,
-    0xa5a969e9, 0xecad9bc8, 0x37a08dab, 0x7ea47f8a, 0x8456d79c, 0xcd5225bd, 0x165f33de, 0x5f5bc1ff,
-    0xe6561503, 0xaf52e722, 0x745ff141, 0x3d5b0360, 0xc7a9ab76, 0x8ead5957, 0x55a04f34, 0x1ca4bd15,
-    0x3c651fd6, 0x7561edf7, 0xae6cfb94, 0xe76809b5, 0x1d9aa1a3, 0x549e5382, 0x8f9345e1, 0xc697b7c0,
-    0x7f9a633c, 0x369e911d, 0xed93877e, 0xa497755f, 0x5e65dd49, 0x17612f68, 0xcc6c390b, 0x8568cb2a,
-    0xbb9be602, 0xf29f1423, 0x29920240, 0x6096f061, 0x9a645877, 0xd360aa56, 0x086dbc35, 0x41694e14,
-    0xf8649ae8, 0xb16068c9, 0x6a6d7eaa, 0x23698c8b, 0xd99b249d, 0x909fd6bc, 0x4b92c0df, 0x029632fe,
-    0x36749a8f, 0x7f7068ae, 0xa47d7ecd, 0xed798cec, 0x178b24fa, 0x5e8fd6db, 0x8582c0b8, 0xcc863299,
-    0x758be665, 0x3c8f1444, 0xe7820227, 0xae86f006, 0x54745810, 0x1d70aa31, 0xc67dbc52, 0x8f794e73,
-    0xb18a635b, 0xf88e917a, 0x23838719, 0x6a877538, 0x9075dd2e, 0xd9712f0f, 0x027c396c, 0x4b78cb4d,
-    0xf2751fb1, 0xbb71ed90, 0x607cfbf3, 0x297809d2, 0xd38aa1c4, 0x9a8e53e5, 0x41834586, 0x0887b7a7,
-  },
+    {
+        0x00000000, 0xe040e0ac, 0xc56db7a9, 0x252d5705, 0x8f3719a3, 0x6f77f90f, 0x4a5aae0a, 0xaa1a4ea6,
+        0x1b8245b7, 0xfbc2a51b, 0xdeeff21e, 0x3eaf12b2, 0x94b55c14, 0x74f5bcb8, 0x51d8ebbd, 0xb1980b11,
+        0x37048b6e, 0xd7446bc2, 0xf2693cc7, 0x1229dc6b, 0xb83392cd, 0x58737261, 0x7d5e2564, 0x9d1ec5c8,
+        0x2c86ced9, 0xccc62e75, 0xe9eb7970, 0x09ab99dc, 0xa3b1d77a, 0x43f137d6, 0x66dc60d3, 0x869c807f,
+        0x6e0916dc, 0x8e49f670, 0xab64a175, 0x4b2441d9, 0xe13e0f7f, 0x017eefd3, 0x2453b8d6, 0xc413587a,
+        0x758b536b, 0x95cbb3c7, 0xb0e6e4c2, 0x50a6046e, 0xfabc4ac8, 0x1afcaa64, 0x3fd1fd61, 0xdf911dcd,
+        0x590d9db2, 0xb94d7d1e, 0x9c602a1b, 0x7c20cab7, 0xd63a8411, 0x367a64bd, 0x135733b8, 0xf317d314,
+        0x428fd805, 0xa2cf38a9, 0x87e26fac, 0x67a28f00, 0xcdb8c1a6, 0x2df8210a, 0x08d5760f, 0xe89596a3,
+        0xdc122db8, 0x3c52cd14, 0x197f9a11, 0xf93f7abd, 0x5325341b, 0xb365d4b7, 0x964883b2, 0x7608631e,
+        0xc790680f, 0x27d088a3, 0x02fddfa6, 0xe2bd3f0a, 0x48a771ac, 0xa8e79100, 0x8dcac605, 0x6d8a26a9,
+        0xeb16a6d6, 0x0b56467a, 0x2e7b117f, 0xce3bf1d3, 0x6421bf75, 0x84615fd9, 0xa14c08dc, 0x410ce870,
+        0xf094e361, 0x10d403cd, 0x35f954c8, 0xd5b9b464, 0x7fa3fac2, 0x9fe31a6e, 0xbace4d6b, 0x5a8eadc7,
+        0xb21b3b64, 0x525bdbc8, 0x77768ccd, 0x97366c61, 0x3d2c22c7, 0xdd6cc26b, 0xf841956e, 0x180175c2,
+        0xa9997ed3, 0x49d99e7f, 0x6cf4c97a, 0x8cb429d6, 0x26ae6770, 0xc6ee87dc, 0xe3c3d0d9, 0x03833075,
+        0x851fb00a, 0x655f50a6, 0x407207a3, 0xa032e70f, 0x0a28a9a9, 0xea684905, 0xcf451e00, 0x2f05feac,
+        0x9e9df5bd, 0x7edd1511, 0x5bf04214, 0xbbb0a2b8, 0x11aaec1e, 0xf1ea0cb2, 0xd4c75bb7, 0x3487bb1b,
+        0xbdc82d81, 0x5d88cd2d, 0x78a59a28, 0x98e57a84, 0x32ff3422, 0xd2bfd48e, 0xf792838b, 0x17d26327,
+        0xa64a6836, 0x460a889a, 0x6327df9f, 0x83673f33, 0x297d7195, 0xc93d9139, 0xec10c63c, 0x0c502690,
+        0x8acca6ef, 0x6a8c4643, 0x4fa11146, 0xafe1f1ea, 0x05fbbf4c, 0xe5bb5fe0, 0xc09608e5, 0x20d6e849,
+        0x914ee358, 0x710e03f4, 0x542354f1, 0xb463b45d, 0x1e79fafb, 0xfe391a57, 0xdb144d52, 0x3b54adfe,
+        0xd3c13b5d, 0x3381dbf1, 0x16ac8cf4, 0xf6ec6c58, 0x5cf622fe, 0xbcb6c252, 0x999b9557, 0x79db75fb,
+        0xc8437eea, 0x28039e46, 0x0d2ec943, 0xed6e29ef, 0x47746749, 0xa73487e5, 0x8219d0e0, 0x6259304c,
+        0xe4c5b033, 0x0485509f, 0x21a8079a, 0xc1e8e736, 0x6bf2a990, 0x8bb2493c, 0xae9f1e39, 0x4edffe95,
+        0xff47f584, 0x1f071528, 0x3a2a422d, 0xda6aa281, 0x7070ec27, 0x90300c8b, 0xb51d5b8e, 0x555dbb22,
+        0x61da0039, 0x819ae095, 0xa4b7b790, 0x44f7573c, 0xeeed199a, 0x0eadf936, 0x2b80ae33, 0xcbc04e9f,
+        0x7a58458e, 0x9a18a522, 0xbf35f227, 0x5f75128b, 0xf56f5c2d, 0x152fbc81, 0x3002eb84, 0xd0420b28,
+        0x56de8b57, 0xb69e6bfb, 0x93b33cfe, 0x73f3dc52, 0xd9e992f4, 0x39a97258, 0x1c84255d, 0xfcc4c5f1,
+        0x4d5ccee0, 0xad1c2e4c, 0x88317949, 0x687199e5, 0xc26bd743, 0x222b37ef, 0x070660ea, 0xe7468046,
+        0x0fd316e5, 0xef93f649, 0xcabea14c, 0x2afe41e0, 0x80e40f46, 0x60a4efea, 0x4589b8ef, 0xa5c95843,
+        0x14515352, 0xf411b3fe, 0xd13ce4fb, 0x317c0457, 0x9b664af1, 0x7b26aa5d, 0x5e0bfd58, 0xbe4b1df4,
+        0x38d79d8b, 0xd8977d27, 0xfdba2a22, 0x1dfaca8e, 0xb7e08428, 0x57a06484, 0x728d3381, 0x92cdd32d,
+        0x2355d83c, 0xc3153890, 0xe6386f95, 0x06788f39, 0xac62c19f, 0x4c222133, 0x690f7636, 0x894f969a,
+    },
+    {
+        0x00000000, 0x7e7c2df3, 0xfcf85be6, 0x82847615, 0xfc1cc13d, 0x8260ecce, 0x00e49adb, 0x7e98b728,
+        0xfdd5f48b, 0x83a9d978, 0x012daf6d, 0x7f51829e, 0x01c935b6, 0x7fb51845, 0xfd316e50, 0x834d43a3,
+        0xfe479fe7, 0x803bb214, 0x02bfc401, 0x7cc3e9f2, 0x025b5eda, 0x7c277329, 0xfea3053c, 0x80df28cf,
+        0x03926b6c, 0x7dee469f, 0xff6a308a, 0x81161d79, 0xff8eaa51, 0x81f287a2, 0x0376f1b7, 0x7d0adc44,
+        0xf963493f, 0x871f64cc, 0x059b12d9, 0x7be73f2a, 0x057f8802, 0x7b03a5f1, 0xf987d3e4, 0x87fbfe17,
+        0x04b6bdb4, 0x7aca9047, 0xf84ee652, 0x8632cba1, 0xf8aa7c89, 0x86d6517a, 0x0452276f, 0x7a2e0a9c,
+        0x0724d6d8, 0x7958fb2b, 0xfbdc8d3e, 0x85a0a0cd, 0xfb3817e5, 0x85443a16, 0x07c04c03, 0x79bc61f0,
+        0xfaf12253, 0x848d0fa0, 0x060979b5, 0x78755446, 0x06ede36e, 0x7891ce9d, 0xfa15b888, 0x8469957b,
+        0xf72ae48f, 0x8956c97c, 0x0bd2bf69, 0x75ae929a, 0x0b3625b2, 0x754a0841, 0xf7ce7e54, 0x89b253a7,
+        0x0aff1004, 0x74833df7, 0xf6074be2, 0x887b6611, 0xf6e3d139, 0x889ffcca, 0x0a1b8adf, 0x7467a72c,
+        0x096d7b68, 0x7711569b, 0xf595208e, 0x8be90d7d, 0xf571ba55, 0x8b0d97a6, 0x0989e1b3, 0x77f5cc40,
+        0xf4b88fe3, 0x8ac4a210, 0x0840d405, 0x763cf9f6, 0x08a44ede, 0x76d8632d, 0xf45c1538, 0x8a2038cb,
+        0x0e49adb0, 0x70358043, 0xf2b1f656, 0x8ccddba5, 0xf2556c8d, 0x8c29417e, 0x0ead376b, 0x70d11a98,
+        0xf39c593b, 0x8de074c8, 0x0f6402dd, 0x71182f2e, 0x0f809806, 0x71fcb5f5, 0xf378c3e0, 0x8d04ee13,
+        0xf00e3257, 0x8e721fa4, 0x0cf669b1, 0x728a4442, 0x0c12f36a, 0x726ede99, 0xf0eaa88c, 0x8e96857f,
+        0x0ddbc6dc, 0x73a7eb2f, 0xf1239d3a, 0x8f5fb0c9, 0xf1c707e1, 0x8fbb2a12, 0x0d3f5c07, 0x734371f4,
+        0xebb9bfef, 0x95c5921c, 0x1741e409, 0x693dc9fa, 0x17a57ed2, 0x69d95321, 0xeb5d2534, 0x952108c7,
+        0x166c4b64, 0x68106697, 0xea941082, 0x94e83d71, 0xea708a59, 0x940ca7aa, 0x1688d1bf, 0x68f4fc4c,
+        0x15fe2008, 0x6b820dfb, 0xe9067bee, 0x977a561d, 0xe9e2e135, 0x979eccc6, 0x151abad3, 0x6b669720,
+        0xe82bd483, 0x9657f970, 0x14d38f65, 0x6aafa296, 0x143715be, 0x6a4b384d, 0xe8cf4e58, 0x96b363ab,
+        0x12daf6d0, 0x6ca6db23, 0xee22ad36, 0x905e80c5, 0xeec637ed, 0x90ba1a1e, 0x123e6c0b, 0x6c4241f8,
+        0xef0f025b, 0x91732fa8, 0x13f759bd, 0x6d8b744e, 0x1313c366, 0x6d6fee95, 0xefeb9880, 0x9197b573,
+        0xec9d6937, 0x92e144c4, 0x106532d1, 0x6e191f22, 0x1081a80a, 0x6efd85f9, 0xec79f3ec, 0x9205de1f,
+        0x11489dbc, 0x6f34b04f, 0xedb0c65a, 0x93cceba9, 0xed545c81, 0x93287172, 0x11ac0767, 0x6fd02a94,
+        0x1c935b60, 0x62ef7693, 0xe06b0086, 0x9e172d75, 0xe08f9a5d, 0x9ef3b7ae, 0x1c77c1bb, 0x620bec48,
+        0xe146afeb, 0x9f3a8218, 0x1dbef40d, 0x63c2d9fe, 0x1d5a6ed6, 0x63264325, 0xe1a23530, 0x9fde18c3,
+        0xe2d4c487, 0x9ca8e974, 0x1e2c9f61, 0x6050b292, 0x1ec805ba, 0x60b42849, 0xe2305e5c, 0x9c4c73af,
+        0x1f01300c, 0x617d1dff, 0xe3f96bea, 0x9d854619, 0xe31df131, 0x9d61dcc2, 0x1fe5aad7, 0x61998724,
+        0xe5f0125f, 0x9b8c3fac, 0x190849b9, 0x6774644a, 0x19ecd362, 0x6790fe91, 0xe5148884, 0x9b68a577,
+        0x1825e6d4, 0x6659cb27, 0xe4ddbd32, 0x9aa190c1, 0xe43927e9, 0x9a450a1a, 0x18c17c0f, 0x66bd51fc,
+        0x1bb78db8, 0x65cba04b, 0xe74fd65e, 0x9933fbad, 0xe7ab4c85, 0x99d76176, 0x1b531763, 0x652f3a90,
+        0xe6627933, 0x981e54c0, 0x1a9a22d5, 0x64e60f26, 0x1a7eb80e, 0x640295fd, 0xe686e3e8, 0x98face1b,
+    },
+    {
+        0x00000000, 0xd29f092f, 0xa0d264af, 0x724d6d80, 0x4448bfaf, 0x96d7b680, 0xe49adb00, 0x3605d22f,
+        0x88917f5e, 0x5a0e7671, 0x28431bf1, 0xfadc12de, 0xccd9c0f1, 0x1e46c9de, 0x6c0ba45e, 0xbe94ad71,
+        0x14ce884d, 0xc6518162, 0xb41cece2, 0x6683e5cd, 0x508637e2, 0x82193ecd, 0xf054534d, 0x22cb5a62,
+        0x9c5ff713, 0x4ec0fe3c, 0x3c8d93bc, 0xee129a93, 0xd81748bc, 0x0a884193, 0x78c52c13, 0xaa5a253c,
+        0x299d109a, 0xfb0219b5, 0x894f7435, 0x5bd07d1a, 0x6dd5af35, 0xbf4aa61a, 0xcd07cb9a, 0x1f98c2b5,
+        0xa10c6fc4, 0x739366eb, 0x01de0b6b, 0xd3410244, 0xe544d06b, 0x37dbd944, 0x4596b4c4, 0x9709bdeb,
+        0x3d5398d7, 0xefcc91f8, 0x9d81fc78, 0x4f1ef557, 0x791b2778, 0xab842e57, 0xd9c943d7, 0x0b564af8,
+        0xb5c2e789, 0x675deea6, 0x15108326, 0xc78f8a09, 0xf18a5826, 0x23155109, 0x51583c89, 0x83c735a6,
+        0x533a2134, 0x81a5281b, 0xf3e8459b, 0x21774cb4, 0x17729e9b, 0xc5ed97b4, 0xb7a0fa34, 0x653ff31b,
+        0xdbab5e6a, 0x09345745, 0x7b793ac5, 0xa9e633ea, 0x9fe3e1c5, 0x4d7ce8ea, 0x3f31856a, 0xedae8c45,
+        0x47f4a979, 0x956ba056, 0xe726cdd6, 0x35b9c4f9, 0x03bc16d6, 0xd1231ff9, 0xa36e7279, 0x71f17b56,
+        0xcf65d627, 0x1dfadf08, 0x6fb7b288, 0xbd28bba7, 0x8b2d6988, 0x59b260a7, 0x2bff0d27, 0xf9600408,
+        0x7aa731ae, 0xa8383881, 0xda755501, 0x08ea5c2e, 0x3eef8e01, 0xec70872e, 0x9e3deaae, 0x4ca2e381,
+        0xf2364ef0, 0x20a947df, 0x52e42a5f, 0x807b2370, 0xb67ef15f, 0x64e1f870, 0x16ac95f0, 0xc4339cdf,
+        0x6e69b9e3, 0xbcf6b0cc, 0xcebbdd4c, 0x1c24d463, 0x2a21064c, 0xf8be0f63, 0x8af362e3, 0x586c6bcc,
+        0xe6f8c6bd, 0x3467cf92, 0x462aa212, 0x94b5ab3d, 0xa2b07912, 0x702f703d, 0x02621dbd, 0xd0fd1492,
+        0xa6744268, 0x74eb4b47, 0x06a626c7, 0xd4392fe8, 0xe23cfdc7, 0x30a3f4e8, 0x42ee9968, 0x90719047,
+        0x2ee53d36, 0xfc7a3419, 0x8e375999, 0x5ca850b6, 0x6aad8299, 0xb8328bb6, 0xca7fe636, 0x18e0ef19,
+        0xb2baca25, 0x6025c30a, 0x1268ae8a, 0xc0f7a7a5, 0xf6f2758a, 0x246d7ca5, 0x56201125, 0x84bf180a,
+        0x3a2bb57b, 0xe8b4bc54, 0x9af9d1d4, 0x4866d8fb, 0x7e630ad4, 0xacfc03fb, 0xdeb16e7b, 0x0c2e6754,
+        0x8fe952f2, 0x5d765bdd, 0x2f3b365d, 0xfda43f72, 0xcba1ed5d, 0x193ee472, 0x6b7389f2, 0xb9ec80dd,
+        0x07782dac, 0xd5e72483, 0xa7aa4903, 0x7535402c, 0x43309203, 0x91af9b2c, 0xe3e2f6ac, 0x317dff83,
+        0x9b27dabf, 0x49b8d390, 0x3bf5be10, 0xe96ab73f, 0xdf6f6510, 0x0df06c3f, 0x7fbd01bf, 0xad220890,
+        0x13b6a5e1, 0xc129acce, 0xb364c14e, 0x61fbc861, 0x57fe1a4e, 0x85611361, 0xf72c7ee1, 0x25b377ce,
+        0xf54e635c, 0x27d16a73, 0x559c07f3, 0x87030edc, 0xb106dcf3, 0x6399d5dc, 0x11d4b85c, 0xc34bb173,
+        0x7ddf1c02, 0xaf40152d, 0xdd0d78ad, 0x0f927182, 0x3997a3ad, 0xeb08aa82, 0x9945c702, 0x4bdace2d,
+        0xe180eb11, 0x331fe23e, 0x41528fbe, 0x93cd8691, 0xa5c854be, 0x77575d91, 0x051a3011, 0xd785393e,
+        0x6911944f, 0xbb8e9d60, 0xc9c3f0e0, 0x1b5cf9cf, 0x2d592be0, 0xffc622cf, 0x8d8b4f4f, 0x5f144660,
+        0xdcd373c6, 0x0e4c7ae9, 0x7c011769, 0xae9e1e46, 0x989bcc69, 0x4a04c546, 0x3849a8c6, 0xead6a1e9,
+        0x54420c98, 0x86dd05b7, 0xf4906837, 0x260f6118, 0x100ab337, 0xc295ba18, 0xb0d8d798, 0x6247deb7,
+        0xc81dfb8b, 0x1a82f2a4, 0x68cf9f24, 0xba50960b, 0x8c554424, 0x5eca4d0b, 0x2c87208b, 0xfe1829a4,
+        0x408c84d5, 0x92138dfa, 0xe05ee07a, 0x32c1e955, 0x04c43b7a, 0xd65b3255, 0xa4165fd5, 0x768956fa,
+    },
+    {
+        0x00000000, 0x4904f221, 0x9209e442, 0xdb0d1663, 0x21ffbe75, 0x68fb4c54, 0xb3f65a37, 0xfaf2a816,
+        0x43ff7cea, 0x0afb8ecb, 0xd1f698a8, 0x98f26a89, 0x6200c29f, 0x2b0430be, 0xf00926dd, 0xb90dd4fc,
+        0x87fef9d4, 0xcefa0bf5, 0x15f71d96, 0x5cf3efb7, 0xa60147a1, 0xef05b580, 0x3408a3e3, 0x7d0c51c2,
+        0xc401853e, 0x8d05771f, 0x5608617c, 0x1f0c935d, 0xe5fe3b4b, 0xacfac96a, 0x77f7df09, 0x3ef32d28,
+        0x0a118559, 0x43157778, 0x9818611b, 0xd11c933a, 0x2bee3b2c, 0x62eac90d, 0xb9e7df6e, 0xf0e32d4f,
+        0x49eef9b3, 0x00ea0b92, 0xdbe71df1, 0x92e3efd0, 0x681147c6, 0x2115b5e7, 0xfa18a384, 0xb31c51a5,
+        0x8def7c8d, 0xc4eb8eac, 0x1fe698cf, 0x56e26aee, 0xac10c2f8, 0xe51430d9, 0x3e1926ba, 0x771dd49b,
+        0xce100067, 0x8714f246, 0x5c19e425, 0x151d1604, 0xefefbe12, 0xa6eb4c33, 0x7de65a50, 0x34e2a871,
+        0x14230ab2, 0x5d27f893, 0x862aeef0, 0xcf2e1cd1, 0x35dcb4c7, 0x7cd846e6, 0xa7d55085, 0xeed1a2a4,
+        0x57dc7658, 0x1ed88479, 0xc5d5921a, 0x8cd1603b, 0x7623c82d, 0x3f273a0c, 0xe42a2c6f, 0xad2ede4e,
+        0x93ddf366, 0xdad90147, 0x01d41724, 0x48d0e505, 0xb2224d13, 0xfb26bf32, 0x202ba951, 0x692f5b70,
+        0xd0228f8c, 0x99267dad, 0x422b6bce, 0x0b2f99ef, 0xf1dd31f9, 0xb8d9c3d8, 0x63d4d5bb, 0x2ad0279a,
+        0x1e328feb, 0x57367dca, 0x8c3b6ba9, 0xc53f9988, 0x3fcd319e, 0x76c9c3bf, 0xadc4d5dc, 0xe4c027fd,
+        0x5dcdf301, 0x14c90120, 0xcfc41743, 0x86c0e562, 0x7c324d74, 0x3536bf55, 0xee3ba936, 0xa73f5b17,
+        0x99cc763f, 0xd0c8841e, 0x0bc5927d, 0x42c1605c, 0xb833c84a, 0xf1373a6b, 0x2a3a2c08, 0x633ede29,
+        0xda330ad5, 0x9337f8f4, 0x483aee97, 0x013e1cb6, 0xfbccb4a0, 0xb2c84681, 0x69c550e2, 0x20c1a2c3,
+        0x28461564, 0x6142e745, 0xba4ff126, 0xf34b0307, 0x09b9ab11, 0x40bd5930, 0x9bb04f53, 0xd2b4bd72,
+        0x6bb9698e, 0x22bd9baf, 0xf9b08dcc, 0xb0b47fed, 0x4a46d7fb, 0x034225da, 0xd84f33b9, 0x914bc198,
+        0xafb8ecb0, 0xe6bc1e91, 0x3db108f2, 0x74b5fad3, 0x8e4752c5, 0xc743a0e4, 0x1c4eb687, 0x554a44a6,
+        0xec47905a, 0xa543627b, 0x7e4e7418, 0x374a8639, 0xcdb82e2f, 0x84bcdc0e, 0x5fb1ca6d, 0x16b5384c,
+        0x2257903d, 0x6b53621c, 0xb05e747f, 0xf95a865e, 0x03a82e48, 0x4aacdc69, 0x91a1ca0a, 0xd8a5382b,
+        0x61a8ecd7, 0x28ac1ef6, 0xf3a10895, 0xbaa5fab4, 0x405752a2, 0x0953a083, 0xd25eb6e0, 0x9b5a44c1,
+        0xa5a969e9, 0xecad9bc8, 0x37a08dab, 0x7ea47f8a, 0x8456d79c, 0xcd5225bd, 0x165f33de, 0x5f5bc1ff,
+        0xe6561503, 0xaf52e722, 0x745ff141, 0x3d5b0360, 0xc7a9ab76, 0x8ead5957, 0x55a04f34, 0x1ca4bd15,
+        0x3c651fd6, 0x7561edf7, 0xae6cfb94, 0xe76809b5, 0x1d9aa1a3, 0x549e5382, 0x8f9345e1, 0xc697b7c0,
+        0x7f9a633c, 0x369e911d, 0xed93877e, 0xa497755f, 0x5e65dd49, 0x17612f68, 0xcc6c390b, 0x8568cb2a,
+        0xbb9be602, 0xf29f1423, 0x29920240, 0x6096f061, 0x9a645877, 0xd360aa56, 0x086dbc35, 0x41694e14,
+        0xf8649ae8, 0xb16068c9, 0x6a6d7eaa, 0x23698c8b, 0xd99b249d, 0x909fd6bc, 0x4b92c0df, 0x029632fe,
+        0x36749a8f, 0x7f7068ae, 0xa47d7ecd, 0xed798cec, 0x178b24fa, 0x5e8fd6db, 0x8582c0b8, 0xcc863299,
+        0x758be665, 0x3c8f1444, 0xe7820227, 0xae86f006, 0x54745810, 0x1d70aa31, 0xc67dbc52, 0x8f794e73,
+        0xb18a635b, 0xf88e917a, 0x23838719, 0x6a877538, 0x9075dd2e, 0xd9712f0f, 0x027c396c, 0x4b78cb4d,
+        0xf2751fb1, 0xbb71ed90, 0x607cfbf3, 0x297809d2, 0xd38aa1c4, 0x9a8e53e5, 0x41834586, 0x0887b7a7,
+    },
 };
 
 const uint32_t crc32_short[4][256] = {
-  {
-    0x00000000, 0xdcb17aa4, 0xbc8e83b9, 0x603ff91d, 0x7cf17183, 0xa0400b27, 0xc07ff23a, 0x1cce889e,
-    0xf9e2e306, 0x255399a2, 0x456c60bf, 0x99dd1a1b, 0x85139285, 0x59a2e821, 0x399d113c, 0xe52c6b98,
-    0xf629b0fd, 0x2a98ca59, 0x4aa73344, 0x961649e0, 0x8ad8c17e, 0x5669bbda, 0x365642c7, 0xeae73863,
-    0x0fcb53fb, 0xd37a295f, 0xb345d042, 0x6ff4aae6, 0x733a2278, 0xaf8b58dc, 0xcfb4a1c1, 0x1305db65,
-    0xe9bf170b, 0x350e6daf, 0x553194b2, 0x8980ee16, 0x954e6688, 0x49ff1c2c, 0x29c0e531, 0xf5719f95,
-    0x105df40d, 0xccec8ea9, 0xacd377b4, 0x70620d10, 0x6cac858e, 0xb01dff2a, 0xd0220637, 0x0c937c93,
-    0x1f96a7f6, 0xc327dd52, 0xa318244f, 0x7fa95eeb, 0x6367d675, 0xbfd6acd1, 0xdfe955cc, 0x03582f68,
-    0xe67444f0, 0x3ac53e54, 0x5afac749, 0x864bbded, 0x9a853573, 0x46344fd7, 0x260bb6ca, 0xfabacc6e,
-    0xd69258e7, 0x0a232243, 0x6a1cdb5e, 0xb6ada1fa, 0xaa632964, 0x76d253c0, 0x16edaadd, 0xca5cd079,
-    0x2f70bbe1, 0xf3c1c145, 0x93fe3858, 0x4f4f42fc, 0x5381ca62, 0x8f30b0c6, 0xef0f49db, 0x33be337f,
-    0x20bbe81a, 0xfc0a92be, 0x9c356ba3, 0x40841107, 0x5c4a9999, 0x80fbe33d, 0xe0c41a20, 0x3c756084,
-    0xd9590b1c, 0x05e871b8, 0x65d788a5, 0xb966f201, 0xa5a87a9f, 0x7919003b, 0x1926f926, 0xc5978382,
-    0x3f2d4fec, 0xe39c3548, 0x83a3cc55, 0x5f12b6f1, 0x43dc3e6f, 0x9f6d44cb, 0xff52bdd6, 0x23e3c772,
-    0xc6cfacea, 0x1a7ed64e, 0x7a412f53, 0xa6f055f7, 0xba3edd69, 0x668fa7cd, 0x06b05ed0, 0xda012474,
-    0xc904ff11, 0x15b585b5, 0x758a7ca8, 0xa93b060c, 0xb5f58e92, 0x6944f436, 0x097b0d2b, 0xd5ca778f,
-    0x30e61c17, 0xec5766b3, 0x8c689fae, 0x50d9e50a, 0x4c176d94, 0x90a61730, 0xf099ee2d, 0x2c289489,
-    0xa8c8c73f, 0x7479bd9b, 0x14464486, 0xc8f73e22, 0xd439b6bc, 0x0888cc18, 0x68b73505, 0xb4064fa1,
-    0x512a2439, 0x8d9b5e9d, 0xeda4a780, 0x3115dd24, 0x2ddb55ba, 0xf16a2f1e, 0x9155d603, 0x4de4aca7,
-    0x5ee177c2, 0x82500d66, 0xe26ff47b, 0x3ede8edf, 0x22100641, 0xfea17ce5, 0x9e9e85f8, 0x422fff5c,
-    0xa70394c4, 0x7bb2ee60, 0x1b8d177d, 0xc73c6dd9, 0xdbf2e547, 0x07439fe3, 0x677c66fe, 0xbbcd1c5a,
-    0x4177d034, 0x9dc6aa90, 0xfdf9538d, 0x21482929, 0x3d86a1b7, 0xe137db13, 0x8108220e, 0x5db958aa,
-    0xb8953332, 0x64244996, 0x041bb08b, 0xd8aaca2f, 0xc46442b1, 0x18d53815, 0x78eac108, 0xa45bbbac,
-    0xb75e60c9, 0x6bef1a6d, 0x0bd0e370, 0xd76199d4, 0xcbaf114a, 0x171e6bee, 0x772192f3, 0xab90e857,
-    0x4ebc83cf, 0x920df96b, 0xf2320076, 0x2e837ad2, 0x324df24c, 0xeefc88e8, 0x8ec371f5, 0x52720b51,
-    0x7e5a9fd8, 0xa2ebe57c, 0xc2d41c61, 0x1e6566c5, 0x02abee5b, 0xde1a94ff, 0xbe256de2, 0x62941746,
-    0x87b87cde, 0x5b09067a, 0x3b36ff67, 0xe78785c3, 0xfb490d5d, 0x27f877f9, 0x47c78ee4, 0x9b76f440,
-    0x88732f25, 0x54c25581, 0x34fdac9c, 0xe84cd638, 0xf4825ea6, 0x28332402, 0x480cdd1f, 0x94bda7bb,
-    0x7191cc23, 0xad20b687, 0xcd1f4f9a, 0x11ae353e, 0x0d60bda0, 0xd1d1c704, 0xb1ee3e19, 0x6d5f44bd,
-    0x97e588d3, 0x4b54f277, 0x2b6b0b6a, 0xf7da71ce, 0xeb14f950, 0x37a583f4, 0x579a7ae9, 0x8b2b004d,
-    0x6e076bd5, 0xb2b61171, 0xd289e86c, 0x0e3892c8, 0x12f61a56, 0xce4760f2, 0xae7899ef, 0x72c9e34b,
-    0x61cc382e, 0xbd7d428a, 0xdd42bb97, 0x01f3c133, 0x1d3d49ad, 0xc18c3309, 0xa1b3ca14, 0x7d02b0b0,
-    0x982edb28, 0x449fa18c, 0x24a05891, 0xf8112235, 0xe4dfaaab, 0x386ed00f, 0x58512912, 0x84e053b6,
-  },
-  {
-    0x00000000, 0x547df88f, 0xa8fbf11e, 0xfc860991, 0x541b94cd, 0x00666c42, 0xfce065d3, 0xa89d9d5c,
-    0xa837299a, 0xfc4ad115, 0x00ccd884, 0x54b1200b, 0xfc2cbd57, 0xa85145d8, 0x54d74c49, 0x00aab4c6,
-    0x558225c5, 0x01ffdd4a, 0xfd79d4db, 0xa9042c54, 0x0199b108, 0x55e44987, 0xa9624016, 0xfd1fb899,
-    0xfdb50c5f, 0xa9c8f4d0, 0x554efd41, 0x013305ce, 0xa9ae9892, 0xfdd3601d, 0x0155698c, 0x55289103,
-    0xab044b8a, 0xff79b305, 0x03ffba94, 0x5782421b, 0xff1fdf47, 0xab6227c8, 0x57e42e59, 0x0399d6d6,
-    0x03336210, 0x574e9a9f, 0xabc8930e, 0xffb56b81, 0x5728f6dd, 0x03550e52, 0xffd307c3, 0xabaeff4c,
-    0xfe866e4f, 0xaafb96c0, 0x567d9f51, 0x020067de, 0xaa9dfa82, 0xfee0020d, 0x02660b9c, 0x561bf313,
-    0x56b147d5, 0x02ccbf5a, 0xfe4ab6cb, 0xaa374e44, 0x02aad318, 0x56d72b97, 0xaa512206, 0xfe2cda89,
-    0x53e4e1e5, 0x0799196a, 0xfb1f10fb, 0xaf62e874, 0x07ff7528, 0x53828da7, 0xaf048436, 0xfb797cb9,
-    0xfbd3c87f, 0xafae30f0, 0x53283961, 0x0755c1ee, 0xafc85cb2, 0xfbb5a43d, 0x0733adac, 0x534e5523,
-    0x0666c420, 0x521b3caf, 0xae9d353e, 0xfae0cdb1, 0x527d50ed, 0x0600a862, 0xfa86a1f3, 0xaefb597c,
-    0xae51edba, 0xfa2c1535, 0x06aa1ca4, 0x52d7e42b, 0xfa4a7977, 0xae3781f8, 0x52b18869, 0x06cc70e6,
-    0xf8e0aa6f, 0xac9d52e0, 0x501b5b71, 0x0466a3fe, 0xacfb3ea2, 0xf886c62d, 0x0400cfbc, 0x507d3733,
-    0x50d783f5, 0x04aa7b7a, 0xf82c72eb, 0xac518a64, 0x04cc1738, 0x50b1efb7, 0xac37e626, 0xf84a1ea9,
-    0xad628faa, 0xf91f7725, 0x05997eb4, 0x51e4863b, 0xf9791b67, 0xad04e3e8, 0x5182ea79, 0x05ff12f6,
-    0x0555a630, 0x51285ebf, 0xadae572e, 0xf9d3afa1, 0x514e32fd, 0x0533ca72, 0xf9b5c3e3, 0xadc83b6c,
-    0xa7c9c3ca, 0xf3b43b45, 0x0f3232d4, 0x5b4fca5b, 0xf3d25707, 0xa7afaf88, 0x5b29a619, 0x0f545e96,
-    0x0ffeea50, 0x5b8312df, 0xa7051b4e, 0xf378e3c1, 0x5be57e9d, 0x0f988612, 0xf31e8f83, 0xa763770c,
-    0xf24be60f, 0xa6361e80, 0x5ab01711, 0x0ecdef9e, 0xa65072c2, 0xf22d8a4d, 0x0eab83dc, 0x5ad67b53,
-    0x5a7ccf95, 0x0e01371a, 0xf2873e8b, 0xa6fac604, 0x0e675b58, 0x5a1aa3d7, 0xa69caa46, 0xf2e152c9,
-    0x0ccd8840, 0x58b070cf, 0xa436795e, 0xf04b81d1, 0x58d61c8d, 0x0cabe402, 0xf02ded93, 0xa450151c,
-    0xa4faa1da, 0xf0875955, 0x0c0150c4, 0x587ca84b, 0xf0e13517, 0xa49ccd98, 0x581ac409, 0x0c673c86,
-    0x594fad85, 0x0d32550a, 0xf1b45c9b, 0xa5c9a414, 0x0d543948, 0x5929c1c7, 0xa5afc856, 0xf1d230d9,
-    0xf178841f, 0xa5057c90, 0x59837501, 0x0dfe8d8e, 0xa56310d2, 0xf11ee85d, 0x0d98e1cc, 0x59e51943,
-    0xf42d222f, 0xa050daa0, 0x5cd6d331, 0x08ab2bbe, 0xa036b6e2, 0xf44b4e6d, 0x08cd47fc, 0x5cb0bf73,
-    0x5c1a0bb5, 0x0867f33a, 0xf4e1faab, 0xa09c0224, 0x08019f78, 0x5c7c67f7, 0xa0fa6e66, 0xf48796e9,
-    0xa1af07ea, 0xf5d2ff65, 0x0954f6f4, 0x5d290e7b, 0xf5b49327, 0xa1c96ba8, 0x5d4f6239, 0x09329ab6,
-    0x09982e70, 0x5de5d6ff, 0xa163df6e, 0xf51e27e1, 0x5d83babd, 0x09fe4232, 0xf5784ba3, 0xa105b32c,
-    0x5f2969a5, 0x0b54912a, 0xf7d298bb, 0xa3af6034, 0x0b32fd68, 0x5f4f05e7, 0xa3c90c76, 0xf7b4f4f9,
-    0xf71e403f, 0xa363b8b0, 0x5fe5b121, 0x0b9849ae, 0xa305d4f2, 0xf7782c7d, 0x0bfe25ec, 0x5f83dd63,
-    0x0aab4c60, 0x5ed6b4ef, 0xa250bd7e, 0xf62d45f1, 0x5eb0d8ad, 0x0acd2022, 0xf64b29b3, 0xa236d13c,
-    0xa29c65fa, 0xf6e19d75, 0x0a6794e4, 0x5e1a6c6b, 0xf687f137, 0xa2fa09b8, 0x5e7c0029, 0x0a01f8a6,
-  },
-  {
-    0x00000000, 0x4a7ff165, 0x94ffe2ca, 0xde8013af, 0x2c13b365, 0x666c4200, 0xb8ec51af, 0xf293a0ca,
-    0x582766ca, 0x125897af, 0xccd88400, 0x86a77565, 0x7434d5af, 0x3e4b24ca, 0xe0cb3765, 0xaab4c600,
-    0xb04ecd94, 0xfa313cf1, 0x24b12f5e, 0x6ecede3b, 0x9c5d7ef1, 0xd6228f94, 0x08a29c3b, 0x42dd6d5e,
-    0xe869ab5e, 0xa2165a3b, 0x7c964994, 0x36e9b8f1, 0xc47a183b, 0x8e05e95e, 0x5085faf1, 0x1afa0b94,
-    0x6571edd9, 0x2f0e1cbc, 0xf18e0f13, 0xbbf1fe76, 0x49625ebc, 0x031dafd9, 0xdd9dbc76, 0x97e24d13,
-    0x3d568b13, 0x77297a76, 0xa9a969d9, 0xe3d698bc, 0x11453876, 0x5b3ac913, 0x85badabc, 0xcfc52bd9,
-    0xd53f204d, 0x9f40d128, 0x41c0c287, 0x0bbf33e2, 0xf92c9328, 0xb353624d, 0x6dd371e2, 0x27ac8087,
-    0x8d184687, 0xc767b7e2, 0x19e7a44d, 0x53985528, 0xa10bf5e2, 0xeb740487, 0x35f41728, 0x7f8be64d,
-    0xcae3dbb2, 0x809c2ad7, 0x5e1c3978, 0x1463c81d, 0xe6f068d7, 0xac8f99b2, 0x720f8a1d, 0x38707b78,
-    0x92c4bd78, 0xd8bb4c1d, 0x063b5fb2, 0x4c44aed7, 0xbed70e1d, 0xf4a8ff78, 0x2a28ecd7, 0x60571db2,
-    0x7aad1626, 0x30d2e743, 0xee52f4ec, 0xa42d0589, 0x56bea543, 0x1cc15426, 0xc2414789, 0x883eb6ec,
-    0x228a70ec, 0x68f58189, 0xb6759226, 0xfc0a6343, 0x0e99c389, 0x44e632ec, 0x9a662143, 0xd019d026,
-    0xaf92366b, 0xe5edc70e, 0x3b6dd4a1, 0x711225c4, 0x8381850e, 0xc9fe746b, 0x177e67c4, 0x5d0196a1,
-    0xf7b550a1, 0xbdcaa1c4, 0x634ab26b, 0x2935430e, 0xdba6e3c4, 0x91d912a1, 0x4f59010e, 0x0526f06b,
-    0x1fdcfbff, 0x55a30a9a, 0x8b231935, 0xc15ce850, 0x33cf489a, 0x79b0b9ff, 0xa730aa50, 0xed4f5b35,
-    0x47fb9d35, 0x0d846c50, 0xd3047fff, 0x997b8e9a, 0x6be82e50, 0x2197df35, 0xff17cc9a, 0xb5683dff,
-    0x902bc195, 0xda5430f0, 0x04d4235f, 0x4eabd23a, 0xbc3872f0, 0xf6478395, 0x28c7903a, 0x62b8615f,
-    0xc80ca75f, 0x8273563a, 0x5cf34595, 0x168cb4f0, 0xe41f143a, 0xae60e55f, 0x70e0f6f0, 0x3a9f0795,
-    0x20650c01, 0x6a1afd64, 0xb49aeecb, 0xfee51fae, 0x0c76bf64, 0x46094e01, 0x98895dae, 0xd2f6accb,
-    0x78426acb, 0x323d9bae, 0xecbd8801, 0xa6c27964, 0x5451d9ae, 0x1e2e28cb, 0xc0ae3b64, 0x8ad1ca01,
-    0xf55a2c4c, 0xbf25dd29, 0x61a5ce86, 0x2bda3fe3, 0xd9499f29, 0x93366e4c, 0x4db67de3, 0x07c98c86,
-    0xad7d4a86, 0xe702bbe3, 0x3982a84c, 0x73fd5929, 0x816ef9e3, 0xcb110886, 0x15911b29, 0x5feeea4c,
-    0x4514e1d8, 0x0f6b10bd, 0xd1eb0312, 0x9b94f277, 0x690752bd, 0x2378a3d8, 0xfdf8b077, 0xb7874112,
-    0x1d338712, 0x574c7677, 0x89cc65d8, 0xc3b394bd, 0x31203477, 0x7b5fc512, 0xa5dfd6bd, 0xefa027d8,
-    0x5ac81a27, 0x10b7eb42, 0xce37f8ed, 0x84480988, 0x76dba942, 0x3ca45827, 0xe2244b88, 0xa85bbaed,
-    0x02ef7ced, 0x48908d88, 0x96109e27, 0xdc6f6f42, 0x2efccf88, 0x64833eed, 0xba032d42, 0xf07cdc27,
-    0xea86d7b3, 0xa0f926d6, 0x7e793579, 0x3406c41c, 0xc69564d6, 0x8cea95b3, 0x526a861c, 0x18157779,
-    0xb2a1b179, 0xf8de401c, 0x265e53b3, 0x6c21a2d6, 0x9eb2021c, 0xd4cdf379, 0x0a4de0d6, 0x403211b3,
-    0x3fb9f7fe, 0x75c6069b, 0xab461534, 0xe139e451, 0x13aa449b, 0x59d5b5fe, 0x8755a651, 0xcd2a5734,
-    0x679e9134, 0x2de16051, 0xf36173fe, 0xb91e829b, 0x4b8d2251, 0x01f2d334, 0xdf72c09b, 0x950d31fe,
-    0x8ff73a6a, 0xc588cb0f, 0x1b08d8a0, 0x517729c5, 0xa3e4890f, 0xe99b786a, 0x371b6bc5, 0x7d649aa0,
-    0xd7d05ca0, 0x9dafadc5, 0x432fbe6a, 0x09504f0f, 0xfbc3efc5, 0xb1bc1ea0, 0x6f3c0d0f, 0x2543fc6a,
-  },
-  {
-    0x00000000, 0x25bbf5db, 0x4b77ebb6, 0x6ecc1e6d, 0x96efd76c, 0xb35422b7, 0xdd983cda, 0xf823c901,
-    0x2833d829, 0x0d882df2, 0x6344339f, 0x46ffc644, 0xbedc0f45, 0x9b67fa9e, 0xf5abe4f3, 0xd0101128,
-    0x5067b052, 0x75dc4589, 0x1b105be4, 0x3eabae3f, 0xc688673e, 0xe33392e5, 0x8dff8c88, 0xa8447953,
-    0x7854687b, 0x5def9da0, 0x332383cd, 0x16987616, 0xeebbbf17, 0xcb004acc, 0xa5cc54a1, 0x8077a17a,
-    0xa0cf60a4, 0x8574957f, 0xebb88b12, 0xce037ec9, 0x3620b7c8, 0x139b4213, 0x7d575c7e, 0x58eca9a5,
-    0x88fcb88d, 0xad474d56, 0xc38b533b, 0xe630a6e0, 0x1e136fe1, 0x3ba89a3a, 0x55648457, 0x70df718c,
-    0xf0a8d0f6, 0xd513252d, 0xbbdf3b40, 0x9e64ce9b, 0x6647079a, 0x43fcf241, 0x2d30ec2c, 0x088b19f7,
-    0xd89b08df, 0xfd20fd04, 0x93ece369, 0xb65716b2, 0x4e74dfb3, 0x6bcf2a68, 0x05033405, 0x20b8c1de,
-    0x4472b7b9, 0x61c94262, 0x0f055c0f, 0x2abea9d4, 0xd29d60d5, 0xf726950e, 0x99ea8b63, 0xbc517eb8,
-    0x6c416f90, 0x49fa9a4b, 0x27368426, 0x028d71fd, 0xfaaeb8fc, 0xdf154d27, 0xb1d9534a, 0x9462a691,
-    0x141507eb, 0x31aef230, 0x5f62ec5d, 0x7ad91986, 0x82fad087, 0xa741255c, 0xc98d3b31, 0xec36ceea,
-    0x3c26dfc2, 0x199d2a19, 0x77513474, 0x52eac1af, 0xaac908ae, 0x8f72fd75, 0xe1bee318, 0xc40516c3,
-    0xe4bdd71d, 0xc10622c6, 0xafca3cab, 0x8a71c970, 0x72520071, 0x57e9f5aa, 0x3925ebc7, 0x1c9e1e1c,
-    0xcc8e0f34, 0xe935faef, 0x87f9e482, 0xa2421159, 0x5a61d858, 0x7fda2d83, 0x111633ee, 0x34adc635,
-    0xb4da674f, 0x91619294, 0xffad8cf9, 0xda167922, 0x2235b023, 0x078e45f8, 0x69425b95, 0x4cf9ae4e,
-    0x9ce9bf66, 0xb9524abd, 0xd79e54d0, 0xf225a10b, 0x0a06680a, 0x2fbd9dd1, 0x417183bc, 0x64ca7667,
-    0x88e56f72, 0xad5e9aa9, 0xc39284c4, 0xe629711f, 0x1e0ab81e, 0x3bb14dc5, 0x557d53a8, 0x70c6a673,
-    0xa0d6b75b, 0x856d4280, 0xeba15ced, 0xce1aa936, 0x36396037, 0x138295ec, 0x7d4e8b81, 0x58f57e5a,
-    0xd882df20, 0xfd392afb, 0x93f53496, 0xb64ec14d, 0x4e6d084c, 0x6bd6fd97, 0x051ae3fa, 0x20a11621,
-    0xf0b10709, 0xd50af2d2, 0xbbc6ecbf, 0x9e7d1964, 0x665ed065, 0x43e525be, 0x2d293bd3, 0x0892ce08,
-    0x282a0fd6, 0x0d91fa0d, 0x635de460, 0x46e611bb, 0xbec5d8ba, 0x9b7e2d61, 0xf5b2330c, 0xd009c6d7,
-    0x0019d7ff, 0x25a22224, 0x4b6e3c49, 0x6ed5c992, 0x96f60093, 0xb34df548, 0xdd81eb25, 0xf83a1efe,
-    0x784dbf84, 0x5df64a5f, 0x333a5432, 0x1681a1e9, 0xeea268e8, 0xcb199d33, 0xa5d5835e, 0x806e7685,
-    0x507e67ad, 0x75c59276, 0x1b098c1b, 0x3eb279c0, 0xc691b0c1, 0xe32a451a, 0x8de65b77, 0xa85daeac,
-    0xcc97d8cb, 0xe92c2d10, 0x87e0337d, 0xa25bc6a6, 0x5a780fa7, 0x7fc3fa7c, 0x110fe411, 0x34b411ca,
-    0xe4a400e2, 0xc11ff539, 0xafd3eb54, 0x8a681e8f, 0x724bd78e, 0x57f02255, 0x393c3c38, 0x1c87c9e3,
-    0x9cf06899, 0xb94b9d42, 0xd787832f, 0xf23c76f4, 0x0a1fbff5, 0x2fa44a2e, 0x41685443, 0x64d3a198,
-    0xb4c3b0b0, 0x9178456b, 0xffb45b06, 0xda0faedd, 0x222c67dc, 0x07979207, 0x695b8c6a, 0x4ce079b1,
-    0x6c58b86f, 0x49e34db4, 0x272f53d9, 0x0294a602, 0xfab76f03, 0xdf0c9ad8, 0xb1c084b5, 0x947b716e,
-    0x446b6046, 0x61d0959d, 0x0f1c8bf0, 0x2aa77e2b, 0xd284b72a, 0xf73f42f1, 0x99f35c9c, 0xbc48a947,
-    0x3c3f083d, 0x1984fde6, 0x7748e38b, 0x52f31650, 0xaad0df51, 0x8f6b2a8a, 0xe1a734e7, 0xc41cc13c,
-    0x140cd014, 0x31b725cf, 0x5f7b3ba2, 0x7ac0ce79, 0x82e30778, 0xa758f2a3, 0xc994ecce, 0xec2f1915,
-  },
+    {
+        0x00000000, 0xdcb17aa4, 0xbc8e83b9, 0x603ff91d, 0x7cf17183, 0xa0400b27, 0xc07ff23a, 0x1cce889e,
+        0xf9e2e306, 0x255399a2, 0x456c60bf, 0x99dd1a1b, 0x85139285, 0x59a2e821, 0x399d113c, 0xe52c6b98,
+        0xf629b0fd, 0x2a98ca59, 0x4aa73344, 0x961649e0, 0x8ad8c17e, 0x5669bbda, 0x365642c7, 0xeae73863,
+        0x0fcb53fb, 0xd37a295f, 0xb345d042, 0x6ff4aae6, 0x733a2278, 0xaf8b58dc, 0xcfb4a1c1, 0x1305db65,
+        0xe9bf170b, 0x350e6daf, 0x553194b2, 0x8980ee16, 0x954e6688, 0x49ff1c2c, 0x29c0e531, 0xf5719f95,
+        0x105df40d, 0xccec8ea9, 0xacd377b4, 0x70620d10, 0x6cac858e, 0xb01dff2a, 0xd0220637, 0x0c937c93,
+        0x1f96a7f6, 0xc327dd52, 0xa318244f, 0x7fa95eeb, 0x6367d675, 0xbfd6acd1, 0xdfe955cc, 0x03582f68,
+        0xe67444f0, 0x3ac53e54, 0x5afac749, 0x864bbded, 0x9a853573, 0x46344fd7, 0x260bb6ca, 0xfabacc6e,
+        0xd69258e7, 0x0a232243, 0x6a1cdb5e, 0xb6ada1fa, 0xaa632964, 0x76d253c0, 0x16edaadd, 0xca5cd079,
+        0x2f70bbe1, 0xf3c1c145, 0x93fe3858, 0x4f4f42fc, 0x5381ca62, 0x8f30b0c6, 0xef0f49db, 0x33be337f,
+        0x20bbe81a, 0xfc0a92be, 0x9c356ba3, 0x40841107, 0x5c4a9999, 0x80fbe33d, 0xe0c41a20, 0x3c756084,
+        0xd9590b1c, 0x05e871b8, 0x65d788a5, 0xb966f201, 0xa5a87a9f, 0x7919003b, 0x1926f926, 0xc5978382,
+        0x3f2d4fec, 0xe39c3548, 0x83a3cc55, 0x5f12b6f1, 0x43dc3e6f, 0x9f6d44cb, 0xff52bdd6, 0x23e3c772,
+        0xc6cfacea, 0x1a7ed64e, 0x7a412f53, 0xa6f055f7, 0xba3edd69, 0x668fa7cd, 0x06b05ed0, 0xda012474,
+        0xc904ff11, 0x15b585b5, 0x758a7ca8, 0xa93b060c, 0xb5f58e92, 0x6944f436, 0x097b0d2b, 0xd5ca778f,
+        0x30e61c17, 0xec5766b3, 0x8c689fae, 0x50d9e50a, 0x4c176d94, 0x90a61730, 0xf099ee2d, 0x2c289489,
+        0xa8c8c73f, 0x7479bd9b, 0x14464486, 0xc8f73e22, 0xd439b6bc, 0x0888cc18, 0x68b73505, 0xb4064fa1,
+        0x512a2439, 0x8d9b5e9d, 0xeda4a780, 0x3115dd24, 0x2ddb55ba, 0xf16a2f1e, 0x9155d603, 0x4de4aca7,
+        0x5ee177c2, 0x82500d66, 0xe26ff47b, 0x3ede8edf, 0x22100641, 0xfea17ce5, 0x9e9e85f8, 0x422fff5c,
+        0xa70394c4, 0x7bb2ee60, 0x1b8d177d, 0xc73c6dd9, 0xdbf2e547, 0x07439fe3, 0x677c66fe, 0xbbcd1c5a,
+        0x4177d034, 0x9dc6aa90, 0xfdf9538d, 0x21482929, 0x3d86a1b7, 0xe137db13, 0x8108220e, 0x5db958aa,
+        0xb8953332, 0x64244996, 0x041bb08b, 0xd8aaca2f, 0xc46442b1, 0x18d53815, 0x78eac108, 0xa45bbbac,
+        0xb75e60c9, 0x6bef1a6d, 0x0bd0e370, 0xd76199d4, 0xcbaf114a, 0x171e6bee, 0x772192f3, 0xab90e857,
+        0x4ebc83cf, 0x920df96b, 0xf2320076, 0x2e837ad2, 0x324df24c, 0xeefc88e8, 0x8ec371f5, 0x52720b51,
+        0x7e5a9fd8, 0xa2ebe57c, 0xc2d41c61, 0x1e6566c5, 0x02abee5b, 0xde1a94ff, 0xbe256de2, 0x62941746,
+        0x87b87cde, 0x5b09067a, 0x3b36ff67, 0xe78785c3, 0xfb490d5d, 0x27f877f9, 0x47c78ee4, 0x9b76f440,
+        0x88732f25, 0x54c25581, 0x34fdac9c, 0xe84cd638, 0xf4825ea6, 0x28332402, 0x480cdd1f, 0x94bda7bb,
+        0x7191cc23, 0xad20b687, 0xcd1f4f9a, 0x11ae353e, 0x0d60bda0, 0xd1d1c704, 0xb1ee3e19, 0x6d5f44bd,
+        0x97e588d3, 0x4b54f277, 0x2b6b0b6a, 0xf7da71ce, 0xeb14f950, 0x37a583f4, 0x579a7ae9, 0x8b2b004d,
+        0x6e076bd5, 0xb2b61171, 0xd289e86c, 0x0e3892c8, 0x12f61a56, 0xce4760f2, 0xae7899ef, 0x72c9e34b,
+        0x61cc382e, 0xbd7d428a, 0xdd42bb97, 0x01f3c133, 0x1d3d49ad, 0xc18c3309, 0xa1b3ca14, 0x7d02b0b0,
+        0x982edb28, 0x449fa18c, 0x24a05891, 0xf8112235, 0xe4dfaaab, 0x386ed00f, 0x58512912, 0x84e053b6,
+    },
+    {
+        0x00000000, 0x547df88f, 0xa8fbf11e, 0xfc860991, 0x541b94cd, 0x00666c42, 0xfce065d3, 0xa89d9d5c,
+        0xa837299a, 0xfc4ad115, 0x00ccd884, 0x54b1200b, 0xfc2cbd57, 0xa85145d8, 0x54d74c49, 0x00aab4c6,
+        0x558225c5, 0x01ffdd4a, 0xfd79d4db, 0xa9042c54, 0x0199b108, 0x55e44987, 0xa9624016, 0xfd1fb899,
+        0xfdb50c5f, 0xa9c8f4d0, 0x554efd41, 0x013305ce, 0xa9ae9892, 0xfdd3601d, 0x0155698c, 0x55289103,
+        0xab044b8a, 0xff79b305, 0x03ffba94, 0x5782421b, 0xff1fdf47, 0xab6227c8, 0x57e42e59, 0x0399d6d6,
+        0x03336210, 0x574e9a9f, 0xabc8930e, 0xffb56b81, 0x5728f6dd, 0x03550e52, 0xffd307c3, 0xabaeff4c,
+        0xfe866e4f, 0xaafb96c0, 0x567d9f51, 0x020067de, 0xaa9dfa82, 0xfee0020d, 0x02660b9c, 0x561bf313,
+        0x56b147d5, 0x02ccbf5a, 0xfe4ab6cb, 0xaa374e44, 0x02aad318, 0x56d72b97, 0xaa512206, 0xfe2cda89,
+        0x53e4e1e5, 0x0799196a, 0xfb1f10fb, 0xaf62e874, 0x07ff7528, 0x53828da7, 0xaf048436, 0xfb797cb9,
+        0xfbd3c87f, 0xafae30f0, 0x53283961, 0x0755c1ee, 0xafc85cb2, 0xfbb5a43d, 0x0733adac, 0x534e5523,
+        0x0666c420, 0x521b3caf, 0xae9d353e, 0xfae0cdb1, 0x527d50ed, 0x0600a862, 0xfa86a1f3, 0xaefb597c,
+        0xae51edba, 0xfa2c1535, 0x06aa1ca4, 0x52d7e42b, 0xfa4a7977, 0xae3781f8, 0x52b18869, 0x06cc70e6,
+        0xf8e0aa6f, 0xac9d52e0, 0x501b5b71, 0x0466a3fe, 0xacfb3ea2, 0xf886c62d, 0x0400cfbc, 0x507d3733,
+        0x50d783f5, 0x04aa7b7a, 0xf82c72eb, 0xac518a64, 0x04cc1738, 0x50b1efb7, 0xac37e626, 0xf84a1ea9,
+        0xad628faa, 0xf91f7725, 0x05997eb4, 0x51e4863b, 0xf9791b67, 0xad04e3e8, 0x5182ea79, 0x05ff12f6,
+        0x0555a630, 0x51285ebf, 0xadae572e, 0xf9d3afa1, 0x514e32fd, 0x0533ca72, 0xf9b5c3e3, 0xadc83b6c,
+        0xa7c9c3ca, 0xf3b43b45, 0x0f3232d4, 0x5b4fca5b, 0xf3d25707, 0xa7afaf88, 0x5b29a619, 0x0f545e96,
+        0x0ffeea50, 0x5b8312df, 0xa7051b4e, 0xf378e3c1, 0x5be57e9d, 0x0f988612, 0xf31e8f83, 0xa763770c,
+        0xf24be60f, 0xa6361e80, 0x5ab01711, 0x0ecdef9e, 0xa65072c2, 0xf22d8a4d, 0x0eab83dc, 0x5ad67b53,
+        0x5a7ccf95, 0x0e01371a, 0xf2873e8b, 0xa6fac604, 0x0e675b58, 0x5a1aa3d7, 0xa69caa46, 0xf2e152c9,
+        0x0ccd8840, 0x58b070cf, 0xa436795e, 0xf04b81d1, 0x58d61c8d, 0x0cabe402, 0xf02ded93, 0xa450151c,
+        0xa4faa1da, 0xf0875955, 0x0c0150c4, 0x587ca84b, 0xf0e13517, 0xa49ccd98, 0x581ac409, 0x0c673c86,
+        0x594fad85, 0x0d32550a, 0xf1b45c9b, 0xa5c9a414, 0x0d543948, 0x5929c1c7, 0xa5afc856, 0xf1d230d9,
+        0xf178841f, 0xa5057c90, 0x59837501, 0x0dfe8d8e, 0xa56310d2, 0xf11ee85d, 0x0d98e1cc, 0x59e51943,
+        0xf42d222f, 0xa050daa0, 0x5cd6d331, 0x08ab2bbe, 0xa036b6e2, 0xf44b4e6d, 0x08cd47fc, 0x5cb0bf73,
+        0x5c1a0bb5, 0x0867f33a, 0xf4e1faab, 0xa09c0224, 0x08019f78, 0x5c7c67f7, 0xa0fa6e66, 0xf48796e9,
+        0xa1af07ea, 0xf5d2ff65, 0x0954f6f4, 0x5d290e7b, 0xf5b49327, 0xa1c96ba8, 0x5d4f6239, 0x09329ab6,
+        0x09982e70, 0x5de5d6ff, 0xa163df6e, 0xf51e27e1, 0x5d83babd, 0x09fe4232, 0xf5784ba3, 0xa105b32c,
+        0x5f2969a5, 0x0b54912a, 0xf7d298bb, 0xa3af6034, 0x0b32fd68, 0x5f4f05e7, 0xa3c90c76, 0xf7b4f4f9,
+        0xf71e403f, 0xa363b8b0, 0x5fe5b121, 0x0b9849ae, 0xa305d4f2, 0xf7782c7d, 0x0bfe25ec, 0x5f83dd63,
+        0x0aab4c60, 0x5ed6b4ef, 0xa250bd7e, 0xf62d45f1, 0x5eb0d8ad, 0x0acd2022, 0xf64b29b3, 0xa236d13c,
+        0xa29c65fa, 0xf6e19d75, 0x0a6794e4, 0x5e1a6c6b, 0xf687f137, 0xa2fa09b8, 0x5e7c0029, 0x0a01f8a6,
+    },
+    {
+        0x00000000, 0x4a7ff165, 0x94ffe2ca, 0xde8013af, 0x2c13b365, 0x666c4200, 0xb8ec51af, 0xf293a0ca,
+        0x582766ca, 0x125897af, 0xccd88400, 0x86a77565, 0x7434d5af, 0x3e4b24ca, 0xe0cb3765, 0xaab4c600,
+        0xb04ecd94, 0xfa313cf1, 0x24b12f5e, 0x6ecede3b, 0x9c5d7ef1, 0xd6228f94, 0x08a29c3b, 0x42dd6d5e,
+        0xe869ab5e, 0xa2165a3b, 0x7c964994, 0x36e9b8f1, 0xc47a183b, 0x8e05e95e, 0x5085faf1, 0x1afa0b94,
+        0x6571edd9, 0x2f0e1cbc, 0xf18e0f13, 0xbbf1fe76, 0x49625ebc, 0x031dafd9, 0xdd9dbc76, 0x97e24d13,
+        0x3d568b13, 0x77297a76, 0xa9a969d9, 0xe3d698bc, 0x11453876, 0x5b3ac913, 0x85badabc, 0xcfc52bd9,
+        0xd53f204d, 0x9f40d128, 0x41c0c287, 0x0bbf33e2, 0xf92c9328, 0xb353624d, 0x6dd371e2, 0x27ac8087,
+        0x8d184687, 0xc767b7e2, 0x19e7a44d, 0x53985528, 0xa10bf5e2, 0xeb740487, 0x35f41728, 0x7f8be64d,
+        0xcae3dbb2, 0x809c2ad7, 0x5e1c3978, 0x1463c81d, 0xe6f068d7, 0xac8f99b2, 0x720f8a1d, 0x38707b78,
+        0x92c4bd78, 0xd8bb4c1d, 0x063b5fb2, 0x4c44aed7, 0xbed70e1d, 0xf4a8ff78, 0x2a28ecd7, 0x60571db2,
+        0x7aad1626, 0x30d2e743, 0xee52f4ec, 0xa42d0589, 0x56bea543, 0x1cc15426, 0xc2414789, 0x883eb6ec,
+        0x228a70ec, 0x68f58189, 0xb6759226, 0xfc0a6343, 0x0e99c389, 0x44e632ec, 0x9a662143, 0xd019d026,
+        0xaf92366b, 0xe5edc70e, 0x3b6dd4a1, 0x711225c4, 0x8381850e, 0xc9fe746b, 0x177e67c4, 0x5d0196a1,
+        0xf7b550a1, 0xbdcaa1c4, 0x634ab26b, 0x2935430e, 0xdba6e3c4, 0x91d912a1, 0x4f59010e, 0x0526f06b,
+        0x1fdcfbff, 0x55a30a9a, 0x8b231935, 0xc15ce850, 0x33cf489a, 0x79b0b9ff, 0xa730aa50, 0xed4f5b35,
+        0x47fb9d35, 0x0d846c50, 0xd3047fff, 0x997b8e9a, 0x6be82e50, 0x2197df35, 0xff17cc9a, 0xb5683dff,
+        0x902bc195, 0xda5430f0, 0x04d4235f, 0x4eabd23a, 0xbc3872f0, 0xf6478395, 0x28c7903a, 0x62b8615f,
+        0xc80ca75f, 0x8273563a, 0x5cf34595, 0x168cb4f0, 0xe41f143a, 0xae60e55f, 0x70e0f6f0, 0x3a9f0795,
+        0x20650c01, 0x6a1afd64, 0xb49aeecb, 0xfee51fae, 0x0c76bf64, 0x46094e01, 0x98895dae, 0xd2f6accb,
+        0x78426acb, 0x323d9bae, 0xecbd8801, 0xa6c27964, 0x5451d9ae, 0x1e2e28cb, 0xc0ae3b64, 0x8ad1ca01,
+        0xf55a2c4c, 0xbf25dd29, 0x61a5ce86, 0x2bda3fe3, 0xd9499f29, 0x93366e4c, 0x4db67de3, 0x07c98c86,
+        0xad7d4a86, 0xe702bbe3, 0x3982a84c, 0x73fd5929, 0x816ef9e3, 0xcb110886, 0x15911b29, 0x5feeea4c,
+        0x4514e1d8, 0x0f6b10bd, 0xd1eb0312, 0x9b94f277, 0x690752bd, 0x2378a3d8, 0xfdf8b077, 0xb7874112,
+        0x1d338712, 0x574c7677, 0x89cc65d8, 0xc3b394bd, 0x31203477, 0x7b5fc512, 0xa5dfd6bd, 0xefa027d8,
+        0x5ac81a27, 0x10b7eb42, 0xce37f8ed, 0x84480988, 0x76dba942, 0x3ca45827, 0xe2244b88, 0xa85bbaed,
+        0x02ef7ced, 0x48908d88, 0x96109e27, 0xdc6f6f42, 0x2efccf88, 0x64833eed, 0xba032d42, 0xf07cdc27,
+        0xea86d7b3, 0xa0f926d6, 0x7e793579, 0x3406c41c, 0xc69564d6, 0x8cea95b3, 0x526a861c, 0x18157779,
+        0xb2a1b179, 0xf8de401c, 0x265e53b3, 0x6c21a2d6, 0x9eb2021c, 0xd4cdf379, 0x0a4de0d6, 0x403211b3,
+        0x3fb9f7fe, 0x75c6069b, 0xab461534, 0xe139e451, 0x13aa449b, 0x59d5b5fe, 0x8755a651, 0xcd2a5734,
+        0x679e9134, 0x2de16051, 0xf36173fe, 0xb91e829b, 0x4b8d2251, 0x01f2d334, 0xdf72c09b, 0x950d31fe,
+        0x8ff73a6a, 0xc588cb0f, 0x1b08d8a0, 0x517729c5, 0xa3e4890f, 0xe99b786a, 0x371b6bc5, 0x7d649aa0,
+        0xd7d05ca0, 0x9dafadc5, 0x432fbe6a, 0x09504f0f, 0xfbc3efc5, 0xb1bc1ea0, 0x6f3c0d0f, 0x2543fc6a,
+    },
+    {
+        0x00000000, 0x25bbf5db, 0x4b77ebb6, 0x6ecc1e6d, 0x96efd76c, 0xb35422b7, 0xdd983cda, 0xf823c901,
+        0x2833d829, 0x0d882df2, 0x6344339f, 0x46ffc644, 0xbedc0f45, 0x9b67fa9e, 0xf5abe4f3, 0xd0101128,
+        0x5067b052, 0x75dc4589, 0x1b105be4, 0x3eabae3f, 0xc688673e, 0xe33392e5, 0x8dff8c88, 0xa8447953,
+        0x7854687b, 0x5def9da0, 0x332383cd, 0x16987616, 0xeebbbf17, 0xcb004acc, 0xa5cc54a1, 0x8077a17a,
+        0xa0cf60a4, 0x8574957f, 0xebb88b12, 0xce037ec9, 0x3620b7c8, 0x139b4213, 0x7d575c7e, 0x58eca9a5,
+        0x88fcb88d, 0xad474d56, 0xc38b533b, 0xe630a6e0, 0x1e136fe1, 0x3ba89a3a, 0x55648457, 0x70df718c,
+        0xf0a8d0f6, 0xd513252d, 0xbbdf3b40, 0x9e64ce9b, 0x6647079a, 0x43fcf241, 0x2d30ec2c, 0x088b19f7,
+        0xd89b08df, 0xfd20fd04, 0x93ece369, 0xb65716b2, 0x4e74dfb3, 0x6bcf2a68, 0x05033405, 0x20b8c1de,
+        0x4472b7b9, 0x61c94262, 0x0f055c0f, 0x2abea9d4, 0xd29d60d5, 0xf726950e, 0x99ea8b63, 0xbc517eb8,
+        0x6c416f90, 0x49fa9a4b, 0x27368426, 0x028d71fd, 0xfaaeb8fc, 0xdf154d27, 0xb1d9534a, 0x9462a691,
+        0x141507eb, 0x31aef230, 0x5f62ec5d, 0x7ad91986, 0x82fad087, 0xa741255c, 0xc98d3b31, 0xec36ceea,
+        0x3c26dfc2, 0x199d2a19, 0x77513474, 0x52eac1af, 0xaac908ae, 0x8f72fd75, 0xe1bee318, 0xc40516c3,
+        0xe4bdd71d, 0xc10622c6, 0xafca3cab, 0x8a71c970, 0x72520071, 0x57e9f5aa, 0x3925ebc7, 0x1c9e1e1c,
+        0xcc8e0f34, 0xe935faef, 0x87f9e482, 0xa2421159, 0x5a61d858, 0x7fda2d83, 0x111633ee, 0x34adc635,
+        0xb4da674f, 0x91619294, 0xffad8cf9, 0xda167922, 0x2235b023, 0x078e45f8, 0x69425b95, 0x4cf9ae4e,
+        0x9ce9bf66, 0xb9524abd, 0xd79e54d0, 0xf225a10b, 0x0a06680a, 0x2fbd9dd1, 0x417183bc, 0x64ca7667,
+        0x88e56f72, 0xad5e9aa9, 0xc39284c4, 0xe629711f, 0x1e0ab81e, 0x3bb14dc5, 0x557d53a8, 0x70c6a673,
+        0xa0d6b75b, 0x856d4280, 0xeba15ced, 0xce1aa936, 0x36396037, 0x138295ec, 0x7d4e8b81, 0x58f57e5a,
+        0xd882df20, 0xfd392afb, 0x93f53496, 0xb64ec14d, 0x4e6d084c, 0x6bd6fd97, 0x051ae3fa, 0x20a11621,
+        0xf0b10709, 0xd50af2d2, 0xbbc6ecbf, 0x9e7d1964, 0x665ed065, 0x43e525be, 0x2d293bd3, 0x0892ce08,
+        0x282a0fd6, 0x0d91fa0d, 0x635de460, 0x46e611bb, 0xbec5d8ba, 0x9b7e2d61, 0xf5b2330c, 0xd009c6d7,
+        0x0019d7ff, 0x25a22224, 0x4b6e3c49, 0x6ed5c992, 0x96f60093, 0xb34df548, 0xdd81eb25, 0xf83a1efe,
+        0x784dbf84, 0x5df64a5f, 0x333a5432, 0x1681a1e9, 0xeea268e8, 0xcb199d33, 0xa5d5835e, 0x806e7685,
+        0x507e67ad, 0x75c59276, 0x1b098c1b, 0x3eb279c0, 0xc691b0c1, 0xe32a451a, 0x8de65b77, 0xa85daeac,
+        0xcc97d8cb, 0xe92c2d10, 0x87e0337d, 0xa25bc6a6, 0x5a780fa7, 0x7fc3fa7c, 0x110fe411, 0x34b411ca,
+        0xe4a400e2, 0xc11ff539, 0xafd3eb54, 0x8a681e8f, 0x724bd78e, 0x57f02255, 0x393c3c38, 0x1c87c9e3,
+        0x9cf06899, 0xb94b9d42, 0xd787832f, 0xf23c76f4, 0x0a1fbff5, 0x2fa44a2e, 0x41685443, 0x64d3a198,
+        0xb4c3b0b0, 0x9178456b, 0xffb45b06, 0xda0faedd, 0x222c67dc, 0x07979207, 0x695b8c6a, 0x4ce079b1,
+        0x6c58b86f, 0x49e34db4, 0x272f53d9, 0x0294a602, 0xfab76f03, 0xdf0c9ad8, 0xb1c084b5, 0x947b716e,
+        0x446b6046, 0x61d0959d, 0x0f1c8bf0, 0x2aa77e2b, 0xd284b72a, 0xf73f42f1, 0x99f35c9c, 0xbc48a947,
+        0x3c3f083d, 0x1984fde6, 0x7748e38b, 0x52f31650, 0xaad0df51, 0x8f6b2a8a, 0xe1a734e7, 0xc41cc13c,
+        0x140cd014, 0x31b725cf, 0x5f7b3ba2, 0x7ac0ce79, 0x82e30778, 0xa758f2a3, 0xc994ecce, 0xec2f1915,
+    },
 };
 
 #endif
 
 const uint32_t crc32c_sw_table[16][256] = {
-  {
-    0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
-    0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
-    0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
-    0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
-    0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
-    0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
-    0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
-    0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
-    0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
-    0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
-    0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
-    0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
-    0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
-    0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
-    0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
-    0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
-    0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
-    0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
-    0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
-    0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
-    0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
-    0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
-    0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
-    0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
-    0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
-    0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
-    0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
-    0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
-    0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
-    0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
-    0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
-    0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
-  },
-  {
-    0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945,
-    0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd,
-    0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4,
-    0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c,
-    0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47,
-    0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff,
-    0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6,
-    0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e,
-    0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41,
-    0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9,
-    0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0,
-    0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78,
-    0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43,
-    0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb,
-    0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2,
-    0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a,
-    0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc,
-    0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004,
-    0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d,
-    0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185,
-    0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be,
-    0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306,
-    0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f,
-    0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287,
-    0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8,
-    0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600,
-    0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439,
-    0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781,
-    0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba,
-    0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502,
-    0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b,
-    0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483,
-  },
-  {
-    0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469,
-    0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac,
-    0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3,
-    0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726,
-    0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d,
-    0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8,
-    0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7,
-    0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32,
-    0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0,
-    0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75,
-    0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a,
-    0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff,
-    0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4,
-    0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161,
-    0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e,
-    0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb,
-    0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a,
-    0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def,
-    0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0,
-    0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065,
-    0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e,
-    0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb,
-    0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4,
-    0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71,
-    0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3,
-    0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36,
-    0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79,
-    0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc,
-    0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7,
-    0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622,
-    0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d,
-    0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8,
-  },
-  {
-    0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca,
-    0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c,
-    0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7,
-    0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11,
-    0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41,
-    0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7,
-    0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c,
-    0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a,
-    0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d,
-    0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb,
-    0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610,
-    0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6,
-    0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6,
-    0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040,
-    0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b,
-    0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d,
-    0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5,
-    0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213,
-    0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8,
-    0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e,
-    0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e,
-    0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698,
-    0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443,
-    0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5,
-    0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12,
-    0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4,
-    0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f,
-    0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9,
-    0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99,
-    0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f,
-    0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4,
-    0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842,
-  },
-  {
-    0x00000000, 0x38116fac, 0x7022df58, 0x4833b0f4, 0xe045beb0, 0xd854d11c, 0x906761e8, 0xa8760e44,
-    0xc5670b91, 0xfd76643d, 0xb545d4c9, 0x8d54bb65, 0x2522b521, 0x1d33da8d, 0x55006a79, 0x6d1105d5,
-    0x8f2261d3, 0xb7330e7f, 0xff00be8b, 0xc711d127, 0x6f67df63, 0x5776b0cf, 0x1f45003b, 0x27546f97,
-    0x4a456a42, 0x725405ee, 0x3a67b51a, 0x0276dab6, 0xaa00d4f2, 0x9211bb5e, 0xda220baa, 0xe2336406,
-    0x1ba8b557, 0x23b9dafb, 0x6b8a6a0f, 0x539b05a3, 0xfbed0be7, 0xc3fc644b, 0x8bcfd4bf, 0xb3debb13,
-    0xdecfbec6, 0xe6ded16a, 0xaeed619e, 0x96fc0e32, 0x3e8a0076, 0x069b6fda, 0x4ea8df2e, 0x76b9b082,
-    0x948ad484, 0xac9bbb28, 0xe4a80bdc, 0xdcb96470, 0x74cf6a34, 0x4cde0598, 0x04edb56c, 0x3cfcdac0,
-    0x51eddf15, 0x69fcb0b9, 0x21cf004d, 0x19de6fe1, 0xb1a861a5, 0x89b90e09, 0xc18abefd, 0xf99bd151,
-    0x37516aae, 0x0f400502, 0x4773b5f6, 0x7f62da5a, 0xd714d41e, 0xef05bbb2, 0xa7360b46, 0x9f2764ea,
-    0xf236613f, 0xca270e93, 0x8214be67, 0xba05d1cb, 0x1273df8f, 0x2a62b023, 0x625100d7, 0x5a406f7b,
-    0xb8730b7d, 0x806264d1, 0xc851d425, 0xf040bb89, 0x5836b5cd, 0x6027da61, 0x28146a95, 0x10050539,
-    0x7d1400ec, 0x45056f40, 0x0d36dfb4, 0x3527b018, 0x9d51be5c, 0xa540d1f0, 0xed736104, 0xd5620ea8,
-    0x2cf9dff9, 0x14e8b055, 0x5cdb00a1, 0x64ca6f0d, 0xccbc6149, 0xf4ad0ee5, 0xbc9ebe11, 0x848fd1bd,
-    0xe99ed468, 0xd18fbbc4, 0x99bc0b30, 0xa1ad649c, 0x09db6ad8, 0x31ca0574, 0x79f9b580, 0x41e8da2c,
-    0xa3dbbe2a, 0x9bcad186, 0xd3f96172, 0xebe80ede, 0x439e009a, 0x7b8f6f36, 0x33bcdfc2, 0x0badb06e,
-    0x66bcb5bb, 0x5eadda17, 0x169e6ae3, 0x2e8f054f, 0x86f90b0b, 0xbee864a7, 0xf6dbd453, 0xcecabbff,
-    0x6ea2d55c, 0x56b3baf0, 0x1e800a04, 0x269165a8, 0x8ee76bec, 0xb6f60440, 0xfec5b4b4, 0xc6d4db18,
-    0xabc5decd, 0x93d4b161, 0xdbe70195, 0xe3f66e39, 0x4b80607d, 0x73910fd1, 0x3ba2bf25, 0x03b3d089,
-    0xe180b48f, 0xd991db23, 0x91a26bd7, 0xa9b3047b, 0x01c50a3f, 0x39d46593, 0x71e7d567, 0x49f6bacb,
-    0x24e7bf1e, 0x1cf6d0b2, 0x54c56046, 0x6cd40fea, 0xc4a201ae, 0xfcb36e02, 0xb480def6, 0x8c91b15a,
-    0x750a600b, 0x4d1b0fa7, 0x0528bf53, 0x3d39d0ff, 0x954fdebb, 0xad5eb117, 0xe56d01e3, 0xdd7c6e4f,
-    0xb06d6b9a, 0x887c0436, 0xc04fb4c2, 0xf85edb6e, 0x5028d52a, 0x6839ba86, 0x200a0a72, 0x181b65de,
-    0xfa2801d8, 0xc2396e74, 0x8a0ade80, 0xb21bb12c, 0x1a6dbf68, 0x227cd0c4, 0x6a4f6030, 0x525e0f9c,
-    0x3f4f0a49, 0x075e65e5, 0x4f6dd511, 0x777cbabd, 0xdf0ab4f9, 0xe71bdb55, 0xaf286ba1, 0x9739040d,
-    0x59f3bff2, 0x61e2d05e, 0x29d160aa, 0x11c00f06, 0xb9b60142, 0x81a76eee, 0xc994de1a, 0xf185b1b6,
-    0x9c94b463, 0xa485dbcf, 0xecb66b3b, 0xd4a70497, 0x7cd10ad3, 0x44c0657f, 0x0cf3d58b, 0x34e2ba27,
-    0xd6d1de21, 0xeec0b18d, 0xa6f30179, 0x9ee26ed5, 0x36946091, 0x0e850f3d, 0x46b6bfc9, 0x7ea7d065,
-    0x13b6d5b0, 0x2ba7ba1c, 0x63940ae8, 0x5b856544, 0xf3f36b00, 0xcbe204ac, 0x83d1b458, 0xbbc0dbf4,
-    0x425b0aa5, 0x7a4a6509, 0x3279d5fd, 0x0a68ba51, 0xa21eb415, 0x9a0fdbb9, 0xd23c6b4d, 0xea2d04e1,
-    0x873c0134, 0xbf2d6e98, 0xf71ede6c, 0xcf0fb1c0, 0x6779bf84, 0x5f68d028, 0x175b60dc, 0x2f4a0f70,
-    0xcd796b76, 0xf56804da, 0xbd5bb42e, 0x854adb82, 0x2d3cd5c6, 0x152dba6a, 0x5d1e0a9e, 0x650f6532,
-    0x081e60e7, 0x300f0f4b, 0x783cbfbf, 0x402dd013, 0xe85bde57, 0xd04ab1fb, 0x9879010f, 0xa0686ea3,
-  },
-  {
-    0x00000000, 0xef306b19, 0xdb8ca0c3, 0x34bccbda, 0xb2f53777, 0x5dc55c6e, 0x697997b4, 0x8649fcad,
-    0x6006181f, 0x8f367306, 0xbb8ab8dc, 0x54bad3c5, 0xd2f32f68, 0x3dc34471, 0x097f8fab, 0xe64fe4b2,
-    0xc00c303e, 0x2f3c5b27, 0x1b8090fd, 0xf4b0fbe4, 0x72f90749, 0x9dc96c50, 0xa975a78a, 0x4645cc93,
-    0xa00a2821, 0x4f3a4338, 0x7b8688e2, 0x94b6e3fb, 0x12ff1f56, 0xfdcf744f, 0xc973bf95, 0x2643d48c,
-    0x85f4168d, 0x6ac47d94, 0x5e78b64e, 0xb148dd57, 0x370121fa, 0xd8314ae3, 0xec8d8139, 0x03bdea20,
-    0xe5f20e92, 0x0ac2658b, 0x3e7eae51, 0xd14ec548, 0x570739e5, 0xb83752fc, 0x8c8b9926, 0x63bbf23f,
-    0x45f826b3, 0xaac84daa, 0x9e748670, 0x7144ed69, 0xf70d11c4, 0x183d7add, 0x2c81b107, 0xc3b1da1e,
-    0x25fe3eac, 0xcace55b5, 0xfe729e6f, 0x1142f576, 0x970b09db, 0x783b62c2, 0x4c87a918, 0xa3b7c201,
-    0x0e045beb, 0xe13430f2, 0xd588fb28, 0x3ab89031, 0xbcf16c9c, 0x53c10785, 0x677dcc5f, 0x884da746,
-    0x6e0243f4, 0x813228ed, 0xb58ee337, 0x5abe882e, 0xdcf77483, 0x33c71f9a, 0x077bd440, 0xe84bbf59,
-    0xce086bd5, 0x213800cc, 0x1584cb16, 0xfab4a00f, 0x7cfd5ca2, 0x93cd37bb, 0xa771fc61, 0x48419778,
-    0xae0e73ca, 0x413e18d3, 0x7582d309, 0x9ab2b810, 0x1cfb44bd, 0xf3cb2fa4, 0xc777e47e, 0x28478f67,
-    0x8bf04d66, 0x64c0267f, 0x507ceda5, 0xbf4c86bc, 0x39057a11, 0xd6351108, 0xe289dad2, 0x0db9b1cb,
-    0xebf65579, 0x04c63e60, 0x307af5ba, 0xdf4a9ea3, 0x5903620e, 0xb6330917, 0x828fc2cd, 0x6dbfa9d4,
-    0x4bfc7d58, 0xa4cc1641, 0x9070dd9b, 0x7f40b682, 0xf9094a2f, 0x16392136, 0x2285eaec, 0xcdb581f5,
-    0x2bfa6547, 0xc4ca0e5e, 0xf076c584, 0x1f46ae9d, 0x990f5230, 0x763f3929, 0x4283f2f3, 0xadb399ea,
-    0x1c08b7d6, 0xf338dccf, 0xc7841715, 0x28b47c0c, 0xaefd80a1, 0x41cdebb8, 0x75712062, 0x9a414b7b,
-    0x7c0eafc9, 0x933ec4d0, 0xa7820f0a, 0x48b26413, 0xcefb98be, 0x21cbf3a7, 0x1577387d, 0xfa475364,
-    0xdc0487e8, 0x3334ecf1, 0x0788272b, 0xe8b84c32, 0x6ef1b09f, 0x81c1db86, 0xb57d105c, 0x5a4d7b45,
-    0xbc029ff7, 0x5332f4ee, 0x678e3f34, 0x88be542d, 0x0ef7a880, 0xe1c7c399, 0xd57b0843, 0x3a4b635a,
-    0x99fca15b, 0x76ccca42, 0x42700198, 0xad406a81, 0x2b09962c, 0xc439fd35, 0xf08536ef, 0x1fb55df6,
-    0xf9fab944, 0x16cad25d, 0x22761987, 0xcd46729e, 0x4b0f8e33, 0xa43fe52a, 0x90832ef0, 0x7fb345e9,
-    0x59f09165, 0xb6c0fa7c, 0x827c31a6, 0x6d4c5abf, 0xeb05a612, 0x0435cd0b, 0x308906d1, 0xdfb96dc8,
-    0x39f6897a, 0xd6c6e263, 0xe27a29b9, 0x0d4a42a0, 0x8b03be0d, 0x6433d514, 0x508f1ece, 0xbfbf75d7,
-    0x120cec3d, 0xfd3c8724, 0xc9804cfe, 0x26b027e7, 0xa0f9db4a, 0x4fc9b053, 0x7b757b89, 0x94451090,
-    0x720af422, 0x9d3a9f3b, 0xa98654e1, 0x46b63ff8, 0xc0ffc355, 0x2fcfa84c, 0x1b736396, 0xf443088f,
-    0xd200dc03, 0x3d30b71a, 0x098c7cc0, 0xe6bc17d9, 0x60f5eb74, 0x8fc5806d, 0xbb794bb7, 0x544920ae,
-    0xb206c41c, 0x5d36af05, 0x698a64df, 0x86ba0fc6, 0x00f3f36b, 0xefc39872, 0xdb7f53a8, 0x344f38b1,
-    0x97f8fab0, 0x78c891a9, 0x4c745a73, 0xa344316a, 0x250dcdc7, 0xca3da6de, 0xfe816d04, 0x11b1061d,
-    0xf7fee2af, 0x18ce89b6, 0x2c72426c, 0xc3422975, 0x450bd5d8, 0xaa3bbec1, 0x9e87751b, 0x71b71e02,
-    0x57f4ca8e, 0xb8c4a197, 0x8c786a4d, 0x63480154, 0xe501fdf9, 0x0a3196e0, 0x3e8d5d3a, 0xd1bd3623,
-    0x37f2d291, 0xd8c2b988, 0xec7e7252, 0x034e194b, 0x8507e5e6, 0x6a378eff, 0x5e8b4525, 0xb1bb2e3c,
-  },
-  {
-    0x00000000, 0x68032cc8, 0xd0065990, 0xb8057558, 0xa5e0c5d1, 0xcde3e919, 0x75e69c41, 0x1de5b089,
-    0x4e2dfd53, 0x262ed19b, 0x9e2ba4c3, 0xf628880b, 0xebcd3882, 0x83ce144a, 0x3bcb6112, 0x53c84dda,
-    0x9c5bfaa6, 0xf458d66e, 0x4c5da336, 0x245e8ffe, 0x39bb3f77, 0x51b813bf, 0xe9bd66e7, 0x81be4a2f,
-    0xd27607f5, 0xba752b3d, 0x02705e65, 0x6a7372ad, 0x7796c224, 0x1f95eeec, 0xa7909bb4, 0xcf93b77c,
-    0x3d5b83bd, 0x5558af75, 0xed5dda2d, 0x855ef6e5, 0x98bb466c, 0xf0b86aa4, 0x48bd1ffc, 0x20be3334,
-    0x73767eee, 0x1b755226, 0xa370277e, 0xcb730bb6, 0xd696bb3f, 0xbe9597f7, 0x0690e2af, 0x6e93ce67,
-    0xa100791b, 0xc90355d3, 0x7106208b, 0x19050c43, 0x04e0bcca, 0x6ce39002, 0xd4e6e55a, 0xbce5c992,
-    0xef2d8448, 0x872ea880, 0x3f2bddd8, 0x5728f110, 0x4acd4199, 0x22ce6d51, 0x9acb1809, 0xf2c834c1,
-    0x7ab7077a, 0x12b42bb2, 0xaab15eea, 0xc2b27222, 0xdf57c2ab, 0xb754ee63, 0x0f519b3b, 0x6752b7f3,
-    0x349afa29, 0x5c99d6e1, 0xe49ca3b9, 0x8c9f8f71, 0x917a3ff8, 0xf9791330, 0x417c6668, 0x297f4aa0,
-    0xe6ecfddc, 0x8eefd114, 0x36eaa44c, 0x5ee98884, 0x430c380d, 0x2b0f14c5, 0x930a619d, 0xfb094d55,
-    0xa8c1008f, 0xc0c22c47, 0x78c7591f, 0x10c475d7, 0x0d21c55e, 0x6522e996, 0xdd279cce, 0xb524b006,
-    0x47ec84c7, 0x2fefa80f, 0x97eadd57, 0xffe9f19f, 0xe20c4116, 0x8a0f6dde, 0x320a1886, 0x5a09344e,
-    0x09c17994, 0x61c2555c, 0xd9c72004, 0xb1c40ccc, 0xac21bc45, 0xc422908d, 0x7c27e5d5, 0x1424c91d,
-    0xdbb77e61, 0xb3b452a9, 0x0bb127f1, 0x63b20b39, 0x7e57bbb0, 0x16549778, 0xae51e220, 0xc652cee8,
-    0x959a8332, 0xfd99affa, 0x459cdaa2, 0x2d9ff66a, 0x307a46e3, 0x58796a2b, 0xe07c1f73, 0x887f33bb,
-    0xf56e0ef4, 0x9d6d223c, 0x25685764, 0x4d6b7bac, 0x508ecb25, 0x388de7ed, 0x808892b5, 0xe88bbe7d,
-    0xbb43f3a7, 0xd340df6f, 0x6b45aa37, 0x034686ff, 0x1ea33676, 0x76a01abe, 0xcea56fe6, 0xa6a6432e,
-    0x6935f452, 0x0136d89a, 0xb933adc2, 0xd130810a, 0xccd53183, 0xa4d61d4b, 0x1cd36813, 0x74d044db,
-    0x27180901, 0x4f1b25c9, 0xf71e5091, 0x9f1d7c59, 0x82f8ccd0, 0xeafbe018, 0x52fe9540, 0x3afdb988,
-    0xc8358d49, 0xa036a181, 0x1833d4d9, 0x7030f811, 0x6dd54898, 0x05d66450, 0xbdd31108, 0xd5d03dc0,
-    0x8618701a, 0xee1b5cd2, 0x561e298a, 0x3e1d0542, 0x23f8b5cb, 0x4bfb9903, 0xf3feec5b, 0x9bfdc093,
-    0x546e77ef, 0x3c6d5b27, 0x84682e7f, 0xec6b02b7, 0xf18eb23e, 0x998d9ef6, 0x2188ebae, 0x498bc766,
-    0x1a438abc, 0x7240a674, 0xca45d32c, 0xa246ffe4, 0xbfa34f6d, 0xd7a063a5, 0x6fa516fd, 0x07a63a35,
-    0x8fd9098e, 0xe7da2546, 0x5fdf501e, 0x37dc7cd6, 0x2a39cc5f, 0x423ae097, 0xfa3f95cf, 0x923cb907,
-    0xc1f4f4dd, 0xa9f7d815, 0x11f2ad4d, 0x79f18185, 0x6414310c, 0x0c171dc4, 0xb412689c, 0xdc114454,
-    0x1382f328, 0x7b81dfe0, 0xc384aab8, 0xab878670, 0xb66236f9, 0xde611a31, 0x66646f69, 0x0e6743a1,
-    0x5daf0e7b, 0x35ac22b3, 0x8da957eb, 0xe5aa7b23, 0xf84fcbaa, 0x904ce762, 0x2849923a, 0x404abef2,
-    0xb2828a33, 0xda81a6fb, 0x6284d3a3, 0x0a87ff6b, 0x17624fe2, 0x7f61632a, 0xc7641672, 0xaf673aba,
-    0xfcaf7760, 0x94ac5ba8, 0x2ca92ef0, 0x44aa0238, 0x594fb2b1, 0x314c9e79, 0x8949eb21, 0xe14ac7e9,
-    0x2ed97095, 0x46da5c5d, 0xfedf2905, 0x96dc05cd, 0x8b39b544, 0xe33a998c, 0x5b3fecd4, 0x333cc01c,
-    0x60f48dc6, 0x08f7a10e, 0xb0f2d456, 0xd8f1f89e, 0xc5144817, 0xad1764df, 0x15121187, 0x7d113d4f,
-  },
-  {
-    0x00000000, 0x493c7d27, 0x9278fa4e, 0xdb448769, 0x211d826d, 0x6821ff4a, 0xb3657823, 0xfa590504,
-    0x423b04da, 0x0b0779fd, 0xd043fe94, 0x997f83b3, 0x632686b7, 0x2a1afb90, 0xf15e7cf9, 0xb86201de,
-    0x847609b4, 0xcd4a7493, 0x160ef3fa, 0x5f328edd, 0xa56b8bd9, 0xec57f6fe, 0x37137197, 0x7e2f0cb0,
-    0xc64d0d6e, 0x8f717049, 0x5435f720, 0x1d098a07, 0xe7508f03, 0xae6cf224, 0x7528754d, 0x3c14086a,
-    0x0d006599, 0x443c18be, 0x9f789fd7, 0xd644e2f0, 0x2c1de7f4, 0x65219ad3, 0xbe651dba, 0xf759609d,
-    0x4f3b6143, 0x06071c64, 0xdd439b0d, 0x947fe62a, 0x6e26e32e, 0x271a9e09, 0xfc5e1960, 0xb5626447,
-    0x89766c2d, 0xc04a110a, 0x1b0e9663, 0x5232eb44, 0xa86bee40, 0xe1579367, 0x3a13140e, 0x732f6929,
-    0xcb4d68f7, 0x827115d0, 0x593592b9, 0x1009ef9e, 0xea50ea9a, 0xa36c97bd, 0x782810d4, 0x31146df3,
-    0x1a00cb32, 0x533cb615, 0x8878317c, 0xc1444c5b, 0x3b1d495f, 0x72213478, 0xa965b311, 0xe059ce36,
-    0x583bcfe8, 0x1107b2cf, 0xca4335a6, 0x837f4881, 0x79264d85, 0x301a30a2, 0xeb5eb7cb, 0xa262caec,
-    0x9e76c286, 0xd74abfa1, 0x0c0e38c8, 0x453245ef, 0xbf6b40eb, 0xf6573dcc, 0x2d13baa5, 0x642fc782,
-    0xdc4dc65c, 0x9571bb7b, 0x4e353c12, 0x07094135, 0xfd504431, 0xb46c3916, 0x6f28be7f, 0x2614c358,
-    0x1700aeab, 0x5e3cd38c, 0x857854e5, 0xcc4429c2, 0x361d2cc6, 0x7f2151e1, 0xa465d688, 0xed59abaf,
-    0x553baa71, 0x1c07d756, 0xc743503f, 0x8e7f2d18, 0x7426281c, 0x3d1a553b, 0xe65ed252, 0xaf62af75,
-    0x9376a71f, 0xda4ada38, 0x010e5d51, 0x48322076, 0xb26b2572, 0xfb575855, 0x2013df3c, 0x692fa21b,
-    0xd14da3c5, 0x9871dee2, 0x4335598b, 0x0a0924ac, 0xf05021a8, 0xb96c5c8f, 0x6228dbe6, 0x2b14a6c1,
-    0x34019664, 0x7d3deb43, 0xa6796c2a, 0xef45110d, 0x151c1409, 0x5c20692e, 0x8764ee47, 0xce589360,
-    0x763a92be, 0x3f06ef99, 0xe44268f0, 0xad7e15d7, 0x572710d3, 0x1e1b6df4, 0xc55fea9d, 0x8c6397ba,
-    0xb0779fd0, 0xf94be2f7, 0x220f659e, 0x6b3318b9, 0x916a1dbd, 0xd856609a, 0x0312e7f3, 0x4a2e9ad4,
-    0xf24c9b0a, 0xbb70e62d, 0x60346144, 0x29081c63, 0xd3511967, 0x9a6d6440, 0x4129e329, 0x08159e0e,
-    0x3901f3fd, 0x703d8eda, 0xab7909b3, 0xe2457494, 0x181c7190, 0x51200cb7, 0x8a648bde, 0xc358f6f9,
-    0x7b3af727, 0x32068a00, 0xe9420d69, 0xa07e704e, 0x5a27754a, 0x131b086d, 0xc85f8f04, 0x8163f223,
-    0xbd77fa49, 0xf44b876e, 0x2f0f0007, 0x66337d20, 0x9c6a7824, 0xd5560503, 0x0e12826a, 0x472eff4d,
-    0xff4cfe93, 0xb67083b4, 0x6d3404dd, 0x240879fa, 0xde517cfe, 0x976d01d9, 0x4c2986b0, 0x0515fb97,
-    0x2e015d56, 0x673d2071, 0xbc79a718, 0xf545da3f, 0x0f1cdf3b, 0x4620a21c, 0x9d642575, 0xd4585852,
-    0x6c3a598c, 0x250624ab, 0xfe42a3c2, 0xb77edee5, 0x4d27dbe1, 0x041ba6c6, 0xdf5f21af, 0x96635c88,
-    0xaa7754e2, 0xe34b29c5, 0x380faeac, 0x7133d38b, 0x8b6ad68f, 0xc256aba8, 0x19122cc1, 0x502e51e6,
-    0xe84c5038, 0xa1702d1f, 0x7a34aa76, 0x3308d751, 0xc951d255, 0x806daf72, 0x5b29281b, 0x1215553c,
-    0x230138cf, 0x6a3d45e8, 0xb179c281, 0xf845bfa6, 0x021cbaa2, 0x4b20c785, 0x906440ec, 0xd9583dcb,
-    0x613a3c15, 0x28064132, 0xf342c65b, 0xba7ebb7c, 0x4027be78, 0x091bc35f, 0xd25f4436, 0x9b633911,
-    0xa777317b, 0xee4b4c5c, 0x350fcb35, 0x7c33b612, 0x866ab316, 0xcf56ce31, 0x14124958, 0x5d2e347f,
-    0xe54c35a1, 0xac704886, 0x7734cfef, 0x3e08b2c8, 0xc451b7cc, 0x8d6dcaeb, 0x56294d82, 0x1f1530a5,
-  },
-  {
-    0x00000000, 0xf43ed648, 0xed91da61, 0x19af0c29, 0xdecfc233, 0x2af1147b, 0x335e1852, 0xc760ce1a,
-    0xb873f297, 0x4c4d24df, 0x55e228f6, 0xa1dcfebe, 0x66bc30a4, 0x9282e6ec, 0x8b2deac5, 0x7f133c8d,
-    0x750b93df, 0x81354597, 0x989a49be, 0x6ca49ff6, 0xabc451ec, 0x5ffa87a4, 0x46558b8d, 0xb26b5dc5,
-    0xcd786148, 0x3946b700, 0x20e9bb29, 0xd4d76d61, 0x13b7a37b, 0xe7897533, 0xfe26791a, 0x0a18af52,
-    0xea1727be, 0x1e29f1f6, 0x0786fddf, 0xf3b82b97, 0x34d8e58d, 0xc0e633c5, 0xd9493fec, 0x2d77e9a4,
-    0x5264d529, 0xa65a0361, 0xbff50f48, 0x4bcbd900, 0x8cab171a, 0x7895c152, 0x613acd7b, 0x95041b33,
-    0x9f1cb461, 0x6b226229, 0x728d6e00, 0x86b3b848, 0x41d37652, 0xb5eda01a, 0xac42ac33, 0x587c7a7b,
-    0x276f46f6, 0xd35190be, 0xcafe9c97, 0x3ec04adf, 0xf9a084c5, 0x0d9e528d, 0x14315ea4, 0xe00f88ec,
-    0xd1c2398d, 0x25fcefc5, 0x3c53e3ec, 0xc86d35a4, 0x0f0dfbbe, 0xfb332df6, 0xe29c21df, 0x16a2f797,
-    0x69b1cb1a, 0x9d8f1d52, 0x8420117b, 0x701ec733, 0xb77e0929, 0x4340df61, 0x5aefd348, 0xaed10500,
-    0xa4c9aa52, 0x50f77c1a, 0x49587033, 0xbd66a67b, 0x7a066861, 0x8e38be29, 0x9797b200, 0x63a96448,
-    0x1cba58c5, 0xe8848e8d, 0xf12b82a4, 0x051554ec, 0xc2759af6, 0x364b4cbe, 0x2fe44097, 0xdbda96df,
-    0x3bd51e33, 0xcfebc87b, 0xd644c452, 0x227a121a, 0xe51adc00, 0x11240a48, 0x088b0661, 0xfcb5d029,
-    0x83a6eca4, 0x77983aec, 0x6e3736c5, 0x9a09e08d, 0x5d692e97, 0xa957f8df, 0xb0f8f4f6, 0x44c622be,
-    0x4ede8dec, 0xbae05ba4, 0xa34f578d, 0x577181c5, 0x90114fdf, 0x642f9997, 0x7d8095be, 0x89be43f6,
-    0xf6ad7f7b, 0x0293a933, 0x1b3ca51a, 0xef027352, 0x2862bd48, 0xdc5c6b00, 0xc5f36729, 0x31cdb161,
-    0xa66805eb, 0x5256d3a3, 0x4bf9df8a, 0xbfc709c2, 0x78a7c7d8, 0x8c991190, 0x95361db9, 0x6108cbf1,
-    0x1e1bf77c, 0xea252134, 0xf38a2d1d, 0x07b4fb55, 0xc0d4354f, 0x34eae307, 0x2d45ef2e, 0xd97b3966,
-    0xd3639634, 0x275d407c, 0x3ef24c55, 0xcacc9a1d, 0x0dac5407, 0xf992824f, 0xe03d8e66, 0x1403582e,
-    0x6b1064a3, 0x9f2eb2eb, 0x8681bec2, 0x72bf688a, 0xb5dfa690, 0x41e170d8, 0x584e7cf1, 0xac70aab9,
-    0x4c7f2255, 0xb841f41d, 0xa1eef834, 0x55d02e7c, 0x92b0e066, 0x668e362e, 0x7f213a07, 0x8b1fec4f,
-    0xf40cd0c2, 0x0032068a, 0x199d0aa3, 0xeda3dceb, 0x2ac312f1, 0xdefdc4b9, 0xc752c890, 0x336c1ed8,
-    0x3974b18a, 0xcd4a67c2, 0xd4e56beb, 0x20dbbda3, 0xe7bb73b9, 0x1385a5f1, 0x0a2aa9d8, 0xfe147f90,
-    0x8107431d, 0x75399555, 0x6c96997c, 0x98a84f34, 0x5fc8812e, 0xabf65766, 0xb2595b4f, 0x46678d07,
-    0x77aa3c66, 0x8394ea2e, 0x9a3be607, 0x6e05304f, 0xa965fe55, 0x5d5b281d, 0x44f42434, 0xb0caf27c,
-    0xcfd9cef1, 0x3be718b9, 0x22481490, 0xd676c2d8, 0x11160cc2, 0xe528da8a, 0xfc87d6a3, 0x08b900eb,
-    0x02a1afb9, 0xf69f79f1, 0xef3075d8, 0x1b0ea390, 0xdc6e6d8a, 0x2850bbc2, 0x31ffb7eb, 0xc5c161a3,
-    0xbad25d2e, 0x4eec8b66, 0x5743874f, 0xa37d5107, 0x641d9f1d, 0x90234955, 0x898c457c, 0x7db29334,
-    0x9dbd1bd8, 0x6983cd90, 0x702cc1b9, 0x841217f1, 0x4372d9eb, 0xb74c0fa3, 0xaee3038a, 0x5addd5c2,
-    0x25cee94f, 0xd1f03f07, 0xc85f332e, 0x3c61e566, 0xfb012b7c, 0x0f3ffd34, 0x1690f11d, 0xe2ae2755,
-    0xe8b68807, 0x1c885e4f, 0x05275266, 0xf119842e, 0x36794a34, 0xc2479c7c, 0xdbe89055, 0x2fd6461d,
-    0x50c57a90, 0xa4fbacd8, 0xbd54a0f1, 0x496a76b9, 0x8e0ab8a3, 0x7a346eeb, 0x639b62c2, 0x97a5b48a,
-  },
-  {
-    0x00000000, 0xcb567ba5, 0x934081bb, 0x5816fa1e, 0x236d7587, 0xe83b0e22, 0xb02df43c, 0x7b7b8f99,
-    0x46daeb0e, 0x8d8c90ab, 0xd59a6ab5, 0x1ecc1110, 0x65b79e89, 0xaee1e52c, 0xf6f71f32, 0x3da16497,
-    0x8db5d61c, 0x46e3adb9, 0x1ef557a7, 0xd5a32c02, 0xaed8a39b, 0x658ed83e, 0x3d982220, 0xf6ce5985,
-    0xcb6f3d12, 0x003946b7, 0x582fbca9, 0x9379c70c, 0xe8024895, 0x23543330, 0x7b42c92e, 0xb014b28b,
-    0x1e87dac9, 0xd5d1a16c, 0x8dc75b72, 0x469120d7, 0x3deaaf4e, 0xf6bcd4eb, 0xaeaa2ef5, 0x65fc5550,
-    0x585d31c7, 0x930b4a62, 0xcb1db07c, 0x004bcbd9, 0x7b304440, 0xb0663fe5, 0xe870c5fb, 0x2326be5e,
-    0x93320cd5, 0x58647770, 0x00728d6e, 0xcb24f6cb, 0xb05f7952, 0x7b0902f7, 0x231ff8e9, 0xe849834c,
-    0xd5e8e7db, 0x1ebe9c7e, 0x46a86660, 0x8dfe1dc5, 0xf685925c, 0x3dd3e9f9, 0x65c513e7, 0xae936842,
-    0x3d0fb592, 0xf659ce37, 0xae4f3429, 0x65194f8c, 0x1e62c015, 0xd534bbb0, 0x8d2241ae, 0x46743a0b,
-    0x7bd55e9c, 0xb0832539, 0xe895df27, 0x23c3a482, 0x58b82b1b, 0x93ee50be, 0xcbf8aaa0, 0x00aed105,
-    0xb0ba638e, 0x7bec182b, 0x23fae235, 0xe8ac9990, 0x93d71609, 0x58816dac, 0x009797b2, 0xcbc1ec17,
-    0xf6608880, 0x3d36f325, 0x6520093b, 0xae76729e, 0xd50dfd07, 0x1e5b86a2, 0x464d7cbc, 0x8d1b0719,
-    0x23886f5b, 0xe8de14fe, 0xb0c8eee0, 0x7b9e9545, 0x00e51adc, 0xcbb36179, 0x93a59b67, 0x58f3e0c2,
-    0x65528455, 0xae04fff0, 0xf61205ee, 0x3d447e4b, 0x463ff1d2, 0x8d698a77, 0xd57f7069, 0x1e290bcc,
-    0xae3db947, 0x656bc2e2, 0x3d7d38fc, 0xf62b4359, 0x8d50ccc0, 0x4606b765, 0x1e104d7b, 0xd54636de,
-    0xe8e75249, 0x23b129ec, 0x7ba7d3f2, 0xb0f1a857, 0xcb8a27ce, 0x00dc5c6b, 0x58caa675, 0x939cddd0,
-    0x7a1f6b24, 0xb1491081, 0xe95fea9f, 0x2209913a, 0x59721ea3, 0x92246506, 0xca329f18, 0x0164e4bd,
-    0x3cc5802a, 0xf793fb8f, 0xaf850191, 0x64d37a34, 0x1fa8f5ad, 0xd4fe8e08, 0x8ce87416, 0x47be0fb3,
-    0xf7aabd38, 0x3cfcc69d, 0x64ea3c83, 0xafbc4726, 0xd4c7c8bf, 0x1f91b31a, 0x47874904, 0x8cd132a1,
-    0xb1705636, 0x7a262d93, 0x2230d78d, 0xe966ac28, 0x921d23b1, 0x594b5814, 0x015da20a, 0xca0bd9af,
-    0x6498b1ed, 0xafceca48, 0xf7d83056, 0x3c8e4bf3, 0x47f5c46a, 0x8ca3bfcf, 0xd4b545d1, 0x1fe33e74,
-    0x22425ae3, 0xe9142146, 0xb102db58, 0x7a54a0fd, 0x012f2f64, 0xca7954c1, 0x926faedf, 0x5939d57a,
-    0xe92d67f1, 0x227b1c54, 0x7a6de64a, 0xb13b9def, 0xca401276, 0x011669d3, 0x590093cd, 0x9256e868,
-    0xaff78cff, 0x64a1f75a, 0x3cb70d44, 0xf7e176e1, 0x8c9af978, 0x47cc82dd, 0x1fda78c3, 0xd48c0366,
-    0x4710deb6, 0x8c46a513, 0xd4505f0d, 0x1f0624a8, 0x647dab31, 0xaf2bd094, 0xf73d2a8a, 0x3c6b512f,
-    0x01ca35b8, 0xca9c4e1d, 0x928ab403, 0x59dccfa6, 0x22a7403f, 0xe9f13b9a, 0xb1e7c184, 0x7ab1ba21,
-    0xcaa508aa, 0x01f3730f, 0x59e58911, 0x92b3f2b4, 0xe9c87d2d, 0x229e0688, 0x7a88fc96, 0xb1de8733,
-    0x8c7fe3a4, 0x47299801, 0x1f3f621f, 0xd46919ba, 0xaf129623, 0x6444ed86, 0x3c521798, 0xf7046c3d,
-    0x5997047f, 0x92c17fda, 0xcad785c4, 0x0181fe61, 0x7afa71f8, 0xb1ac0a5d, 0xe9baf043, 0x22ec8be6,
-    0x1f4def71, 0xd41b94d4, 0x8c0d6eca, 0x475b156f, 0x3c209af6, 0xf776e153, 0xaf601b4d, 0x643660e8,
-    0xd422d263, 0x1f74a9c6, 0x476253d8, 0x8c34287d, 0xf74fa7e4, 0x3c19dc41, 0x640f265f, 0xaf595dfa,
-    0x92f8396d, 0x59ae42c8, 0x01b8b8d6, 0xcaeec373, 0xb1954cea, 0x7ac3374f, 0x22d5cd51, 0xe983b6f4,
-  },
-  {
-    0x00000000, 0x9771f7c1, 0x2b0f9973, 0xbc7e6eb2, 0x561f32e6, 0xc16ec527, 0x7d10ab95, 0xea615c54,
-    0xac3e65cc, 0x3b4f920d, 0x8731fcbf, 0x10400b7e, 0xfa21572a, 0x6d50a0eb, 0xd12ece59, 0x465f3998,
-    0x5d90bd69, 0xcae14aa8, 0x769f241a, 0xe1eed3db, 0x0b8f8f8f, 0x9cfe784e, 0x208016fc, 0xb7f1e13d,
-    0xf1aed8a5, 0x66df2f64, 0xdaa141d6, 0x4dd0b617, 0xa7b1ea43, 0x30c01d82, 0x8cbe7330, 0x1bcf84f1,
-    0xbb217ad2, 0x2c508d13, 0x902ee3a1, 0x075f1460, 0xed3e4834, 0x7a4fbff5, 0xc631d147, 0x51402686,
-    0x171f1f1e, 0x806ee8df, 0x3c10866d, 0xab6171ac, 0x41002df8, 0xd671da39, 0x6a0fb48b, 0xfd7e434a,
-    0xe6b1c7bb, 0x71c0307a, 0xcdbe5ec8, 0x5acfa909, 0xb0aef55d, 0x27df029c, 0x9ba16c2e, 0x0cd09bef,
-    0x4a8fa277, 0xddfe55b6, 0x61803b04, 0xf6f1ccc5, 0x1c909091, 0x8be16750, 0x379f09e2, 0xa0eefe23,
-    0x73ae8355, 0xe4df7494, 0x58a11a26, 0xcfd0ede7, 0x25b1b1b3, 0xb2c04672, 0x0ebe28c0, 0x99cfdf01,
-    0xdf90e699, 0x48e11158, 0xf49f7fea, 0x63ee882b, 0x898fd47f, 0x1efe23be, 0xa2804d0c, 0x35f1bacd,
-    0x2e3e3e3c, 0xb94fc9fd, 0x0531a74f, 0x9240508e, 0x78210cda, 0xef50fb1b, 0x532e95a9, 0xc45f6268,
-    0x82005bf0, 0x1571ac31, 0xa90fc283, 0x3e7e3542, 0xd41f6916, 0x436e9ed7, 0xff10f065, 0x686107a4,
-    0xc88ff987, 0x5ffe0e46, 0xe38060f4, 0x74f19735, 0x9e90cb61, 0x09e13ca0, 0xb59f5212, 0x22eea5d3,
-    0x64b19c4b, 0xf3c06b8a, 0x4fbe0538, 0xd8cff2f9, 0x32aeaead, 0xa5df596c, 0x19a137de, 0x8ed0c01f,
-    0x951f44ee, 0x026eb32f, 0xbe10dd9d, 0x29612a5c, 0xc3007608, 0x547181c9, 0xe80fef7b, 0x7f7e18ba,
-    0x39212122, 0xae50d6e3, 0x122eb851, 0x855f4f90, 0x6f3e13c4, 0xf84fe405, 0x44318ab7, 0xd3407d76,
-    0xe75d06aa, 0x702cf16b, 0xcc529fd9, 0x5b236818, 0xb142344c, 0x2633c38d, 0x9a4dad3f, 0x0d3c5afe,
-    0x4b636366, 0xdc1294a7, 0x606cfa15, 0xf71d0dd4, 0x1d7c5180, 0x8a0da641, 0x3673c8f3, 0xa1023f32,
-    0xbacdbbc3, 0x2dbc4c02, 0x91c222b0, 0x06b3d571, 0xecd28925, 0x7ba37ee4, 0xc7dd1056, 0x50ace797,
-    0x16f3de0f, 0x818229ce, 0x3dfc477c, 0xaa8db0bd, 0x40ecece9, 0xd79d1b28, 0x6be3759a, 0xfc92825b,
-    0x5c7c7c78, 0xcb0d8bb9, 0x7773e50b, 0xe00212ca, 0x0a634e9e, 0x9d12b95f, 0x216cd7ed, 0xb61d202c,
-    0xf04219b4, 0x6733ee75, 0xdb4d80c7, 0x4c3c7706, 0xa65d2b52, 0x312cdc93, 0x8d52b221, 0x1a2345e0,
-    0x01ecc111, 0x969d36d0, 0x2ae35862, 0xbd92afa3, 0x57f3f3f7, 0xc0820436, 0x7cfc6a84, 0xeb8d9d45,
-    0xadd2a4dd, 0x3aa3531c, 0x86dd3dae, 0x11acca6f, 0xfbcd963b, 0x6cbc61fa, 0xd0c20f48, 0x47b3f889,
-    0x94f385ff, 0x0382723e, 0xbffc1c8c, 0x288deb4d, 0xc2ecb719, 0x559d40d8, 0xe9e32e6a, 0x7e92d9ab,
-    0x38cde033, 0xafbc17f2, 0x13c27940, 0x84b38e81, 0x6ed2d2d5, 0xf9a32514, 0x45dd4ba6, 0xd2acbc67,
-    0xc9633896, 0x5e12cf57, 0xe26ca1e5, 0x751d5624, 0x9f7c0a70, 0x080dfdb1, 0xb4739303, 0x230264c2,
-    0x655d5d5a, 0xf22caa9b, 0x4e52c429, 0xd92333e8, 0x33426fbc, 0xa433987d, 0x184df6cf, 0x8f3c010e,
-    0x2fd2ff2d, 0xb8a308ec, 0x04dd665e, 0x93ac919f, 0x79cdcdcb, 0xeebc3a0a, 0x52c254b8, 0xc5b3a379,
-    0x83ec9ae1, 0x149d6d20, 0xa8e30392, 0x3f92f453, 0xd5f3a807, 0x42825fc6, 0xfefc3174, 0x698dc6b5,
-    0x72424244, 0xe533b585, 0x594ddb37, 0xce3c2cf6, 0x245d70a2, 0xb32c8763, 0x0f52e9d1, 0x98231e10,
-    0xde7c2788, 0x490dd049, 0xf573befb, 0x6202493a, 0x8863156e, 0x1f12e2af, 0xa36c8c1d, 0x341d7bdc,
-  },
-  {
-    0x00000000, 0x3171d430, 0x62e3a860, 0x53927c50, 0xc5c750c0, 0xf4b684f0, 0xa724f8a0, 0x96552c90,
-    0x8e62d771, 0xbf130341, 0xec817f11, 0xddf0ab21, 0x4ba587b1, 0x7ad45381, 0x29462fd1, 0x1837fbe1,
-    0x1929d813, 0x28580c23, 0x7bca7073, 0x4abba443, 0xdcee88d3, 0xed9f5ce3, 0xbe0d20b3, 0x8f7cf483,
-    0x974b0f62, 0xa63adb52, 0xf5a8a702, 0xc4d97332, 0x528c5fa2, 0x63fd8b92, 0x306ff7c2, 0x011e23f2,
-    0x3253b026, 0x03226416, 0x50b01846, 0x61c1cc76, 0xf794e0e6, 0xc6e534d6, 0x95774886, 0xa4069cb6,
-    0xbc316757, 0x8d40b367, 0xded2cf37, 0xefa31b07, 0x79f63797, 0x4887e3a7, 0x1b159ff7, 0x2a644bc7,
-    0x2b7a6835, 0x1a0bbc05, 0x4999c055, 0x78e81465, 0xeebd38f5, 0xdfccecc5, 0x8c5e9095, 0xbd2f44a5,
-    0xa518bf44, 0x94696b74, 0xc7fb1724, 0xf68ac314, 0x60dfef84, 0x51ae3bb4, 0x023c47e4, 0x334d93d4,
-    0x64a7604c, 0x55d6b47c, 0x0644c82c, 0x37351c1c, 0xa160308c, 0x9011e4bc, 0xc38398ec, 0xf2f24cdc,
-    0xeac5b73d, 0xdbb4630d, 0x88261f5d, 0xb957cb6d, 0x2f02e7fd, 0x1e7333cd, 0x4de14f9d, 0x7c909bad,
-    0x7d8eb85f, 0x4cff6c6f, 0x1f6d103f, 0x2e1cc40f, 0xb849e89f, 0x89383caf, 0xdaaa40ff, 0xebdb94cf,
-    0xf3ec6f2e, 0xc29dbb1e, 0x910fc74e, 0xa07e137e, 0x362b3fee, 0x075aebde, 0x54c8978e, 0x65b943be,
-    0x56f4d06a, 0x6785045a, 0x3417780a, 0x0566ac3a, 0x933380aa, 0xa242549a, 0xf1d028ca, 0xc0a1fcfa,
-    0xd896071b, 0xe9e7d32b, 0xba75af7b, 0x8b047b4b, 0x1d5157db, 0x2c2083eb, 0x7fb2ffbb, 0x4ec32b8b,
-    0x4fdd0879, 0x7eacdc49, 0x2d3ea019, 0x1c4f7429, 0x8a1a58b9, 0xbb6b8c89, 0xe8f9f0d9, 0xd98824e9,
-    0xc1bfdf08, 0xf0ce0b38, 0xa35c7768, 0x922da358, 0x04788fc8, 0x35095bf8, 0x669b27a8, 0x57eaf398,
-    0xc94ec098, 0xf83f14a8, 0xabad68f8, 0x9adcbcc8, 0x0c899058, 0x3df84468, 0x6e6a3838, 0x5f1bec08,
-    0x472c17e9, 0x765dc3d9, 0x25cfbf89, 0x14be6bb9, 0x82eb4729, 0xb39a9319, 0xe008ef49, 0xd1793b79,
-    0xd067188b, 0xe116ccbb, 0xb284b0eb, 0x83f564db, 0x15a0484b, 0x24d19c7b, 0x7743e02b, 0x4632341b,
-    0x5e05cffa, 0x6f741bca, 0x3ce6679a, 0x0d97b3aa, 0x9bc29f3a, 0xaab34b0a, 0xf921375a, 0xc850e36a,
-    0xfb1d70be, 0xca6ca48e, 0x99fed8de, 0xa88f0cee, 0x3eda207e, 0x0fabf44e, 0x5c39881e, 0x6d485c2e,
-    0x757fa7cf, 0x440e73ff, 0x179c0faf, 0x26eddb9f, 0xb0b8f70f, 0x81c9233f, 0xd25b5f6f, 0xe32a8b5f,
-    0xe234a8ad, 0xd3457c9d, 0x80d700cd, 0xb1a6d4fd, 0x27f3f86d, 0x16822c5d, 0x4510500d, 0x7461843d,
-    0x6c567fdc, 0x5d27abec, 0x0eb5d7bc, 0x3fc4038c, 0xa9912f1c, 0x98e0fb2c, 0xcb72877c, 0xfa03534c,
-    0xade9a0d4, 0x9c9874e4, 0xcf0a08b4, 0xfe7bdc84, 0x682ef014, 0x595f2424, 0x0acd5874, 0x3bbc8c44,
-    0x238b77a5, 0x12faa395, 0x4168dfc5, 0x70190bf5, 0xe64c2765, 0xd73df355, 0x84af8f05, 0xb5de5b35,
-    0xb4c078c7, 0x85b1acf7, 0xd623d0a7, 0xe7520497, 0x71072807, 0x4076fc37, 0x13e48067, 0x22955457,
-    0x3aa2afb6, 0x0bd37b86, 0x584107d6, 0x6930d3e6, 0xff65ff76, 0xce142b46, 0x9d865716, 0xacf78326,
-    0x9fba10f2, 0xaecbc4c2, 0xfd59b892, 0xcc286ca2, 0x5a7d4032, 0x6b0c9402, 0x389ee852, 0x09ef3c62,
-    0x11d8c783, 0x20a913b3, 0x733b6fe3, 0x424abbd3, 0xd41f9743, 0xe56e4373, 0xb6fc3f23, 0x878deb13,
-    0x8693c8e1, 0xb7e21cd1, 0xe4706081, 0xd501b4b1, 0x43549821, 0x72254c11, 0x21b73041, 0x10c6e471,
-    0x08f11f90, 0x3980cba0, 0x6a12b7f0, 0x5b6363c0, 0xcd364f50, 0xfc479b60, 0xafd5e730, 0x9ea43300,
-  },
-  {
-    0x00000000, 0x30d23865, 0x61a470ca, 0x517648af, 0xc348e194, 0xf39ad9f1, 0xa2ec915e, 0x923ea93b,
-    0x837db5d9, 0xb3af8dbc, 0xe2d9c513, 0xd20bfd76, 0x4035544d, 0x70e76c28, 0x21912487, 0x11431ce2,
-    0x03171d43, 0x33c52526, 0x62b36d89, 0x526155ec, 0xc05ffcd7, 0xf08dc4b2, 0xa1fb8c1d, 0x9129b478,
-    0x806aa89a, 0xb0b890ff, 0xe1ced850, 0xd11ce035, 0x4322490e, 0x73f0716b, 0x228639c4, 0x125401a1,
-    0x062e3a86, 0x36fc02e3, 0x678a4a4c, 0x57587229, 0xc566db12, 0xf5b4e377, 0xa4c2abd8, 0x941093bd,
-    0x85538f5f, 0xb581b73a, 0xe4f7ff95, 0xd425c7f0, 0x461b6ecb, 0x76c956ae, 0x27bf1e01, 0x176d2664,
-    0x053927c5, 0x35eb1fa0, 0x649d570f, 0x544f6f6a, 0xc671c651, 0xf6a3fe34, 0xa7d5b69b, 0x97078efe,
-    0x8644921c, 0xb696aa79, 0xe7e0e2d6, 0xd732dab3, 0x450c7388, 0x75de4bed, 0x24a80342, 0x147a3b27,
-    0x0c5c750c, 0x3c8e4d69, 0x6df805c6, 0x5d2a3da3, 0xcf149498, 0xffc6acfd, 0xaeb0e452, 0x9e62dc37,
-    0x8f21c0d5, 0xbff3f8b0, 0xee85b01f, 0xde57887a, 0x4c692141, 0x7cbb1924, 0x2dcd518b, 0x1d1f69ee,
-    0x0f4b684f, 0x3f99502a, 0x6eef1885, 0x5e3d20e0, 0xcc0389db, 0xfcd1b1be, 0xada7f911, 0x9d75c174,
-    0x8c36dd96, 0xbce4e5f3, 0xed92ad5c, 0xdd409539, 0x4f7e3c02, 0x7fac0467, 0x2eda4cc8, 0x1e0874ad,
-    0x0a724f8a, 0x3aa077ef, 0x6bd63f40, 0x5b040725, 0xc93aae1e, 0xf9e8967b, 0xa89eded4, 0x984ce6b1,
-    0x890ffa53, 0xb9ddc236, 0xe8ab8a99, 0xd879b2fc, 0x4a471bc7, 0x7a9523a2, 0x2be36b0d, 0x1b315368,
-    0x096552c9, 0x39b76aac, 0x68c12203, 0x58131a66, 0xca2db35d, 0xfaff8b38, 0xab89c397, 0x9b5bfbf2,
-    0x8a18e710, 0xbacadf75, 0xebbc97da, 0xdb6eafbf, 0x49500684, 0x79823ee1, 0x28f4764e, 0x18264e2b,
-    0x18b8ea18, 0x286ad27d, 0x791c9ad2, 0x49cea2b7, 0xdbf00b8c, 0xeb2233e9, 0xba547b46, 0x8a864323,
-    0x9bc55fc1, 0xab1767a4, 0xfa612f0b, 0xcab3176e, 0x588dbe55, 0x685f8630, 0x3929ce9f, 0x09fbf6fa,
-    0x1baff75b, 0x2b7dcf3e, 0x7a0b8791, 0x4ad9bff4, 0xd8e716cf, 0xe8352eaa, 0xb9436605, 0x89915e60,
-    0x98d24282, 0xa8007ae7, 0xf9763248, 0xc9a40a2d, 0x5b9aa316, 0x6b489b73, 0x3a3ed3dc, 0x0aecebb9,
-    0x1e96d09e, 0x2e44e8fb, 0x7f32a054, 0x4fe09831, 0xddde310a, 0xed0c096f, 0xbc7a41c0, 0x8ca879a5,
-    0x9deb6547, 0xad395d22, 0xfc4f158d, 0xcc9d2de8, 0x5ea384d3, 0x6e71bcb6, 0x3f07f419, 0x0fd5cc7c,
-    0x1d81cddd, 0x2d53f5b8, 0x7c25bd17, 0x4cf78572, 0xdec92c49, 0xee1b142c, 0xbf6d5c83, 0x8fbf64e6,
-    0x9efc7804, 0xae2e4061, 0xff5808ce, 0xcf8a30ab, 0x5db49990, 0x6d66a1f5, 0x3c10e95a, 0x0cc2d13f,
-    0x14e49f14, 0x2436a771, 0x7540efde, 0x4592d7bb, 0xd7ac7e80, 0xe77e46e5, 0xb6080e4a, 0x86da362f,
-    0x97992acd, 0xa74b12a8, 0xf63d5a07, 0xc6ef6262, 0x54d1cb59, 0x6403f33c, 0x3575bb93, 0x05a783f6,
-    0x17f38257, 0x2721ba32, 0x7657f29d, 0x4685caf8, 0xd4bb63c3, 0xe4695ba6, 0xb51f1309, 0x85cd2b6c,
-    0x948e378e, 0xa45c0feb, 0xf52a4744, 0xc5f87f21, 0x57c6d61a, 0x6714ee7f, 0x3662a6d0, 0x06b09eb5,
-    0x12caa592, 0x22189df7, 0x736ed558, 0x43bced3d, 0xd1824406, 0xe1507c63, 0xb02634cc, 0x80f40ca9,
-    0x91b7104b, 0xa165282e, 0xf0136081, 0xc0c158e4, 0x52fff1df, 0x622dc9ba, 0x335b8115, 0x0389b970,
-    0x11ddb8d1, 0x210f80b4, 0x7079c81b, 0x40abf07e, 0xd2955945, 0xe2476120, 0xb331298f, 0x83e311ea,
-    0x92a00d08, 0xa272356d, 0xf3047dc2, 0xc3d645a7, 0x51e8ec9c, 0x613ad4f9, 0x304c9c56, 0x009ea433,
-  },
-  {
-    0x00000000, 0x54075546, 0xa80eaa8c, 0xfc09ffca, 0x55f123e9, 0x01f676af, 0xfdff8965, 0xa9f8dc23,
-    0xabe247d2, 0xffe51294, 0x03eced5e, 0x57ebb818, 0xfe13643b, 0xaa14317d, 0x561dceb7, 0x021a9bf1,
-    0x5228f955, 0x062fac13, 0xfa2653d9, 0xae21069f, 0x07d9dabc, 0x53de8ffa, 0xafd77030, 0xfbd02576,
-    0xf9cabe87, 0xadcdebc1, 0x51c4140b, 0x05c3414d, 0xac3b9d6e, 0xf83cc828, 0x043537e2, 0x503262a4,
-    0xa451f2aa, 0xf056a7ec, 0x0c5f5826, 0x58580d60, 0xf1a0d143, 0xa5a78405, 0x59ae7bcf, 0x0da92e89,
-    0x0fb3b578, 0x5bb4e03e, 0xa7bd1ff4, 0xf3ba4ab2, 0x5a429691, 0x0e45c3d7, 0xf24c3c1d, 0xa64b695b,
-    0xf6790bff, 0xa27e5eb9, 0x5e77a173, 0x0a70f435, 0xa3882816, 0xf78f7d50, 0x0b86829a, 0x5f81d7dc,
-    0x5d9b4c2d, 0x099c196b, 0xf595e6a1, 0xa192b3e7, 0x086a6fc4, 0x5c6d3a82, 0xa064c548, 0xf463900e,
-    0x4d4f93a5, 0x1948c6e3, 0xe5413929, 0xb1466c6f, 0x18beb04c, 0x4cb9e50a, 0xb0b01ac0, 0xe4b74f86,
-    0xe6add477, 0xb2aa8131, 0x4ea37efb, 0x1aa42bbd, 0xb35cf79e, 0xe75ba2d8, 0x1b525d12, 0x4f550854,
-    0x1f676af0, 0x4b603fb6, 0xb769c07c, 0xe36e953a, 0x4a964919, 0x1e911c5f, 0xe298e395, 0xb69fb6d3,
-    0xb4852d22, 0xe0827864, 0x1c8b87ae, 0x488cd2e8, 0xe1740ecb, 0xb5735b8d, 0x497aa447, 0x1d7df101,
-    0xe91e610f, 0xbd193449, 0x4110cb83, 0x15179ec5, 0xbcef42e6, 0xe8e817a0, 0x14e1e86a, 0x40e6bd2c,
-    0x42fc26dd, 0x16fb739b, 0xeaf28c51, 0xbef5d917, 0x170d0534, 0x430a5072, 0xbf03afb8, 0xeb04fafe,
-    0xbb36985a, 0xef31cd1c, 0x133832d6, 0x473f6790, 0xeec7bbb3, 0xbac0eef5, 0x46c9113f, 0x12ce4479,
-    0x10d4df88, 0x44d38ace, 0xb8da7504, 0xecdd2042, 0x4525fc61, 0x1122a927, 0xed2b56ed, 0xb92c03ab,
-    0x9a9f274a, 0xce98720c, 0x32918dc6, 0x6696d880, 0xcf6e04a3, 0x9b6951e5, 0x6760ae2f, 0x3367fb69,
-    0x317d6098, 0x657a35de, 0x9973ca14, 0xcd749f52, 0x648c4371, 0x308b1637, 0xcc82e9fd, 0x9885bcbb,
-    0xc8b7de1f, 0x9cb08b59, 0x60b97493, 0x34be21d5, 0x9d46fdf6, 0xc941a8b0, 0x3548577a, 0x614f023c,
-    0x635599cd, 0x3752cc8b, 0xcb5b3341, 0x9f5c6607, 0x36a4ba24, 0x62a3ef62, 0x9eaa10a8, 0xcaad45ee,
-    0x3eced5e0, 0x6ac980a6, 0x96c07f6c, 0xc2c72a2a, 0x6b3ff609, 0x3f38a34f, 0xc3315c85, 0x973609c3,
-    0x952c9232, 0xc12bc774, 0x3d2238be, 0x69256df8, 0xc0ddb1db, 0x94dae49d, 0x68d31b57, 0x3cd44e11,
-    0x6ce62cb5, 0x38e179f3, 0xc4e88639, 0x90efd37f, 0x39170f5c, 0x6d105a1a, 0x9119a5d0, 0xc51ef096,
-    0xc7046b67, 0x93033e21, 0x6f0ac1eb, 0x3b0d94ad, 0x92f5488e, 0xc6f21dc8, 0x3afbe202, 0x6efcb744,
-    0xd7d0b4ef, 0x83d7e1a9, 0x7fde1e63, 0x2bd94b25, 0x82219706, 0xd626c240, 0x2a2f3d8a, 0x7e2868cc,
-    0x7c32f33d, 0x2835a67b, 0xd43c59b1, 0x803b0cf7, 0x29c3d0d4, 0x7dc48592, 0x81cd7a58, 0xd5ca2f1e,
-    0x85f84dba, 0xd1ff18fc, 0x2df6e736, 0x79f1b270, 0xd0096e53, 0x840e3b15, 0x7807c4df, 0x2c009199,
-    0x2e1a0a68, 0x7a1d5f2e, 0x8614a0e4, 0xd213f5a2, 0x7beb2981, 0x2fec7cc7, 0xd3e5830d, 0x87e2d64b,
-    0x73814645, 0x27861303, 0xdb8fecc9, 0x8f88b98f, 0x267065ac, 0x727730ea, 0x8e7ecf20, 0xda799a66,
-    0xd8630197, 0x8c6454d1, 0x706dab1b, 0x246afe5d, 0x8d92227e, 0xd9957738, 0x259c88f2, 0x719bddb4,
-    0x21a9bf10, 0x75aeea56, 0x89a7159c, 0xdda040da, 0x74589cf9, 0x205fc9bf, 0xdc563675, 0x88516333,
-    0x8a4bf8c2, 0xde4cad84, 0x2245524e, 0x76420708, 0xdfbadb2b, 0x8bbd8e6d, 0x77b471a7, 0x23b324e1,
-  },
-  {
-    0x00000000, 0x678efd01, 0xcf1dfa02, 0xa8930703, 0x9bd782f5, 0xfc597ff4, 0x54ca78f7, 0x334485f6,
-    0x3243731b, 0x55cd8e1a, 0xfd5e8919, 0x9ad07418, 0xa994f1ee, 0xce1a0cef, 0x66890bec, 0x0107f6ed,
-    0x6486e636, 0x03081b37, 0xab9b1c34, 0xcc15e135, 0xff5164c3, 0x98df99c2, 0x304c9ec1, 0x57c263c0,
-    0x56c5952d, 0x314b682c, 0x99d86f2f, 0xfe56922e, 0xcd1217d8, 0xaa9cead9, 0x020fedda, 0x658110db,
-    0xc90dcc6c, 0xae83316d, 0x0610366e, 0x619ecb6f, 0x52da4e99, 0x3554b398, 0x9dc7b49b, 0xfa49499a,
-    0xfb4ebf77, 0x9cc04276, 0x34534575, 0x53ddb874, 0x60993d82, 0x0717c083, 0xaf84c780, 0xc80a3a81,
-    0xad8b2a5a, 0xca05d75b, 0x6296d058, 0x05182d59, 0x365ca8af, 0x51d255ae, 0xf94152ad, 0x9ecfafac,
-    0x9fc85941, 0xf846a440, 0x50d5a343, 0x375b5e42, 0x041fdbb4, 0x639126b5, 0xcb0221b6, 0xac8cdcb7,
-    0x97f7ee29, 0xf0791328, 0x58ea142b, 0x3f64e92a, 0x0c206cdc, 0x6bae91dd, 0xc33d96de, 0xa4b36bdf,
-    0xa5b49d32, 0xc23a6033, 0x6aa96730, 0x0d279a31, 0x3e631fc7, 0x59ede2c6, 0xf17ee5c5, 0x96f018c4,
-    0xf371081f, 0x94fff51e, 0x3c6cf21d, 0x5be20f1c, 0x68a68aea, 0x0f2877eb, 0xa7bb70e8, 0xc0358de9,
-    0xc1327b04, 0xa6bc8605, 0x0e2f8106, 0x69a17c07, 0x5ae5f9f1, 0x3d6b04f0, 0x95f803f3, 0xf276fef2,
-    0x5efa2245, 0x3974df44, 0x91e7d847, 0xf6692546, 0xc52da0b0, 0xa2a35db1, 0x0a305ab2, 0x6dbea7b3,
-    0x6cb9515e, 0x0b37ac5f, 0xa3a4ab5c, 0xc42a565d, 0xf76ed3ab, 0x90e02eaa, 0x387329a9, 0x5ffdd4a8,
-    0x3a7cc473, 0x5df23972, 0xf5613e71, 0x92efc370, 0xa1ab4686, 0xc625bb87, 0x6eb6bc84, 0x09384185,
-    0x083fb768, 0x6fb14a69, 0xc7224d6a, 0xa0acb06b, 0x93e8359d, 0xf466c89c, 0x5cf5cf9f, 0x3b7b329e,
-    0x2a03aaa3, 0x4d8d57a2, 0xe51e50a1, 0x8290ada0, 0xb1d42856, 0xd65ad557, 0x7ec9d254, 0x19472f55,
-    0x1840d9b8, 0x7fce24b9, 0xd75d23ba, 0xb0d3debb, 0x83975b4d, 0xe419a64c, 0x4c8aa14f, 0x2b045c4e,
-    0x4e854c95, 0x290bb194, 0x8198b697, 0xe6164b96, 0xd552ce60, 0xb2dc3361, 0x1a4f3462, 0x7dc1c963,
-    0x7cc63f8e, 0x1b48c28f, 0xb3dbc58c, 0xd455388d, 0xe711bd7b, 0x809f407a, 0x280c4779, 0x4f82ba78,
-    0xe30e66cf, 0x84809bce, 0x2c139ccd, 0x4b9d61cc, 0x78d9e43a, 0x1f57193b, 0xb7c41e38, 0xd04ae339,
-    0xd14d15d4, 0xb6c3e8d5, 0x1e50efd6, 0x79de12d7, 0x4a9a9721, 0x2d146a20, 0x85876d23, 0xe2099022,
-    0x878880f9, 0xe0067df8, 0x48957afb, 0x2f1b87fa, 0x1c5f020c, 0x7bd1ff0d, 0xd342f80e, 0xb4cc050f,
-    0xb5cbf3e2, 0xd2450ee3, 0x7ad609e0, 0x1d58f4e1, 0x2e1c7117, 0x49928c16, 0xe1018b15, 0x868f7614,
-    0xbdf4448a, 0xda7ab98b, 0x72e9be88, 0x15674389, 0x2623c67f, 0x41ad3b7e, 0xe93e3c7d, 0x8eb0c17c,
-    0x8fb73791, 0xe839ca90, 0x40aacd93, 0x27243092, 0x1460b564, 0x73ee4865, 0xdb7d4f66, 0xbcf3b267,
-    0xd972a2bc, 0xbefc5fbd, 0x166f58be, 0x71e1a5bf, 0x42a52049, 0x252bdd48, 0x8db8da4b, 0xea36274a,
-    0xeb31d1a7, 0x8cbf2ca6, 0x242c2ba5, 0x43a2d6a4, 0x70e65352, 0x1768ae53, 0xbffba950, 0xd8755451,
-    0x74f988e6, 0x137775e7, 0xbbe472e4, 0xdc6a8fe5, 0xef2e0a13, 0x88a0f712, 0x2033f011, 0x47bd0d10,
-    0x46bafbfd, 0x213406fc, 0x89a701ff, 0xee29fcfe, 0xdd6d7908, 0xbae38409, 0x1270830a, 0x75fe7e0b,
-    0x107f6ed0, 0x77f193d1, 0xdf6294d2, 0xb8ec69d3, 0x8ba8ec25, 0xec261124, 0x44b51627, 0x233beb26,
-    0x223c1dcb, 0x45b2e0ca, 0xed21e7c9, 0x8aaf1ac8, 0xb9eb9f3e, 0xde65623f, 0x76f6653c, 0x1178983d,
-  },
-  {
-    0x00000000, 0xf20c0dfe, 0xe1f46d0d, 0x13f860f3, 0xc604aceb, 0x3408a115, 0x27f0c1e6, 0xd5fccc18,
-    0x89e52f27, 0x7be922d9, 0x6811422a, 0x9a1d4fd4, 0x4fe183cc, 0xbded8e32, 0xae15eec1, 0x5c19e33f,
-    0x162628bf, 0xe42a2541, 0xf7d245b2, 0x05de484c, 0xd0228454, 0x222e89aa, 0x31d6e959, 0xc3dae4a7,
-    0x9fc30798, 0x6dcf0a66, 0x7e376a95, 0x8c3b676b, 0x59c7ab73, 0xabcba68d, 0xb833c67e, 0x4a3fcb80,
-    0x2c4c517e, 0xde405c80, 0xcdb83c73, 0x3fb4318d, 0xea48fd95, 0x1844f06b, 0x0bbc9098, 0xf9b09d66,
-    0xa5a97e59, 0x57a573a7, 0x445d1354, 0xb6511eaa, 0x63add2b2, 0x91a1df4c, 0x8259bfbf, 0x7055b241,
-    0x3a6a79c1, 0xc866743f, 0xdb9e14cc, 0x29921932, 0xfc6ed52a, 0x0e62d8d4, 0x1d9ab827, 0xef96b5d9,
-    0xb38f56e6, 0x41835b18, 0x527b3beb, 0xa0773615, 0x758bfa0d, 0x8787f7f3, 0x947f9700, 0x66739afe,
-    0x5898a2fc, 0xaa94af02, 0xb96ccff1, 0x4b60c20f, 0x9e9c0e17, 0x6c9003e9, 0x7f68631a, 0x8d646ee4,
-    0xd17d8ddb, 0x23718025, 0x3089e0d6, 0xc285ed28, 0x17792130, 0xe5752cce, 0xf68d4c3d, 0x048141c3,
-    0x4ebe8a43, 0xbcb287bd, 0xaf4ae74e, 0x5d46eab0, 0x88ba26a8, 0x7ab62b56, 0x694e4ba5, 0x9b42465b,
-    0xc75ba564, 0x3557a89a, 0x26afc869, 0xd4a3c597, 0x015f098f, 0xf3530471, 0xe0ab6482, 0x12a7697c,
-    0x74d4f382, 0x86d8fe7c, 0x95209e8f, 0x672c9371, 0xb2d05f69, 0x40dc5297, 0x53243264, 0xa1283f9a,
-    0xfd31dca5, 0x0f3dd15b, 0x1cc5b1a8, 0xeec9bc56, 0x3b35704e, 0xc9397db0, 0xdac11d43, 0x28cd10bd,
-    0x62f2db3d, 0x90fed6c3, 0x8306b630, 0x710abbce, 0xa4f677d6, 0x56fa7a28, 0x45021adb, 0xb70e1725,
-    0xeb17f41a, 0x191bf9e4, 0x0ae39917, 0xf8ef94e9, 0x2d1358f1, 0xdf1f550f, 0xcce735fc, 0x3eeb3802,
-    0xb13145f8, 0x433d4806, 0x50c528f5, 0xa2c9250b, 0x7735e913, 0x8539e4ed, 0x96c1841e, 0x64cd89e0,
-    0x38d46adf, 0xcad86721, 0xd92007d2, 0x2b2c0a2c, 0xfed0c634, 0x0cdccbca, 0x1f24ab39, 0xed28a6c7,
-    0xa7176d47, 0x551b60b9, 0x46e3004a, 0xb4ef0db4, 0x6113c1ac, 0x931fcc52, 0x80e7aca1, 0x72eba15f,
-    0x2ef24260, 0xdcfe4f9e, 0xcf062f6d, 0x3d0a2293, 0xe8f6ee8b, 0x1afae375, 0x09028386, 0xfb0e8e78,
-    0x9d7d1486, 0x6f711978, 0x7c89798b, 0x8e857475, 0x5b79b86d, 0xa975b593, 0xba8dd560, 0x4881d89e,
-    0x14983ba1, 0xe694365f, 0xf56c56ac, 0x07605b52, 0xd29c974a, 0x20909ab4, 0x3368fa47, 0xc164f7b9,
-    0x8b5b3c39, 0x795731c7, 0x6aaf5134, 0x98a35cca, 0x4d5f90d2, 0xbf539d2c, 0xacabfddf, 0x5ea7f021,
-    0x02be131e, 0xf0b21ee0, 0xe34a7e13, 0x114673ed, 0xc4babff5, 0x36b6b20b, 0x254ed2f8, 0xd742df06,
-    0xe9a9e704, 0x1ba5eafa, 0x085d8a09, 0xfa5187f7, 0x2fad4bef, 0xdda14611, 0xce5926e2, 0x3c552b1c,
-    0x604cc823, 0x9240c5dd, 0x81b8a52e, 0x73b4a8d0, 0xa64864c8, 0x54446936, 0x47bc09c5, 0xb5b0043b,
-    0xff8fcfbb, 0x0d83c245, 0x1e7ba2b6, 0xec77af48, 0x398b6350, 0xcb876eae, 0xd87f0e5d, 0x2a7303a3,
-    0x766ae09c, 0x8466ed62, 0x979e8d91, 0x6592806f, 0xb06e4c77, 0x42624189, 0x519a217a, 0xa3962c84,
-    0xc5e5b67a, 0x37e9bb84, 0x2411db77, 0xd61dd689, 0x03e11a91, 0xf1ed176f, 0xe215779c, 0x10197a62,
-    0x4c00995d, 0xbe0c94a3, 0xadf4f450, 0x5ff8f9ae, 0x8a0435b6, 0x78083848, 0x6bf058bb, 0x99fc5545,
-    0xd3c39ec5, 0x21cf933b, 0x3237f3c8, 0xc03bfe36, 0x15c7322e, 0xe7cb3fd0, 0xf4335f23, 0x063f52dd,
-    0x5a26b1e2, 0xa82abc1c, 0xbbd2dcef, 0x49ded111, 0x9c221d09, 0x6e2e10f7, 0x7dd67004, 0x8fda7dfa,
-  },
+    {
+        0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
+        0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
+        0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+        0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
+        0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
+        0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+        0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
+        0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
+        0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+        0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
+        0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
+        0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+        0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
+        0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
+        0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+        0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
+        0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
+        0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+        0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
+        0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
+        0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+        0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
+        0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
+        0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+        0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
+        0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
+        0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+        0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
+        0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
+        0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+        0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
+        0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,
+    },
+    {
+        0x00000000, 0x13a29877, 0x274530ee, 0x34e7a899, 0x4e8a61dc, 0x5d28f9ab, 0x69cf5132, 0x7a6dc945,
+        0x9d14c3b8, 0x8eb65bcf, 0xba51f356, 0xa9f36b21, 0xd39ea264, 0xc03c3a13, 0xf4db928a, 0xe7790afd,
+        0x3fc5f181, 0x2c6769f6, 0x1880c16f, 0x0b225918, 0x714f905d, 0x62ed082a, 0x560aa0b3, 0x45a838c4,
+        0xa2d13239, 0xb173aa4e, 0x859402d7, 0x96369aa0, 0xec5b53e5, 0xfff9cb92, 0xcb1e630b, 0xd8bcfb7c,
+        0x7f8be302, 0x6c297b75, 0x58ced3ec, 0x4b6c4b9b, 0x310182de, 0x22a31aa9, 0x1644b230, 0x05e62a47,
+        0xe29f20ba, 0xf13db8cd, 0xc5da1054, 0xd6788823, 0xac154166, 0xbfb7d911, 0x8b507188, 0x98f2e9ff,
+        0x404e1283, 0x53ec8af4, 0x670b226d, 0x74a9ba1a, 0x0ec4735f, 0x1d66eb28, 0x298143b1, 0x3a23dbc6,
+        0xdd5ad13b, 0xcef8494c, 0xfa1fe1d5, 0xe9bd79a2, 0x93d0b0e7, 0x80722890, 0xb4958009, 0xa737187e,
+        0xff17c604, 0xecb55e73, 0xd852f6ea, 0xcbf06e9d, 0xb19da7d8, 0xa23f3faf, 0x96d89736, 0x857a0f41,
+        0x620305bc, 0x71a19dcb, 0x45463552, 0x56e4ad25, 0x2c896460, 0x3f2bfc17, 0x0bcc548e, 0x186eccf9,
+        0xc0d23785, 0xd370aff2, 0xe797076b, 0xf4359f1c, 0x8e585659, 0x9dface2e, 0xa91d66b7, 0xbabffec0,
+        0x5dc6f43d, 0x4e646c4a, 0x7a83c4d3, 0x69215ca4, 0x134c95e1, 0x00ee0d96, 0x3409a50f, 0x27ab3d78,
+        0x809c2506, 0x933ebd71, 0xa7d915e8, 0xb47b8d9f, 0xce1644da, 0xddb4dcad, 0xe9537434, 0xfaf1ec43,
+        0x1d88e6be, 0x0e2a7ec9, 0x3acdd650, 0x296f4e27, 0x53028762, 0x40a01f15, 0x7447b78c, 0x67e52ffb,
+        0xbf59d487, 0xacfb4cf0, 0x981ce469, 0x8bbe7c1e, 0xf1d3b55b, 0xe2712d2c, 0xd69685b5, 0xc5341dc2,
+        0x224d173f, 0x31ef8f48, 0x050827d1, 0x16aabfa6, 0x6cc776e3, 0x7f65ee94, 0x4b82460d, 0x5820de7a,
+        0xfbc3faf9, 0xe861628e, 0xdc86ca17, 0xcf245260, 0xb5499b25, 0xa6eb0352, 0x920cabcb, 0x81ae33bc,
+        0x66d73941, 0x7575a136, 0x419209af, 0x523091d8, 0x285d589d, 0x3bffc0ea, 0x0f186873, 0x1cbaf004,
+        0xc4060b78, 0xd7a4930f, 0xe3433b96, 0xf0e1a3e1, 0x8a8c6aa4, 0x992ef2d3, 0xadc95a4a, 0xbe6bc23d,
+        0x5912c8c0, 0x4ab050b7, 0x7e57f82e, 0x6df56059, 0x1798a91c, 0x043a316b, 0x30dd99f2, 0x237f0185,
+        0x844819fb, 0x97ea818c, 0xa30d2915, 0xb0afb162, 0xcac27827, 0xd960e050, 0xed8748c9, 0xfe25d0be,
+        0x195cda43, 0x0afe4234, 0x3e19eaad, 0x2dbb72da, 0x57d6bb9f, 0x447423e8, 0x70938b71, 0x63311306,
+        0xbb8de87a, 0xa82f700d, 0x9cc8d894, 0x8f6a40e3, 0xf50789a6, 0xe6a511d1, 0xd242b948, 0xc1e0213f,
+        0x26992bc2, 0x353bb3b5, 0x01dc1b2c, 0x127e835b, 0x68134a1e, 0x7bb1d269, 0x4f567af0, 0x5cf4e287,
+        0x04d43cfd, 0x1776a48a, 0x23910c13, 0x30339464, 0x4a5e5d21, 0x59fcc556, 0x6d1b6dcf, 0x7eb9f5b8,
+        0x99c0ff45, 0x8a626732, 0xbe85cfab, 0xad2757dc, 0xd74a9e99, 0xc4e806ee, 0xf00fae77, 0xe3ad3600,
+        0x3b11cd7c, 0x28b3550b, 0x1c54fd92, 0x0ff665e5, 0x759baca0, 0x663934d7, 0x52de9c4e, 0x417c0439,
+        0xa6050ec4, 0xb5a796b3, 0x81403e2a, 0x92e2a65d, 0xe88f6f18, 0xfb2df76f, 0xcfca5ff6, 0xdc68c781,
+        0x7b5fdfff, 0x68fd4788, 0x5c1aef11, 0x4fb87766, 0x35d5be23, 0x26772654, 0x12908ecd, 0x013216ba,
+        0xe64b1c47, 0xf5e98430, 0xc10e2ca9, 0xd2acb4de, 0xa8c17d9b, 0xbb63e5ec, 0x8f844d75, 0x9c26d502,
+        0x449a2e7e, 0x5738b609, 0x63df1e90, 0x707d86e7, 0x0a104fa2, 0x19b2d7d5, 0x2d557f4c, 0x3ef7e73b,
+        0xd98eedc6, 0xca2c75b1, 0xfecbdd28, 0xed69455f, 0x97048c1a, 0x84a6146d, 0xb041bcf4, 0xa3e32483,
+    },
+    {
+        0x00000000, 0xa541927e, 0x4f6f520d, 0xea2ec073, 0x9edea41a, 0x3b9f3664, 0xd1b1f617, 0x74f06469,
+        0x38513ec5, 0x9d10acbb, 0x773e6cc8, 0xd27ffeb6, 0xa68f9adf, 0x03ce08a1, 0xe9e0c8d2, 0x4ca15aac,
+        0x70a27d8a, 0xd5e3eff4, 0x3fcd2f87, 0x9a8cbdf9, 0xee7cd990, 0x4b3d4bee, 0xa1138b9d, 0x045219e3,
+        0x48f3434f, 0xedb2d131, 0x079c1142, 0xa2dd833c, 0xd62de755, 0x736c752b, 0x9942b558, 0x3c032726,
+        0xe144fb14, 0x4405696a, 0xae2ba919, 0x0b6a3b67, 0x7f9a5f0e, 0xdadbcd70, 0x30f50d03, 0x95b49f7d,
+        0xd915c5d1, 0x7c5457af, 0x967a97dc, 0x333b05a2, 0x47cb61cb, 0xe28af3b5, 0x08a433c6, 0xade5a1b8,
+        0x91e6869e, 0x34a714e0, 0xde89d493, 0x7bc846ed, 0x0f382284, 0xaa79b0fa, 0x40577089, 0xe516e2f7,
+        0xa9b7b85b, 0x0cf62a25, 0xe6d8ea56, 0x43997828, 0x37691c41, 0x92288e3f, 0x78064e4c, 0xdd47dc32,
+        0xc76580d9, 0x622412a7, 0x880ad2d4, 0x2d4b40aa, 0x59bb24c3, 0xfcfab6bd, 0x16d476ce, 0xb395e4b0,
+        0xff34be1c, 0x5a752c62, 0xb05bec11, 0x151a7e6f, 0x61ea1a06, 0xc4ab8878, 0x2e85480b, 0x8bc4da75,
+        0xb7c7fd53, 0x12866f2d, 0xf8a8af5e, 0x5de93d20, 0x29195949, 0x8c58cb37, 0x66760b44, 0xc337993a,
+        0x8f96c396, 0x2ad751e8, 0xc0f9919b, 0x65b803e5, 0x1148678c, 0xb409f5f2, 0x5e273581, 0xfb66a7ff,
+        0x26217bcd, 0x8360e9b3, 0x694e29c0, 0xcc0fbbbe, 0xb8ffdfd7, 0x1dbe4da9, 0xf7908dda, 0x52d11fa4,
+        0x1e704508, 0xbb31d776, 0x511f1705, 0xf45e857b, 0x80aee112, 0x25ef736c, 0xcfc1b31f, 0x6a802161,
+        0x56830647, 0xf3c29439, 0x19ec544a, 0xbcadc634, 0xc85da25d, 0x6d1c3023, 0x8732f050, 0x2273622e,
+        0x6ed23882, 0xcb93aafc, 0x21bd6a8f, 0x84fcf8f1, 0xf00c9c98, 0x554d0ee6, 0xbf63ce95, 0x1a225ceb,
+        0x8b277743, 0x2e66e53d, 0xc448254e, 0x6109b730, 0x15f9d359, 0xb0b84127, 0x5a968154, 0xffd7132a,
+        0xb3764986, 0x1637dbf8, 0xfc191b8b, 0x595889f5, 0x2da8ed9c, 0x88e97fe2, 0x62c7bf91, 0xc7862def,
+        0xfb850ac9, 0x5ec498b7, 0xb4ea58c4, 0x11abcaba, 0x655baed3, 0xc01a3cad, 0x2a34fcde, 0x8f756ea0,
+        0xc3d4340c, 0x6695a672, 0x8cbb6601, 0x29faf47f, 0x5d0a9016, 0xf84b0268, 0x1265c21b, 0xb7245065,
+        0x6a638c57, 0xcf221e29, 0x250cde5a, 0x804d4c24, 0xf4bd284d, 0x51fcba33, 0xbbd27a40, 0x1e93e83e,
+        0x5232b292, 0xf77320ec, 0x1d5de09f, 0xb81c72e1, 0xccec1688, 0x69ad84f6, 0x83834485, 0x26c2d6fb,
+        0x1ac1f1dd, 0xbf8063a3, 0x55aea3d0, 0xf0ef31ae, 0x841f55c7, 0x215ec7b9, 0xcb7007ca, 0x6e3195b4,
+        0x2290cf18, 0x87d15d66, 0x6dff9d15, 0xc8be0f6b, 0xbc4e6b02, 0x190ff97c, 0xf321390f, 0x5660ab71,
+        0x4c42f79a, 0xe90365e4, 0x032da597, 0xa66c37e9, 0xd29c5380, 0x77ddc1fe, 0x9df3018d, 0x38b293f3,
+        0x7413c95f, 0xd1525b21, 0x3b7c9b52, 0x9e3d092c, 0xeacd6d45, 0x4f8cff3b, 0xa5a23f48, 0x00e3ad36,
+        0x3ce08a10, 0x99a1186e, 0x738fd81d, 0xd6ce4a63, 0xa23e2e0a, 0x077fbc74, 0xed517c07, 0x4810ee79,
+        0x04b1b4d5, 0xa1f026ab, 0x4bdee6d8, 0xee9f74a6, 0x9a6f10cf, 0x3f2e82b1, 0xd50042c2, 0x7041d0bc,
+        0xad060c8e, 0x08479ef0, 0xe2695e83, 0x4728ccfd, 0x33d8a894, 0x96993aea, 0x7cb7fa99, 0xd9f668e7,
+        0x9557324b, 0x3016a035, 0xda386046, 0x7f79f238, 0x0b899651, 0xaec8042f, 0x44e6c45c, 0xe1a75622,
+        0xdda47104, 0x78e5e37a, 0x92cb2309, 0x378ab177, 0x437ad51e, 0xe63b4760, 0x0c158713, 0xa954156d,
+        0xe5f54fc1, 0x40b4ddbf, 0xaa9a1dcc, 0x0fdb8fb2, 0x7b2bebdb, 0xde6a79a5, 0x3444b9d6, 0x91052ba8,
+    },
+    {
+        0x00000000, 0xdd45aab8, 0xbf672381, 0x62228939, 0x7b2231f3, 0xa6679b4b, 0xc4451272, 0x1900b8ca,
+        0xf64463e6, 0x2b01c95e, 0x49234067, 0x9466eadf, 0x8d665215, 0x5023f8ad, 0x32017194, 0xef44db2c,
+        0xe964b13d, 0x34211b85, 0x560392bc, 0x8b463804, 0x924680ce, 0x4f032a76, 0x2d21a34f, 0xf06409f7,
+        0x1f20d2db, 0xc2657863, 0xa047f15a, 0x7d025be2, 0x6402e328, 0xb9474990, 0xdb65c0a9, 0x06206a11,
+        0xd725148b, 0x0a60be33, 0x6842370a, 0xb5079db2, 0xac072578, 0x71428fc0, 0x136006f9, 0xce25ac41,
+        0x2161776d, 0xfc24ddd5, 0x9e0654ec, 0x4343fe54, 0x5a43469e, 0x8706ec26, 0xe524651f, 0x3861cfa7,
+        0x3e41a5b6, 0xe3040f0e, 0x81268637, 0x5c632c8f, 0x45639445, 0x98263efd, 0xfa04b7c4, 0x27411d7c,
+        0xc805c650, 0x15406ce8, 0x7762e5d1, 0xaa274f69, 0xb327f7a3, 0x6e625d1b, 0x0c40d422, 0xd1057e9a,
+        0xaba65fe7, 0x76e3f55f, 0x14c17c66, 0xc984d6de, 0xd0846e14, 0x0dc1c4ac, 0x6fe34d95, 0xb2a6e72d,
+        0x5de23c01, 0x80a796b9, 0xe2851f80, 0x3fc0b538, 0x26c00df2, 0xfb85a74a, 0x99a72e73, 0x44e284cb,
+        0x42c2eeda, 0x9f874462, 0xfda5cd5b, 0x20e067e3, 0x39e0df29, 0xe4a57591, 0x8687fca8, 0x5bc25610,
+        0xb4868d3c, 0x69c32784, 0x0be1aebd, 0xd6a40405, 0xcfa4bccf, 0x12e11677, 0x70c39f4e, 0xad8635f6,
+        0x7c834b6c, 0xa1c6e1d4, 0xc3e468ed, 0x1ea1c255, 0x07a17a9f, 0xdae4d027, 0xb8c6591e, 0x6583f3a6,
+        0x8ac7288a, 0x57828232, 0x35a00b0b, 0xe8e5a1b3, 0xf1e51979, 0x2ca0b3c1, 0x4e823af8, 0x93c79040,
+        0x95e7fa51, 0x48a250e9, 0x2a80d9d0, 0xf7c57368, 0xeec5cba2, 0x3380611a, 0x51a2e823, 0x8ce7429b,
+        0x63a399b7, 0xbee6330f, 0xdcc4ba36, 0x0181108e, 0x1881a844, 0xc5c402fc, 0xa7e68bc5, 0x7aa3217d,
+        0x52a0c93f, 0x8fe56387, 0xedc7eabe, 0x30824006, 0x2982f8cc, 0xf4c75274, 0x96e5db4d, 0x4ba071f5,
+        0xa4e4aad9, 0x79a10061, 0x1b838958, 0xc6c623e0, 0xdfc69b2a, 0x02833192, 0x60a1b8ab, 0xbde41213,
+        0xbbc47802, 0x6681d2ba, 0x04a35b83, 0xd9e6f13b, 0xc0e649f1, 0x1da3e349, 0x7f816a70, 0xa2c4c0c8,
+        0x4d801be4, 0x90c5b15c, 0xf2e73865, 0x2fa292dd, 0x36a22a17, 0xebe780af, 0x89c50996, 0x5480a32e,
+        0x8585ddb4, 0x58c0770c, 0x3ae2fe35, 0xe7a7548d, 0xfea7ec47, 0x23e246ff, 0x41c0cfc6, 0x9c85657e,
+        0x73c1be52, 0xae8414ea, 0xcca69dd3, 0x11e3376b, 0x08e38fa1, 0xd5a62519, 0xb784ac20, 0x6ac10698,
+        0x6ce16c89, 0xb1a4c631, 0xd3864f08, 0x0ec3e5b0, 0x17c35d7a, 0xca86f7c2, 0xa8a47efb, 0x75e1d443,
+        0x9aa50f6f, 0x47e0a5d7, 0x25c22cee, 0xf8878656, 0xe1873e9c, 0x3cc29424, 0x5ee01d1d, 0x83a5b7a5,
+        0xf90696d8, 0x24433c60, 0x4661b559, 0x9b241fe1, 0x8224a72b, 0x5f610d93, 0x3d4384aa, 0xe0062e12,
+        0x0f42f53e, 0xd2075f86, 0xb025d6bf, 0x6d607c07, 0x7460c4cd, 0xa9256e75, 0xcb07e74c, 0x16424df4,
+        0x106227e5, 0xcd278d5d, 0xaf050464, 0x7240aedc, 0x6b401616, 0xb605bcae, 0xd4273597, 0x09629f2f,
+        0xe6264403, 0x3b63eebb, 0x59416782, 0x8404cd3a, 0x9d0475f0, 0x4041df48, 0x22635671, 0xff26fcc9,
+        0x2e238253, 0xf36628eb, 0x9144a1d2, 0x4c010b6a, 0x5501b3a0, 0x88441918, 0xea669021, 0x37233a99,
+        0xd867e1b5, 0x05224b0d, 0x6700c234, 0xba45688c, 0xa345d046, 0x7e007afe, 0x1c22f3c7, 0xc167597f,
+        0xc747336e, 0x1a0299d6, 0x782010ef, 0xa565ba57, 0xbc65029d, 0x6120a825, 0x0302211c, 0xde478ba4,
+        0x31035088, 0xec46fa30, 0x8e647309, 0x5321d9b1, 0x4a21617b, 0x9764cbc3, 0xf54642fa, 0x2803e842,
+    },
+    {
+        0x00000000, 0x38116fac, 0x7022df58, 0x4833b0f4, 0xe045beb0, 0xd854d11c, 0x906761e8, 0xa8760e44,
+        0xc5670b91, 0xfd76643d, 0xb545d4c9, 0x8d54bb65, 0x2522b521, 0x1d33da8d, 0x55006a79, 0x6d1105d5,
+        0x8f2261d3, 0xb7330e7f, 0xff00be8b, 0xc711d127, 0x6f67df63, 0x5776b0cf, 0x1f45003b, 0x27546f97,
+        0x4a456a42, 0x725405ee, 0x3a67b51a, 0x0276dab6, 0xaa00d4f2, 0x9211bb5e, 0xda220baa, 0xe2336406,
+        0x1ba8b557, 0x23b9dafb, 0x6b8a6a0f, 0x539b05a3, 0xfbed0be7, 0xc3fc644b, 0x8bcfd4bf, 0xb3debb13,
+        0xdecfbec6, 0xe6ded16a, 0xaeed619e, 0x96fc0e32, 0x3e8a0076, 0x069b6fda, 0x4ea8df2e, 0x76b9b082,
+        0x948ad484, 0xac9bbb28, 0xe4a80bdc, 0xdcb96470, 0x74cf6a34, 0x4cde0598, 0x04edb56c, 0x3cfcdac0,
+        0x51eddf15, 0x69fcb0b9, 0x21cf004d, 0x19de6fe1, 0xb1a861a5, 0x89b90e09, 0xc18abefd, 0xf99bd151,
+        0x37516aae, 0x0f400502, 0x4773b5f6, 0x7f62da5a, 0xd714d41e, 0xef05bbb2, 0xa7360b46, 0x9f2764ea,
+        0xf236613f, 0xca270e93, 0x8214be67, 0xba05d1cb, 0x1273df8f, 0x2a62b023, 0x625100d7, 0x5a406f7b,
+        0xb8730b7d, 0x806264d1, 0xc851d425, 0xf040bb89, 0x5836b5cd, 0x6027da61, 0x28146a95, 0x10050539,
+        0x7d1400ec, 0x45056f40, 0x0d36dfb4, 0x3527b018, 0x9d51be5c, 0xa540d1f0, 0xed736104, 0xd5620ea8,
+        0x2cf9dff9, 0x14e8b055, 0x5cdb00a1, 0x64ca6f0d, 0xccbc6149, 0xf4ad0ee5, 0xbc9ebe11, 0x848fd1bd,
+        0xe99ed468, 0xd18fbbc4, 0x99bc0b30, 0xa1ad649c, 0x09db6ad8, 0x31ca0574, 0x79f9b580, 0x41e8da2c,
+        0xa3dbbe2a, 0x9bcad186, 0xd3f96172, 0xebe80ede, 0x439e009a, 0x7b8f6f36, 0x33bcdfc2, 0x0badb06e,
+        0x66bcb5bb, 0x5eadda17, 0x169e6ae3, 0x2e8f054f, 0x86f90b0b, 0xbee864a7, 0xf6dbd453, 0xcecabbff,
+        0x6ea2d55c, 0x56b3baf0, 0x1e800a04, 0x269165a8, 0x8ee76bec, 0xb6f60440, 0xfec5b4b4, 0xc6d4db18,
+        0xabc5decd, 0x93d4b161, 0xdbe70195, 0xe3f66e39, 0x4b80607d, 0x73910fd1, 0x3ba2bf25, 0x03b3d089,
+        0xe180b48f, 0xd991db23, 0x91a26bd7, 0xa9b3047b, 0x01c50a3f, 0x39d46593, 0x71e7d567, 0x49f6bacb,
+        0x24e7bf1e, 0x1cf6d0b2, 0x54c56046, 0x6cd40fea, 0xc4a201ae, 0xfcb36e02, 0xb480def6, 0x8c91b15a,
+        0x750a600b, 0x4d1b0fa7, 0x0528bf53, 0x3d39d0ff, 0x954fdebb, 0xad5eb117, 0xe56d01e3, 0xdd7c6e4f,
+        0xb06d6b9a, 0x887c0436, 0xc04fb4c2, 0xf85edb6e, 0x5028d52a, 0x6839ba86, 0x200a0a72, 0x181b65de,
+        0xfa2801d8, 0xc2396e74, 0x8a0ade80, 0xb21bb12c, 0x1a6dbf68, 0x227cd0c4, 0x6a4f6030, 0x525e0f9c,
+        0x3f4f0a49, 0x075e65e5, 0x4f6dd511, 0x777cbabd, 0xdf0ab4f9, 0xe71bdb55, 0xaf286ba1, 0x9739040d,
+        0x59f3bff2, 0x61e2d05e, 0x29d160aa, 0x11c00f06, 0xb9b60142, 0x81a76eee, 0xc994de1a, 0xf185b1b6,
+        0x9c94b463, 0xa485dbcf, 0xecb66b3b, 0xd4a70497, 0x7cd10ad3, 0x44c0657f, 0x0cf3d58b, 0x34e2ba27,
+        0xd6d1de21, 0xeec0b18d, 0xa6f30179, 0x9ee26ed5, 0x36946091, 0x0e850f3d, 0x46b6bfc9, 0x7ea7d065,
+        0x13b6d5b0, 0x2ba7ba1c, 0x63940ae8, 0x5b856544, 0xf3f36b00, 0xcbe204ac, 0x83d1b458, 0xbbc0dbf4,
+        0x425b0aa5, 0x7a4a6509, 0x3279d5fd, 0x0a68ba51, 0xa21eb415, 0x9a0fdbb9, 0xd23c6b4d, 0xea2d04e1,
+        0x873c0134, 0xbf2d6e98, 0xf71ede6c, 0xcf0fb1c0, 0x6779bf84, 0x5f68d028, 0x175b60dc, 0x2f4a0f70,
+        0xcd796b76, 0xf56804da, 0xbd5bb42e, 0x854adb82, 0x2d3cd5c6, 0x152dba6a, 0x5d1e0a9e, 0x650f6532,
+        0x081e60e7, 0x300f0f4b, 0x783cbfbf, 0x402dd013, 0xe85bde57, 0xd04ab1fb, 0x9879010f, 0xa0686ea3,
+    },
+    {
+        0x00000000, 0xef306b19, 0xdb8ca0c3, 0x34bccbda, 0xb2f53777, 0x5dc55c6e, 0x697997b4, 0x8649fcad,
+        0x6006181f, 0x8f367306, 0xbb8ab8dc, 0x54bad3c5, 0xd2f32f68, 0x3dc34471, 0x097f8fab, 0xe64fe4b2,
+        0xc00c303e, 0x2f3c5b27, 0x1b8090fd, 0xf4b0fbe4, 0x72f90749, 0x9dc96c50, 0xa975a78a, 0x4645cc93,
+        0xa00a2821, 0x4f3a4338, 0x7b8688e2, 0x94b6e3fb, 0x12ff1f56, 0xfdcf744f, 0xc973bf95, 0x2643d48c,
+        0x85f4168d, 0x6ac47d94, 0x5e78b64e, 0xb148dd57, 0x370121fa, 0xd8314ae3, 0xec8d8139, 0x03bdea20,
+        0xe5f20e92, 0x0ac2658b, 0x3e7eae51, 0xd14ec548, 0x570739e5, 0xb83752fc, 0x8c8b9926, 0x63bbf23f,
+        0x45f826b3, 0xaac84daa, 0x9e748670, 0x7144ed69, 0xf70d11c4, 0x183d7add, 0x2c81b107, 0xc3b1da1e,
+        0x25fe3eac, 0xcace55b5, 0xfe729e6f, 0x1142f576, 0x970b09db, 0x783b62c2, 0x4c87a918, 0xa3b7c201,
+        0x0e045beb, 0xe13430f2, 0xd588fb28, 0x3ab89031, 0xbcf16c9c, 0x53c10785, 0x677dcc5f, 0x884da746,
+        0x6e0243f4, 0x813228ed, 0xb58ee337, 0x5abe882e, 0xdcf77483, 0x33c71f9a, 0x077bd440, 0xe84bbf59,
+        0xce086bd5, 0x213800cc, 0x1584cb16, 0xfab4a00f, 0x7cfd5ca2, 0x93cd37bb, 0xa771fc61, 0x48419778,
+        0xae0e73ca, 0x413e18d3, 0x7582d309, 0x9ab2b810, 0x1cfb44bd, 0xf3cb2fa4, 0xc777e47e, 0x28478f67,
+        0x8bf04d66, 0x64c0267f, 0x507ceda5, 0xbf4c86bc, 0x39057a11, 0xd6351108, 0xe289dad2, 0x0db9b1cb,
+        0xebf65579, 0x04c63e60, 0x307af5ba, 0xdf4a9ea3, 0x5903620e, 0xb6330917, 0x828fc2cd, 0x6dbfa9d4,
+        0x4bfc7d58, 0xa4cc1641, 0x9070dd9b, 0x7f40b682, 0xf9094a2f, 0x16392136, 0x2285eaec, 0xcdb581f5,
+        0x2bfa6547, 0xc4ca0e5e, 0xf076c584, 0x1f46ae9d, 0x990f5230, 0x763f3929, 0x4283f2f3, 0xadb399ea,
+        0x1c08b7d6, 0xf338dccf, 0xc7841715, 0x28b47c0c, 0xaefd80a1, 0x41cdebb8, 0x75712062, 0x9a414b7b,
+        0x7c0eafc9, 0x933ec4d0, 0xa7820f0a, 0x48b26413, 0xcefb98be, 0x21cbf3a7, 0x1577387d, 0xfa475364,
+        0xdc0487e8, 0x3334ecf1, 0x0788272b, 0xe8b84c32, 0x6ef1b09f, 0x81c1db86, 0xb57d105c, 0x5a4d7b45,
+        0xbc029ff7, 0x5332f4ee, 0x678e3f34, 0x88be542d, 0x0ef7a880, 0xe1c7c399, 0xd57b0843, 0x3a4b635a,
+        0x99fca15b, 0x76ccca42, 0x42700198, 0xad406a81, 0x2b09962c, 0xc439fd35, 0xf08536ef, 0x1fb55df6,
+        0xf9fab944, 0x16cad25d, 0x22761987, 0xcd46729e, 0x4b0f8e33, 0xa43fe52a, 0x90832ef0, 0x7fb345e9,
+        0x59f09165, 0xb6c0fa7c, 0x827c31a6, 0x6d4c5abf, 0xeb05a612, 0x0435cd0b, 0x308906d1, 0xdfb96dc8,
+        0x39f6897a, 0xd6c6e263, 0xe27a29b9, 0x0d4a42a0, 0x8b03be0d, 0x6433d514, 0x508f1ece, 0xbfbf75d7,
+        0x120cec3d, 0xfd3c8724, 0xc9804cfe, 0x26b027e7, 0xa0f9db4a, 0x4fc9b053, 0x7b757b89, 0x94451090,
+        0x720af422, 0x9d3a9f3b, 0xa98654e1, 0x46b63ff8, 0xc0ffc355, 0x2fcfa84c, 0x1b736396, 0xf443088f,
+        0xd200dc03, 0x3d30b71a, 0x098c7cc0, 0xe6bc17d9, 0x60f5eb74, 0x8fc5806d, 0xbb794bb7, 0x544920ae,
+        0xb206c41c, 0x5d36af05, 0x698a64df, 0x86ba0fc6, 0x00f3f36b, 0xefc39872, 0xdb7f53a8, 0x344f38b1,
+        0x97f8fab0, 0x78c891a9, 0x4c745a73, 0xa344316a, 0x250dcdc7, 0xca3da6de, 0xfe816d04, 0x11b1061d,
+        0xf7fee2af, 0x18ce89b6, 0x2c72426c, 0xc3422975, 0x450bd5d8, 0xaa3bbec1, 0x9e87751b, 0x71b71e02,
+        0x57f4ca8e, 0xb8c4a197, 0x8c786a4d, 0x63480154, 0xe501fdf9, 0x0a3196e0, 0x3e8d5d3a, 0xd1bd3623,
+        0x37f2d291, 0xd8c2b988, 0xec7e7252, 0x034e194b, 0x8507e5e6, 0x6a378eff, 0x5e8b4525, 0xb1bb2e3c,
+    },
+    {
+        0x00000000, 0x68032cc8, 0xd0065990, 0xb8057558, 0xa5e0c5d1, 0xcde3e919, 0x75e69c41, 0x1de5b089,
+        0x4e2dfd53, 0x262ed19b, 0x9e2ba4c3, 0xf628880b, 0xebcd3882, 0x83ce144a, 0x3bcb6112, 0x53c84dda,
+        0x9c5bfaa6, 0xf458d66e, 0x4c5da336, 0x245e8ffe, 0x39bb3f77, 0x51b813bf, 0xe9bd66e7, 0x81be4a2f,
+        0xd27607f5, 0xba752b3d, 0x02705e65, 0x6a7372ad, 0x7796c224, 0x1f95eeec, 0xa7909bb4, 0xcf93b77c,
+        0x3d5b83bd, 0x5558af75, 0xed5dda2d, 0x855ef6e5, 0x98bb466c, 0xf0b86aa4, 0x48bd1ffc, 0x20be3334,
+        0x73767eee, 0x1b755226, 0xa370277e, 0xcb730bb6, 0xd696bb3f, 0xbe9597f7, 0x0690e2af, 0x6e93ce67,
+        0xa100791b, 0xc90355d3, 0x7106208b, 0x19050c43, 0x04e0bcca, 0x6ce39002, 0xd4e6e55a, 0xbce5c992,
+        0xef2d8448, 0x872ea880, 0x3f2bddd8, 0x5728f110, 0x4acd4199, 0x22ce6d51, 0x9acb1809, 0xf2c834c1,
+        0x7ab7077a, 0x12b42bb2, 0xaab15eea, 0xc2b27222, 0xdf57c2ab, 0xb754ee63, 0x0f519b3b, 0x6752b7f3,
+        0x349afa29, 0x5c99d6e1, 0xe49ca3b9, 0x8c9f8f71, 0x917a3ff8, 0xf9791330, 0x417c6668, 0x297f4aa0,
+        0xe6ecfddc, 0x8eefd114, 0x36eaa44c, 0x5ee98884, 0x430c380d, 0x2b0f14c5, 0x930a619d, 0xfb094d55,
+        0xa8c1008f, 0xc0c22c47, 0x78c7591f, 0x10c475d7, 0x0d21c55e, 0x6522e996, 0xdd279cce, 0xb524b006,
+        0x47ec84c7, 0x2fefa80f, 0x97eadd57, 0xffe9f19f, 0xe20c4116, 0x8a0f6dde, 0x320a1886, 0x5a09344e,
+        0x09c17994, 0x61c2555c, 0xd9c72004, 0xb1c40ccc, 0xac21bc45, 0xc422908d, 0x7c27e5d5, 0x1424c91d,
+        0xdbb77e61, 0xb3b452a9, 0x0bb127f1, 0x63b20b39, 0x7e57bbb0, 0x16549778, 0xae51e220, 0xc652cee8,
+        0x959a8332, 0xfd99affa, 0x459cdaa2, 0x2d9ff66a, 0x307a46e3, 0x58796a2b, 0xe07c1f73, 0x887f33bb,
+        0xf56e0ef4, 0x9d6d223c, 0x25685764, 0x4d6b7bac, 0x508ecb25, 0x388de7ed, 0x808892b5, 0xe88bbe7d,
+        0xbb43f3a7, 0xd340df6f, 0x6b45aa37, 0x034686ff, 0x1ea33676, 0x76a01abe, 0xcea56fe6, 0xa6a6432e,
+        0x6935f452, 0x0136d89a, 0xb933adc2, 0xd130810a, 0xccd53183, 0xa4d61d4b, 0x1cd36813, 0x74d044db,
+        0x27180901, 0x4f1b25c9, 0xf71e5091, 0x9f1d7c59, 0x82f8ccd0, 0xeafbe018, 0x52fe9540, 0x3afdb988,
+        0xc8358d49, 0xa036a181, 0x1833d4d9, 0x7030f811, 0x6dd54898, 0x05d66450, 0xbdd31108, 0xd5d03dc0,
+        0x8618701a, 0xee1b5cd2, 0x561e298a, 0x3e1d0542, 0x23f8b5cb, 0x4bfb9903, 0xf3feec5b, 0x9bfdc093,
+        0x546e77ef, 0x3c6d5b27, 0x84682e7f, 0xec6b02b7, 0xf18eb23e, 0x998d9ef6, 0x2188ebae, 0x498bc766,
+        0x1a438abc, 0x7240a674, 0xca45d32c, 0xa246ffe4, 0xbfa34f6d, 0xd7a063a5, 0x6fa516fd, 0x07a63a35,
+        0x8fd9098e, 0xe7da2546, 0x5fdf501e, 0x37dc7cd6, 0x2a39cc5f, 0x423ae097, 0xfa3f95cf, 0x923cb907,
+        0xc1f4f4dd, 0xa9f7d815, 0x11f2ad4d, 0x79f18185, 0x6414310c, 0x0c171dc4, 0xb412689c, 0xdc114454,
+        0x1382f328, 0x7b81dfe0, 0xc384aab8, 0xab878670, 0xb66236f9, 0xde611a31, 0x66646f69, 0x0e6743a1,
+        0x5daf0e7b, 0x35ac22b3, 0x8da957eb, 0xe5aa7b23, 0xf84fcbaa, 0x904ce762, 0x2849923a, 0x404abef2,
+        0xb2828a33, 0xda81a6fb, 0x6284d3a3, 0x0a87ff6b, 0x17624fe2, 0x7f61632a, 0xc7641672, 0xaf673aba,
+        0xfcaf7760, 0x94ac5ba8, 0x2ca92ef0, 0x44aa0238, 0x594fb2b1, 0x314c9e79, 0x8949eb21, 0xe14ac7e9,
+        0x2ed97095, 0x46da5c5d, 0xfedf2905, 0x96dc05cd, 0x8b39b544, 0xe33a998c, 0x5b3fecd4, 0x333cc01c,
+        0x60f48dc6, 0x08f7a10e, 0xb0f2d456, 0xd8f1f89e, 0xc5144817, 0xad1764df, 0x15121187, 0x7d113d4f,
+    },
+    {
+        0x00000000, 0x493c7d27, 0x9278fa4e, 0xdb448769, 0x211d826d, 0x6821ff4a, 0xb3657823, 0xfa590504,
+        0x423b04da, 0x0b0779fd, 0xd043fe94, 0x997f83b3, 0x632686b7, 0x2a1afb90, 0xf15e7cf9, 0xb86201de,
+        0x847609b4, 0xcd4a7493, 0x160ef3fa, 0x5f328edd, 0xa56b8bd9, 0xec57f6fe, 0x37137197, 0x7e2f0cb0,
+        0xc64d0d6e, 0x8f717049, 0x5435f720, 0x1d098a07, 0xe7508f03, 0xae6cf224, 0x7528754d, 0x3c14086a,
+        0x0d006599, 0x443c18be, 0x9f789fd7, 0xd644e2f0, 0x2c1de7f4, 0x65219ad3, 0xbe651dba, 0xf759609d,
+        0x4f3b6143, 0x06071c64, 0xdd439b0d, 0x947fe62a, 0x6e26e32e, 0x271a9e09, 0xfc5e1960, 0xb5626447,
+        0x89766c2d, 0xc04a110a, 0x1b0e9663, 0x5232eb44, 0xa86bee40, 0xe1579367, 0x3a13140e, 0x732f6929,
+        0xcb4d68f7, 0x827115d0, 0x593592b9, 0x1009ef9e, 0xea50ea9a, 0xa36c97bd, 0x782810d4, 0x31146df3,
+        0x1a00cb32, 0x533cb615, 0x8878317c, 0xc1444c5b, 0x3b1d495f, 0x72213478, 0xa965b311, 0xe059ce36,
+        0x583bcfe8, 0x1107b2cf, 0xca4335a6, 0x837f4881, 0x79264d85, 0x301a30a2, 0xeb5eb7cb, 0xa262caec,
+        0x9e76c286, 0xd74abfa1, 0x0c0e38c8, 0x453245ef, 0xbf6b40eb, 0xf6573dcc, 0x2d13baa5, 0x642fc782,
+        0xdc4dc65c, 0x9571bb7b, 0x4e353c12, 0x07094135, 0xfd504431, 0xb46c3916, 0x6f28be7f, 0x2614c358,
+        0x1700aeab, 0x5e3cd38c, 0x857854e5, 0xcc4429c2, 0x361d2cc6, 0x7f2151e1, 0xa465d688, 0xed59abaf,
+        0x553baa71, 0x1c07d756, 0xc743503f, 0x8e7f2d18, 0x7426281c, 0x3d1a553b, 0xe65ed252, 0xaf62af75,
+        0x9376a71f, 0xda4ada38, 0x010e5d51, 0x48322076, 0xb26b2572, 0xfb575855, 0x2013df3c, 0x692fa21b,
+        0xd14da3c5, 0x9871dee2, 0x4335598b, 0x0a0924ac, 0xf05021a8, 0xb96c5c8f, 0x6228dbe6, 0x2b14a6c1,
+        0x34019664, 0x7d3deb43, 0xa6796c2a, 0xef45110d, 0x151c1409, 0x5c20692e, 0x8764ee47, 0xce589360,
+        0x763a92be, 0x3f06ef99, 0xe44268f0, 0xad7e15d7, 0x572710d3, 0x1e1b6df4, 0xc55fea9d, 0x8c6397ba,
+        0xb0779fd0, 0xf94be2f7, 0x220f659e, 0x6b3318b9, 0x916a1dbd, 0xd856609a, 0x0312e7f3, 0x4a2e9ad4,
+        0xf24c9b0a, 0xbb70e62d, 0x60346144, 0x29081c63, 0xd3511967, 0x9a6d6440, 0x4129e329, 0x08159e0e,
+        0x3901f3fd, 0x703d8eda, 0xab7909b3, 0xe2457494, 0x181c7190, 0x51200cb7, 0x8a648bde, 0xc358f6f9,
+        0x7b3af727, 0x32068a00, 0xe9420d69, 0xa07e704e, 0x5a27754a, 0x131b086d, 0xc85f8f04, 0x8163f223,
+        0xbd77fa49, 0xf44b876e, 0x2f0f0007, 0x66337d20, 0x9c6a7824, 0xd5560503, 0x0e12826a, 0x472eff4d,
+        0xff4cfe93, 0xb67083b4, 0x6d3404dd, 0x240879fa, 0xde517cfe, 0x976d01d9, 0x4c2986b0, 0x0515fb97,
+        0x2e015d56, 0x673d2071, 0xbc79a718, 0xf545da3f, 0x0f1cdf3b, 0x4620a21c, 0x9d642575, 0xd4585852,
+        0x6c3a598c, 0x250624ab, 0xfe42a3c2, 0xb77edee5, 0x4d27dbe1, 0x041ba6c6, 0xdf5f21af, 0x96635c88,
+        0xaa7754e2, 0xe34b29c5, 0x380faeac, 0x7133d38b, 0x8b6ad68f, 0xc256aba8, 0x19122cc1, 0x502e51e6,
+        0xe84c5038, 0xa1702d1f, 0x7a34aa76, 0x3308d751, 0xc951d255, 0x806daf72, 0x5b29281b, 0x1215553c,
+        0x230138cf, 0x6a3d45e8, 0xb179c281, 0xf845bfa6, 0x021cbaa2, 0x4b20c785, 0x906440ec, 0xd9583dcb,
+        0x613a3c15, 0x28064132, 0xf342c65b, 0xba7ebb7c, 0x4027be78, 0x091bc35f, 0xd25f4436, 0x9b633911,
+        0xa777317b, 0xee4b4c5c, 0x350fcb35, 0x7c33b612, 0x866ab316, 0xcf56ce31, 0x14124958, 0x5d2e347f,
+        0xe54c35a1, 0xac704886, 0x7734cfef, 0x3e08b2c8, 0xc451b7cc, 0x8d6dcaeb, 0x56294d82, 0x1f1530a5,
+    },
+    {
+        0x00000000, 0xf43ed648, 0xed91da61, 0x19af0c29, 0xdecfc233, 0x2af1147b, 0x335e1852, 0xc760ce1a,
+        0xb873f297, 0x4c4d24df, 0x55e228f6, 0xa1dcfebe, 0x66bc30a4, 0x9282e6ec, 0x8b2deac5, 0x7f133c8d,
+        0x750b93df, 0x81354597, 0x989a49be, 0x6ca49ff6, 0xabc451ec, 0x5ffa87a4, 0x46558b8d, 0xb26b5dc5,
+        0xcd786148, 0x3946b700, 0x20e9bb29, 0xd4d76d61, 0x13b7a37b, 0xe7897533, 0xfe26791a, 0x0a18af52,
+        0xea1727be, 0x1e29f1f6, 0x0786fddf, 0xf3b82b97, 0x34d8e58d, 0xc0e633c5, 0xd9493fec, 0x2d77e9a4,
+        0x5264d529, 0xa65a0361, 0xbff50f48, 0x4bcbd900, 0x8cab171a, 0x7895c152, 0x613acd7b, 0x95041b33,
+        0x9f1cb461, 0x6b226229, 0x728d6e00, 0x86b3b848, 0x41d37652, 0xb5eda01a, 0xac42ac33, 0x587c7a7b,
+        0x276f46f6, 0xd35190be, 0xcafe9c97, 0x3ec04adf, 0xf9a084c5, 0x0d9e528d, 0x14315ea4, 0xe00f88ec,
+        0xd1c2398d, 0x25fcefc5, 0x3c53e3ec, 0xc86d35a4, 0x0f0dfbbe, 0xfb332df6, 0xe29c21df, 0x16a2f797,
+        0x69b1cb1a, 0x9d8f1d52, 0x8420117b, 0x701ec733, 0xb77e0929, 0x4340df61, 0x5aefd348, 0xaed10500,
+        0xa4c9aa52, 0x50f77c1a, 0x49587033, 0xbd66a67b, 0x7a066861, 0x8e38be29, 0x9797b200, 0x63a96448,
+        0x1cba58c5, 0xe8848e8d, 0xf12b82a4, 0x051554ec, 0xc2759af6, 0x364b4cbe, 0x2fe44097, 0xdbda96df,
+        0x3bd51e33, 0xcfebc87b, 0xd644c452, 0x227a121a, 0xe51adc00, 0x11240a48, 0x088b0661, 0xfcb5d029,
+        0x83a6eca4, 0x77983aec, 0x6e3736c5, 0x9a09e08d, 0x5d692e97, 0xa957f8df, 0xb0f8f4f6, 0x44c622be,
+        0x4ede8dec, 0xbae05ba4, 0xa34f578d, 0x577181c5, 0x90114fdf, 0x642f9997, 0x7d8095be, 0x89be43f6,
+        0xf6ad7f7b, 0x0293a933, 0x1b3ca51a, 0xef027352, 0x2862bd48, 0xdc5c6b00, 0xc5f36729, 0x31cdb161,
+        0xa66805eb, 0x5256d3a3, 0x4bf9df8a, 0xbfc709c2, 0x78a7c7d8, 0x8c991190, 0x95361db9, 0x6108cbf1,
+        0x1e1bf77c, 0xea252134, 0xf38a2d1d, 0x07b4fb55, 0xc0d4354f, 0x34eae307, 0x2d45ef2e, 0xd97b3966,
+        0xd3639634, 0x275d407c, 0x3ef24c55, 0xcacc9a1d, 0x0dac5407, 0xf992824f, 0xe03d8e66, 0x1403582e,
+        0x6b1064a3, 0x9f2eb2eb, 0x8681bec2, 0x72bf688a, 0xb5dfa690, 0x41e170d8, 0x584e7cf1, 0xac70aab9,
+        0x4c7f2255, 0xb841f41d, 0xa1eef834, 0x55d02e7c, 0x92b0e066, 0x668e362e, 0x7f213a07, 0x8b1fec4f,
+        0xf40cd0c2, 0x0032068a, 0x199d0aa3, 0xeda3dceb, 0x2ac312f1, 0xdefdc4b9, 0xc752c890, 0x336c1ed8,
+        0x3974b18a, 0xcd4a67c2, 0xd4e56beb, 0x20dbbda3, 0xe7bb73b9, 0x1385a5f1, 0x0a2aa9d8, 0xfe147f90,
+        0x8107431d, 0x75399555, 0x6c96997c, 0x98a84f34, 0x5fc8812e, 0xabf65766, 0xb2595b4f, 0x46678d07,
+        0x77aa3c66, 0x8394ea2e, 0x9a3be607, 0x6e05304f, 0xa965fe55, 0x5d5b281d, 0x44f42434, 0xb0caf27c,
+        0xcfd9cef1, 0x3be718b9, 0x22481490, 0xd676c2d8, 0x11160cc2, 0xe528da8a, 0xfc87d6a3, 0x08b900eb,
+        0x02a1afb9, 0xf69f79f1, 0xef3075d8, 0x1b0ea390, 0xdc6e6d8a, 0x2850bbc2, 0x31ffb7eb, 0xc5c161a3,
+        0xbad25d2e, 0x4eec8b66, 0x5743874f, 0xa37d5107, 0x641d9f1d, 0x90234955, 0x898c457c, 0x7db29334,
+        0x9dbd1bd8, 0x6983cd90, 0x702cc1b9, 0x841217f1, 0x4372d9eb, 0xb74c0fa3, 0xaee3038a, 0x5addd5c2,
+        0x25cee94f, 0xd1f03f07, 0xc85f332e, 0x3c61e566, 0xfb012b7c, 0x0f3ffd34, 0x1690f11d, 0xe2ae2755,
+        0xe8b68807, 0x1c885e4f, 0x05275266, 0xf119842e, 0x36794a34, 0xc2479c7c, 0xdbe89055, 0x2fd6461d,
+        0x50c57a90, 0xa4fbacd8, 0xbd54a0f1, 0x496a76b9, 0x8e0ab8a3, 0x7a346eeb, 0x639b62c2, 0x97a5b48a,
+    },
+    {
+        0x00000000, 0xcb567ba5, 0x934081bb, 0x5816fa1e, 0x236d7587, 0xe83b0e22, 0xb02df43c, 0x7b7b8f99,
+        0x46daeb0e, 0x8d8c90ab, 0xd59a6ab5, 0x1ecc1110, 0x65b79e89, 0xaee1e52c, 0xf6f71f32, 0x3da16497,
+        0x8db5d61c, 0x46e3adb9, 0x1ef557a7, 0xd5a32c02, 0xaed8a39b, 0x658ed83e, 0x3d982220, 0xf6ce5985,
+        0xcb6f3d12, 0x003946b7, 0x582fbca9, 0x9379c70c, 0xe8024895, 0x23543330, 0x7b42c92e, 0xb014b28b,
+        0x1e87dac9, 0xd5d1a16c, 0x8dc75b72, 0x469120d7, 0x3deaaf4e, 0xf6bcd4eb, 0xaeaa2ef5, 0x65fc5550,
+        0x585d31c7, 0x930b4a62, 0xcb1db07c, 0x004bcbd9, 0x7b304440, 0xb0663fe5, 0xe870c5fb, 0x2326be5e,
+        0x93320cd5, 0x58647770, 0x00728d6e, 0xcb24f6cb, 0xb05f7952, 0x7b0902f7, 0x231ff8e9, 0xe849834c,
+        0xd5e8e7db, 0x1ebe9c7e, 0x46a86660, 0x8dfe1dc5, 0xf685925c, 0x3dd3e9f9, 0x65c513e7, 0xae936842,
+        0x3d0fb592, 0xf659ce37, 0xae4f3429, 0x65194f8c, 0x1e62c015, 0xd534bbb0, 0x8d2241ae, 0x46743a0b,
+        0x7bd55e9c, 0xb0832539, 0xe895df27, 0x23c3a482, 0x58b82b1b, 0x93ee50be, 0xcbf8aaa0, 0x00aed105,
+        0xb0ba638e, 0x7bec182b, 0x23fae235, 0xe8ac9990, 0x93d71609, 0x58816dac, 0x009797b2, 0xcbc1ec17,
+        0xf6608880, 0x3d36f325, 0x6520093b, 0xae76729e, 0xd50dfd07, 0x1e5b86a2, 0x464d7cbc, 0x8d1b0719,
+        0x23886f5b, 0xe8de14fe, 0xb0c8eee0, 0x7b9e9545, 0x00e51adc, 0xcbb36179, 0x93a59b67, 0x58f3e0c2,
+        0x65528455, 0xae04fff0, 0xf61205ee, 0x3d447e4b, 0x463ff1d2, 0x8d698a77, 0xd57f7069, 0x1e290bcc,
+        0xae3db947, 0x656bc2e2, 0x3d7d38fc, 0xf62b4359, 0x8d50ccc0, 0x4606b765, 0x1e104d7b, 0xd54636de,
+        0xe8e75249, 0x23b129ec, 0x7ba7d3f2, 0xb0f1a857, 0xcb8a27ce, 0x00dc5c6b, 0x58caa675, 0x939cddd0,
+        0x7a1f6b24, 0xb1491081, 0xe95fea9f, 0x2209913a, 0x59721ea3, 0x92246506, 0xca329f18, 0x0164e4bd,
+        0x3cc5802a, 0xf793fb8f, 0xaf850191, 0x64d37a34, 0x1fa8f5ad, 0xd4fe8e08, 0x8ce87416, 0x47be0fb3,
+        0xf7aabd38, 0x3cfcc69d, 0x64ea3c83, 0xafbc4726, 0xd4c7c8bf, 0x1f91b31a, 0x47874904, 0x8cd132a1,
+        0xb1705636, 0x7a262d93, 0x2230d78d, 0xe966ac28, 0x921d23b1, 0x594b5814, 0x015da20a, 0xca0bd9af,
+        0x6498b1ed, 0xafceca48, 0xf7d83056, 0x3c8e4bf3, 0x47f5c46a, 0x8ca3bfcf, 0xd4b545d1, 0x1fe33e74,
+        0x22425ae3, 0xe9142146, 0xb102db58, 0x7a54a0fd, 0x012f2f64, 0xca7954c1, 0x926faedf, 0x5939d57a,
+        0xe92d67f1, 0x227b1c54, 0x7a6de64a, 0xb13b9def, 0xca401276, 0x011669d3, 0x590093cd, 0x9256e868,
+        0xaff78cff, 0x64a1f75a, 0x3cb70d44, 0xf7e176e1, 0x8c9af978, 0x47cc82dd, 0x1fda78c3, 0xd48c0366,
+        0x4710deb6, 0x8c46a513, 0xd4505f0d, 0x1f0624a8, 0x647dab31, 0xaf2bd094, 0xf73d2a8a, 0x3c6b512f,
+        0x01ca35b8, 0xca9c4e1d, 0x928ab403, 0x59dccfa6, 0x22a7403f, 0xe9f13b9a, 0xb1e7c184, 0x7ab1ba21,
+        0xcaa508aa, 0x01f3730f, 0x59e58911, 0x92b3f2b4, 0xe9c87d2d, 0x229e0688, 0x7a88fc96, 0xb1de8733,
+        0x8c7fe3a4, 0x47299801, 0x1f3f621f, 0xd46919ba, 0xaf129623, 0x6444ed86, 0x3c521798, 0xf7046c3d,
+        0x5997047f, 0x92c17fda, 0xcad785c4, 0x0181fe61, 0x7afa71f8, 0xb1ac0a5d, 0xe9baf043, 0x22ec8be6,
+        0x1f4def71, 0xd41b94d4, 0x8c0d6eca, 0x475b156f, 0x3c209af6, 0xf776e153, 0xaf601b4d, 0x643660e8,
+        0xd422d263, 0x1f74a9c6, 0x476253d8, 0x8c34287d, 0xf74fa7e4, 0x3c19dc41, 0x640f265f, 0xaf595dfa,
+        0x92f8396d, 0x59ae42c8, 0x01b8b8d6, 0xcaeec373, 0xb1954cea, 0x7ac3374f, 0x22d5cd51, 0xe983b6f4,
+    },
+    {
+        0x00000000, 0x9771f7c1, 0x2b0f9973, 0xbc7e6eb2, 0x561f32e6, 0xc16ec527, 0x7d10ab95, 0xea615c54,
+        0xac3e65cc, 0x3b4f920d, 0x8731fcbf, 0x10400b7e, 0xfa21572a, 0x6d50a0eb, 0xd12ece59, 0x465f3998,
+        0x5d90bd69, 0xcae14aa8, 0x769f241a, 0xe1eed3db, 0x0b8f8f8f, 0x9cfe784e, 0x208016fc, 0xb7f1e13d,
+        0xf1aed8a5, 0x66df2f64, 0xdaa141d6, 0x4dd0b617, 0xa7b1ea43, 0x30c01d82, 0x8cbe7330, 0x1bcf84f1,
+        0xbb217ad2, 0x2c508d13, 0x902ee3a1, 0x075f1460, 0xed3e4834, 0x7a4fbff5, 0xc631d147, 0x51402686,
+        0x171f1f1e, 0x806ee8df, 0x3c10866d, 0xab6171ac, 0x41002df8, 0xd671da39, 0x6a0fb48b, 0xfd7e434a,
+        0xe6b1c7bb, 0x71c0307a, 0xcdbe5ec8, 0x5acfa909, 0xb0aef55d, 0x27df029c, 0x9ba16c2e, 0x0cd09bef,
+        0x4a8fa277, 0xddfe55b6, 0x61803b04, 0xf6f1ccc5, 0x1c909091, 0x8be16750, 0x379f09e2, 0xa0eefe23,
+        0x73ae8355, 0xe4df7494, 0x58a11a26, 0xcfd0ede7, 0x25b1b1b3, 0xb2c04672, 0x0ebe28c0, 0x99cfdf01,
+        0xdf90e699, 0x48e11158, 0xf49f7fea, 0x63ee882b, 0x898fd47f, 0x1efe23be, 0xa2804d0c, 0x35f1bacd,
+        0x2e3e3e3c, 0xb94fc9fd, 0x0531a74f, 0x9240508e, 0x78210cda, 0xef50fb1b, 0x532e95a9, 0xc45f6268,
+        0x82005bf0, 0x1571ac31, 0xa90fc283, 0x3e7e3542, 0xd41f6916, 0x436e9ed7, 0xff10f065, 0x686107a4,
+        0xc88ff987, 0x5ffe0e46, 0xe38060f4, 0x74f19735, 0x9e90cb61, 0x09e13ca0, 0xb59f5212, 0x22eea5d3,
+        0x64b19c4b, 0xf3c06b8a, 0x4fbe0538, 0xd8cff2f9, 0x32aeaead, 0xa5df596c, 0x19a137de, 0x8ed0c01f,
+        0x951f44ee, 0x026eb32f, 0xbe10dd9d, 0x29612a5c, 0xc3007608, 0x547181c9, 0xe80fef7b, 0x7f7e18ba,
+        0x39212122, 0xae50d6e3, 0x122eb851, 0x855f4f90, 0x6f3e13c4, 0xf84fe405, 0x44318ab7, 0xd3407d76,
+        0xe75d06aa, 0x702cf16b, 0xcc529fd9, 0x5b236818, 0xb142344c, 0x2633c38d, 0x9a4dad3f, 0x0d3c5afe,
+        0x4b636366, 0xdc1294a7, 0x606cfa15, 0xf71d0dd4, 0x1d7c5180, 0x8a0da641, 0x3673c8f3, 0xa1023f32,
+        0xbacdbbc3, 0x2dbc4c02, 0x91c222b0, 0x06b3d571, 0xecd28925, 0x7ba37ee4, 0xc7dd1056, 0x50ace797,
+        0x16f3de0f, 0x818229ce, 0x3dfc477c, 0xaa8db0bd, 0x40ecece9, 0xd79d1b28, 0x6be3759a, 0xfc92825b,
+        0x5c7c7c78, 0xcb0d8bb9, 0x7773e50b, 0xe00212ca, 0x0a634e9e, 0x9d12b95f, 0x216cd7ed, 0xb61d202c,
+        0xf04219b4, 0x6733ee75, 0xdb4d80c7, 0x4c3c7706, 0xa65d2b52, 0x312cdc93, 0x8d52b221, 0x1a2345e0,
+        0x01ecc111, 0x969d36d0, 0x2ae35862, 0xbd92afa3, 0x57f3f3f7, 0xc0820436, 0x7cfc6a84, 0xeb8d9d45,
+        0xadd2a4dd, 0x3aa3531c, 0x86dd3dae, 0x11acca6f, 0xfbcd963b, 0x6cbc61fa, 0xd0c20f48, 0x47b3f889,
+        0x94f385ff, 0x0382723e, 0xbffc1c8c, 0x288deb4d, 0xc2ecb719, 0x559d40d8, 0xe9e32e6a, 0x7e92d9ab,
+        0x38cde033, 0xafbc17f2, 0x13c27940, 0x84b38e81, 0x6ed2d2d5, 0xf9a32514, 0x45dd4ba6, 0xd2acbc67,
+        0xc9633896, 0x5e12cf57, 0xe26ca1e5, 0x751d5624, 0x9f7c0a70, 0x080dfdb1, 0xb4739303, 0x230264c2,
+        0x655d5d5a, 0xf22caa9b, 0x4e52c429, 0xd92333e8, 0x33426fbc, 0xa433987d, 0x184df6cf, 0x8f3c010e,
+        0x2fd2ff2d, 0xb8a308ec, 0x04dd665e, 0x93ac919f, 0x79cdcdcb, 0xeebc3a0a, 0x52c254b8, 0xc5b3a379,
+        0x83ec9ae1, 0x149d6d20, 0xa8e30392, 0x3f92f453, 0xd5f3a807, 0x42825fc6, 0xfefc3174, 0x698dc6b5,
+        0x72424244, 0xe533b585, 0x594ddb37, 0xce3c2cf6, 0x245d70a2, 0xb32c8763, 0x0f52e9d1, 0x98231e10,
+        0xde7c2788, 0x490dd049, 0xf573befb, 0x6202493a, 0x8863156e, 0x1f12e2af, 0xa36c8c1d, 0x341d7bdc,
+    },
+    {
+        0x00000000, 0x3171d430, 0x62e3a860, 0x53927c50, 0xc5c750c0, 0xf4b684f0, 0xa724f8a0, 0x96552c90,
+        0x8e62d771, 0xbf130341, 0xec817f11, 0xddf0ab21, 0x4ba587b1, 0x7ad45381, 0x29462fd1, 0x1837fbe1,
+        0x1929d813, 0x28580c23, 0x7bca7073, 0x4abba443, 0xdcee88d3, 0xed9f5ce3, 0xbe0d20b3, 0x8f7cf483,
+        0x974b0f62, 0xa63adb52, 0xf5a8a702, 0xc4d97332, 0x528c5fa2, 0x63fd8b92, 0x306ff7c2, 0x011e23f2,
+        0x3253b026, 0x03226416, 0x50b01846, 0x61c1cc76, 0xf794e0e6, 0xc6e534d6, 0x95774886, 0xa4069cb6,
+        0xbc316757, 0x8d40b367, 0xded2cf37, 0xefa31b07, 0x79f63797, 0x4887e3a7, 0x1b159ff7, 0x2a644bc7,
+        0x2b7a6835, 0x1a0bbc05, 0x4999c055, 0x78e81465, 0xeebd38f5, 0xdfccecc5, 0x8c5e9095, 0xbd2f44a5,
+        0xa518bf44, 0x94696b74, 0xc7fb1724, 0xf68ac314, 0x60dfef84, 0x51ae3bb4, 0x023c47e4, 0x334d93d4,
+        0x64a7604c, 0x55d6b47c, 0x0644c82c, 0x37351c1c, 0xa160308c, 0x9011e4bc, 0xc38398ec, 0xf2f24cdc,
+        0xeac5b73d, 0xdbb4630d, 0x88261f5d, 0xb957cb6d, 0x2f02e7fd, 0x1e7333cd, 0x4de14f9d, 0x7c909bad,
+        0x7d8eb85f, 0x4cff6c6f, 0x1f6d103f, 0x2e1cc40f, 0xb849e89f, 0x89383caf, 0xdaaa40ff, 0xebdb94cf,
+        0xf3ec6f2e, 0xc29dbb1e, 0x910fc74e, 0xa07e137e, 0x362b3fee, 0x075aebde, 0x54c8978e, 0x65b943be,
+        0x56f4d06a, 0x6785045a, 0x3417780a, 0x0566ac3a, 0x933380aa, 0xa242549a, 0xf1d028ca, 0xc0a1fcfa,
+        0xd896071b, 0xe9e7d32b, 0xba75af7b, 0x8b047b4b, 0x1d5157db, 0x2c2083eb, 0x7fb2ffbb, 0x4ec32b8b,
+        0x4fdd0879, 0x7eacdc49, 0x2d3ea019, 0x1c4f7429, 0x8a1a58b9, 0xbb6b8c89, 0xe8f9f0d9, 0xd98824e9,
+        0xc1bfdf08, 0xf0ce0b38, 0xa35c7768, 0x922da358, 0x04788fc8, 0x35095bf8, 0x669b27a8, 0x57eaf398,
+        0xc94ec098, 0xf83f14a8, 0xabad68f8, 0x9adcbcc8, 0x0c899058, 0x3df84468, 0x6e6a3838, 0x5f1bec08,
+        0x472c17e9, 0x765dc3d9, 0x25cfbf89, 0x14be6bb9, 0x82eb4729, 0xb39a9319, 0xe008ef49, 0xd1793b79,
+        0xd067188b, 0xe116ccbb, 0xb284b0eb, 0x83f564db, 0x15a0484b, 0x24d19c7b, 0x7743e02b, 0x4632341b,
+        0x5e05cffa, 0x6f741bca, 0x3ce6679a, 0x0d97b3aa, 0x9bc29f3a, 0xaab34b0a, 0xf921375a, 0xc850e36a,
+        0xfb1d70be, 0xca6ca48e, 0x99fed8de, 0xa88f0cee, 0x3eda207e, 0x0fabf44e, 0x5c39881e, 0x6d485c2e,
+        0x757fa7cf, 0x440e73ff, 0x179c0faf, 0x26eddb9f, 0xb0b8f70f, 0x81c9233f, 0xd25b5f6f, 0xe32a8b5f,
+        0xe234a8ad, 0xd3457c9d, 0x80d700cd, 0xb1a6d4fd, 0x27f3f86d, 0x16822c5d, 0x4510500d, 0x7461843d,
+        0x6c567fdc, 0x5d27abec, 0x0eb5d7bc, 0x3fc4038c, 0xa9912f1c, 0x98e0fb2c, 0xcb72877c, 0xfa03534c,
+        0xade9a0d4, 0x9c9874e4, 0xcf0a08b4, 0xfe7bdc84, 0x682ef014, 0x595f2424, 0x0acd5874, 0x3bbc8c44,
+        0x238b77a5, 0x12faa395, 0x4168dfc5, 0x70190bf5, 0xe64c2765, 0xd73df355, 0x84af8f05, 0xb5de5b35,
+        0xb4c078c7, 0x85b1acf7, 0xd623d0a7, 0xe7520497, 0x71072807, 0x4076fc37, 0x13e48067, 0x22955457,
+        0x3aa2afb6, 0x0bd37b86, 0x584107d6, 0x6930d3e6, 0xff65ff76, 0xce142b46, 0x9d865716, 0xacf78326,
+        0x9fba10f2, 0xaecbc4c2, 0xfd59b892, 0xcc286ca2, 0x5a7d4032, 0x6b0c9402, 0x389ee852, 0x09ef3c62,
+        0x11d8c783, 0x20a913b3, 0x733b6fe3, 0x424abbd3, 0xd41f9743, 0xe56e4373, 0xb6fc3f23, 0x878deb13,
+        0x8693c8e1, 0xb7e21cd1, 0xe4706081, 0xd501b4b1, 0x43549821, 0x72254c11, 0x21b73041, 0x10c6e471,
+        0x08f11f90, 0x3980cba0, 0x6a12b7f0, 0x5b6363c0, 0xcd364f50, 0xfc479b60, 0xafd5e730, 0x9ea43300,
+    },
+    {
+        0x00000000, 0x30d23865, 0x61a470ca, 0x517648af, 0xc348e194, 0xf39ad9f1, 0xa2ec915e, 0x923ea93b,
+        0x837db5d9, 0xb3af8dbc, 0xe2d9c513, 0xd20bfd76, 0x4035544d, 0x70e76c28, 0x21912487, 0x11431ce2,
+        0x03171d43, 0x33c52526, 0x62b36d89, 0x526155ec, 0xc05ffcd7, 0xf08dc4b2, 0xa1fb8c1d, 0x9129b478,
+        0x806aa89a, 0xb0b890ff, 0xe1ced850, 0xd11ce035, 0x4322490e, 0x73f0716b, 0x228639c4, 0x125401a1,
+        0x062e3a86, 0x36fc02e3, 0x678a4a4c, 0x57587229, 0xc566db12, 0xf5b4e377, 0xa4c2abd8, 0x941093bd,
+        0x85538f5f, 0xb581b73a, 0xe4f7ff95, 0xd425c7f0, 0x461b6ecb, 0x76c956ae, 0x27bf1e01, 0x176d2664,
+        0x053927c5, 0x35eb1fa0, 0x649d570f, 0x544f6f6a, 0xc671c651, 0xf6a3fe34, 0xa7d5b69b, 0x97078efe,
+        0x8644921c, 0xb696aa79, 0xe7e0e2d6, 0xd732dab3, 0x450c7388, 0x75de4bed, 0x24a80342, 0x147a3b27,
+        0x0c5c750c, 0x3c8e4d69, 0x6df805c6, 0x5d2a3da3, 0xcf149498, 0xffc6acfd, 0xaeb0e452, 0x9e62dc37,
+        0x8f21c0d5, 0xbff3f8b0, 0xee85b01f, 0xde57887a, 0x4c692141, 0x7cbb1924, 0x2dcd518b, 0x1d1f69ee,
+        0x0f4b684f, 0x3f99502a, 0x6eef1885, 0x5e3d20e0, 0xcc0389db, 0xfcd1b1be, 0xada7f911, 0x9d75c174,
+        0x8c36dd96, 0xbce4e5f3, 0xed92ad5c, 0xdd409539, 0x4f7e3c02, 0x7fac0467, 0x2eda4cc8, 0x1e0874ad,
+        0x0a724f8a, 0x3aa077ef, 0x6bd63f40, 0x5b040725, 0xc93aae1e, 0xf9e8967b, 0xa89eded4, 0x984ce6b1,
+        0x890ffa53, 0xb9ddc236, 0xe8ab8a99, 0xd879b2fc, 0x4a471bc7, 0x7a9523a2, 0x2be36b0d, 0x1b315368,
+        0x096552c9, 0x39b76aac, 0x68c12203, 0x58131a66, 0xca2db35d, 0xfaff8b38, 0xab89c397, 0x9b5bfbf2,
+        0x8a18e710, 0xbacadf75, 0xebbc97da, 0xdb6eafbf, 0x49500684, 0x79823ee1, 0x28f4764e, 0x18264e2b,
+        0x18b8ea18, 0x286ad27d, 0x791c9ad2, 0x49cea2b7, 0xdbf00b8c, 0xeb2233e9, 0xba547b46, 0x8a864323,
+        0x9bc55fc1, 0xab1767a4, 0xfa612f0b, 0xcab3176e, 0x588dbe55, 0x685f8630, 0x3929ce9f, 0x09fbf6fa,
+        0x1baff75b, 0x2b7dcf3e, 0x7a0b8791, 0x4ad9bff4, 0xd8e716cf, 0xe8352eaa, 0xb9436605, 0x89915e60,
+        0x98d24282, 0xa8007ae7, 0xf9763248, 0xc9a40a2d, 0x5b9aa316, 0x6b489b73, 0x3a3ed3dc, 0x0aecebb9,
+        0x1e96d09e, 0x2e44e8fb, 0x7f32a054, 0x4fe09831, 0xddde310a, 0xed0c096f, 0xbc7a41c0, 0x8ca879a5,
+        0x9deb6547, 0xad395d22, 0xfc4f158d, 0xcc9d2de8, 0x5ea384d3, 0x6e71bcb6, 0x3f07f419, 0x0fd5cc7c,
+        0x1d81cddd, 0x2d53f5b8, 0x7c25bd17, 0x4cf78572, 0xdec92c49, 0xee1b142c, 0xbf6d5c83, 0x8fbf64e6,
+        0x9efc7804, 0xae2e4061, 0xff5808ce, 0xcf8a30ab, 0x5db49990, 0x6d66a1f5, 0x3c10e95a, 0x0cc2d13f,
+        0x14e49f14, 0x2436a771, 0x7540efde, 0x4592d7bb, 0xd7ac7e80, 0xe77e46e5, 0xb6080e4a, 0x86da362f,
+        0x97992acd, 0xa74b12a8, 0xf63d5a07, 0xc6ef6262, 0x54d1cb59, 0x6403f33c, 0x3575bb93, 0x05a783f6,
+        0x17f38257, 0x2721ba32, 0x7657f29d, 0x4685caf8, 0xd4bb63c3, 0xe4695ba6, 0xb51f1309, 0x85cd2b6c,
+        0x948e378e, 0xa45c0feb, 0xf52a4744, 0xc5f87f21, 0x57c6d61a, 0x6714ee7f, 0x3662a6d0, 0x06b09eb5,
+        0x12caa592, 0x22189df7, 0x736ed558, 0x43bced3d, 0xd1824406, 0xe1507c63, 0xb02634cc, 0x80f40ca9,
+        0x91b7104b, 0xa165282e, 0xf0136081, 0xc0c158e4, 0x52fff1df, 0x622dc9ba, 0x335b8115, 0x0389b970,
+        0x11ddb8d1, 0x210f80b4, 0x7079c81b, 0x40abf07e, 0xd2955945, 0xe2476120, 0xb331298f, 0x83e311ea,
+        0x92a00d08, 0xa272356d, 0xf3047dc2, 0xc3d645a7, 0x51e8ec9c, 0x613ad4f9, 0x304c9c56, 0x009ea433,
+    },
+    {
+        0x00000000, 0x54075546, 0xa80eaa8c, 0xfc09ffca, 0x55f123e9, 0x01f676af, 0xfdff8965, 0xa9f8dc23,
+        0xabe247d2, 0xffe51294, 0x03eced5e, 0x57ebb818, 0xfe13643b, 0xaa14317d, 0x561dceb7, 0x021a9bf1,
+        0x5228f955, 0x062fac13, 0xfa2653d9, 0xae21069f, 0x07d9dabc, 0x53de8ffa, 0xafd77030, 0xfbd02576,
+        0xf9cabe87, 0xadcdebc1, 0x51c4140b, 0x05c3414d, 0xac3b9d6e, 0xf83cc828, 0x043537e2, 0x503262a4,
+        0xa451f2aa, 0xf056a7ec, 0x0c5f5826, 0x58580d60, 0xf1a0d143, 0xa5a78405, 0x59ae7bcf, 0x0da92e89,
+        0x0fb3b578, 0x5bb4e03e, 0xa7bd1ff4, 0xf3ba4ab2, 0x5a429691, 0x0e45c3d7, 0xf24c3c1d, 0xa64b695b,
+        0xf6790bff, 0xa27e5eb9, 0x5e77a173, 0x0a70f435, 0xa3882816, 0xf78f7d50, 0x0b86829a, 0x5f81d7dc,
+        0x5d9b4c2d, 0x099c196b, 0xf595e6a1, 0xa192b3e7, 0x086a6fc4, 0x5c6d3a82, 0xa064c548, 0xf463900e,
+        0x4d4f93a5, 0x1948c6e3, 0xe5413929, 0xb1466c6f, 0x18beb04c, 0x4cb9e50a, 0xb0b01ac0, 0xe4b74f86,
+        0xe6add477, 0xb2aa8131, 0x4ea37efb, 0x1aa42bbd, 0xb35cf79e, 0xe75ba2d8, 0x1b525d12, 0x4f550854,
+        0x1f676af0, 0x4b603fb6, 0xb769c07c, 0xe36e953a, 0x4a964919, 0x1e911c5f, 0xe298e395, 0xb69fb6d3,
+        0xb4852d22, 0xe0827864, 0x1c8b87ae, 0x488cd2e8, 0xe1740ecb, 0xb5735b8d, 0x497aa447, 0x1d7df101,
+        0xe91e610f, 0xbd193449, 0x4110cb83, 0x15179ec5, 0xbcef42e6, 0xe8e817a0, 0x14e1e86a, 0x40e6bd2c,
+        0x42fc26dd, 0x16fb739b, 0xeaf28c51, 0xbef5d917, 0x170d0534, 0x430a5072, 0xbf03afb8, 0xeb04fafe,
+        0xbb36985a, 0xef31cd1c, 0x133832d6, 0x473f6790, 0xeec7bbb3, 0xbac0eef5, 0x46c9113f, 0x12ce4479,
+        0x10d4df88, 0x44d38ace, 0xb8da7504, 0xecdd2042, 0x4525fc61, 0x1122a927, 0xed2b56ed, 0xb92c03ab,
+        0x9a9f274a, 0xce98720c, 0x32918dc6, 0x6696d880, 0xcf6e04a3, 0x9b6951e5, 0x6760ae2f, 0x3367fb69,
+        0x317d6098, 0x657a35de, 0x9973ca14, 0xcd749f52, 0x648c4371, 0x308b1637, 0xcc82e9fd, 0x9885bcbb,
+        0xc8b7de1f, 0x9cb08b59, 0x60b97493, 0x34be21d5, 0x9d46fdf6, 0xc941a8b0, 0x3548577a, 0x614f023c,
+        0x635599cd, 0x3752cc8b, 0xcb5b3341, 0x9f5c6607, 0x36a4ba24, 0x62a3ef62, 0x9eaa10a8, 0xcaad45ee,
+        0x3eced5e0, 0x6ac980a6, 0x96c07f6c, 0xc2c72a2a, 0x6b3ff609, 0x3f38a34f, 0xc3315c85, 0x973609c3,
+        0x952c9232, 0xc12bc774, 0x3d2238be, 0x69256df8, 0xc0ddb1db, 0x94dae49d, 0x68d31b57, 0x3cd44e11,
+        0x6ce62cb5, 0x38e179f3, 0xc4e88639, 0x90efd37f, 0x39170f5c, 0x6d105a1a, 0x9119a5d0, 0xc51ef096,
+        0xc7046b67, 0x93033e21, 0x6f0ac1eb, 0x3b0d94ad, 0x92f5488e, 0xc6f21dc8, 0x3afbe202, 0x6efcb744,
+        0xd7d0b4ef, 0x83d7e1a9, 0x7fde1e63, 0x2bd94b25, 0x82219706, 0xd626c240, 0x2a2f3d8a, 0x7e2868cc,
+        0x7c32f33d, 0x2835a67b, 0xd43c59b1, 0x803b0cf7, 0x29c3d0d4, 0x7dc48592, 0x81cd7a58, 0xd5ca2f1e,
+        0x85f84dba, 0xd1ff18fc, 0x2df6e736, 0x79f1b270, 0xd0096e53, 0x840e3b15, 0x7807c4df, 0x2c009199,
+        0x2e1a0a68, 0x7a1d5f2e, 0x8614a0e4, 0xd213f5a2, 0x7beb2981, 0x2fec7cc7, 0xd3e5830d, 0x87e2d64b,
+        0x73814645, 0x27861303, 0xdb8fecc9, 0x8f88b98f, 0x267065ac, 0x727730ea, 0x8e7ecf20, 0xda799a66,
+        0xd8630197, 0x8c6454d1, 0x706dab1b, 0x246afe5d, 0x8d92227e, 0xd9957738, 0x259c88f2, 0x719bddb4,
+        0x21a9bf10, 0x75aeea56, 0x89a7159c, 0xdda040da, 0x74589cf9, 0x205fc9bf, 0xdc563675, 0x88516333,
+        0x8a4bf8c2, 0xde4cad84, 0x2245524e, 0x76420708, 0xdfbadb2b, 0x8bbd8e6d, 0x77b471a7, 0x23b324e1,
+    },
+    {
+        0x00000000, 0x678efd01, 0xcf1dfa02, 0xa8930703, 0x9bd782f5, 0xfc597ff4, 0x54ca78f7, 0x334485f6,
+        0x3243731b, 0x55cd8e1a, 0xfd5e8919, 0x9ad07418, 0xa994f1ee, 0xce1a0cef, 0x66890bec, 0x0107f6ed,
+        0x6486e636, 0x03081b37, 0xab9b1c34, 0xcc15e135, 0xff5164c3, 0x98df99c2, 0x304c9ec1, 0x57c263c0,
+        0x56c5952d, 0x314b682c, 0x99d86f2f, 0xfe56922e, 0xcd1217d8, 0xaa9cead9, 0x020fedda, 0x658110db,
+        0xc90dcc6c, 0xae83316d, 0x0610366e, 0x619ecb6f, 0x52da4e99, 0x3554b398, 0x9dc7b49b, 0xfa49499a,
+        0xfb4ebf77, 0x9cc04276, 0x34534575, 0x53ddb874, 0x60993d82, 0x0717c083, 0xaf84c780, 0xc80a3a81,
+        0xad8b2a5a, 0xca05d75b, 0x6296d058, 0x05182d59, 0x365ca8af, 0x51d255ae, 0xf94152ad, 0x9ecfafac,
+        0x9fc85941, 0xf846a440, 0x50d5a343, 0x375b5e42, 0x041fdbb4, 0x639126b5, 0xcb0221b6, 0xac8cdcb7,
+        0x97f7ee29, 0xf0791328, 0x58ea142b, 0x3f64e92a, 0x0c206cdc, 0x6bae91dd, 0xc33d96de, 0xa4b36bdf,
+        0xa5b49d32, 0xc23a6033, 0x6aa96730, 0x0d279a31, 0x3e631fc7, 0x59ede2c6, 0xf17ee5c5, 0x96f018c4,
+        0xf371081f, 0x94fff51e, 0x3c6cf21d, 0x5be20f1c, 0x68a68aea, 0x0f2877eb, 0xa7bb70e8, 0xc0358de9,
+        0xc1327b04, 0xa6bc8605, 0x0e2f8106, 0x69a17c07, 0x5ae5f9f1, 0x3d6b04f0, 0x95f803f3, 0xf276fef2,
+        0x5efa2245, 0x3974df44, 0x91e7d847, 0xf6692546, 0xc52da0b0, 0xa2a35db1, 0x0a305ab2, 0x6dbea7b3,
+        0x6cb9515e, 0x0b37ac5f, 0xa3a4ab5c, 0xc42a565d, 0xf76ed3ab, 0x90e02eaa, 0x387329a9, 0x5ffdd4a8,
+        0x3a7cc473, 0x5df23972, 0xf5613e71, 0x92efc370, 0xa1ab4686, 0xc625bb87, 0x6eb6bc84, 0x09384185,
+        0x083fb768, 0x6fb14a69, 0xc7224d6a, 0xa0acb06b, 0x93e8359d, 0xf466c89c, 0x5cf5cf9f, 0x3b7b329e,
+        0x2a03aaa3, 0x4d8d57a2, 0xe51e50a1, 0x8290ada0, 0xb1d42856, 0xd65ad557, 0x7ec9d254, 0x19472f55,
+        0x1840d9b8, 0x7fce24b9, 0xd75d23ba, 0xb0d3debb, 0x83975b4d, 0xe419a64c, 0x4c8aa14f, 0x2b045c4e,
+        0x4e854c95, 0x290bb194, 0x8198b697, 0xe6164b96, 0xd552ce60, 0xb2dc3361, 0x1a4f3462, 0x7dc1c963,
+        0x7cc63f8e, 0x1b48c28f, 0xb3dbc58c, 0xd455388d, 0xe711bd7b, 0x809f407a, 0x280c4779, 0x4f82ba78,
+        0xe30e66cf, 0x84809bce, 0x2c139ccd, 0x4b9d61cc, 0x78d9e43a, 0x1f57193b, 0xb7c41e38, 0xd04ae339,
+        0xd14d15d4, 0xb6c3e8d5, 0x1e50efd6, 0x79de12d7, 0x4a9a9721, 0x2d146a20, 0x85876d23, 0xe2099022,
+        0x878880f9, 0xe0067df8, 0x48957afb, 0x2f1b87fa, 0x1c5f020c, 0x7bd1ff0d, 0xd342f80e, 0xb4cc050f,
+        0xb5cbf3e2, 0xd2450ee3, 0x7ad609e0, 0x1d58f4e1, 0x2e1c7117, 0x49928c16, 0xe1018b15, 0x868f7614,
+        0xbdf4448a, 0xda7ab98b, 0x72e9be88, 0x15674389, 0x2623c67f, 0x41ad3b7e, 0xe93e3c7d, 0x8eb0c17c,
+        0x8fb73791, 0xe839ca90, 0x40aacd93, 0x27243092, 0x1460b564, 0x73ee4865, 0xdb7d4f66, 0xbcf3b267,
+        0xd972a2bc, 0xbefc5fbd, 0x166f58be, 0x71e1a5bf, 0x42a52049, 0x252bdd48, 0x8db8da4b, 0xea36274a,
+        0xeb31d1a7, 0x8cbf2ca6, 0x242c2ba5, 0x43a2d6a4, 0x70e65352, 0x1768ae53, 0xbffba950, 0xd8755451,
+        0x74f988e6, 0x137775e7, 0xbbe472e4, 0xdc6a8fe5, 0xef2e0a13, 0x88a0f712, 0x2033f011, 0x47bd0d10,
+        0x46bafbfd, 0x213406fc, 0x89a701ff, 0xee29fcfe, 0xdd6d7908, 0xbae38409, 0x1270830a, 0x75fe7e0b,
+        0x107f6ed0, 0x77f193d1, 0xdf6294d2, 0xb8ec69d3, 0x8ba8ec25, 0xec261124, 0x44b51627, 0x233beb26,
+        0x223c1dcb, 0x45b2e0ca, 0xed21e7c9, 0x8aaf1ac8, 0xb9eb9f3e, 0xde65623f, 0x76f6653c, 0x1178983d,
+    },
+    {
+        0x00000000, 0xf20c0dfe, 0xe1f46d0d, 0x13f860f3, 0xc604aceb, 0x3408a115, 0x27f0c1e6, 0xd5fccc18,
+        0x89e52f27, 0x7be922d9, 0x6811422a, 0x9a1d4fd4, 0x4fe183cc, 0xbded8e32, 0xae15eec1, 0x5c19e33f,
+        0x162628bf, 0xe42a2541, 0xf7d245b2, 0x05de484c, 0xd0228454, 0x222e89aa, 0x31d6e959, 0xc3dae4a7,
+        0x9fc30798, 0x6dcf0a66, 0x7e376a95, 0x8c3b676b, 0x59c7ab73, 0xabcba68d, 0xb833c67e, 0x4a3fcb80,
+        0x2c4c517e, 0xde405c80, 0xcdb83c73, 0x3fb4318d, 0xea48fd95, 0x1844f06b, 0x0bbc9098, 0xf9b09d66,
+        0xa5a97e59, 0x57a573a7, 0x445d1354, 0xb6511eaa, 0x63add2b2, 0x91a1df4c, 0x8259bfbf, 0x7055b241,
+        0x3a6a79c1, 0xc866743f, 0xdb9e14cc, 0x29921932, 0xfc6ed52a, 0x0e62d8d4, 0x1d9ab827, 0xef96b5d9,
+        0xb38f56e6, 0x41835b18, 0x527b3beb, 0xa0773615, 0x758bfa0d, 0x8787f7f3, 0x947f9700, 0x66739afe,
+        0x5898a2fc, 0xaa94af02, 0xb96ccff1, 0x4b60c20f, 0x9e9c0e17, 0x6c9003e9, 0x7f68631a, 0x8d646ee4,
+        0xd17d8ddb, 0x23718025, 0x3089e0d6, 0xc285ed28, 0x17792130, 0xe5752cce, 0xf68d4c3d, 0x048141c3,
+        0x4ebe8a43, 0xbcb287bd, 0xaf4ae74e, 0x5d46eab0, 0x88ba26a8, 0x7ab62b56, 0x694e4ba5, 0x9b42465b,
+        0xc75ba564, 0x3557a89a, 0x26afc869, 0xd4a3c597, 0x015f098f, 0xf3530471, 0xe0ab6482, 0x12a7697c,
+        0x74d4f382, 0x86d8fe7c, 0x95209e8f, 0x672c9371, 0xb2d05f69, 0x40dc5297, 0x53243264, 0xa1283f9a,
+        0xfd31dca5, 0x0f3dd15b, 0x1cc5b1a8, 0xeec9bc56, 0x3b35704e, 0xc9397db0, 0xdac11d43, 0x28cd10bd,
+        0x62f2db3d, 0x90fed6c3, 0x8306b630, 0x710abbce, 0xa4f677d6, 0x56fa7a28, 0x45021adb, 0xb70e1725,
+        0xeb17f41a, 0x191bf9e4, 0x0ae39917, 0xf8ef94e9, 0x2d1358f1, 0xdf1f550f, 0xcce735fc, 0x3eeb3802,
+        0xb13145f8, 0x433d4806, 0x50c528f5, 0xa2c9250b, 0x7735e913, 0x8539e4ed, 0x96c1841e, 0x64cd89e0,
+        0x38d46adf, 0xcad86721, 0xd92007d2, 0x2b2c0a2c, 0xfed0c634, 0x0cdccbca, 0x1f24ab39, 0xed28a6c7,
+        0xa7176d47, 0x551b60b9, 0x46e3004a, 0xb4ef0db4, 0x6113c1ac, 0x931fcc52, 0x80e7aca1, 0x72eba15f,
+        0x2ef24260, 0xdcfe4f9e, 0xcf062f6d, 0x3d0a2293, 0xe8f6ee8b, 0x1afae375, 0x09028386, 0xfb0e8e78,
+        0x9d7d1486, 0x6f711978, 0x7c89798b, 0x8e857475, 0x5b79b86d, 0xa975b593, 0xba8dd560, 0x4881d89e,
+        0x14983ba1, 0xe694365f, 0xf56c56ac, 0x07605b52, 0xd29c974a, 0x20909ab4, 0x3368fa47, 0xc164f7b9,
+        0x8b5b3c39, 0x795731c7, 0x6aaf5134, 0x98a35cca, 0x4d5f90d2, 0xbf539d2c, 0xacabfddf, 0x5ea7f021,
+        0x02be131e, 0xf0b21ee0, 0xe34a7e13, 0x114673ed, 0xc4babff5, 0x36b6b20b, 0x254ed2f8, 0xd742df06,
+        0xe9a9e704, 0x1ba5eafa, 0x085d8a09, 0xfa5187f7, 0x2fad4bef, 0xdda14611, 0xce5926e2, 0x3c552b1c,
+        0x604cc823, 0x9240c5dd, 0x81b8a52e, 0x73b4a8d0, 0xa64864c8, 0x54446936, 0x47bc09c5, 0xb5b0043b,
+        0xff8fcfbb, 0x0d83c245, 0x1e7ba2b6, 0xec77af48, 0x398b6350, 0xcb876eae, 0xd87f0e5d, 0x2a7303a3,
+        0x766ae09c, 0x8466ed62, 0x979e8d91, 0x6592806f, 0xb06e4c77, 0x42624189, 0x519a217a, 0xa3962c84,
+        0xc5e5b67a, 0x37e9bb84, 0x2411db77, 0xd61dd689, 0x03e11a91, 0xf1ed176f, 0xe215779c, 0x10197a62,
+        0x4c00995d, 0xbe0c94a3, 0xadf4f450, 0x5ff8f9ae, 0x8a0435b6, 0x78083848, 0x6bf058bb, 0x99fc5545,
+        0xd3c39ec5, 0x21cf933b, 0x3237f3c8, 0xc03bfe36, 0x15c7322e, 0xe7cb3fd0, 0xf4335f23, 0x063f52dd,
+        0x5a26b1e2, 0xa82abc1c, 0xbbd2dcef, 0x49ded111, 0x9c221d09, 0x6e2e10f7, 0x7dd67004, 0x8fda7dfa,
+    },
 };
diff --git a/util/VCode.h b/util/VCode.h
index 907c3367..b09eddbb 100644
--- a/util/VCode.h
+++ b/util/VCode.h
@@ -59,65 +59,71 @@
 // fulfill this role quite well. CRC32c in particular has explicit
 // hardware support in many popular architectures, making it one of
 // the lowest-overhead options, both in terms of time and op count.
-// -----------------------------------------------------------------------------
-void VCODE_INIT(void);
-uint32_t VCODE_FINALIZE(void);
+//-----------------------------------------------------------------------------
+void VCODE_INIT( void );
+uint32_t VCODE_FINALIZE( void );
 
 // VCodes have 64-bit state to lessen the probability of internal
 // state collisions. Since CRC HW support is commonly for 32-bits at
 // most, two separate CRCs are stored.
 typedef struct {
-    uint32_t data_hash;
-    uint32_t lens_hash;
+    uint32_t  data_hash;
+    uint32_t  lens_hash;
 } vcode_state_t;
 
 #define VCODE_COUNT 3
 extern vcode_state_t vcode_states[VCODE_COUNT];
-extern uint32_t g_doVCode;
-extern uint32_t g_inputVCode;
-extern uint32_t g_outputVCode;
-extern uint32_t g_resultVCode;
+extern uint32_t      g_doVCode;
+extern uint32_t      g_inputVCode;
+extern uint32_t      g_outputVCode;
+extern uint32_t      g_resultVCode;
 
 //-----------------------------------------------------------------------------
 // HW CRC32c wrappers/accessors
 #if defined(HAVE_ARM_ACLE)
-#  include "Intrinsics.h"
-#  define HWCRC_U64 __crc32cd
-#  define HWCRC_U8  __crc32cb
+  #include "Intrinsics.h"
+  #define HWCRC_U64 __crc32cd
+  #define HWCRC_U8  __crc32cb
 #elif defined(HAVE_ARM64_ASM)
-static inline uint32_t _hwcrc_asm64(uint32_t crc, uint64_t data) {
-    __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n"
-            : [c] "+r"(crc)
-            : [v] "r"(data));
+
+static inline uint32_t _hwcrc_asm64( uint32_t crc, uint64_t data ) {
+    __asm__ __volatile__ ("crc32cx %w[c], %w[c], %x[v]\n"
+             : [c] "+r"(crc)
+             : [v] "r"(data));
     return crc;
 }
-static inline uint32_t _hwcrc_asm8(uint32_t crc, uint8_t data) {
-    __asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n"
-            : [c] "+r"(crc)
-            : [v] "r"(data));
+
+static inline uint32_t _hwcrc_asm8( uint32_t crc, uint8_t data ) {
+    __asm__ __volatile__ ("crc32cb %w[c], %w[c], %w[v]\n"
+             : [c] "+r"(crc)
+             : [v] "r"(data));
     return crc;
 }
-#  define HWCRC_U64 _hwcrc_asm64
-#  define HWCRC_U8  _hwcrc_asm8
+
+  #define HWCRC_U64 _hwcrc_asm64
+  #define HWCRC_U8  _hwcrc_asm8
 #elif defined(HAVE_X86_64_CRC32C)
-#  include "Intrinsics.h"
-#  define HWCRC_U64 _mm_crc32_u64
-#  define HWCRC_U8  _mm_crc32_u8
+  #include "Intrinsics.h"
+  #define HWCRC_U64 _mm_crc32_u64
+  #define HWCRC_U8  _mm_crc32_u8
 #elif defined(HAVE_X86_64_ASM)
-static inline uint32_t _hwcrc_asm64(uint64_t crc, uint64_t data) {
-    __asm__ __volatile__("crc32q %1, %0\n"
-            : "+r"(crc)
-            : "rm"(data));
+
+static inline uint32_t _hwcrc_asm64( uint64_t crc, uint64_t data ) {
+    __asm__ __volatile__ ("crc32q %1, %0\n"
+             : "+r"(crc)
+             : "rm"(data));
     return (uint32_t)crc;
 }
-static inline uint32_t _hwcrc_asm8(uint32_t crc, uint8_t data) {
-    __asm__ __volatile__("crc32b %1, %0\n"
-            : "+r"(crc)
-            : "r"(data));
+
+static inline uint32_t _hwcrc_asm8( uint32_t crc, uint8_t data ) {
+    __asm__ __volatile__ ("crc32b %1, %0\n"
+             : "+r"(crc)
+             : "r"(data));
     return crc;
 }
-#  define HWCRC_U64 _hwcrc_asm64
-#  define HWCRC_U8  _hwcrc_asm8
+
+  #define HWCRC_U64 _hwcrc_asm64
+  #define HWCRC_U8  _hwcrc_asm8
 #endif
 
 //-----------------------------------------------------------------------------
@@ -125,69 +131,72 @@ static inline uint32_t _hwcrc_asm8(uint32_t crc, uint8_t data) {
 extern const uint32_t crc32c_sw_table[16][256];
 
 // This is based on Mark Adler's implementation.
-static inline uint32_t crc32c_update_sw_u64(uint32_t crc, uint64_t data) {
+static inline uint32_t crc32c_update_sw_u64( uint32_t crc, uint64_t data ) {
     uint64_t crc64 = crc ^ data;
+
     crc64 =
-        crc32c_sw_table[7][ crc64        & 0xff] ^
-        crc32c_sw_table[6][(crc64 >>  8) & 0xff] ^
-        crc32c_sw_table[5][(crc64 >> 16) & 0xff] ^
-        crc32c_sw_table[4][(crc64 >> 24) & 0xff] ^
-        crc32c_sw_table[3][(crc64 >> 32) & 0xff] ^
-        crc32c_sw_table[2][(crc64 >> 40) & 0xff] ^
-        crc32c_sw_table[1][(crc64 >> 48) & 0xff] ^
-        crc32c_sw_table[0][ crc64 >> 56]         ;
+            crc32c_sw_table[7][crc64 &         0xff] ^
+            crc32c_sw_table[6][(crc64 >>  8) & 0xff] ^
+            crc32c_sw_table[5][(crc64 >> 16) & 0xff] ^
+            crc32c_sw_table[4][(crc64 >> 24) & 0xff] ^
+            crc32c_sw_table[3][(crc64 >> 32) & 0xff] ^
+            crc32c_sw_table[2][(crc64 >> 40) & 0xff] ^
+            crc32c_sw_table[1][(crc64 >> 48) & 0xff] ^
+            crc32c_sw_table[0][crc64        >>   56];
     return (uint32_t)crc64;
 }
 
-static inline void crc32c_update_u64(uint32_t * crcptr, uint64_t data) {
+static inline void crc32c_update_u64( uint32_t * crcptr, uint64_t data ) {
     uint32_t crc = *crcptr;
+
 #if defined(HWCRC_U64)
-    crc = HWCRC_U64(crc, data);
+    crc     = HWCRC_U64(crc, data);
 #else
-    crc = crc32c_update_sw_u64(crc, data);
+    crc     = crc32c_update_sw_u64(crc, data);
 #endif
     *crcptr = crc;
 }
 
 //-----------------------------------------------------------------------------
 // Special-case inline-able handling of 8-or-fewer byte integer VCode inputs
-static inline void VCODE_HASH_SMALL(const uint64_t data, unsigned idx) {
-    if (idx >= VCODE_COUNT)
+static inline void VCODE_HASH_SMALL( const uint64_t data, unsigned idx ) {
+    if (idx >= VCODE_COUNT) {
         return;
+    }
     crc32c_update_u64(&vcode_states[idx].data_hash, data);
-    crc32c_update_u64(&vcode_states[idx].lens_hash, 8);
+    crc32c_update_u64(&vcode_states[idx].lens_hash,    8);
 }
 
-template < typename T >
-static inline void addVCodeInput(const T data) {
+template <typename T>
+static inline void addVCodeInput( const T data ) {
     static_assert(std::is_integral<T>::value, "Non-integer data requires addVCode(const void *, size_t)");
-    if (g_doVCode) VCODE_HASH_SMALL((uint64_t)data, 0);
+    if (g_doVCode) { VCODE_HASH_SMALL((uint64_t)data, 0); }
 }
 
-template < typename T >
-static inline void addVCodeOutput(const T data) {
+template <typename T>
+static inline void addVCodeOutput( const T data ) {
     static_assert(std::is_integral<T>::value, "Non-integer data requires addVCode(const void *, size_t)");
-    if (g_doVCode) VCODE_HASH_SMALL((uint64_t)data, 1);
+    if (g_doVCode) { VCODE_HASH_SMALL((uint64_t)data, 1); }
 }
 
-template < typename T >
-static inline void addVCodeResult(const T data) {
+template <typename T>
+static inline void addVCodeResult( const T data ) {
     static_assert(std::is_integral<T>::value, "Non-integer data requires addVCode(const void *, size_t)");
-    if (g_doVCode) VCODE_HASH_SMALL((uint64_t)data, 2);
+    if (g_doVCode) { VCODE_HASH_SMALL((uint64_t)data, 2); }
 }
 
 //-----------------------------------------------------------------------------
 // General-purpose VCode input handling
-void VCODE_HASH(const void * input, size_t len, unsigned idx);
+void VCODE_HASH( const void * input, size_t len, unsigned idx );
 
-static inline void addVCodeInput(const void * in, size_t len) {
-    if (g_doVCode) VCODE_HASH(in, len, 0);
+static inline void addVCodeInput( const void * in, size_t len ) {
+    if (g_doVCode) { VCODE_HASH(in, len, 0); }
 }
 
-static inline void addVCodeOutput(const void * in, size_t len) {
-    if (g_doVCode) VCODE_HASH(in, len, 1);
+static inline void addVCodeOutput( const void * in, size_t len ) {
+    if (g_doVCode) { VCODE_HASH(in, len, 1); }
 }
 
-static inline void addVCodeResult(const void * in, size_t len) {
-    if (g_doVCode) VCODE_HASH(in, len, 2);
+static inline void addVCodeResult( const void * in, size_t len ) {
+    if (g_doVCode) { VCODE_HASH(in, len, 2); }
 }