Use size_t instead of uint64_t for len in all adler32 functions

ccawley2011 · Jan 21, 2023 · 1ab4438 · 1ab4438
1 parent 23e4305
commit 1ab4438
Show file tree

Hide file tree

Showing 16 changed files with 54 additions and 65 deletions.
diff --git a/adler32.c b/adler32.c
@@ -8,7 +8,7 @@
 #include "adler32_p.h"
 
 /* ========================================================================= */
-Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, uint64_t len) {
+Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
     uint32_t sum2;
     unsigned n;
 

diff --git a/adler32_fold.c b/adler32_fold.c
@@ -9,19 +9,8 @@
 
 #include <limits.h>
 
-Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
     adler = functable.adler32(adler, src, len);
-/* Test that we don't try to copy more than actually fits in available address space */
-#if INTPTR_MAX > SSIZE_MAX
-    while (len > SSIZE_MAX) {
-        memcpy(dst, src, SSIZE_MAX);
-        dst += SSIZE_MAX;
-        src += SSIZE_MAX;
-        len -= SSIZE_MAX;
-    }
-#endif
-    if (len) {
-        memcpy(dst, src, (size_t)len);
-    }
+    memcpy(dst, src, len);
     return adler;
 }
diff --git a/adler32_fold.h b/adler32_fold.h
@@ -6,6 +6,6 @@
 #ifndef ADLER32_FOLD_H_
 #define ADLER32_FOLD_H_
 
-Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
+Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 
 #endif
diff --git a/adler32_p.h b/adler32_p.h
@@ -26,7 +26,7 @@ static inline uint32_t adler32_len_1(uint32_t adler, const uint8_t *buf, uint32_
     return adler | (sum2 << 16);
 }
 
-static inline uint32_t adler32_len_16(uint32_t adler, const uint8_t *buf, uint64_t len, uint32_t sum2) {
+static inline uint32_t adler32_len_16(uint32_t adler, const uint8_t *buf, size_t len, uint32_t sum2) {
     while (len) {
         --len;
         adler += *buf++;
@@ -38,7 +38,7 @@ static inline uint32_t adler32_len_16(uint32_t adler, const uint8_t *buf, uint64
     return adler | (sum2 << 16);
 }
 
-static inline uint32_t adler32_copy_len_16(uint32_t adler, const uint8_t *buf, uint8_t *dst, uint64_t len, uint32_t sum2) {
+static inline uint32_t adler32_copy_len_16(uint32_t adler, const uint8_t *buf, uint8_t *dst, size_t len, uint32_t sum2) {
     while (len--) {
         *dst = *buf++;
         adler += *dst++;
@@ -50,7 +50,7 @@ static inline uint32_t adler32_copy_len_16(uint32_t adler, const uint8_t *buf, u
     return adler | (sum2 << 16);
 }
 
-static inline uint32_t adler32_len_64(uint32_t adler, const uint8_t *buf, uint64_t len, uint32_t sum2) {
+static inline uint32_t adler32_len_64(uint32_t adler, const uint8_t *buf, size_t len, uint32_t sum2) {
 #ifdef UNROLL_MORE
     while (len >= 16) {
         len -= 16;

diff --git a/arch/arm/adler32_neon.c b/arch/arm/adler32_neon.c
@@ -10,7 +10,7 @@
 #include "../../zbuild.h"
 #include "../../adler32_p.h"
 
-static void NEON_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
+static void NEON_accum32(uint32_t *s, const uint8_t *buf, size_t len) {
     static const uint16_t ALIGNED_(16) taps[64] = {
         64, 63, 62, 61, 60, 59, 58, 57,
         56, 55, 54, 53, 52, 51, 50, 49,
@@ -39,10 +39,10 @@ static void NEON_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
     uint16x8_t s2_4, s2_5, s2_6, s2_7;
     s2_4 = s2_5 = s2_6 = s2_7 = vdupq_n_u16(0);
 
-    uint64_t num_iter = len >> 2;
+    size_t num_iter = len >> 2;
     int rem = len & 3;
 
-    for (uint64_t i = 0; i < num_iter; ++i) {
+    for (size_t i = 0; i < num_iter; ++i) {
         uint8x16x4_t d0_d3 = vld1q_u8_x4(buf);
 
         /* Unfortunately it doesn't look like there's a direct sum 8 bit to 32
@@ -133,15 +133,15 @@ static void NEON_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
     s[1] = vget_lane_u32(as, 1);
 }
 
-static void NEON_handle_tail(uint32_t *pair, const uint8_t *buf, uint64_t len) {
+static void NEON_handle_tail(uint32_t *pair, const uint8_t *buf, size_t len) {
     unsigned int i;
     for (i = 0; i < len; ++i) {
         pair[0] += buf[i];
         pair[1] += pair[0];
     }
 }
 
-uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, uint64_t len) {
+uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len) {
     /* split Adler-32 into component sums */
     uint32_t sum2 = (adler >> 16) & 0xffff;
     adler &= 0xffff;

diff --git a/arch/power/adler32_power8.c b/arch/power/adler32_power8.c
@@ -52,7 +52,7 @@ static inline vector unsigned int vec_sumsu(vector unsigned int __a, vector unsi
     return __a;
 }
 
-uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, uint64_t len) {
+uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len) {
     uint32_t s1 = adler & 0xffff;
     uint32_t s2 = (adler >> 16) & 0xffff;
 

diff --git a/arch/power/adler32_vmx.c b/arch/power/adler32_vmx.c
@@ -12,15 +12,15 @@
 
 #define vmx_zero()  (vec_splat_u32(0))
 
-static inline void vmx_handle_head_or_tail(uint32_t *pair, const uint8_t *buf, uint64_t len) {
+static inline void vmx_handle_head_or_tail(uint32_t *pair, const uint8_t *buf, size_t len) {
     unsigned int i;
     for (i = 0; i < len; ++i) {
         pair[0] += buf[i];
         pair[1] += pair[0];
     }
 }
 
-static void vmx_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
+static void vmx_accum32(uint32_t *s, const uint8_t *buf, size_t len) {
     /* Different taps for the separable components of sums */
     const vector unsigned char t0 = {64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49};
     const vector unsigned char t1 = {48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33};
@@ -113,7 +113,7 @@ static void vmx_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
     vec_ste(s2acc, 0, s+1);
 }
 
-uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, uint64_t len) {
+uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len) {
     uint32_t sum2;
     uint32_t pair[16] ALIGNED_(16);
     memset(&pair[2], 0, 14);

diff --git a/arch/x86/adler32_avx2_tpl.h b/arch/x86/adler32_avx2_tpl.h
@@ -11,8 +11,8 @@
 #include "adler32_avx2_p.h"
 
 #ifdef X86_SSE42_ADLER32
-extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
-extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, uint64_t len);
+extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
+extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len);
 
 #define copy_sub32(a, b, c, d) adler32_fold_copy_sse42(a, b, c, d)
 #define sub32(a, b, c) adler32_ssse3(a, b, c)
@@ -22,9 +22,9 @@ extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, uint64_t len);
 #endif
 
 #ifdef COPY
-Z_INTERNAL uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
 #else
-Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, uint64_t len) {
+Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, size_t len) {
 #endif
     if (src == NULL) return 1L;
     if (len == 0) return adler;
@@ -61,7 +61,7 @@ Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, uint64_t le
        __m256i vs1_0 = vs1;
        __m256i vs3 = _mm256_setzero_si256();
 
-       uint64_t k = MIN(len, NMAX);
+       size_t k = MIN(len, NMAX);
        k -= k % 32;
        len -= k;
 

diff --git a/arch/x86/adler32_avx512_tpl.h b/arch/x86/adler32_avx512_tpl.h
@@ -14,9 +14,9 @@
 #ifdef X86_AVX512_ADLER32
 
 #ifdef COPY
-Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
 #else
-Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, uint64_t len) {
+Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, size_t len) {
 #endif
 
     if (src == NULL) return 1L;
@@ -52,7 +52,7 @@ Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, uint64_t
                                           56, 57, 58, 59, 60, 61, 62, 63, 64);
     const __m512i dot3v = _mm512_set1_epi16(1);
     const __m512i zero = _mm512_setzero_si512();
-    uint64_t k;
+    size_t k;
 
     while (len >= 64) {
         __m512i vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));

diff --git a/arch/x86/adler32_avx512_vnni.c b/arch/x86/adler32_avx512_vnni.c
@@ -18,7 +18,7 @@
 #include "adler32_avx512_p.h"
 #include "adler32_avx2_p.h"
 
-Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, uint64_t len) {
+Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size_t len) {
     if (src == NULL) return 1L;
     if (len == 0) return adler;
 
@@ -54,7 +54,7 @@ Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, uint
     while (len >= 64) {
         vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));
         vs2 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler1));
-        uint64_t k = MIN(len, NMAX);
+        size_t k = MIN(len, NMAX);
         k -= k % 64;
         len -= k;
         __m512i vs1_0 = vs1;
@@ -120,7 +120,7 @@ Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, uint
     return adler;
 }
 
-Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
     if (src == NULL) return 1L;
     if (len == 0) return adler;
 
@@ -151,7 +151,7 @@ Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst,
     while (len >= 32) {
         vs1 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler0));
         vs2 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler1));
-        uint64_t k = MIN(len, NMAX);
+        size_t k = MIN(len, NMAX);
         k -= k % 32;
         len -= k;
         __m256i vs1_0 = vs1;

diff --git a/arch/x86/adler32_sse42.c b/arch/x86/adler32_sse42.c
@@ -14,7 +14,7 @@
 
 #ifdef X86_SSE42_ADLER32
 
-Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
     uint32_t adler0, adler1;
     adler1 = (adler >> 16) & 0xffff;
     adler0 = adler & 0xffff;
@@ -31,7 +31,7 @@ Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const
     const __m128i dot2v = _mm_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17);
     const __m128i dot2v_0 = _mm_setr_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
     const __m128i dot3v = _mm_set1_epi16(1);
-    uint64_t k;
+    size_t k;
 
     while (len >= 16) {
 

diff --git a/arch/x86/adler32_ssse3.c b/arch/x86/adler32_ssse3.c
@@ -14,7 +14,7 @@
 
 #include <immintrin.h>
 
-Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, uint64_t len) {
+Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len) {
     uint32_t sum2;
 
      /* split Adler-32 into component sums */
@@ -46,10 +46,10 @@ Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, uint64_t l
      * additions worthwhile or if it's worth it to just eat the cost of an unaligned
      * load. This is a pretty simple test, just test if 16 - the remainder + len is
      * < 16 */
-    uint64_t max_iters = NMAX;
-    uint64_t rem = (uintptr_t)buf & 15;
-    uint64_t align_offset = 16 - rem;
-    uint64_t k = 0;
+    size_t max_iters = NMAX;
+    size_t rem = (uintptr_t)buf & 15;
+    size_t align_offset = 16 - rem;
+    size_t k = 0;
     if (rem) {
         if (len < 16 + align_offset) {
             /* Let's eat the cost of this one unaligned load so that

diff --git a/cpu_features.h b/cpu_features.h
@@ -23,43 +23,43 @@
 extern void cpu_check_features(void);
 
 /* adler32 */
-typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, uint64_t len);
+typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
 
-extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, uint64_t len);
+extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
 #ifdef ARM_NEON_ADLER32
-extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, uint64_t len);
+extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
 #ifdef PPC_VMX_ADLER32
-extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, uint64_t len);
+extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
 #ifdef X86_SSSE3_ADLER32
-extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, uint64_t len);
+extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
 #ifdef X86_AVX2_ADLER32
-extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, uint64_t len);
+extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
 #ifdef X86_AVX512_ADLER32
-extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, uint64_t len);
+extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
 #ifdef X86_AVX512VNNI_ADLER32
-extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, uint64_t len);
+extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
 #ifdef POWER8_VSX_ADLER32
-extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, uint64_t len);
+extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
 #endif
 
 /* adler32 folding */
 #ifdef X86_SSE42_ADLER32
-extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
+extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 #ifdef X86_AVX2_ADLER32
-extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
+extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 #ifdef X86_AVX512_ADLER32
-extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
+extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 #ifdef X86_AVX512VNNI_ADLER32
-extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
+extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
 #endif
 
 /* CRC32 folding */

diff --git a/functable.c b/functable.c
@@ -162,7 +162,7 @@ Z_INTERNAL uint32_t longest_match_slow_stub(deflate_state *const s, Pos cur_matc
     return functable.longest_match_slow(s, cur_match);
 }
 
-Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const uint8_t *buf, uint64_t len) {
+Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const uint8_t *buf, size_t len) {
     // Initialize default
     functable.adler32 = &adler32_c;
     cpu_check_features();
@@ -202,7 +202,7 @@ Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const uint8_t *buf, uint64_t le
     return functable.adler32(adler, buf, len);
 }
 
-Z_INTERNAL uint32_t adler32_fold_copy_stub(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
+Z_INTERNAL uint32_t adler32_fold_copy_stub(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
     functable.adler32_fold_copy = &adler32_fold_copy_c;
 #if (defined X86_SSE42_ADLER32)
     if (x86_cpu_has_sse42)

diff --git a/functable.h b/functable.h
@@ -11,8 +11,8 @@
 #include "adler32_fold.h"
 
 struct functable_s {
-    uint32_t (* adler32)            (uint32_t adler, const uint8_t *buf, uint64_t len);
-    uint32_t (* adler32_fold_copy)  (uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
+    uint32_t (* adler32)            (uint32_t adler, const uint8_t *buf, size_t len);
+    uint32_t (* adler32_fold_copy)  (uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
     uint32_t (* crc32)              (uint32_t crc, const uint8_t *buf, size_t len);
     uint32_t (* crc32_fold_reset)   (struct crc32_fold_s *crc);
     void     (* crc32_fold_copy)    (struct crc32_fold_s *crc, uint8_t *dst, const uint8_t *src, size_t len);

diff --git a/test/benchmarks/benchmark_adler32_copy.cc b/test/benchmarks/benchmark_adler32_copy.cc
@@ -18,7 +18,7 @@ extern "C" {
 #define MAX_RANDOM_INTS (1024 * 1024)
 #define MAX_RANDOM_INTS_SIZE (MAX_RANDOM_INTS * sizeof(uint32_t))
 
-typedef uint32_t (*adler32_cpy_func)(uint32_t adler, unsigned char *dst, const uint8_t *buf, uint64_t len);
+typedef uint32_t (*adler32_cpy_func)(uint32_t adler, unsigned char *dst, const uint8_t *buf, size_t len);
 
 class adler32_copy: public benchmark::Fixture {
 private:
@@ -76,7 +76,7 @@ class adler32_copy: public benchmark::Fixture {
             state.SkipWithError("CPU does not support " #name); \
         } \
         Bench(state, [](uint32_t init_sum, unsigned char *dst, \
-                        const uint8_t *buf, uint64_t len) -> uint32_t { \
+                        const uint8_t *buf, size_t len) -> uint32_t { \
             memcpy(dst, buf, (size_t)len); \
             return fptr(init_sum, buf, len); \
         }); \