Skip to content

Commit

Permalink
Use size_t instead of uint64_t for len in all adler32 functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ccawley2011 authored and Dead2 committed Jan 21, 2023
1 parent 23e4305 commit 1ab4438
Show file tree
Hide file tree
Showing 16 changed files with 54 additions and 65 deletions.
2 changes: 1 addition & 1 deletion adler32.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "adler32_p.h"

/* ========================================================================= */
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, uint64_t len) {
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len) {
uint32_t sum2;
unsigned n;

Expand Down
15 changes: 2 additions & 13 deletions adler32_fold.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,8 @@

#include <limits.h>

Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
adler = functable.adler32(adler, src, len);
/* Test that we don't try to copy more than actually fits in available address space */
#if INTPTR_MAX > SSIZE_MAX
while (len > SSIZE_MAX) {
memcpy(dst, src, SSIZE_MAX);
dst += SSIZE_MAX;
src += SSIZE_MAX;
len -= SSIZE_MAX;
}
#endif
if (len) {
memcpy(dst, src, (size_t)len);
}
memcpy(dst, src, len);
return adler;
}
2 changes: 1 addition & 1 deletion adler32_fold.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
#ifndef ADLER32_FOLD_H_
#define ADLER32_FOLD_H_

Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);

#endif
6 changes: 3 additions & 3 deletions adler32_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ static inline uint32_t adler32_len_1(uint32_t adler, const uint8_t *buf, uint32_
return adler | (sum2 << 16);
}

static inline uint32_t adler32_len_16(uint32_t adler, const uint8_t *buf, uint64_t len, uint32_t sum2) {
static inline uint32_t adler32_len_16(uint32_t adler, const uint8_t *buf, size_t len, uint32_t sum2) {
while (len) {
--len;
adler += *buf++;
Expand All @@ -38,7 +38,7 @@ static inline uint32_t adler32_len_16(uint32_t adler, const uint8_t *buf, uint64
return adler | (sum2 << 16);
}

static inline uint32_t adler32_copy_len_16(uint32_t adler, const uint8_t *buf, uint8_t *dst, uint64_t len, uint32_t sum2) {
static inline uint32_t adler32_copy_len_16(uint32_t adler, const uint8_t *buf, uint8_t *dst, size_t len, uint32_t sum2) {
while (len--) {
*dst = *buf++;
adler += *dst++;
Expand All @@ -50,7 +50,7 @@ static inline uint32_t adler32_copy_len_16(uint32_t adler, const uint8_t *buf, u
return adler | (sum2 << 16);
}

static inline uint32_t adler32_len_64(uint32_t adler, const uint8_t *buf, uint64_t len, uint32_t sum2) {
static inline uint32_t adler32_len_64(uint32_t adler, const uint8_t *buf, size_t len, uint32_t sum2) {
#ifdef UNROLL_MORE
while (len >= 16) {
len -= 16;
Expand Down
10 changes: 5 additions & 5 deletions arch/arm/adler32_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "../../zbuild.h"
#include "../../adler32_p.h"

static void NEON_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
static void NEON_accum32(uint32_t *s, const uint8_t *buf, size_t len) {
static const uint16_t ALIGNED_(16) taps[64] = {
64, 63, 62, 61, 60, 59, 58, 57,
56, 55, 54, 53, 52, 51, 50, 49,
Expand Down Expand Up @@ -39,10 +39,10 @@ static void NEON_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
uint16x8_t s2_4, s2_5, s2_6, s2_7;
s2_4 = s2_5 = s2_6 = s2_7 = vdupq_n_u16(0);

uint64_t num_iter = len >> 2;
size_t num_iter = len >> 2;
int rem = len & 3;

for (uint64_t i = 0; i < num_iter; ++i) {
for (size_t i = 0; i < num_iter; ++i) {
uint8x16x4_t d0_d3 = vld1q_u8_x4(buf);

/* Unfortunately it doesn't look like there's a direct sum 8 bit to 32
Expand Down Expand Up @@ -133,15 +133,15 @@ static void NEON_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
s[1] = vget_lane_u32(as, 1);
}

static void NEON_handle_tail(uint32_t *pair, const uint8_t *buf, uint64_t len) {
static void NEON_handle_tail(uint32_t *pair, const uint8_t *buf, size_t len) {
unsigned int i;
for (i = 0; i < len; ++i) {
pair[0] += buf[i];
pair[1] += pair[0];
}
}

uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, uint64_t len) {
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len) {
/* split Adler-32 into component sums */
uint32_t sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
Expand Down
2 changes: 1 addition & 1 deletion arch/power/adler32_power8.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ static inline vector unsigned int vec_sumsu(vector unsigned int __a, vector unsi
return __a;
}

uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, uint64_t len) {
uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len) {
uint32_t s1 = adler & 0xffff;
uint32_t s2 = (adler >> 16) & 0xffff;

Expand Down
6 changes: 3 additions & 3 deletions arch/power/adler32_vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@

#define vmx_zero() (vec_splat_u32(0))

static inline void vmx_handle_head_or_tail(uint32_t *pair, const uint8_t *buf, uint64_t len) {
static inline void vmx_handle_head_or_tail(uint32_t *pair, const uint8_t *buf, size_t len) {
unsigned int i;
for (i = 0; i < len; ++i) {
pair[0] += buf[i];
pair[1] += pair[0];
}
}

static void vmx_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
static void vmx_accum32(uint32_t *s, const uint8_t *buf, size_t len) {
/* Different taps for the separable components of sums */
const vector unsigned char t0 = {64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49};
const vector unsigned char t1 = {48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33};
Expand Down Expand Up @@ -113,7 +113,7 @@ static void vmx_accum32(uint32_t *s, const uint8_t *buf, uint64_t len) {
vec_ste(s2acc, 0, s+1);
}

uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, uint64_t len) {
uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len) {
uint32_t sum2;
uint32_t pair[16] ALIGNED_(16);
memset(&pair[2], 0, 14);
Expand Down
10 changes: 5 additions & 5 deletions arch/x86/adler32_avx2_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#include "adler32_avx2_p.h"

#ifdef X86_SSE42_ADLER32
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, uint64_t len);
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, size_t len);

#define copy_sub32(a, b, c, d) adler32_fold_copy_sse42(a, b, c, d)
#define sub32(a, b, c) adler32_ssse3(a, b, c)
Expand All @@ -22,9 +22,9 @@ extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *src, uint64_t len);
#endif

#ifdef COPY
Z_INTERNAL uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
Z_INTERNAL uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
#else
Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, uint64_t len) {
Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, size_t len) {
#endif
if (src == NULL) return 1L;
if (len == 0) return adler;
Expand Down Expand Up @@ -61,7 +61,7 @@ Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const uint8_t *src, uint64_t le
__m256i vs1_0 = vs1;
__m256i vs3 = _mm256_setzero_si256();

uint64_t k = MIN(len, NMAX);
size_t k = MIN(len, NMAX);
k -= k % 32;
len -= k;

Expand Down
6 changes: 3 additions & 3 deletions arch/x86/adler32_avx512_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
#ifdef X86_AVX512_ADLER32

#ifdef COPY
Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
Z_INTERNAL uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
#else
Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, uint64_t len) {
Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, size_t len) {
#endif

if (src == NULL) return 1L;
Expand Down Expand Up @@ -52,7 +52,7 @@ Z_INTERNAL uint32_t adler32_avx512(uint32_t adler, const uint8_t *src, uint64_t
56, 57, 58, 59, 60, 61, 62, 63, 64);
const __m512i dot3v = _mm512_set1_epi16(1);
const __m512i zero = _mm512_setzero_si512();
uint64_t k;
size_t k;

while (len >= 64) {
__m512i vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));
Expand Down
8 changes: 4 additions & 4 deletions arch/x86/adler32_avx512_vnni.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "adler32_avx512_p.h"
#include "adler32_avx2_p.h"

Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, uint64_t len) {
Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, size_t len) {
if (src == NULL) return 1L;
if (len == 0) return adler;

Expand Down Expand Up @@ -54,7 +54,7 @@ Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, uint
while (len >= 64) {
vs1 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler0));
vs2 = _mm512_zextsi128_si512(_mm_cvtsi32_si128(adler1));
uint64_t k = MIN(len, NMAX);
size_t k = MIN(len, NMAX);
k -= k % 64;
len -= k;
__m512i vs1_0 = vs1;
Expand Down Expand Up @@ -120,7 +120,7 @@ Z_INTERNAL uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *src, uint
return adler;
}

Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
if (src == NULL) return 1L;
if (len == 0) return adler;

Expand Down Expand Up @@ -151,7 +151,7 @@ Z_INTERNAL uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst,
while (len >= 32) {
vs1 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler0));
vs2 = _mm256_zextsi128_si256(_mm_cvtsi32_si128(adler1));
uint64_t k = MIN(len, NMAX);
size_t k = MIN(len, NMAX);
k -= k % 32;
len -= k;
__m256i vs1_0 = vs1;
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/adler32_sse42.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#ifdef X86_SSE42_ADLER32

Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
uint32_t adler0, adler1;
adler1 = (adler >> 16) & 0xffff;
adler0 = adler & 0xffff;
Expand All @@ -31,7 +31,7 @@ Z_INTERNAL uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const
const __m128i dot2v = _mm_setr_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17);
const __m128i dot2v_0 = _mm_setr_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
const __m128i dot3v = _mm_set1_epi16(1);
uint64_t k;
size_t k;

while (len >= 16) {

Expand Down
10 changes: 5 additions & 5 deletions arch/x86/adler32_ssse3.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

#include <immintrin.h>

Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, uint64_t len) {
Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len) {
uint32_t sum2;

/* split Adler-32 into component sums */
Expand Down Expand Up @@ -46,10 +46,10 @@ Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, uint64_t l
* additions worthwhile or if it's worth it to just eat the cost of an unaligned
* load. This is a pretty simple test, just test if 16 - the remainder + len is
* < 16 */
uint64_t max_iters = NMAX;
uint64_t rem = (uintptr_t)buf & 15;
uint64_t align_offset = 16 - rem;
uint64_t k = 0;
size_t max_iters = NMAX;
size_t rem = (uintptr_t)buf & 15;
size_t align_offset = 16 - rem;
size_t k = 0;
if (rem) {
if (len < 16 + align_offset) {
/* Let's eat the cost of this one unaligned load so that
Expand Down
26 changes: 13 additions & 13 deletions cpu_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,43 +23,43 @@
extern void cpu_check_features(void);

/* adler32 */
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, uint64_t len);
typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);

extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, uint64_t len);
extern uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);
#ifdef ARM_NEON_ADLER32
extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, uint64_t len);
extern uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
#endif
#ifdef PPC_VMX_ADLER32
extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, uint64_t len);
extern uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
#endif
#ifdef X86_SSSE3_ADLER32
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, uint64_t len);
extern uint32_t adler32_ssse3(uint32_t adler, const uint8_t *buf, size_t len);
#endif
#ifdef X86_AVX2_ADLER32
extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, uint64_t len);
extern uint32_t adler32_avx2(uint32_t adler, const uint8_t *buf, size_t len);
#endif
#ifdef X86_AVX512_ADLER32
extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, uint64_t len);
extern uint32_t adler32_avx512(uint32_t adler, const uint8_t *buf, size_t len);
#endif
#ifdef X86_AVX512VNNI_ADLER32
extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, uint64_t len);
extern uint32_t adler32_avx512_vnni(uint32_t adler, const uint8_t *buf, size_t len);
#endif
#ifdef POWER8_VSX_ADLER32
extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, uint64_t len);
extern uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
#endif

/* adler32 folding */
#ifdef X86_SSE42_ADLER32
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
extern uint32_t adler32_fold_copy_sse42(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif
#ifdef X86_AVX2_ADLER32
extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
extern uint32_t adler32_fold_copy_avx2(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif
#ifdef X86_AVX512_ADLER32
extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
extern uint32_t adler32_fold_copy_avx512(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif
#ifdef X86_AVX512VNNI_ADLER32
extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
extern uint32_t adler32_fold_copy_avx512_vnni(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
#endif

/* CRC32 folding */
Expand Down
4 changes: 2 additions & 2 deletions functable.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ Z_INTERNAL uint32_t longest_match_slow_stub(deflate_state *const s, Pos cur_matc
return functable.longest_match_slow(s, cur_match);
}

Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const uint8_t *buf, uint64_t len) {
Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const uint8_t *buf, size_t len) {
// Initialize default
functable.adler32 = &adler32_c;
cpu_check_features();
Expand Down Expand Up @@ -202,7 +202,7 @@ Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const uint8_t *buf, uint64_t le
return functable.adler32(adler, buf, len);
}

Z_INTERNAL uint32_t adler32_fold_copy_stub(uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len) {
Z_INTERNAL uint32_t adler32_fold_copy_stub(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len) {
functable.adler32_fold_copy = &adler32_fold_copy_c;
#if (defined X86_SSE42_ADLER32)
if (x86_cpu_has_sse42)
Expand Down
4 changes: 2 additions & 2 deletions functable.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
#include "adler32_fold.h"

struct functable_s {
uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, uint64_t len);
uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, uint64_t len);
uint32_t (* adler32) (uint32_t adler, const uint8_t *buf, size_t len);
uint32_t (* adler32_fold_copy) (uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);
uint32_t (* crc32) (uint32_t crc, const uint8_t *buf, size_t len);
uint32_t (* crc32_fold_reset) (struct crc32_fold_s *crc);
void (* crc32_fold_copy) (struct crc32_fold_s *crc, uint8_t *dst, const uint8_t *src, size_t len);
Expand Down
4 changes: 2 additions & 2 deletions test/benchmarks/benchmark_adler32_copy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ extern "C" {
#define MAX_RANDOM_INTS (1024 * 1024)
#define MAX_RANDOM_INTS_SIZE (MAX_RANDOM_INTS * sizeof(uint32_t))

typedef uint32_t (*adler32_cpy_func)(uint32_t adler, unsigned char *dst, const uint8_t *buf, uint64_t len);
typedef uint32_t (*adler32_cpy_func)(uint32_t adler, unsigned char *dst, const uint8_t *buf, size_t len);

class adler32_copy: public benchmark::Fixture {
private:
Expand Down Expand Up @@ -76,7 +76,7 @@ class adler32_copy: public benchmark::Fixture {
state.SkipWithError("CPU does not support " #name); \
} \
Bench(state, [](uint32_t init_sum, unsigned char *dst, \
const uint8_t *buf, uint64_t len) -> uint32_t { \
const uint8_t *buf, size_t len) -> uint32_t { \
memcpy(dst, buf, (size_t)len); \
return fptr(init_sum, buf, len); \
}); \
Expand Down

0 comments on commit 1ab4438

Please sign in to comment.