Skip to content

Commit

Permalink
Continued cleanup of old UNALIGNED_OK checks
Browse files Browse the repository at this point in the history
- Remove obsolete checks
- Fix checks that are inconsistent
- Stop compiling compare256/longest_match variants that never gets called
- Improve how the generic compare256 functions are handled.
- Allow overriding OPTIMAL_CMP

This simplifies the code and avoids having a lot of code in the compiled library than can never get executed.
  • Loading branch information
Dead2 committed Dec 26, 2024
1 parent 1aeb291 commit bf05e88
Show file tree
Hide file tree
Showing 15 changed files with 218 additions and 345 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1074,6 +1074,9 @@ set(ZLIB_PUBLIC_HDRS
${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h
)
set(ZLIB_PRIVATE_HDRS
arch/generic/chunk_permute_table.h
arch/generic/compare256_p.h
arch/generic/generic_functions.h
adler32_p.h
chunkset_tpl.h
compare256_rle.h
Expand Down
4 changes: 2 additions & 2 deletions arch/generic/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.
chunkset_c.lo: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_c.c

compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
compare256_c.o: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCDIR)/compare256_p.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c

compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
compare256_c.lo: $(SRCDIR)/compare256_c.c $(SRCTOP)/zbuild.h $(SRCDIR)/compare256_p.h $(SRCTOP)/zmemory.h $(SRCTOP)/deflate.h $(SRCTOP)/fallback_builtins.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/compare256_c.c

crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
Expand Down
189 changes: 15 additions & 174 deletions arch/generic/compare256_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,187 +4,28 @@
*/

#include "zbuild.h"
#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"

/* ALIGNED, byte comparison */
static inline uint32_t compare256_c_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

do {
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
} while (len < 256);

return 256;
}
#include "compare256_p.h"

// Set optimal COMPARE256 function variant
#if OPTIMAL_CMP == 8
# define COMPARE256 compare256_8
#elif defined(HAVE_BUILTIN_CTZLL)
# define COMPARE256 compare256_64
#elif defined(HAVE_BUILTIN_CTZ)
# define COMPARE256 compare256_32
#else
# define COMPARE256 compare256_16
#endif

Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {
return compare256_c_static(src0, src1);
return COMPARE256(src0, src1);
}

// Generate longest_match_c
#define LONGEST_MATCH longest_match_c
#define COMPARE256 compare256_c_static

#include "match_tpl.h"

// Generate longest_match_slow_c
#define LONGEST_MATCH_SLOW
#define LONGEST_MATCH longest_match_slow_c
#define COMPARE256 compare256_c_static

#include "match_tpl.h"

#if OPTIMAL_CMP >= 32

/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_16_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

do {
if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;

if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;

if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;

if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
} while (len < 256);

return 256;
}

Z_INTERNAL uint32_t compare256_16(const uint8_t *src0, const uint8_t *src1) {
return compare256_16_static(src0, src1);
}

#define LONGEST_MATCH longest_match_16
#define COMPARE256 compare256_16_static

#include "match_tpl.h"

#define LONGEST_MATCH_SLOW
#define LONGEST_MATCH longest_match_slow_16
#define COMPARE256 compare256_16_static

#include "match_tpl.h"

#ifdef HAVE_BUILTIN_CTZ
/* 32-bit unaligned integer comparison */
static inline uint32_t compare256_32_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

do {
uint32_t sv, mv, diff;

sv = zng_memread_4(src0);
mv = zng_memread_4(src1);

diff = sv ^ mv;
if (diff) {
#if BYTE_ORDER == LITTLE_ENDIAN
uint32_t match_byte = __builtin_ctz(diff) / 8;
#else
uint32_t match_byte = __builtin_clz(diff) / 8;
#endif
return len + match_byte;
}

src0 += 4, src1 += 4, len += 4;
} while (len < 256);

return 256;
}

Z_INTERNAL uint32_t compare256_32(const uint8_t *src0, const uint8_t *src1) {
return compare256_32_static(src0, src1);
}

#define LONGEST_MATCH longest_match_32
#define COMPARE256 compare256_32_static

#include "match_tpl.h"

#define LONGEST_MATCH_SLOW
#define LONGEST_MATCH longest_match_slow_32
#define COMPARE256 compare256_32_static

#include "match_tpl.h"

#endif

#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
/* 64-bit integer comparison */
static inline uint32_t compare256_64_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

do {
uint64_t sv, mv, diff;

sv = zng_memread_8(src0);
mv = zng_memread_8(src1);

diff = sv ^ mv;
if (diff) {
#if BYTE_ORDER == LITTLE_ENDIAN
uint64_t match_byte = __builtin_ctzll(diff) / 8;
#else
uint64_t match_byte = __builtin_clzll(diff) / 8;
#endif
return len + (uint32_t)match_byte;
}

src0 += 8, src1 += 8, len += 8;
} while (len < 256);

return 256;
}

Z_INTERNAL uint32_t compare256_64(const uint8_t *src0, const uint8_t *src1) {
return compare256_64_static(src0, src1);
}

#define LONGEST_MATCH longest_match_64
#define COMPARE256 compare256_64_static

#include "match_tpl.h"

#define LONGEST_MATCH_SLOW
#define LONGEST_MATCH longest_match_slow_64
#define COMPARE256 compare256_64_static

#include "match_tpl.h"

#endif

#endif
123 changes: 123 additions & 0 deletions arch/generic/compare256_p.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/* compare256_p.h -- 256 byte memory comparison with match length return
* Copyright (C) 2020 Nathan Moinvaziri
* For conditions of distribution and use, see copyright notice in zlib.h
*/

#include "zmemory.h"
#include "deflate.h"
#include "fallback_builtins.h"

/* 8-bit integer comparison */
static inline uint32_t compare256_8(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

do {
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len;
src0 += 1, src1 += 1, len += 1;
} while (len < 256);

return 256;
}

/* 16-bit integer comparison */
static inline uint32_t compare256_16(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

do {
if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;

if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;

if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;

if (zng_memcmp_2(src0, src1) != 0)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
} while (len < 256);

return 256;
}

#ifdef HAVE_BUILTIN_CTZ
/* 32-bit integer comparison */
static inline uint32_t compare256_32(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

do {
uint32_t sv, mv, diff;

sv = zng_memread_4(src0);
mv = zng_memread_4(src1);

diff = sv ^ mv;
if (diff) {
# if BYTE_ORDER == LITTLE_ENDIAN
uint32_t match_byte = __builtin_ctz(diff) / 8;
# else
uint32_t match_byte = __builtin_clz(diff) / 8;
# endif
return len + match_byte;
}

src0 += 4, src1 += 4, len += 4;
} while (len < 256);

return 256;
}
#endif

#ifdef HAVE_BUILTIN_CTZLL
/* 64-bit integer comparison */
static inline uint32_t compare256_64(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

do {
uint64_t sv, mv, diff;

sv = zng_memread_8(src0);
mv = zng_memread_8(src1);

diff = sv ^ mv;
if (diff) {
# if BYTE_ORDER == LITTLE_ENDIAN
uint64_t match_byte = __builtin_ctzll(diff) / 8;
# else
uint64_t match_byte = __builtin_clzll(diff) / 8;
# endif
return len + (uint32_t)match_byte;
}

src0 += 8, src1 += 8, len += 8;
} while (len < 256);

return 256;
}
#endif
Loading

0 comments on commit bf05e88

Please sign in to comment.