Skip to content

Commit

Permalink
Since we long ago make unaligned reads safe (by using memcpy or intri…
Browse files Browse the repository at this point in the history
…nsics),

it is time to replace the UNALIGNED_OK checks that have since really only been
used to select the optimal comparison sizes for the arch instead.
  • Loading branch information
Dead2 committed Dec 20, 2024
1 parent 4fa76be commit 509f6b5
Show file tree
Hide file tree
Showing 17 changed files with 89 additions and 129 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ jobs:
build-src-dir: ../zlib-ng/test/add-subdirectory-project
readonly-project-dir: true

- name: Ubuntu GCC -O1 No Unaligned UBSAN
- name: Ubuntu GCC -O1 UBSAN
os: ubuntu-latest
compiler: gcc
cxx-compiler: g++
cmake-args: -DWITH_UNALIGNED=OFF -DWITH_SANITIZER=Undefined
cmake-args: -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_o1
cflags: -O1

Expand Down
8 changes: 0 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF)
option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON)

set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols.
Useful when embedding into a larger library.
Expand Down Expand Up @@ -147,7 +146,6 @@ mark_as_advanced(FORCE
WITH_RVV
WITH_INFLATE_STRICT
WITH_INFLATE_ALLOW_INVALID_DIST
WITH_UNALIGNED
INSTALL_UTILS
)

Expand Down Expand Up @@ -336,12 +334,6 @@ if(NOT WITH_NATIVE_INSTRUCTIONS)
endforeach()
endif()

# Set architecture alignment requirements
if(NOT WITH_UNALIGNED)
add_definitions(-DNO_UNALIGNED)
message(STATUS "Unaligned reads manually disabled")
endif()

# Apply warning compiler flags
if(WITH_MAINTAINER_WARNINGS)
add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE})
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Features
* Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
* Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
* Support for hardware-accelerated deflate using IBM Z DFLTCC
* Unaligned memory read/writes and large bit buffer improvements
* Safe unaligned memory read/writes and large bit buffer improvements
* Includes improvements from Cloudflare and Intel forks
* Configure, CMake, and NMake build system support
* Comprehensive set of CMake unit tests
Expand Down Expand Up @@ -213,7 +213,6 @@ Advanced Build Options
| WITH_CRC32_VX | --without-crc32-vx | Build with vectorized CRC32 on IBM Z | ON |
| WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z | OFF |
| WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z | OFF |
| WITH_UNALIGNED | --without-unaligned | Allow optimizations that use unaligned reads if safe on current arch| ON |
| WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF |
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |
Expand Down
6 changes: 3 additions & 3 deletions arch/generic/compare256_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {

#include "match_tpl.h"

#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
Expand Down Expand Up @@ -138,8 +138,8 @@ Z_INTERNAL uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *

#endif

#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
/* UNALIGNED64_OK, 64-bit integer comparison */
#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
/* 64-bit integer comparison */
static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

Expand Down
35 changes: 15 additions & 20 deletions arch/generic/generic_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);

uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
# ifdef HAVE_BUILTIN_CTZ
uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
# endif
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
# endif
#endif

Expand All @@ -43,29 +43,24 @@ typedef void (*slide_hash_func)(deflate_state *s);
void slide_hash_c(deflate_state *s);

uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
# ifdef HAVE_BUILTIN_CTZ
uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
# ifdef HAVE_BUILTIN_CTZ
uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
# endif
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
# endif
uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
# endif

uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
# ifdef UNALIGNED64_OK
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
# endif
# endif
#endif


// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
# define longest_match_generic longest_match_unaligned_64
# define longest_match_slow_generic longest_match_slow_unaligned_64
# define compare256_generic compare256_unaligned_64
Expand Down
8 changes: 3 additions & 5 deletions chunkset_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,17 +227,15 @@ static inline uint8_t* CHUNKMEMSET(uint8_t *out, uint8_t *from, unsigned len) {
}

Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, unsigned len, unsigned left) {
#if !defined(UNALIGNED64_OK)
# if !defined(UNALIGNED_OK)
#if OPTIMAL_CMP < 32
static const uint32_t align_mask = 7;
# else
#elif OPTIMAL_CMP == 32
static const uint32_t align_mask = 3;
# endif
#endif

len = MIN(len, left);

#if !defined(UNALIGNED64_OK)
#if OPTIMAL_CMP < 64
while (((uintptr_t)out & align_mask) && (len > 0)) {
*out++ = *from++;
--len;
Expand Down
11 changes: 1 addition & 10 deletions cmake/detect-sanitizer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ endmacro()

macro(add_undefined_sanitizer)
set(known_checks
alignment
array-bounds
bool
bounds
Expand All @@ -137,10 +138,6 @@ macro(add_undefined_sanitizer)
vptr
)

# Only check for alignment sanitizer flag if unaligned access is not supported
if(NOT WITH_UNALIGNED)
list(APPEND known_checks alignment)
endif()
# Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
if(NOT CMAKE_C_FLAGS MATCHES "-O0")
list(APPEND known_checks object-size)
Expand All @@ -153,12 +150,6 @@ macro(add_undefined_sanitizer)
add_compile_options(-fsanitize=${supported_checks})
add_link_options(-fsanitize=${supported_checks})

# Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if
# it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing.
if(WITH_UNALIGNED)
add_compile_options(-fno-sanitize=alignment)
endif()

add_common_sanitizer_flags()
else()
message(STATUS "Undefined behavior sanitizer is not supported")
Expand Down
4 changes: 2 additions & 2 deletions compare256_rle.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1
return 256;
}

#ifdef UNALIGNED_OK
#if OPTIMAL_CMP >= 32
/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
Expand Down Expand Up @@ -100,7 +100,7 @@ static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const ui

#endif

#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
/* 64-bit unaligned integer comparison */
static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const uint8_t *src1) {
uint32_t src0_cmp32, len = 0;
Expand Down
10 changes: 0 additions & 10 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ mandir=${mandir-'${prefix}/share/man'}
shared_ext='.so'
shared=1
gzfileops=1
unalignedok=1
compat=0
cover=0
build32=0
Expand Down Expand Up @@ -164,7 +163,6 @@ case "$1" in
echo ' [--warn] Enables extra compiler warnings' | tee -a configure.log
echo ' [--debug] Enables extra debug prints during operation' | tee -a configure.log
echo ' [--zlib-compat] Compiles for zlib-compatible API instead of zlib-ng API' | tee -a configure.log
echo ' [--without-unaligned] Compiles without fast unaligned access' | tee -a configure.log
echo ' [--without-gzfileops] Compiles without the gzfile parts of the API enabled' | tee -a configure.log
echo ' [--without-optimizations] Compiles without support for optional instruction sets' | tee -a configure.log
echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log
Expand Down Expand Up @@ -195,7 +193,6 @@ case "$1" in
-s* | --shared | --enable-shared) shared=1; shift ;;
-t | --static) shared=0; shift ;;
--zlib-compat) compat=1; shift ;;
--without-unaligned) unalignedok=0; shift ;;
--without-gzfileops) gzfileops=0; shift ;;
--cover) cover=1; shift ;;
-3* | --32) build32=1; shift ;;
Expand Down Expand Up @@ -876,13 +873,6 @@ else
PIC_TESTOBJG="\$(OBJG)"
fi

# set architecture alignment requirements
if test $unalignedok -eq 0; then
CFLAGS="${CFLAGS} -DNO_UNALIGNED"
SFLAGS="${SFLAGS} -DNO_UNALIGNED"
echo "Unaligned reads manually disabled." | tee -a configure.log
fi

# enable reduced memory configuration
if test $reducedmem -eq 1; then
echo "Configuring for reduced memory environment." | tee -a configure.log
Expand Down
4 changes: 2 additions & 2 deletions deflate_rle.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
#include "deflate_p.h"
#include "functable.h"

#ifdef UNALIGNED_OK
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
#if OPTIMAL_CMP >= 32
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
# define compare256_rle compare256_rle_unaligned_64
# elif defined(HAVE_BUILTIN_CTZ)
# define compare256_rle compare256_rle_unaligned_32
Expand Down
18 changes: 5 additions & 13 deletions insert_string_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,13 @@
# define HASH_CALC_MASK HASH_MASK
#endif
#ifndef HASH_CALC_READ
# ifdef UNALIGNED_OK
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val));
# else
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val)); \
val = ZSWAP32(val);
# endif
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val));
# else
# define HASH_CALC_READ \
val = ((uint32_t)(strstart[0])); \
val |= ((uint32_t)(strstart[1]) << 8); \
val |= ((uint32_t)(strstart[2]) << 16); \
val |= ((uint32_t)(strstart[3]) << 24);
memcpy(&val, strstart, sizeof(val)); \
val = ZSWAP32(val);
# endif
#endif

Expand Down
22 changes: 11 additions & 11 deletions match_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
uint32_t chain_length, nice_match, best_len, offset;
uint32_t lookahead = s->lookahead;
Pos match_offset = 0;
#ifdef UNALIGNED_OK
#if OPTIMAL_CMP >= 32
uint8_t scan_start[8];
#endif
uint8_t scan_end[8];
Expand All @@ -59,20 +59,20 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* to find the next best match length.
*/
offset = best_len-1;
#ifdef UNALIGNED_OK
#if OPTIMAL_CMP >= 32
if (best_len >= sizeof(uint32_t)) {
offset -= 2;
#ifdef UNALIGNED64_OK
#if OPTIMAL_CMP >= 64
if (best_len >= sizeof(uint64_t))
offset -= 4;
#endif
}
#endif

#ifdef UNALIGNED64_OK
#if OPTIMAL_CMP >= 64
memcpy(scan_start, scan, sizeof(uint64_t));
memcpy(scan_end, scan+offset, sizeof(uint64_t));
#elif defined(UNALIGNED_OK)
#elif OPTIMAL_CMP >= 32
memcpy(scan_start, scan, sizeof(uint32_t));
memcpy(scan_end, scan+offset, sizeof(uint32_t));
#else
Expand Down Expand Up @@ -138,15 +138,15 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* that depend on those values. However the length of the match is limited to the
* lookahead, so the output of deflate is not affected by the uninitialized values.
*/
#ifdef UNALIGNED_OK
#if OPTIMAL_CMP >= 32
if (best_len < sizeof(uint32_t)) {
for (;;) {
if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
zng_memcmp_2(mbase_start+cur_match, scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# ifdef UNALIGNED64_OK
# if OPTIMAL_CMP >= 64
} else if (best_len >= sizeof(uint64_t)) {
for (;;) {
if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
Expand Down Expand Up @@ -186,19 +186,19 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
return best_len;

offset = best_len-1;
#ifdef UNALIGNED_OK
#if OPTIMAL_CMP >= 32
if (best_len >= sizeof(uint32_t)) {
offset -= 2;
#ifdef UNALIGNED64_OK
#if OPTIMAL_CMP >= 64
if (best_len >= sizeof(uint64_t))
offset -= 4;
#endif
}
#endif

#ifdef UNALIGNED64_OK
#if OPTIMAL_CMP >= 64
memcpy(scan_end, scan+offset, sizeof(uint64_t));
#elif defined(UNALIGNED_OK)
#elif OPTIMAL_CMP >= 32
memcpy(scan_end, scan+offset, sizeof(uint32_t));
#else
scan_end[0] = *(scan+offset);
Expand Down
10 changes: 5 additions & 5 deletions test/benchmarks/benchmark_compare256.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ BENCHMARK_COMPARE256(c, compare256_c, 1);
BENCHMARK_COMPARE256(native, native_compare256, 1);
#else

#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1);
#ifdef HAVE_BUILTIN_CTZ
# if defined(HAVE_BUILTIN_CTZ)
BENCHMARK_COMPARE256(unaligned_32, compare256_unaligned_32, 1);
#endif
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
# endif
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1);
#endif
# endif
#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
Expand Down
10 changes: 5 additions & 5 deletions test/benchmarks/benchmark_compare256_rle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ class compare256_rle: public benchmark::Fixture {

BENCHMARK_COMPARE256_RLE(c, compare256_rle_c, 1);

#ifdef UNALIGNED_OK
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
BENCHMARK_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1);
#ifdef HAVE_BUILTIN_CTZ
# if defined(HAVE_BUILTIN_CTZ)
BENCHMARK_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1);
#endif
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
# endif
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
BENCHMARK_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1);
#endif
# endif
#endif
Loading

0 comments on commit 509f6b5

Please sign in to comment.