Skip to content

Commit

Permalink
Revert "Since we long ago make unaligned reads safe (by using memcpy …
Browse files Browse the repository at this point in the history
…or intrinsics),"

This reverts commit 80fffd7.
It was mistakenly pushed to develop instead of going through a PR and the appropriate reviews.
  • Loading branch information
Dead2 committed Dec 17, 2024
1 parent 80fffd7 commit 037ab0f
Show file tree
Hide file tree
Showing 17 changed files with 108 additions and 92 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,11 @@ jobs:
build-src-dir: ../zlib-ng/test/add-subdirectory-project
readonly-project-dir: true

- name: Ubuntu GCC -O1 UBSAN
- name: Ubuntu GCC -O1 No Unaligned UBSAN
os: ubuntu-latest
compiler: gcc
cxx-compiler: g++
cmake-args: -DWITH_SANITIZER=Undefined
cmake-args: -DWITH_UNALIGNED=OFF -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_o1
cflags: -O1

Expand Down
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ option(WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings" OFF)
option(WITH_CODE_COVERAGE "Enable code coverage reporting" OFF)
option(WITH_INFLATE_STRICT "Build with strict inflate distance checking" OFF)
option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid distances" OFF)
option(WITH_UNALIGNED "Support unaligned reads on platforms that support it" ON)

set(ZLIB_SYMBOL_PREFIX "" CACHE STRING "Give this prefix to all publicly exported symbols.
Useful when embedding into a larger library.
Expand Down Expand Up @@ -146,6 +147,7 @@ mark_as_advanced(FORCE
WITH_RVV
WITH_INFLATE_STRICT
WITH_INFLATE_ALLOW_INVALID_DIST
WITH_UNALIGNED
INSTALL_UTILS
)

Expand Down Expand Up @@ -334,6 +336,12 @@ if(NOT WITH_NATIVE_INSTRUCTIONS)
endforeach()
endif()

# Set architecture alignment requirements
if(NOT WITH_UNALIGNED)
add_definitions(-DNO_UNALIGNED)
message(STATUS "Unaligned reads manually disabled")
endif()

# Apply warning compiler flags
if(WITH_MAINTAINER_WARNINGS)
add_compile_options(${WARNFLAGS} ${WARNFLAGS_MAINTAINER} ${WARNFLAGS_DISABLE})
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Features
* Compare256 implementations using SSE2, AVX2, Neon, POWER9 & RVV
* Inflate chunk copying using SSE2, SSSE3, AVX, Neon & VSX
* Support for hardware-accelerated deflate using IBM Z DFLTCC
* Safe unaligned memory read/writes and large bit buffer improvements
* Unaligned memory read/writes and large bit buffer improvements
* Includes improvements from Cloudflare and Intel forks
* Configure, CMake, and NMake build system support
* Comprehensive set of CMake unit tests
Expand Down Expand Up @@ -213,6 +213,7 @@ Advanced Build Options
| WITH_CRC32_VX | --without-crc32-vx | Build with vectorized CRC32 on IBM Z | ON |
| WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Build with DFLTCC intrinsics for compression on IBM Z | OFF |
| WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Build with DFLTCC intrinsics for decompression on IBM Z | OFF |
| WITH_UNALIGNED | --without-unaligned | Allow optimizations that use unaligned reads if safe on current arch| ON |
| WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF |
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |
Expand Down
6 changes: 3 additions & 3 deletions arch/generic/compare256_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Z_INTERNAL uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1) {

#include "match_tpl.h"

#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_unaligned_16_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
Expand Down Expand Up @@ -138,8 +138,8 @@ Z_INTERNAL uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *

#endif

#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
/* 64-bit integer comparison */
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
/* UNALIGNED64_OK, 64-bit integer comparison */
static inline uint32_t compare256_unaligned_64_static(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;

Expand Down
35 changes: 20 additions & 15 deletions arch/generic/generic_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);

uint32_t compare256_c(const uint8_t *src0, const uint8_t *src1);
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
# ifdef HAVE_BUILTIN_CTZ
uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
uint32_t compare256_unaligned_32(const uint8_t *src0, const uint8_t *src1);
# endif
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
uint32_t compare256_unaligned_64(const uint8_t *src0, const uint8_t *src1);
# endif
#endif

Expand All @@ -43,24 +43,29 @@ typedef void (*slide_hash_func)(deflate_state *s);
void slide_hash_c(deflate_state *s);

uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
# ifdef HAVE_BUILTIN_CTZ
# ifdef HAVE_BUILTIN_CTZ
uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
# endif
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
# endif
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
# endif
# endif

uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
# if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
uint32_t longest_match_slow_unaligned_16(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_unaligned_32(deflate_state *const s, Pos cur_match);
# ifdef UNALIGNED64_OK
uint32_t longest_match_slow_unaligned_64(deflate_state *const s, Pos cur_match);
# endif
# endif
#endif


// Select generic implementation for longest_match, longest_match_slow, longest_match_slow functions.
#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
# define longest_match_generic longest_match_unaligned_64
# define longest_match_slow_generic longest_match_slow_unaligned_64
# define compare256_generic compare256_unaligned_64
Expand Down
8 changes: 5 additions & 3 deletions chunkset_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,15 +227,17 @@ static inline uint8_t* CHUNKMEMSET(uint8_t *out, uint8_t *from, unsigned len) {
}

Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, uint8_t *from, unsigned len, unsigned left) {
#if OPTIMAL_CMP < 32
#if !defined(UNALIGNED64_OK)
# if !defined(UNALIGNED_OK)
static const uint32_t align_mask = 7;
#elif OPTIMAL_CMP == 32
# else
static const uint32_t align_mask = 3;
# endif
#endif

len = MIN(len, left);

#if OPTIMAL_CMP < 64
#if !defined(UNALIGNED64_OK)
while (((uintptr_t)out & align_mask) && (len > 0)) {
*out++ = *from++;
--len;
Expand Down
11 changes: 10 additions & 1 deletion cmake/detect-sanitizer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ endmacro()

macro(add_undefined_sanitizer)
set(known_checks
alignment
array-bounds
bool
bounds
Expand All @@ -138,6 +137,10 @@ macro(add_undefined_sanitizer)
vptr
)

# Only check for alignment sanitizer flag if unaligned access is not supported
if(NOT WITH_UNALIGNED)
list(APPEND known_checks alignment)
endif()
# Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
if(NOT CMAKE_C_FLAGS MATCHES "-O0")
list(APPEND known_checks object-size)
Expand All @@ -150,6 +153,12 @@ macro(add_undefined_sanitizer)
add_compile_options(-fsanitize=${supported_checks})
add_link_options(-fsanitize=${supported_checks})

# Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if
# it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing.
if(WITH_UNALIGNED)
add_compile_options(-fno-sanitize=alignment)
endif()

add_common_sanitizer_flags()
else()
message(STATUS "Undefined behavior sanitizer is not supported")
Expand Down
4 changes: 2 additions & 2 deletions compare256_rle.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ static inline uint32_t compare256_rle_c(const uint8_t *src0, const uint8_t *src1
return 256;
}

#if OPTIMAL_CMP >= 16
#ifdef UNALIGNED_OK
/* 16-bit unaligned integer comparison */
static inline uint32_t compare256_rle_unaligned_16(const uint8_t *src0, const uint8_t *src1) {
uint32_t len = 0;
Expand Down Expand Up @@ -100,7 +100,7 @@ static inline uint32_t compare256_rle_unaligned_32(const uint8_t *src0, const ui

#endif

#if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
/* 64-bit unaligned integer comparison */
static inline uint32_t compare256_rle_unaligned_64(const uint8_t *src0, const uint8_t *src1) {
uint32_t src0_cmp32, len = 0;
Expand Down
10 changes: 10 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ mandir=${mandir-'${prefix}/share/man'}
shared_ext='.so'
shared=1
gzfileops=1
unalignedok=1
compat=0
cover=0
build32=0
Expand Down Expand Up @@ -163,6 +164,7 @@ case "$1" in
echo ' [--warn] Enables extra compiler warnings' | tee -a configure.log
echo ' [--debug] Enables extra debug prints during operation' | tee -a configure.log
echo ' [--zlib-compat] Compiles for zlib-compatible API instead of zlib-ng API' | tee -a configure.log
echo ' [--without-unaligned] Compiles without fast unaligned access' | tee -a configure.log
echo ' [--without-gzfileops] Compiles without the gzfile parts of the API enabled' | tee -a configure.log
echo ' [--without-optimizations] Compiles without support for optional instruction sets' | tee -a configure.log
echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log
Expand Down Expand Up @@ -193,6 +195,7 @@ case "$1" in
-s* | --shared | --enable-shared) shared=1; shift ;;
-t | --static) shared=0; shift ;;
--zlib-compat) compat=1; shift ;;
--without-unaligned) unalignedok=0; shift ;;
--without-gzfileops) gzfileops=0; shift ;;
--cover) cover=1; shift ;;
-3* | --32) build32=1; shift ;;
Expand Down Expand Up @@ -873,6 +876,13 @@ else
PIC_TESTOBJG="\$(OBJG)"
fi

# set architecture alignment requirements
if test $unalignedok -eq 0; then
CFLAGS="${CFLAGS} -DNO_UNALIGNED"
SFLAGS="${SFLAGS} -DNO_UNALIGNED"
echo "Unaligned reads manually disabled." | tee -a configure.log
fi

# enable reduced memory configuration
if test $reducedmem -eq 1; then
echo "Configuring for reduced memory environment." | tee -a configure.log
Expand Down
4 changes: 2 additions & 2 deletions deflate_rle.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
#include "deflate_p.h"
#include "functable.h"

#if OPTIMAL_CMP >= 32
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
#ifdef UNALIGNED_OK
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
# define compare256_rle compare256_rle_unaligned_64
# elif defined(HAVE_BUILTIN_CTZ)
# define compare256_rle compare256_rle_unaligned_32
Expand Down
18 changes: 13 additions & 5 deletions insert_string_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,21 @@
# define HASH_CALC_MASK HASH_MASK
#endif
#ifndef HASH_CALC_READ
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val));
# ifdef UNALIGNED_OK
# if BYTE_ORDER == LITTLE_ENDIAN
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val));
# else
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val)); \
val = ZSWAP32(val);
# endif
# else
# define HASH_CALC_READ \
memcpy(&val, strstart, sizeof(val)); \
val = ZSWAP32(val);
val = ((uint32_t)(strstart[0])); \
val |= ((uint32_t)(strstart[1]) << 8); \
val |= ((uint32_t)(strstart[2]) << 16); \
val |= ((uint32_t)(strstart[3]) << 24);
# endif
#endif

Expand Down
22 changes: 11 additions & 11 deletions match_tpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
uint32_t chain_length, nice_match, best_len, offset;
uint32_t lookahead = s->lookahead;
Pos match_offset = 0;
#if OPTIMAL_CMP >= 32
#ifdef UNALIGNED_OK
uint8_t scan_start[8];
#endif
uint8_t scan_end[8];
Expand All @@ -59,20 +59,20 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* to find the next best match length.
*/
offset = best_len-1;
#if OPTIMAL_CMP >= 32
#ifdef UNALIGNED_OK
if (best_len >= sizeof(uint32_t)) {
offset -= 2;
#if OPTIMAL_CMP >= 64
#ifdef UNALIGNED64_OK
if (best_len >= sizeof(uint64_t))
offset -= 4;
#endif
}
#endif

#if OPTIMAL_CMP >= 64
#ifdef UNALIGNED64_OK
memcpy(scan_start, scan, sizeof(uint64_t));
memcpy(scan_end, scan+offset, sizeof(uint64_t));
#elif OPTIMAL_CMP >= 32
#elif defined(UNALIGNED_OK)
memcpy(scan_start, scan, sizeof(uint32_t));
memcpy(scan_end, scan+offset, sizeof(uint32_t));
#else
Expand Down Expand Up @@ -138,15 +138,15 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
* that depend on those values. However the length of the match is limited to the
* lookahead, so the output of deflate is not affected by the uninitialized values.
*/
#if OPTIMAL_CMP >= 32
#ifdef UNALIGNED_OK
if (best_len < sizeof(uint32_t)) {
for (;;) {
if (zng_memcmp_2(mbase_end+cur_match, scan_end) == 0 &&
zng_memcmp_2(mbase_start+cur_match, scan_start) == 0)
break;
GOTO_NEXT_CHAIN;
}
# if OPTIMAL_CMP >= 64
# ifdef UNALIGNED64_OK
} else if (best_len >= sizeof(uint64_t)) {
for (;;) {
if (zng_memcmp_8(mbase_end+cur_match, scan_end) == 0 &&
Expand Down Expand Up @@ -186,19 +186,19 @@ Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
return best_len;

offset = best_len-1;
#if OPTIMAL_CMP >= 32
#ifdef UNALIGNED_OK
if (best_len >= sizeof(uint32_t)) {
offset -= 2;
#if OPTIMAL_CMP >= 64
#ifdef UNALIGNED64_OK
if (best_len >= sizeof(uint64_t))
offset -= 4;
#endif
}
#endif

#if OPTIMAL_CMP >= 64
#ifdef UNALIGNED64_OK
memcpy(scan_end, scan+offset, sizeof(uint64_t));
#elif OPTIMAL_CMP >= 32
#elif defined(UNALIGNED_OK)
memcpy(scan_end, scan+offset, sizeof(uint32_t));
#else
scan_end[0] = *(scan+offset);
Expand Down
10 changes: 5 additions & 5 deletions test/benchmarks/benchmark_compare256.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ BENCHMARK_COMPARE256(c, compare256_c, 1);
BENCHMARK_COMPARE256(native, native_compare256, 1);
#else

#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1);
# if defined(HAVE_BUILTIN_CTZ)
#ifdef HAVE_BUILTIN_CTZ
BENCHMARK_COMPARE256(unaligned_32, compare256_unaligned_32, 1);
# endif
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
#endif
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
BENCHMARK_COMPARE256(unaligned_64, compare256_unaligned_64, 1);
# endif
#endif
#endif
#if defined(X86_SSE2) && defined(HAVE_BUILTIN_CTZ)
BENCHMARK_COMPARE256(sse2, compare256_sse2, test_cpu_features.x86.has_sse2);
Expand Down
10 changes: 5 additions & 5 deletions test/benchmarks/benchmark_compare256_rle.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ class compare256_rle: public benchmark::Fixture {

BENCHMARK_COMPARE256_RLE(c, compare256_rle_c, 1);

#if BYTE_ORDER == LITTLE_ENDIAN && OPTIMAL_CMP >= 32
#ifdef UNALIGNED_OK
BENCHMARK_COMPARE256_RLE(unaligned_16, compare256_rle_unaligned_16, 1);
# if defined(HAVE_BUILTIN_CTZ)
#ifdef HAVE_BUILTIN_CTZ
BENCHMARK_COMPARE256_RLE(unaligned_32, compare256_rle_unaligned_32, 1);
# endif
# if defined(HAVE_BUILTIN_CTZLL) && OPTIMAL_CMP >= 64
#endif
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
BENCHMARK_COMPARE256_RLE(unaligned_64, compare256_rle_unaligned_64, 1);
# endif
#endif
#endif
Loading

0 comments on commit 037ab0f

Please sign in to comment.