Skip to content

Commit

Permalink
Allow disabling runtime CPU features detection in tests and benchmarks
Browse files Browse the repository at this point in the history
Signed-off-by: Vladislav Shchapov <[email protected]>
  • Loading branch information
phprus authored and Dead2 committed Apr 4, 2024
1 parent 939352f commit 5401b24
Show file tree
Hide file tree
Showing 14 changed files with 105 additions and 18 deletions.
52 changes: 38 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -679,8 +679,12 @@ if(WITH_OPTIM)
endif()
endif()
endif()
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h ${ARCHDIR}/arm_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_functions.h)
if(WITH_RUNTIME_CPU_DETECTION)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c)
endif()

if(WITH_ACLE)
check_acle_compiler_flag()
if(HAVE_ACLE_FLAG)
Expand Down Expand Up @@ -751,8 +755,11 @@ if(WITH_OPTIM)
add_definitions(-DPOWER_FEATURES)
endif()
if(HAVE_VMX OR HAVE_POWER8_INTRIN OR HAVE_POWER9_INTRIN)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h ${ARCHDIR}/power_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_functions.h)
if(WITH_RUNTIME_CPU_DETECTION)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c)
endif()
endif()
# VMX specific options and files
if(WITH_ALTIVEC)
Expand Down Expand Up @@ -801,11 +808,17 @@ if(WITH_OPTIM)
if(HAVE_RVV_INTRIN)
add_definitions(-DRISCV_FEATURES)
add_definitions(-DRISCV_RVV)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h ${ARCHDIR}/riscv_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_functions.h)
if(WITH_RUNTIME_CPU_DETECTION)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c)
endif()
# FIXME: we will not set compile flags for riscv_features.c when
# the kernels update hwcap or hwprobe for riscv
set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c)
set(RVV_SRCS ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c)
if(WITH_RUNTIME_CPU_DETECTION)
list(APPEND RVV_SRCS ${ARCHDIR}/riscv_features.c)
endif()
list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS})
set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}")
else()
Expand All @@ -816,8 +829,11 @@ if(WITH_OPTIM)
check_s390_intrinsics()
if(HAVE_S390_INTRIN)
add_definitions(-DS390_FEATURES)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h ${ARCHDIR}/s390_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_functions.h)
if(WITH_RUNTIME_CPU_DETECTION)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c)
endif()
endif()
if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_common.c)
Expand All @@ -843,15 +859,20 @@ if(WITH_OPTIM)
endif()
elseif(BASEARCH_X86_FOUND)
add_definitions(-DX86_FEATURES)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h ${ARCHDIR}/x86_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_functions.h)
if(WITH_RUNTIME_CPU_DETECTION)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
endif()
if(MSVC)
list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h)
endif()
check_xsave_intrinsics()
if(HAVE_XSAVE_INTRIN)
add_feature_info(XSAVE 1 "Support XSAVE intrinsics using \"${XSAVEFLAG}\"")
set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}")
if(WITH_RUNTIME_CPU_DETECTION)
set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}")
endif()
if(NOT (CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 8.2))
add_definitions(-DX86_HAVE_XSAVE_INTRIN)
endif()
Expand Down Expand Up @@ -1038,7 +1059,6 @@ set(ZLIB_PRIVATE_HDRS
chunkset_tpl.h
compare256_rle.h
arch_functions.h
cpu_features.h
crc32_braid_p.h
crc32_braid_comb_p.h
crc32_braid_tbl.h
Expand Down Expand Up @@ -1069,7 +1089,6 @@ set(ZLIB_SRCS
arch/generic/slide_hash_c.c
adler32.c
compress.c
cpu_features.c
crc32.c
crc32_braid_comb.c
deflate.c
Expand All @@ -1091,6 +1110,11 @@ set(ZLIB_SRCS
zutil.c
)

if(WITH_RUNTIME_CPU_DETECTION)
list(APPEND ZLIB_PRIVATE_HDRS cpu_features.h)
list(APPEND ZLIB_SRCS cpu_features.c)
endif()

set(ZLIB_GZFILE_PRIVATE_HDRS
gzguts.h
)
Expand Down
4 changes: 2 additions & 2 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ OBJZ = \
arch/generic/slide_hash_c.o \
adler32.o \
compress.o \
cpu_features.o \
crc32.o \
crc32_braid_comb.o \
deflate.o \
Expand All @@ -103,6 +102,7 @@ OBJZ = \
trees.o \
uncompr.o \
zutil.o \
cpu_features.o \
$(ARCH_STATIC_OBJS)

OBJG = \
Expand All @@ -123,7 +123,6 @@ PIC_OBJZ = \
arch/generic/slide_hash_c.lo \
adler32.lo \
compress.lo \
cpu_features.lo \
crc32.lo \
crc32_braid_comb.lo \
deflate.lo \
Expand All @@ -143,6 +142,7 @@ PIC_OBJZ = \
trees.lo \
uncompr.lo \
zutil.lo \
cpu_features.lo \
$(ARCH_SHARED_OBJS)

PIC_OBJG = \
Expand Down
4 changes: 4 additions & 0 deletions cpu_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#ifndef CPU_FEATURES_H_
#define CPU_FEATURES_H_

#ifndef DISABLE_RUNTIME_CPU_DETECTION

#if defined(X86_FEATURES)
# include "arch/x86/x86_features.h"
#elif defined(ARM_FEATURES)
Expand Down Expand Up @@ -37,3 +39,5 @@ struct cpu_features {
void cpu_check_features(struct cpu_features *features);

#endif

#endif
6 changes: 6 additions & 0 deletions test/benchmarks/benchmark_adler32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ class adler32: public benchmark::Fixture {

BENCHMARK_ADLER32(c, adler32_c, 1);

#ifdef DISABLE_RUNTIME_CPU_DETECTION
BENCHMARK_ADLER32(native, native_adler32, 1);
#else

#ifdef ARM_NEON
BENCHMARK_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon);
#endif
Expand Down Expand Up @@ -92,3 +96,5 @@ BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512);
#ifdef X86_AVX512VNNI
BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
#endif

#endif
6 changes: 6 additions & 0 deletions test/benchmarks/benchmark_adler32_copy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ class adler32_copy: public benchmark::Fixture {

BENCHMARK_ADLER32_BASELINE_COPY(c, adler32_c, 1);

#ifdef DISABLE_RUNTIME_CPU_DETECTION
BENCHMARK_ADLER32_BASELINE_COPY(native, native_adler32, 1);
#else

#ifdef ARM_NEON
/* If we inline this copy for neon, the function would go here */
//BENCHMARK_ADLER32_COPY(neon, adler32_neon, test_cpu_features.arm.has_neon);
Expand Down Expand Up @@ -122,3 +126,5 @@ BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.h
BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, test_cpu_features.x86.has_avx512vnni);
#endif

#endif
6 changes: 6 additions & 0 deletions test/benchmarks/benchmark_compare256.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ class compare256: public benchmark::Fixture {

BENCHMARK_COMPARE256(c, compare256_c, 1);

#ifdef DISABLE_RUNTIME_CPU_DETECTION
BENCHMARK_COMPARE256(native, native_compare256, 1);
#else

#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1);
#ifdef HAVE_BUILTIN_CTZ
Expand All @@ -86,3 +90,5 @@ BENCHMARK_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch
#ifdef RISCV_RVV
BENCHMARK_COMPARE256(rvv, compare256_rvv, test_cpu_features.riscv.has_rvv);
#endif

#endif
6 changes: 6 additions & 0 deletions test/benchmarks/benchmark_crc32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ class crc32: public benchmark::Fixture {

BENCHMARK_CRC32(braid, PREFIX(crc32_braid), 1);

#ifdef DISABLE_RUNTIME_CPU_DETECTION
BENCHMARK_CRC32(native, native_crc32, 1);
#else

#ifdef ARM_ACLE
BENCHMARK_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32);
#endif
Expand All @@ -75,3 +79,5 @@ BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
/* CRC32 fold does a memory copy while hashing */
BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq));
#endif

#endif
4 changes: 4 additions & 0 deletions test/benchmarks/benchmark_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,17 @@ extern "C" {
# include "zbuild.h"
# include "../test_cpu_features.h"

# ifndef DISABLE_RUNTIME_CPU_DETECTION
struct cpu_features test_cpu_features;
# endif
}
#endif

int main(int argc, char** argv) {
#ifndef BUILD_ALT
# ifndef DISABLE_RUNTIME_CPU_DETECTION
cpu_check_features(&test_cpu_features);
# endif
#endif

::benchmark::Initialize(&argc, argv);
Expand Down
6 changes: 6 additions & 0 deletions test/benchmarks/benchmark_slidehash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ class slide_hash: public benchmark::Fixture {

BENCHMARK_SLIDEHASH(c, slide_hash_c, 1);

#ifdef DISABLE_RUNTIME_CPU_DETECTION
BENCHMARK_SLIDEHASH(native, native_slide_hash, 1);
#else

#ifdef ARM_SIMD
BENCHMARK_SLIDEHASH(armv6, slide_hash_armv6, test_cpu_features.arm.has_simd);
#endif
Expand All @@ -90,3 +94,5 @@ BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2);
#ifdef X86_AVX2
BENCHMARK_SLIDEHASH(avx2, slide_hash_avx2, test_cpu_features.x86.has_avx2);
#endif

#endif
6 changes: 6 additions & 0 deletions test/test_adler32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,10 @@ INSTANTIATE_TEST_SUITE_P(adler32, adler32_variant, testing::ValuesIn(tests));

TEST_ADLER32(c, adler32_c, 1)

#ifdef DISABLE_RUNTIME_CPU_DETECTION
TEST_ADLER32(native, native_adler32, 1)
#else

#ifdef ARM_NEON
TEST_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon)
#elif defined(POWER8_VSX)
Expand All @@ -387,3 +391,5 @@ TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512)
#ifdef X86_AVX512VNNI
TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni)
#endif

#endif
6 changes: 6 additions & 0 deletions test/test_compare256.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ static inline void compare256_match_check(compare256_func compare256) {

TEST_COMPARE256(c, compare256_c, 1)

#ifdef DISABLE_RUNTIME_CPU_DETECTION
TEST_COMPARE256(native, native_compare256, 1)
#else

#if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN
TEST_COMPARE256(unaligned_16, compare256_unaligned_16, 1)
#ifdef HAVE_BUILTIN_CTZ
Expand All @@ -85,3 +89,5 @@ TEST_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00
#ifdef RISCV_RVV
TEST_COMPARE256(rvv, compare256_rvv, test_cpu_features.riscv.has_rvv)
#endif

#endif
6 changes: 5 additions & 1 deletion test/test_cpu_features.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
#ifndef TEST_CPU_FEATURES_H
#define TEST_CPU_FEATURES_H

#include "cpu_features.h"
#ifndef DISABLE_RUNTIME_CPU_DETECTION

# include "cpu_features.h"

extern struct cpu_features test_cpu_features;

#endif

#endif
6 changes: 6 additions & 0 deletions test/test_crc32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests));

TEST_CRC32(braid, PREFIX(crc32_braid), 1)

#ifdef DISABLE_RUNTIME_CPU_DETECTION
TEST_CRC32(native, native_crc32, 1)
#else

#ifdef ARM_ACLE
TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32)
#endif
Expand All @@ -223,3 +227,5 @@ TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq)
#ifdef X86_VPCLMULQDQ_CRC
TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq))
#endif

#endif
5 changes: 4 additions & 1 deletion test/test_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,16 @@
extern "C" {
# include "zbuild.h"
# include "test_cpu_features.h"

# ifndef DISABLE_RUNTIME_CPU_DETECTION
struct cpu_features test_cpu_features;
# endif
}

GTEST_API_ int main(int argc, char **argv) {
printf("Running main() from %s\n", __FILE__);
#ifndef DISABLE_RUNTIME_CPU_DETECTION
cpu_check_features(&test_cpu_features);
#endif
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

0 comments on commit 5401b24

Please sign in to comment.