diff --git a/CMakeLists.txt b/CMakeLists.txt index ef69f86458..56654bf00d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -679,8 +679,12 @@ if(WITH_OPTIM) endif() endif() endif() - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h ${ARCHDIR}/arm_functions.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c) + endif() + if(WITH_ACLE) check_acle_compiler_flag() if(HAVE_ACLE_FLAG) @@ -751,8 +755,11 @@ if(WITH_OPTIM) add_definitions(-DPOWER_FEATURES) endif() if(HAVE_VMX OR HAVE_POWER8_INTRIN OR HAVE_POWER9_INTRIN) - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h ${ARCHDIR}/power_functions.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c) + endif() endif() # VMX specific options and files if(WITH_ALTIVEC) @@ -801,11 +808,17 @@ if(WITH_OPTIM) if(HAVE_RVV_INTRIN) add_definitions(-DRISCV_FEATURES) add_definitions(-DRISCV_RVV) - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h ${ARCHDIR}/riscv_functions.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c) + endif() # FIXME: we will not set compile flags for riscv_features.c when # the kernels update hwcap or hwprobe for riscv - set(RVV_SRCS ${ARCHDIR}/riscv_features.c ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c) + set(RVV_SRCS ${ARCHDIR}/adler32_rvv.c ${ARCHDIR}/chunkset_rvv.c ${ARCHDIR}/compare256_rvv.c ${ARCHDIR}/slide_hash_rvv.c) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND RVV_SRCS ${ARCHDIR}/riscv_features.c) + endif() list(APPEND ZLIB_ARCH_SRCS ${RVV_SRCS}) set_property(SOURCE ${RVV_SRCS} PROPERTY COMPILE_FLAGS "${RISCVFLAG} ${NOLTOFLAG}") else() @@ -816,8 +829,11 @@ if(WITH_OPTIM) check_s390_intrinsics() if(HAVE_S390_INTRIN) add_definitions(-DS390_FEATURES) - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h ${ARCHDIR}/s390_functions.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c) + endif() endif() if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE) list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_common.c) @@ -843,15 +859,20 @@ if(WITH_OPTIM) endif() elseif(BASEARCH_X86_FOUND) add_definitions(-DX86_FEATURES) - list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h ${ARCHDIR}/x86_functions.h) - list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_functions.h) + if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h) + list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c) + endif() if(MSVC) list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h) endif() check_xsave_intrinsics() if(HAVE_XSAVE_INTRIN) add_feature_info(XSAVE 1 "Support XSAVE intrinsics using \"${XSAVEFLAG}\"") - set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}") + if(WITH_RUNTIME_CPU_DETECTION) + set_property(SOURCE ${ARCHDIR}/x86_features.c PROPERTY COMPILE_FLAGS "${XSAVEFLAG}") + endif() if(NOT (CMAKE_C_COMPILER_ID MATCHES "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 8.2)) add_definitions(-DX86_HAVE_XSAVE_INTRIN) endif() @@ -1038,7 +1059,6 @@ set(ZLIB_PRIVATE_HDRS chunkset_tpl.h compare256_rle.h arch_functions.h - cpu_features.h crc32_braid_p.h crc32_braid_comb_p.h crc32_braid_tbl.h @@ -1069,7 +1089,6 @@ set(ZLIB_SRCS arch/generic/slide_hash_c.c adler32.c compress.c - cpu_features.c crc32.c crc32_braid_comb.c deflate.c @@ -1091,6 +1110,11 @@ set(ZLIB_SRCS zutil.c ) +if(WITH_RUNTIME_CPU_DETECTION) + list(APPEND ZLIB_PRIVATE_HDRS cpu_features.h) + list(APPEND ZLIB_SRCS cpu_features.c) +endif() + set(ZLIB_GZFILE_PRIVATE_HDRS gzguts.h ) diff --git a/Makefile.in b/Makefile.in index df8105f27c..2bf0dc13bf 100644 --- a/Makefile.in +++ b/Makefile.in @@ -83,7 +83,6 @@ OBJZ = \ arch/generic/slide_hash_c.o \ adler32.o \ compress.o \ - cpu_features.o \ crc32.o \ crc32_braid_comb.o \ deflate.o \ @@ -103,6 +102,7 @@ OBJZ = \ trees.o \ uncompr.o \ zutil.o \ + cpu_features.o \ $(ARCH_STATIC_OBJS) OBJG = \ @@ -123,7 +123,6 @@ PIC_OBJZ = \ arch/generic/slide_hash_c.lo \ adler32.lo \ compress.lo \ - cpu_features.lo \ crc32.lo \ crc32_braid_comb.lo \ deflate.lo \ @@ -143,6 +142,7 @@ PIC_OBJZ = \ trees.lo \ uncompr.lo \ zutil.lo \ + cpu_features.lo \ $(ARCH_SHARED_OBJS) PIC_OBJG = \ diff --git a/cpu_features.h b/cpu_features.h index d74b1d6e03..8708724bc0 100644 --- a/cpu_features.h +++ b/cpu_features.h @@ -6,6 +6,8 @@ #ifndef CPU_FEATURES_H_ #define CPU_FEATURES_H_ +#ifndef DISABLE_RUNTIME_CPU_DETECTION + #if defined(X86_FEATURES) # include "arch/x86/x86_features.h" #elif defined(ARM_FEATURES) @@ -37,3 +39,5 @@ struct cpu_features { void cpu_check_features(struct cpu_features *features); #endif + +#endif diff --git a/test/benchmarks/benchmark_adler32.cc b/test/benchmarks/benchmark_adler32.cc index fd86145782..84f0d617c2 100644 --- a/test/benchmarks/benchmark_adler32.cc +++ b/test/benchmarks/benchmark_adler32.cc @@ -65,6 +65,10 @@ class adler32: public benchmark::Fixture { BENCHMARK_ADLER32(c, adler32_c, 1); +#ifdef DISABLE_RUNTIME_CPU_DETECTION +BENCHMARK_ADLER32(native, native_adler32, 1); +#else + #ifdef ARM_NEON BENCHMARK_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon); #endif @@ -92,3 +96,5 @@ BENCHMARK_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512); #ifdef X86_AVX512VNNI BENCHMARK_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni); #endif + +#endif diff --git a/test/benchmarks/benchmark_adler32_copy.cc b/test/benchmarks/benchmark_adler32_copy.cc index 7992bdd5a4..e052ee76d0 100644 --- a/test/benchmarks/benchmark_adler32_copy.cc +++ b/test/benchmarks/benchmark_adler32_copy.cc @@ -86,6 +86,10 @@ class adler32_copy: public benchmark::Fixture { BENCHMARK_ADLER32_BASELINE_COPY(c, adler32_c, 1); +#ifdef DISABLE_RUNTIME_CPU_DETECTION +BENCHMARK_ADLER32_BASELINE_COPY(native, native_adler32, 1); +#else + #ifdef ARM_NEON /* If we inline this copy for neon, the function would go here */ //BENCHMARK_ADLER32_COPY(neon, adler32_neon, test_cpu_features.arm.has_neon); @@ -122,3 +126,5 @@ BENCHMARK_ADLER32_COPY(avx512, adler32_fold_copy_avx512, test_cpu_features.x86.h BENCHMARK_ADLER32_BASELINE_COPY(avx512_vnni_baseline, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni); BENCHMARK_ADLER32_COPY(avx512_vnni, adler32_fold_copy_avx512_vnni, test_cpu_features.x86.has_avx512vnni); #endif + +#endif diff --git a/test/benchmarks/benchmark_compare256.cc b/test/benchmarks/benchmark_compare256.cc index 188539df51..efdbbacc9f 100644 --- a/test/benchmarks/benchmark_compare256.cc +++ b/test/benchmarks/benchmark_compare256.cc @@ -62,6 +62,10 @@ class compare256: public benchmark::Fixture { BENCHMARK_COMPARE256(c, compare256_c, 1); +#ifdef DISABLE_RUNTIME_CPU_DETECTION +BENCHMARK_COMPARE256(native, native_compare256, 1); +#else + #if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN BENCHMARK_COMPARE256(unaligned_16, compare256_unaligned_16, 1); #ifdef HAVE_BUILTIN_CTZ @@ -86,3 +90,5 @@ BENCHMARK_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch #ifdef RISCV_RVV BENCHMARK_COMPARE256(rvv, compare256_rvv, test_cpu_features.riscv.has_rvv); #endif + +#endif diff --git a/test/benchmarks/benchmark_crc32.cc b/test/benchmarks/benchmark_crc32.cc index 062d229184..8611b28052 100644 --- a/test/benchmarks/benchmark_crc32.cc +++ b/test/benchmarks/benchmark_crc32.cc @@ -58,6 +58,10 @@ class crc32: public benchmark::Fixture { BENCHMARK_CRC32(braid, PREFIX(crc32_braid), 1); +#ifdef DISABLE_RUNTIME_CPU_DETECTION +BENCHMARK_CRC32(native, native_crc32, 1); +#else + #ifdef ARM_ACLE BENCHMARK_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32); #endif @@ -75,3 +79,5 @@ BENCHMARK_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq) /* CRC32 fold does a memory copy while hashing */ BENCHMARK_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq)); #endif + +#endif diff --git a/test/benchmarks/benchmark_main.cc b/test/benchmarks/benchmark_main.cc index 3ef2c5e87d..f3c227bdf7 100644 --- a/test/benchmarks/benchmark_main.cc +++ b/test/benchmarks/benchmark_main.cc @@ -12,13 +12,17 @@ extern "C" { # include "zbuild.h" # include "../test_cpu_features.h" +# ifndef DISABLE_RUNTIME_CPU_DETECTION struct cpu_features test_cpu_features; +# endif } #endif int main(int argc, char** argv) { #ifndef BUILD_ALT +# ifndef DISABLE_RUNTIME_CPU_DETECTION cpu_check_features(&test_cpu_features); +# endif #endif ::benchmark::Initialize(&argc, argv); diff --git a/test/benchmarks/benchmark_slidehash.cc b/test/benchmarks/benchmark_slidehash.cc index e15caa4f55..e51aa685a9 100644 --- a/test/benchmarks/benchmark_slidehash.cc +++ b/test/benchmarks/benchmark_slidehash.cc @@ -69,6 +69,10 @@ class slide_hash: public benchmark::Fixture { BENCHMARK_SLIDEHASH(c, slide_hash_c, 1); +#ifdef DISABLE_RUNTIME_CPU_DETECTION +BENCHMARK_SLIDEHASH(native, native_slide_hash, 1); +#else + #ifdef ARM_SIMD BENCHMARK_SLIDEHASH(armv6, slide_hash_armv6, test_cpu_features.arm.has_simd); #endif @@ -90,3 +94,5 @@ BENCHMARK_SLIDEHASH(sse2, slide_hash_sse2, test_cpu_features.x86.has_sse2); #ifdef X86_AVX2 BENCHMARK_SLIDEHASH(avx2, slide_hash_avx2, test_cpu_features.x86.has_avx2); #endif + +#endif diff --git a/test/test_adler32.cc b/test/test_adler32.cc index 2d3f4a98cf..85c4c78bbc 100644 --- a/test/test_adler32.cc +++ b/test/test_adler32.cc @@ -365,6 +365,10 @@ INSTANTIATE_TEST_SUITE_P(adler32, adler32_variant, testing::ValuesIn(tests)); TEST_ADLER32(c, adler32_c, 1) +#ifdef DISABLE_RUNTIME_CPU_DETECTION +TEST_ADLER32(native, native_adler32, 1) +#else + #ifdef ARM_NEON TEST_ADLER32(neon, adler32_neon, test_cpu_features.arm.has_neon) #elif defined(POWER8_VSX) @@ -387,3 +391,5 @@ TEST_ADLER32(avx512, adler32_avx512, test_cpu_features.x86.has_avx512) #ifdef X86_AVX512VNNI TEST_ADLER32(avx512_vnni, adler32_avx512_vnni, test_cpu_features.x86.has_avx512vnni) #endif + +#endif diff --git a/test/test_compare256.cc b/test/test_compare256.cc index 9aa7c977bc..ec2136aeba 100644 --- a/test/test_compare256.cc +++ b/test/test_compare256.cc @@ -61,6 +61,10 @@ static inline void compare256_match_check(compare256_func compare256) { TEST_COMPARE256(c, compare256_c, 1) +#ifdef DISABLE_RUNTIME_CPU_DETECTION +TEST_COMPARE256(native, native_compare256, 1) +#else + #if defined(UNALIGNED_OK) && BYTE_ORDER == LITTLE_ENDIAN TEST_COMPARE256(unaligned_16, compare256_unaligned_16, 1) #ifdef HAVE_BUILTIN_CTZ @@ -85,3 +89,5 @@ TEST_COMPARE256(power9, compare256_power9, test_cpu_features.power.has_arch_3_00 #ifdef RISCV_RVV TEST_COMPARE256(rvv, compare256_rvv, test_cpu_features.riscv.has_rvv) #endif + +#endif diff --git a/test/test_cpu_features.h b/test/test_cpu_features.h index 1bb4b13a08..70b74283d2 100644 --- a/test/test_cpu_features.h +++ b/test/test_cpu_features.h @@ -1,8 +1,12 @@ #ifndef TEST_CPU_FEATURES_H #define TEST_CPU_FEATURES_H -#include "cpu_features.h" +#ifndef DISABLE_RUNTIME_CPU_DETECTION + +# include "cpu_features.h" extern struct cpu_features test_cpu_features; #endif + +#endif diff --git a/test/test_crc32.cc b/test/test_crc32.cc index 83bf311316..f8322085e6 100644 --- a/test/test_crc32.cc +++ b/test/test_crc32.cc @@ -208,6 +208,10 @@ INSTANTIATE_TEST_SUITE_P(crc32, crc32_variant, testing::ValuesIn(tests)); TEST_CRC32(braid, PREFIX(crc32_braid), 1) +#ifdef DISABLE_RUNTIME_CPU_DETECTION +TEST_CRC32(native, native_crc32, 1) +#else + #ifdef ARM_ACLE TEST_CRC32(acle, crc32_acle, test_cpu_features.arm.has_crc32) #endif @@ -223,3 +227,5 @@ TEST_CRC32(pclmulqdq, crc32_pclmulqdq, test_cpu_features.x86.has_pclmulqdq) #ifdef X86_VPCLMULQDQ_CRC TEST_CRC32(vpclmulqdq, crc32_vpclmulqdq, (test_cpu_features.x86.has_pclmulqdq && test_cpu_features.x86.has_avx512 && test_cpu_features.x86.has_vpclmulqdq)) #endif + +#endif diff --git a/test/test_main.cc b/test/test_main.cc index 82b39e4874..994a3ef389 100644 --- a/test/test_main.cc +++ b/test/test_main.cc @@ -7,13 +7,16 @@ extern "C" { # include "zbuild.h" # include "test_cpu_features.h" - +# ifndef DISABLE_RUNTIME_CPU_DETECTION struct cpu_features test_cpu_features; +# endif } GTEST_API_ int main(int argc, char **argv) { printf("Running main() from %s\n", __FILE__); +#ifndef DISABLE_RUNTIME_CPU_DETECTION cpu_check_features(&test_cpu_features); +#endif testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); }