Skip to content

Commit

Permalink
Split CPU features checks and CPU-specific function prototypes and re…
Browse files Browse the repository at this point in the history
…duce include-dependencies.

Signed-off-by: Vladislav Shchapov <[email protected]>
  • Loading branch information
phprus authored and Dead2 committed Feb 22, 2024
1 parent a09b42d commit ac25a2e
Show file tree
Hide file tree
Showing 37 changed files with 265 additions and 261 deletions.
15 changes: 7 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ endif()
set(GENERIC_ARCHDIR "arch/generic")

set(ZLIB_ARCH_SRCS)
set(ZLIB_ARCH_HDRS ${GENERIC_ARCHDIR}/generic_features.h)
set(ZLIB_ARCH_HDRS ${GENERIC_ARCHDIR}/generic_functions.h)

if(BASEARCH_ARM_FOUND)
set(ARCHDIR "arch/arm")
Expand Down Expand Up @@ -647,7 +647,7 @@ if(WITH_OPTIM)
endif()
endif()
endif()
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/arm_features.h ${ARCHDIR}/arm_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/arm_features.c)
if(WITH_ACLE)
check_acle_compiler_flag()
Expand Down Expand Up @@ -719,7 +719,7 @@ if(WITH_OPTIM)
add_definitions(-DPOWER_FEATURES)
endif()
if(HAVE_VMX OR HAVE_POWER8_INTRIN OR HAVE_POWER9_INTRIN)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power_features.h ${ARCHDIR}/power_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power_features.c)
endif()
# VMX specific options and files
Expand Down Expand Up @@ -769,7 +769,7 @@ if(WITH_OPTIM)
if(HAVE_RVV_INTRIN)
add_definitions(-DRISCV_FEATURES)
add_definitions(-DRISCV_RVV)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/riscv_features.h ${ARCHDIR}/riscv_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/riscv_features.c)
# FIXME: we will not set compile flags for riscv_features.c when
# the kernels update hwcap or hwprobe for riscv
Expand All @@ -784,7 +784,7 @@ if(WITH_OPTIM)
check_s390_intrinsics()
if(HAVE_S390_INTRIN)
add_definitions(-DS390_FEATURES)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/s390_features.h ${ARCHDIR}/s390_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/s390_features.c)
endif()
if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
Expand All @@ -811,7 +811,7 @@ if(WITH_OPTIM)
endif()
elseif(BASEARCH_X86_FOUND)
add_definitions(-DX86_FEATURES)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h)
list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/x86_features.h ${ARCHDIR}/x86_functions.h)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/x86_features.c)
if(MSVC)
list(APPEND ZLIB_ARCH_HDRS fallback_builtins.h)
Expand Down Expand Up @@ -1002,11 +1002,10 @@ set(ZLIB_PUBLIC_HDRS
${CMAKE_CURRENT_BINARY_DIR}/zlib${SUFFIX}.h
)
set(ZLIB_PRIVATE_HDRS
arch/generic/adler32_fold_c.h
arch/generic/crc32_fold_c.h
adler32_p.h
chunkset_tpl.h
compare256_rle.h
cpu_functions.h
cpu_features.h
crc32_braid_p.h
crc32_braid_comb_p.h
Expand Down
46 changes: 3 additions & 43 deletions arch/arm/arm_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/

#ifndef ARM_H_
#define ARM_H_
#ifndef ARM_FEATURES_H_
#define ARM_FEATURES_H_

struct arm_cpu_features {
int has_simd;
Expand All @@ -13,44 +13,4 @@ struct arm_cpu_features {

void Z_INTERNAL arm_check_features(struct arm_cpu_features *features);

#ifdef CPU_FEATURES_H_

#ifdef ARM_NEON
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t chunksize_neon(void);
uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);

# ifdef HAVE_BUILTIN_CTZLL
uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
# ifdef DEFLATE_H_
uint32_t longest_match_neon(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match);
# endif
# endif
# ifdef DEFLATE_H_
void slide_hash_neon(deflate_state *s);
# endif
# ifdef INFLATE_H_
void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
# endif
#endif

#ifdef ARM_ACLE
uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);

# ifdef DEFLATE_H_
void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
# endif
#endif

#ifdef ARM_SIMD
# ifdef DEFLATE_H_
void slide_hash_armv6(deflate_state *s);
# endif
#endif

#endif

#endif /* ARM_H_ */
#endif /* ARM_FEATURES_H_ */
35 changes: 35 additions & 0 deletions arch/arm/arm_functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/* arm_functions.h -- ARM implementations for arch-specific functions.
* For conditions of distribution and use, see copyright notice in zlib.h
*/

#ifndef ARM_FUNCTIONS_H_
#define ARM_FUNCTIONS_H_


#ifdef ARM_NEON
uint32_t adler32_neon(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t chunksize_neon(void);
uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);

# ifdef HAVE_BUILTIN_CTZLL
uint32_t compare256_neon(const uint8_t *src0, const uint8_t *src1);
uint32_t longest_match_neon(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_neon(deflate_state *const s, Pos cur_match);
# endif
void slide_hash_neon(deflate_state *s);
void inflate_fast_neon(PREFIX3(stream) *strm, uint32_t start);
#endif

#ifdef ARM_ACLE
uint32_t crc32_acle(uint32_t crc, const uint8_t *buf, size_t len);

void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
uint32_t update_hash_acle(deflate_state *const s, uint32_t h, uint32_t val);
#endif

#ifdef ARM_SIMD
void slide_hash_armv6(deflate_state *s);
#endif

#endif /* ARM_FUNCTIONS_H_ */
8 changes: 4 additions & 4 deletions arch/generic/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ adler32_c.o: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
adler32_c.lo: $(SRCDIR)/adler32_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/adler32_p.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_c.c

adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/adler32_fold_c.h
adler32_fold_c.o: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c

adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/adler32_fold_c.h
adler32_fold_c.lo: $(SRCDIR)/adler32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_fold_c.c

chunkset_c.o: $(SRCDIR)/chunkset_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/chunkset_tpl.h $(SRCTOP)/inffast_tpl.h
Expand All @@ -53,10 +53,10 @@ crc32_braid_c.o: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_b
crc32_braid_c.lo: $(SRCDIR)/crc32_braid_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/crc32_braid_p.h $(SRCTOP)/crc32_braid_tbl.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_braid_c.c

crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/crc32_fold_c.h
crc32_fold_c.o: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c

crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h $(SRCDIR)/crc32_fold_c.h
crc32_fold_c.lo: $(SRCDIR)/crc32_fold_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/functable.h
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_fold_c.c

insert_string_c.o: $(SRCDIR)/insert_string_c.c $(SRCTOP)/zbuild.h $(SRCTOP)/deflate.h $(SRCTOP)/insert_string_tpl.h
Expand Down
1 change: 0 additions & 1 deletion arch/generic/adler32_fold_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

#include "zbuild.h"
#include "functable.h"
#include "adler32_fold_c.h"

#include <limits.h>

Expand Down
11 changes: 0 additions & 11 deletions arch/generic/adler32_fold_c.h

This file was deleted.

3 changes: 1 addition & 2 deletions arch/generic/crc32_fold_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "zutil.h"
#include "functable.h"
#include "crc32.h"

#include "crc32_fold_c.h"

Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc) {
crc->value = CRC32_INITIAL_VALUE;
return crc->value;
Expand Down
13 changes: 0 additions & 13 deletions arch/generic/crc32_fold_c.h

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
/* generic_features.h -- generic C implementations for arch-specific features
/* generic_functions.h -- generic C implementations for arch-specific functions.
* For conditions of distribution and use, see copyright notice in zlib.h
*/

#ifndef GENERIC_FEATURES_H_
#define GENERIC_FEATURES_H_
#ifndef GENERIC_FUNCTIONS_H_
#define GENERIC_FUNCTIONS_H_

#include "zendian.h"

Z_INTERNAL uint32_t crc32_fold_reset_c(crc32_fold *crc);
Z_INTERNAL void crc32_fold_copy_c(crc32_fold *crc, uint8_t *dst, const uint8_t *src, size_t len);
Z_INTERNAL void crc32_fold_c(crc32_fold *crc, const uint8_t *src, size_t len, uint32_t init_crc);
Z_INTERNAL uint32_t crc32_fold_final_c(crc32_fold *crc);

Z_INTERNAL uint32_t adler32_fold_copy_c(uint32_t adler, uint8_t *dst, const uint8_t *src, size_t len);


typedef uint32_t (*adler32_func)(uint32_t adler, const uint8_t *buf, size_t len);
typedef uint32_t (*compare256_func)(const uint8_t *src0, const uint8_t *src1);
typedef uint32_t (*crc32_func)(uint32_t crc32, const uint8_t *buf, size_t len);

uint32_t adler32_c(uint32_t adler, const uint8_t *buf, size_t len);

uint32_t chunksize_c(void);
uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#ifdef INFLATE_H_
void inflate_fast_c(PREFIX3(stream) *strm, uint32_t start);
#endif

uint32_t PREFIX(crc32_braid)(uint32_t crc, const uint8_t *buf, size_t len);

Expand All @@ -29,8 +38,7 @@ uint32_t compare256_unaligned_16(const uint8_t *src0, const uint8_t *src1);
# endif
#endif

#ifdef DEFLATE_H_
typedef void (*slide_hash_func)(deflate_state *s);
typedef void (*slide_hash_func)(deflate_state *s);

void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
Expand Down Expand Up @@ -58,5 +66,3 @@ uint32_t longest_match_slow_c(deflate_state *const s, Pos cur_match);
# endif

#endif

#endif
38 changes: 3 additions & 35 deletions arch/power/power_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/

#ifndef POWER_H_
#define POWER_H_
#ifndef POWER_FEATURES_H_
#define POWER_FEATURES_H_

struct power_cpu_features {
int has_altivec;
Expand All @@ -15,36 +15,4 @@ struct power_cpu_features {

void Z_INTERNAL power_check_features(struct power_cpu_features *features);

#ifdef CPU_FEATURES_H_

#ifdef PPC_VMX
uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
# ifdef DEFLATE_H_
void slide_hash_vmx(deflate_state *s);
# endif
#endif

#ifdef POWER8_VSX
uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t chunksize_power8(void);
uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
# ifdef DEFLATE_H_
void slide_hash_power8(deflate_state *s);
# endif
# ifdef INFLATE_H_
void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
# endif
#endif

#ifdef POWER9
uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
# ifdef DEFLATE_H_
uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
# endif
#endif

#endif

#endif /* POWER_H_ */
#endif /* POWER_FEATURES_H_ */
30 changes: 30 additions & 0 deletions arch/power/power_functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* power_functions.h -- POWER implementations for arch-specific functions.
* Copyright (C) 2020 Matheus Castanho <[email protected]>, IBM
* Copyright (C) 2021 Mika T. Lindqvist <[email protected]>
* For conditions of distribution and use, see copyright notice in zlib.h
*/

#ifndef POWER_FUNCTIONS_H_
#define POWER_FUNCTIONS_H_

#ifdef PPC_VMX
uint32_t adler32_vmx(uint32_t adler, const uint8_t *buf, size_t len);
void slide_hash_vmx(deflate_state *s);
#endif

#ifdef POWER8_VSX
uint32_t adler32_power8(uint32_t adler, const uint8_t *buf, size_t len);
uint32_t chunksize_power8(void);
uint8_t* chunkmemset_safe_power8(uint8_t *out, unsigned dist, unsigned len, unsigned left);
uint32_t crc32_power8(uint32_t crc, const uint8_t *buf, size_t len);
void slide_hash_power8(deflate_state *s);
void inflate_fast_power8(PREFIX3(stream) *strm, uint32_t start);
#endif

#ifdef POWER9
uint32_t compare256_power9(const uint8_t *src0, const uint8_t *src1);
uint32_t longest_match_power9(deflate_state *const s, Pos cur_match);
uint32_t longest_match_slow_power9(deflate_state *const s, Pos cur_match);
#endif

#endif /* POWER_FUNCTIONS_H_ */
Loading

0 comments on commit ac25a2e

Please sign in to comment.