Skip to content

Commit

Permalink
cat: updates in vector-FLOPs benchmarks
Browse files Browse the repository at this point in the history
Include kernels that perform scalar floating-point operations.

These changes have been tested on the Intel Sapphire Rapids and IBM
POWER10 architectures.
  • Loading branch information
dbarry9 committed Feb 20, 2025
1 parent a1f0c41 commit 33d73fd
Show file tree
Hide file tree
Showing 11 changed files with 692 additions and 325 deletions.
58 changes: 29 additions & 29 deletions src/counter_analysis_toolkit/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ endif
ifeq ($(ARCH),POWER)
FLOP+=-maltivec -DPOWER
VECSRC=vec_fma_hp.o vec_fma_sp.o vec_fma_dp.o vec_nonfma_hp.o vec_nonfma_sp.o vec_nonfma_dp.o
VEC=-maltivec -O0 -DPOWER
VEC_FMA=-maltivec -O0 -DPOWER
VEC_ALL=$(VEC) -O0 -DPOWER
VEC=-maltivec -DPOWER
VEC_FMA=-maltivec -DPOWER
VEC_ALL=$(VEC) -DPOWER
endif
ifeq ($(ARCH),ARM)
FLOP+=-march=armv8.2-a+fp16 -DARM
Expand Down Expand Up @@ -109,58 +109,58 @@ weak_symbols.o: weak_symbols.c vec.h
-$(CC) -c $(CFLAGS) weak_symbols.c

vec.o: vec.c vec.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) -D$(ARCH) $(VEC_META) vec.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) -D$(ARCH) $(VEC_META) vec.c

vec_scalar_verify.o: vec_scalar_verify.c vec_scalar_verify.h cat_arch.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_ALL) vec_scalar_verify.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_ALL) vec_scalar_verify.c

vec_fma_hp.o: vec_fma_hp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_FMA) vec_fma_hp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_FMA) vec_fma_hp.c

vec_fma_hp: vec_fma_hp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128_FMA) vec_fma_hp.c -o vec_fma_hp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256_FMA) vec_fma_hp.c -o vec_fma_hp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512_FMA) vec_fma_hp.c -o vec_fma_hp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128_FMA) vec_fma_hp.c -o vec_fma_hp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256_FMA) vec_fma_hp.c -o vec_fma_hp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512_FMA) vec_fma_hp.c -o vec_fma_hp-512B.o

vec_fma_sp.o: vec_fma_sp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_FMA) vec_fma_sp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_FMA) vec_fma_sp.c

vec_fma_sp: vec_fma_sp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128_FMA) vec_fma_sp.c -o vec_fma_sp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256_FMA) vec_fma_sp.c -o vec_fma_sp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512_FMA) vec_fma_sp.c -o vec_fma_sp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128_FMA) vec_fma_sp.c -o vec_fma_sp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256_FMA) vec_fma_sp.c -o vec_fma_sp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512_FMA) vec_fma_sp.c -o vec_fma_sp-512B.o

vec_fma_dp.o: vec_fma_dp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_FMA) vec_fma_dp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_FMA) vec_fma_dp.c

vec_fma_dp: vec_fma_dp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128_FMA) vec_fma_dp.c -o vec_fma_dp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256_FMA) vec_fma_dp.c -o vec_fma_dp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512_FMA) vec_fma_dp.c -o vec_fma_dp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128_FMA) vec_fma_dp.c -o vec_fma_dp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256_FMA) vec_fma_dp.c -o vec_fma_dp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512_FMA) vec_fma_dp.c -o vec_fma_dp-512B.o

vec_nonfma_hp.o: vec_nonfma_hp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC) vec_nonfma_hp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC) vec_nonfma_hp.c

vec_nonfma_hp: vec_nonfma_hp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128) vec_nonfma_hp.c -o vec_nonfma_hp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_hp.c -o vec_nonfma_hp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_hp.c -o vec_nonfma_hp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128) vec_nonfma_hp.c -o vec_nonfma_hp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256) vec_nonfma_hp.c -o vec_nonfma_hp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512) vec_nonfma_hp.c -o vec_nonfma_hp-512B.o

vec_nonfma_sp.o: vec_nonfma_sp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC) vec_nonfma_sp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC) vec_nonfma_sp.c

vec_nonfma_sp: vec_nonfma_sp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128) vec_nonfma_sp.c -o vec_nonfma_sp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_sp.c -o vec_nonfma_sp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_sp.c -o vec_nonfma_sp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128) vec_nonfma_sp.c -o vec_nonfma_sp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256) vec_nonfma_sp.c -o vec_nonfma_sp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512) vec_nonfma_sp.c -o vec_nonfma_sp-512B.o

vec_nonfma_dp.o: vec_nonfma_dp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC) vec_nonfma_dp.c
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC) vec_nonfma_dp.c

vec_nonfma_dp: vec_nonfma_dp.c vec_scalar_verify.h
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128) vec_nonfma_dp.c -o vec_nonfma_dp-128B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_dp.c -o vec_nonfma_dp-256B.o
-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_dp.c -o vec_nonfma_dp-512B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128) vec_nonfma_dp.c -o vec_nonfma_dp-128B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256) vec_nonfma_dp.c -o vec_nonfma_dp-256B.o
-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512) vec_nonfma_dp.c -o vec_nonfma_dp-512B.o

cat_collect:
$(CC) $(CFLAGS) -fopenmp $(INCFLAGS) main.c $(wildcard *.o) -o cat_collect $(LDFLAGS)
Expand Down
45 changes: 5 additions & 40 deletions src/counter_analysis_toolkit/cat_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,38 +123,17 @@ typedef float64x2_t DP_VEC_TYPE;
#define ADD_VEC_SH(_I_,_J_) vaddh_f16( _I_ , _J_ );
#define MUL_VEC_SH(_I_,_J_) vmulh_f16( _I_ , _J_ );
#define SQRT_VEC_SH(_I_) vsqrth_f16( _I_ );
#define FMA_VEC_SH(_out_,_I_,_J_,_K_) {\
HP_VEC_TYPE arg1 = SET_VEC_PH(_I_);\
HP_VEC_TYPE arg2 = SET_VEC_PH(_J_);\
HP_VEC_TYPE arg3 = SET_VEC_PH(_K_);\
HP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PH( arg1 , arg2 , arg3 );\
_out_ = ((half*)&(argTmp))[0];\
}
#define FMA_VEC_SH(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#define SET_VEC_SS(_I_) _I_ ;
#define ADD_VEC_SS(_I_,_J_) _I_ + _J_ ;
#define MUL_VEC_SS(_I_,_J_) _I_ * _J_ ;
#define FMA_VEC_SS(_out_,_I_,_J_,_K_) {\
SP_VEC_TYPE arg1 = SET_VEC_PS(_I_);\
SP_VEC_TYPE arg2 = SET_VEC_PS(_J_);\
SP_VEC_TYPE arg3 = SET_VEC_PS(_K_);\
SP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PS( arg1 , arg2 , arg3 );\
_out_ = ((SP_SCALAR_TYPE*)&(argTmp))[0];\
}
#define FMA_VEC_SS(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#define SET_VEC_SD(_I_) _I_ ;
#define ADD_VEC_SD(_I_,_J_) _I_ + _J_ ;
#define MUL_VEC_SD(_I_,_J_) _I_ * _J_ ;
#define FMA_VEC_SD(_out_,_I_,_J_,_K_) {\
DP_VEC_TYPE arg1 = SET_VEC_PD(_I_);\
DP_VEC_TYPE arg2 = SET_VEC_PD(_J_);\
DP_VEC_TYPE arg3 = SET_VEC_PD(_K_);\
DP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PD( arg1 , arg2 , arg3 );\
_out_ = ((DP_SCALAR_TYPE*)&(argTmp))[0];\
}
#define FMA_VEC_SD(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#elif defined(POWER)
void test_hp_power_VEC( int instr_per_loop, uint64 iterations, int EventSet, FILE *fp );
Expand Down Expand Up @@ -187,25 +166,11 @@ typedef __vector double DP_VEC_TYPE;
#define SET_VEC_SS(_I_) _I_ ;
#define ADD_VEC_SS(_I_,_J_) _I_ + _J_ ;
#define MUL_VEC_SS(_I_,_J_) _I_ * _J_ ;
#define FMA_VEC_SS(_out_,_I_,_J_,_K_) {\
SP_VEC_TYPE arg1 = SET_VEC_PS(_I_);\
SP_VEC_TYPE arg2 = SET_VEC_PS(_J_);\
SP_VEC_TYPE arg3 = SET_VEC_PS(_K_);\
SP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PS( arg1 , arg2 , arg3 );\
_out_ = ((SP_SCALAR_TYPE*)&(argTmp))[0];\
}
#define FMA_VEC_SS(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#define SET_VEC_SD(_I_) _I_ ;
#define ADD_VEC_SD(_I_,_J_) _I_ + _J_ ;
#define MUL_VEC_SD(_I_,_J_) _I_ * _J_ ;
#define FMA_VEC_SD(_out_,_I_,_J_,_K_) {\
DP_VEC_TYPE arg1 = SET_VEC_PD(_I_);\
DP_VEC_TYPE arg2 = SET_VEC_PD(_J_);\
DP_VEC_TYPE arg3 = SET_VEC_PD(_K_);\
DP_VEC_TYPE argTmp;\
argTmp = FMA_VEC_PD( arg1 , arg2 , arg3 );\
_out_ = ((DP_SCALAR_TYPE*)&(argTmp))[0];\
}
#define FMA_VEC_SD(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;

#endif
Loading

0 comments on commit 33d73fd

Please sign in to comment.