From e3ff3193860f372c6419618209d77fab88c8325f Mon Sep 17 00:00:00 2001 From: Willem Deconinck Date: Wed, 24 Jul 2024 14:05:38 +0000 Subject: [PATCH] Make fiat-drhook-sanity a unit-test, refactor CMake and other cosmetic cleanup --- CMakeLists.txt | 11 +- cmake/FindPAPI.cmake | 4 +- src/fiat/CMakeLists.txt | 6 +- src/fiat/drhook/drhook.c | 25 ++--- src/fiat/drhook/drhook_papi.c | 84 ++++++++------- src/fiat/drhook/drhook_papi.h | 8 +- .../internal/drhook_run_omp_parallel.F90 | 17 ++- src/programs/CMakeLists.txt | 28 ----- src/programs/fiat-drhook-sanity.F90 | 74 ------------- src/programs/mysecond.c | 27 ----- tests/CMakeLists.txt | 42 ++++++++ tests/test_drhook_counters.F90 | 100 ++++++++++++++++++ .../test_drhook_counters_gemm.F90 | 12 ++- .../test_drhook_counters_stream.F90 | 42 ++++---- 14 files changed, 248 insertions(+), 232 deletions(-) delete mode 100644 src/programs/fiat-drhook-sanity.F90 delete mode 100644 src/programs/mysecond.c create mode 100644 tests/test_drhook_counters.F90 rename src/programs/fiat-drhook-sanity-gemm.F90 => tests/test_drhook_counters_gemm.F90 (88%) rename src/programs/fiat-drhook-sanity-stream.F90 => tests/test_drhook_counters_stream.F90 (95%) diff --git a/CMakeLists.txt b/CMakeLists.txt index f175cf5..afd05b4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,9 +29,9 @@ ecbuild_add_option( FEATURE MPI DESCRIPTION "Support for MPI distributed parallelism" REQUIRED_PACKAGES "MPI COMPONENTS Fortran" ) -ecbuild_add_option( FEATURE PAPI - DESCRIPTION "Support for HW counters in drhook via PAPI" - REQUIRED_PACKAGES "OpenMP COMPONENTS C" "PAPI") +ecbuild_add_option( FEATURE DR_HOOK_PAPI + DESCRIPTION "Support for HW counters in DR_HOOK via PAPI" + REQUIRED_PACKAGES "PAPI") ecbuild_find_package( fckit QUIET ) ecbuild_add_option( FEATURE FCKIT @@ -54,11 +54,6 @@ ecbuild_add_option( FEATURE WARNINGS DEFAULT ON DESCRIPTION "Add warnings to compiler" ) -ecbuild_add_option( FEATURE MKL - DESCRIPTION "Use MKL for BLAS and/or FFTW" - DEFAULT ON - REQUIRED_PACKAGES "MKL" ) - ecbuild_find_package( NAME Realtime QUIET ) ### Sources diff --git a/cmake/FindPAPI.cmake b/cmake/FindPAPI.cmake index f778f51..35c3e6f 100644 --- a/cmake/FindPAPI.cmake +++ b/cmake/FindPAPI.cmake @@ -24,12 +24,12 @@ find_path(PAPI_ROOT find_library(PAPI_LIBRARIES # Pick the static library first for easier run-time linking. NAMES libpapi.so libpapi.a papi - HINTS ${PAPI_ROOT}/lib ${HILTIDEPS}/lib + HINTS ${PAPI_ROOT}/lib ) find_path(PAPI_INCLUDE_DIRS NAMES papi.h - HINTS ${PAPI_ROOT}/include ${HILTIDEPS}/include + HINTS ${PAPI_ROOT}/include ) include(FindPackageHandleStandardArgs) diff --git a/src/fiat/CMakeLists.txt b/src/fiat/CMakeLists.txt index ecae913..624a195 100644 --- a/src/fiat/CMakeLists.txt +++ b/src/fiat/CMakeLists.txt @@ -86,15 +86,13 @@ else() endif() if( HAVE_OMP ) - target_link_libraries( fiat PRIVATE OpenMP::OpenMP_Fortran ) - endif() -if ( HAVE_PAPI ) +if ( HAVE_DR_HOOK_PAPI ) target_link_libraries ( fiat PRIVATE ${PAPI_LIBRARIES} ) target_include_directories ( fiat PRIVATE ${PAPI_INCLUDE_DIRS} ) - target_compile_definitions ( fiat PRIVATE HKPAPI ) + target_compile_definitions ( fiat PRIVATE DR_HOOK_HAVE_PAPI=1 ) endif() fiat_target_ignore_missing_symbols( TARGET fiat SYMBOLS diff --git a/src/fiat/drhook/drhook.c b/src/fiat/drhook/drhook.c index 65eb819..9836cc5 100644 --- a/src/fiat/drhook/drhook.c +++ b/src/fiat/drhook/drhook.c @@ -473,7 +473,7 @@ typedef struct drhook_key_t { long long int mem_maxhwm, mem_maxrss, mem_maxstk, mem_maxpagdelta; long long int paging_in; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) long_long counters_in[NPAPICNTRS]; long_long delta_counters_all[NPAPICNTRS]; long_long delta_counters_child[NPAPICNTRS]; @@ -507,7 +507,7 @@ typedef struct drhook_prof_t { double pc; double total; double self; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) long_long counter_tot[NPAPICNTRS]; long_long counter_self[NPAPICNTRS]; #endif @@ -1060,7 +1060,7 @@ remove_calltree(int tid, drhook_key_t *keyptr, if (treeptr->prev) { drhook_key_t *parent_keyptr = treeptr->prev->keyptr; if (parent_keyptr) { /* extra security */ -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_add(NULL, parent_keyptr->delta_counters_child, delta_counters @@ -2827,7 +2827,7 @@ getkey(int tid, const char *name, int name_len, if (opt_walltime) keyptr->wall_in = walltime ? *walltime : WALLTIME(); if (opt_cputime) keyptr->cpu_in = cputime ? *cputime : CPUTIME(); if (opt_cycles) keyptr->cycles_in = cycles ? *cycles : ec_get_cycles(); -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_readAll(keyptr->counters_in); #endif if (any_memstat) memstat(keyptr,&tid,1); @@ -2932,7 +2932,7 @@ putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len, double delta_wall = 0; double delta_cpu = 0; long_long * delta_counters=NULL; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) delta_counters=alloca(drhook_papi_num_counters() * sizeof(long_long) ); drhook_papi_bzero(delta_counters); #endif @@ -2965,7 +2965,7 @@ putkey(int tid, drhook_key_t *keyptr, const char *name, int name_len, if (opt_walltime) keyptr->delta_wall_all += delta_wall; if (opt_cputime) keyptr->delta_cpu_all += delta_cpu; if (opt_cycles) keyptr->delta_cycles_all += delta_cycles; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_subtract(delta_counters, NULL , keyptr->counters_in); drhook_papi_add(NULL, keyptr->delta_counters_all, delta_counters); #endif @@ -3095,7 +3095,7 @@ itself(drhook_key_t *keyptr_self, if (opt == 0) { if (opt_wallprof) keyptr->wall_in = walltime ? *walltime : WALLTIME(); else keyptr->cpu_in = cputime ? *cputime : CPUTIME(); -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_readAll(keyptr->counters_in); #endif keyptr->calls++; @@ -3112,7 +3112,8 @@ itself(drhook_key_t *keyptr_self, } if (delta_time) *delta_time = delta; -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) + long_long cntrs_delta[NPAPICNTRS]; /* cntrs_delta = current - counters_in */ @@ -3412,7 +3413,7 @@ c_drhook_check_watch_(const char *where, } /*** PUBLIC ***/ -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) #define PAPIREAD \ long_long cntrs[NPAPICNTRS]; \ drhook_papi_readAll(cntrs) @@ -3509,7 +3510,7 @@ c_drhook_init_(const char *progname, tabort_delete_lockfile(); drhook_delete_lockfile(); } -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_init(myproc -1); #endif @@ -4308,7 +4309,7 @@ c_drhook_print_(const int *ftnunitno, drhook_key_t *keyptr = &keydata[t][j]; while (keyptr) { if (keyptr->name && (keyptr->status == 0 || signal_handler_called)) { -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) drhook_papi_subtract(p->counter_self, keyptr->delta_counters_all, keyptr->delta_counters_child); @@ -4580,7 +4581,7 @@ c_drhook_print_(const int *ftnunitno, } /* for (j=0; j #include @@ -8,20 +8,22 @@ #define STD_MSG_LEN 4096 -int * drhook_papi_event_set=NULL; -enum {drhook_papi_notstarted,drhook_papi_running,drhook_papi_failed}; -int drhook_papi_state=0; +int* drhook_papi_event_set=NULL; +enum { + drhook_papi_notstarted, + drhook_papi_running, + drhook_papi_failed +}; +int drhook_papi_state=drhook_papi_notstarted; int drhook_papi_rank=0; /* C style! */ -size_t drhook_max_counter_name=0; /* hardwired for now */ -const char * hookCounters[ NPAPICNTRS ][2]= - { +const char* hookCounters[NPAPICNTRS][2]= { {"PAPI_TOT_CYC","Cycles"}, {"PAPI_FP_OPS","FP Operations"}, {"PAPI_L1_DCA","L1 Access"}, {"PAPI_L2_DCM","L2 Miss"} - }; +}; /* function to use for thread id - it should be better than omp_get_thread_num! @@ -30,7 +32,7 @@ unsigned long safe_thread_num(){ return oml_my_thread()-1; } -const char * drhook_papi_counter_name(int c,int t){ +const char* drhook_papi_counter_name(int c,int t){ return hookCounters[c][t]; } @@ -46,7 +48,7 @@ void drhook_papi_bzero(long_long* a){ } } -void drhook_papi_print(char * s,long_long* a,int header){ +void drhook_papi_print(char* s, long_long* a, int header){ char msg[STD_MSG_LEN]; if (header>0){ char fmt[STD_MSG_LEN]; @@ -88,8 +90,6 @@ void drhook_papi_add(long_long* a,long_long* b, long_long* c){ } } - - // number of counters available to read int drhook_papi_num_counters(){ return NPAPICNTRS; @@ -115,7 +115,7 @@ int drhook_papi_readAll(long_long * counterArray){ printf("DRHOOK:PAPI:PAPI_read: Error reading counters, thread=%ld es=%d %s\n",safe_thread_num(),drhook_papi_event_set[safe_thread_num()],PAPI_strerror(err)); } #if defined(DEBUG) - drhook_papi_print("readAll:",counterArray); + drhook_papi_print("readAll:",counterArray,0); #endif return err; } @@ -142,7 +142,7 @@ int drhook_papi_init(int rank){ } paperr=PAPI_library_init(PAPI_VER_CURRENT); - if (paperr != PAPI_VER_CURRENT){ + if (paperr != PAPI_VER_CURRENT){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI:PAPI_library_init: ret code=%d version loaded =%d ", paperr,PAPI_VER_CURRENT); printf("%s\n",pmsg); @@ -211,7 +211,7 @@ int drhook_papi_init(int rank){ return 1; } -int dr_hook_papi_start_threads(int * events){ +int dr_hook_papi_start_threads(int* events){ int thread=safe_thread_num(); int papiErr; char pmsg[STD_MSG_LEN]; @@ -222,18 +222,23 @@ int dr_hook_papi_start_threads(int * events){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: create event set failed (%s) \n",PAPI_strerror(papiErr)); printf("%s\n",pmsg); return 0; - } else - printf("Event set %d created for thread %d\n",events[thread],thread); + } + + printf("DRHOOK:PAPI: Event set %d created for thread %d\n",events[thread],thread); int prof_papi_numcntrs=NPAPICNTRS; - for (int counter=0;counter < prof_papi_numcntrs ;counter ++){ + for (int counter=0; counter < prof_papi_numcntrs; counter ++){ int eventCode; snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: %s (%s)",hookCounters[counter][0],hookCounters[counter][1]); - if (drhook_papi_rank==0) if (thread==0)printf("%s\n",pmsg); + if (drhook_papi_rank==0) { + if (thread==0) { + printf("%s\n",pmsg); + } + } papiErr=PAPI_event_name_to_code(hookCounters[counter][0],&eventCode); - if (papiErr !=PAPI_OK){ + if (papiErr != PAPI_OK){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: event name to code failed (%s)",PAPI_strerror(papiErr)); printf("%s\n",pmsg); PAPI_perror("initPapi"); @@ -245,22 +250,25 @@ int dr_hook_papi_start_threads(int * events){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: add_event failed: %d (%s)",papiErr,PAPI_strerror(papiErr)); printf("%s\n",pmsg); if (papiErr == PAPI_EINVAL) - printf("Invalid argumet"); + printf("Invalid argumet"); else if (papiErr == PAPI_ENOMEM) - printf("Out of Mmemory"); + printf("Out of Mmemory"); else if (papiErr == PAPI_ENOEVST) - printf("EventSet does not exist"); + printf("EventSet does not exist"); else if (papiErr == PAPI_EISRUN) - printf("EventSet is running"); + printf("EventSet is running"); else if (papiErr == PAPI_ECNFLCT) - printf("Conflict"); + printf("Conflict"); else if (papiErr == PAPI_ENOEVNT) - printf("Preset not available"); + printf("Preset not available"); return 0; - }else { + } + else { #if defined(DEBUG) - snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Added code=%d to Evnt set %d",events[thread]); - if (thread==0)printf("%s\n",pmsg); + snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Added code=%d to Event set %d",eventCode, events[thread]); + if (thread==0) { + printf("%s\n",pmsg); + } #endif } } @@ -272,15 +280,13 @@ int dr_hook_papi_start_threads(int * events){ snprintf(pmsg,STD_MSG_LEN,"DRHOOK:PAPI: Error querying events - %d=%s",papiErr,PAPI_strerror(papiErr)); printf("%s\n",pmsg); return 0; - }else { + } #if defined(DEBUG) - for (counter=0;counter -#endif +#if defined(DR_HOOK_HAVE_PAPI) #include @@ -12,7 +8,7 @@ int drhook_papi_init(int rank); int drhook_papi_num_counters(); -const char * drhook_papi_counter_name(int c,int t); +const char* drhook_papi_counter_name(int c,int t); long_long drhook_papi_read(int counterId); int drhook_papi_readAll(long_long * counterArray); diff --git a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 index c0a1fcc..6c3c0a0 100644 --- a/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 +++ b/src/fiat/drhook/internal/drhook_run_omp_parallel.F90 @@ -10,18 +10,17 @@ ! These functions are to be used within drhook C methods, to avoid having OMP pragmas there. -module hook_papi_interface -#ifdef HKPAPI - +module drhook_papi_interface +#if defined(DR_HOOK_HAVE_PAPI) interface function dr_hook_papi_start_threads ( events) bind ( c ) - use :: iso_c_binding - INTEGER(KIND=C_INT) :: dr_hook_papi_start_threads - INTEGER(KIND=C_INT), INTENT(INOUT) :: Events(*) + use, intrinsic :: iso_c_binding, only : c_int + integer(kind=c_int) :: dr_hook_papi_start_threads + integer(kind=c_int), intent(inout) :: events(*) end function dr_hook_papi_start_threads end interface #endif -end module hook_papi_interface +end module drhook_papi_interface subroutine drhook_run_omp_parallel_ipfstr(NTIDS, FUNC, CDSTR) ! Usage: @@ -75,11 +74,11 @@ subroutine drhook_run_omp_parallel_get_cycles(NTIDS, NCYCLES) !$OMP END PARALLEL end subroutine drhook_run_omp_parallel_get_cycles -#ifdef HKPAPI +#if defined(DR_HOOK_HAVE_PAPI) subroutine drhook_run_omp_parallel_papi_startup(events,n) bind(c) use, intrinsic :: iso_c_binding, only : c_char, c_int, c_double - use hook_papi_interface + use drhook_papi_interface use OML_MOD implicit none INTEGER(KIND=C_INT), INTENT(INOUT) :: Events(n) diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt index 1fb2787..dd4696d 100644 --- a/src/programs/CMakeLists.txt +++ b/src/programs/CMakeLists.txt @@ -68,31 +68,3 @@ check_c_compiler_flag( "-Wno-implicit-function-declaration" disable_warning_impl if( disable_warning_implicit_function_declaration ) target_compile_options( fiat-printbinding PRIVATE "-Wno-implicit-function-declaration" ) endif() - -### Program fiat-drhook-sanity -# Note: it only depends on MPI and OpenMP -ecbuild_add_executable(TARGET fiat-drhook-sanity - SOURCES fiat-drhook-sanity.F90 fiat-drhook-sanity-stream.F90 fiat-drhook-sanity-gemm.F90 mysecond.c - LIBS fiat parkind_dp - ) -target_compile_definitions( fiat-drhook-sanity PRIVATE OMP ) - -ecbuild_info( CMAKE_Fortran_COMPILER_ID - 1. ${CMAKE_Fortran_COMPILER_ID} 2. ${EC_COMPILER_FAMILY} ) -if( ${CMAKE_Fortran_COMPILER_ID} MATCHES Intel ) - set_source_files_properties(fiat-drhook-sanity-stream.f90 PROPERTIES COMPILE_FLAGS " -qopt-prefetch-distance=64,12 -qopt-streaming-cache-evict=0 -qopt-streaming-stores always -qopt-zmm-usage=high ") -endif() - -find_package( OpenMP COMPONENTS Fortran ) -if( TARGET OpenMP::OpenMP_Fortran ) - target_link_libraries( fiat-drhook-sanity OpenMP::OpenMP_Fortran ) -endif() -target_link_libraries( fiat-drhook-sanity ${MKL_LIBRARIES} ) - -find_package( MPI COMPONENTS C ) -if( HAVE_MPI AND TARGET MPI::MPI_C ) - target_link_libraries( fiat-drhook-sanity MPI::MPI_C ) -else() - target_compile_definitions( fiat-drhook-sanity PRIVATE NOMPI ) -endif() - - diff --git a/src/programs/fiat-drhook-sanity.F90 b/src/programs/fiat-drhook-sanity.F90 deleted file mode 100644 index 5fdf09c..0000000 --- a/src/programs/fiat-drhook-sanity.F90 +++ /dev/null @@ -1,74 +0,0 @@ -program drhook_sanity - use parkind1, only: jpim, jprb, jprd - use oml_mod ,only : oml_max_threads - use mpl_module - use yomhook, only : LHOOK,DR_HOOK,JPHOOK,dr_hook_init,dr_hook_end - use stream_mod - use gemm_mod - implicit none - logical :: luse_mpi = .true. - integer :: myproc,nproc,nthread - integer :: verbosity = 0 - - REAL(KIND=JPHOOK) :: ZHOOK_HANDLE - - luse_mpi = detect_mpirun() - - if (luse_mpi) then - call mpl_init(ldinfo=(verbosity>=1)) - nproc = mpl_nproc() - myproc = mpl_myrank() - else - nproc = 1 - myproc = 1 - mpl_comm = -1 - endif - - nthread= oml_max_threads() - if (myproc.eq.1) write(6,*)'Starting Tasks=',nproc,'threads=',nthread - - call dr_hook_init() - - IF (LHOOK) CALL DR_HOOK('MAIN',0,ZHOOK_HANDLE) - - call stream_combinations() - -#if defined(HAVE_BLAS) - call gemm_combinations() -#endif - - IF (LHOOK) CALL DR_HOOK('MAIN',1,ZHOOK_HANDLE) - - call dr_hook_end() - - if (luse_mpi) then - call mpl_end(ldmeminfo=.false.) - endif - if (myproc.eq.1) write(6,*)'Completed' -contains - function detect_mpirun() result(lmpi_required) - logical :: lmpi_required - integer :: ilen - integer, parameter :: nvars = 5 - character(len=32), dimension(nvars) :: cmpirun_detect - character(len=4) :: clenv_dr_hook_assert_mpi_initialized - integer :: ivar - - ! Environment variables that are set when mpirun, srun, aprun, ... are used - cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi - cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe - cmpirun_detect(3) = 'PMI_SIZE' ! intel - cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm - cmpirun_detect(5) = 'ECTRANS_USE_MPI' ! forced - - lmpi_required = .false. - do ivar = 1, nvars - call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) - if (ilen > 0) then - lmpi_required = .true. - exit ! break - endif - enddo -end function - -end program drhook_sanity diff --git a/src/programs/mysecond.c b/src/programs/mysecond.c deleted file mode 100644 index d206a4a..0000000 --- a/src/programs/mysecond.c +++ /dev/null @@ -1,27 +0,0 @@ -/* A gettimeofday routine to give access to the wall - clock timer on most UNIX-like systems. - - This version defines two entry points -- with - and without appended underscores, so it *should* - automagically link with FORTRAN */ - -#include - -double mysecond() -{ -/* struct timeval { long tv_sec; - long tv_usec; }; - -struct timezone { int tz_minuteswest; - int tz_dsttime; }; */ - - struct timeval tp; - struct timezone tzp; - int i; - - i = gettimeofday(&tp,&tzp); - return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); -} - -double mysecond_() {return mysecond();} - diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 22c73c9..56d3f47 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -8,6 +8,8 @@ if( HAVE_TESTS ) +set( CMAKE_Fortran_MODULE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + if( HAVE_MPI AND MPIEXEC ) set( LAUNCH ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 1 ) else() @@ -105,6 +107,46 @@ set_tests_properties(fiat_test_drhook_fortran PROPERTIES ENVIRONMENT "MPL=0;DR_HOOK_ASSERT_MPI_INITIALIZED=0;DR_HOOK_OPT=NOPROPAGATE_SIGNALS" PASS_REGULAR_EXPRESSION "EC_DRHOOK.*\[DrHookCallTree\]" ) + +# ---------------------------------------------------------------------------------------- +# Tests: fiat_test_drhook_counters + +if( HAVE_DR_HOOK_PAPI ) + ecbuild_add_test(TARGET fiat_test_drhook_counters + SOURCES test_drhook_counters.F90 + test_drhook_counters_stream.F90 + test_drhook_counters_gemm.F90 + LIBS fiat + ENVIRONMENT "DR_HOOK_ASSERT_MPI_INITIALIZED=0;FIAT_UNIT_TEST=1" + ) + target_compile_definitions( fiat_test_drhook_counters PRIVATE OMP ) + if( NOT HAVE_MPI ) + target_compile_definitions( fiat_test_drhook_counters PRIVATE NOMPI ) + endif() + + if( CMAKE_Fortran_COMPILER_ID MATCHES Intel ) + set_source_files_properties(test_drhook_counters_stream.F90 + PROPERTIES COMPILE_OPTIONS "-qopt-prefetch-distance=64,12;-qopt-streaming-cache-evict=0;-qopt-streaming-stores always;-qopt-zmm-usage=high") + endif() + + find_package( OpenMP COMPONENTS Fortran ) + if( TARGET OpenMP::OpenMP_Fortran ) + target_link_libraries( fiat_test_drhook_counters OpenMP::OpenMP_Fortran ) + endif() + if( NOT BLAS_LIBRARIES ) + find_package( MKL QUIET ) + if( MKL_LIBRARIES ) + set( BLAS_LIBRARIES ${MKL_LIBRARIES} ) + else() + find_package( BLAS QUIET ) + endif() + endif() + if( BLAS_LIBRARIES ) + target_link_libraries( fiat_test_drhook_counters ${BLAS_LIBRARIES} ) + target_compile_definitions( fiat_test_drhook_counters PUBLIC HAVE_BLAS ) + endif() +endif() + # ---------------------------------------------------------------------------------------- # Tests: fiat_test_ec_args_fortran diff --git a/tests/test_drhook_counters.F90 b/tests/test_drhook_counters.F90 new file mode 100644 index 0000000..d425e28 --- /dev/null +++ b/tests/test_drhook_counters.F90 @@ -0,0 +1,100 @@ +program fiat_test_drhook_counters + use oml_mod ,only : oml_max_threads + use mpl_module, only : mpl_init, mpl_end, mpl_nproc, mpl_myrank + use yomhook, only : LHOOK,DR_HOOK,JPHOOK,dr_hook_init,dr_hook_end + use test_drhook_counters_stream_mod, only : stream_combinations + use test_drhook_counters_gemm_mod, only : gemm_combinations + use ec_env_mod, only : ec_setenv + + implicit none + logical :: luse_mpi = .true. + logical :: lsmall_problem_size = .false. + integer :: myproc,nproc + integer :: verbosity = 0 + + REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + + luse_mpi = detect_mpirun() + lsmall_problem_size = detect_FIAT_UNIT_TEST() + + if (luse_mpi) then + call mpl_init(ldinfo=(verbosity>=1)) + nproc = mpl_nproc() + myproc = mpl_myrank() + else + nproc = 1 + myproc = 1 + endif + + if (myproc.eq.1) write(6,*)'Starting Tasks=',nproc,'threads=',oml_max_threads() + + call ec_setenv("DR_HOOK", "1", overwrite=.true.) + call ec_setenv("DR_HOOK_OPT", "COUNTERS", overwrite=.true.) + + call dr_hook_init() + + IF (LHOOK) CALL DR_HOOK('MAIN',0,ZHOOK_HANDLE) + + if (myproc.eq.1) write(6,*) "================================================= BENCHMARK STREAM START" + if (lsmall_problem_size) then + call stream_combinations(int(1024*32,kind=8)) + else + call stream_combinations() + endif + if (myproc.eq.1) write(6,*) "================================================= BENCHMARK STREAM END" + + if (myproc.eq.1) write(6,*) "================================================= BENCHMARK GEMM START" + if (lsmall_problem_size) then + call gemm_combinations(int(250,kind=8)) + else + call gemm_combinations() + endif + write(6,*) "================================================= BENCHMARK GEMM END" + + IF (LHOOK) CALL DR_HOOK('MAIN',1,ZHOOK_HANDLE) + + call dr_hook_end() + + if (luse_mpi) then + call mpl_end(ldmeminfo=.false.) + endif + if (myproc.eq.1) write(6,*)'Completed' +contains + function detect_mpirun() result(lmpi_required) + logical :: lmpi_required + integer :: ilen + integer, parameter :: nvars = 5 + character(len=32), dimension(nvars) :: cmpirun_detect + character(len=4) :: clenv_dr_hook_assert_mpi_initialized + integer :: ivar + lmpi_required = .false. +#if defined(NOMPI) + return +#endif + ! Environment variables that are set when mpirun, srun, aprun, ... are used + cmpirun_detect(1) = 'OMPI_COMM_WORLD_SIZE' ! openmpi + cmpirun_detect(2) = 'ALPS_APP_PE' ! cray pe + cmpirun_detect(3) = 'PMI_SIZE' ! intel + cmpirun_detect(4) = 'SLURM_NTASKS' ! slurm + cmpirun_detect(5) = 'FIAT_USE_MPI' ! forced + + do ivar = 1, nvars + call get_environment_variable(name=trim(cmpirun_detect(ivar)), length=ilen) + if (ilen > 0) then + lmpi_required = .true. + exit ! break + endif + enddo +end function + +function detect_FIAT_UNIT_TEST() result(lunit_test) + logical :: lunit_test + integer :: ilen + lunit_test = .false. + call get_environment_variable(name='FIAT_UNIT_TEST', length=ilen) + if (ilen > 0) then + lunit_test = .true. + endif +end function + +end program diff --git a/src/programs/fiat-drhook-sanity-gemm.F90 b/tests/test_drhook_counters_gemm.F90 similarity index 88% rename from src/programs/fiat-drhook-sanity-gemm.F90 rename to tests/test_drhook_counters_gemm.F90 index b82cb87..bb9affb 100644 --- a/src/programs/fiat-drhook-sanity-gemm.F90 +++ b/tests/test_drhook_counters_gemm.F90 @@ -1,13 +1,17 @@ -module gemm_mod +module test_drhook_counters_gemm_mod use yomhook, only : lhook,dr_hook,jphook implicit none contains - subroutine gemm_combinations() + subroutine gemm_combinations(n_init) implicit none - integer*8 n,i + integer(kind=8), intent(in), optional :: n_init + integer(kind=8) :: n,i real(kind=jphook) :: zhook_handle n=1000 + if (present(n_init)) then + n = n_init + endif #if defined(HAVE_BLAS) if (lhook) call dr_hook('GEMM_ALL',0,zhook_handle) do i=1,4 @@ -81,4 +85,4 @@ subroutine sgemm_driver(nn) end subroutine sgemm_driver #endif -end module gemm_mod +end module diff --git a/src/programs/fiat-drhook-sanity-stream.F90 b/tests/test_drhook_counters_stream.F90 similarity index 95% rename from src/programs/fiat-drhook-sanity-stream.F90 rename to tests/test_drhook_counters_stream.F90 index 2dbd1ac..ee575b1 100644 --- a/src/programs/fiat-drhook-sanity-stream.F90 +++ b/tests/test_drhook_counters_stream.F90 @@ -1,4 +1,4 @@ -MODULE stream_mod +MODULE test_drhook_counters_stream_mod !======================================================================= ! Program: STREAM ! Programmer: John D. McCalpin @@ -45,19 +45,23 @@ MODULE stream_mod use yomhook, only : lhook,dr_hook,jphook contains - subroutine stream_combinations() + subroutine stream_combinations(n_init) implicit none - integer*8 n,ntimes,i + integer(kind=8), intent(in), optional :: n_init + integer(kind=8) :: n, ntimes, i real(kind=jphook) :: zhook_handle n=1024*1024 + if (present(n_init)) then + n = n_init + endif ntimes=1024 if (lhook) call dr_hook('STREAM',0,zhook_handle) do i=1,3 + write(6,'(" =============================== CALL STREAM(",I0,",",I0,")")') n, ntimes call stream(n,ntimes) n=n*8 ntimes=ntimes/8 end do - if (lhook) call dr_hook('STREAM',1,zhook_handle) end subroutine stream_combinations @@ -79,7 +83,7 @@ SUBROUTINE stream(n,ntimes) CHARACTER label(4)*11 ! .. ! .. External Functions .. - DOUBLE PRECISION mysecond + DOUBLE PRECISION timef REAL(KIND=JPHOOK) :: ZHOOK_HANDLE REAL(KIND=JPHOOK) :: ZHOOK_1,ZHOOK_2,ZHOOK_3,ZHOOK_4 CHARACTER(len=29) :: tag @@ -142,12 +146,12 @@ SUBROUTINE stream(n,ntimes) b(j) = 0.5D0 c(j) = 0.0D0 10 END DO - t = mysecond() + t = timef() !$OMP PARALLEL DO DO 20 j = 1,n a(j) = 0.5d0*a(j) 20 END DO - t = mysecond() - t + t = timef() - t PRINT *,'----------------------------------------------------' quantum = checktick() WRITE (*,FMT=9000) & @@ -160,51 +164,51 @@ SUBROUTINE stream(n,ntimes) DO 70 k = 1,ntimes IF (LHOOK) CALL DR_HOOK('STREAM_COPY'//TRIM(tag),0,ZHOOK_1) - t = mysecond() + t = timef() a(1) = a(1) + t !$OMP PARALLEL DO DO 30 j = 1,n c(j) = a(j) 30 END DO - t = mysecond() - t + t = timef() - t IF (LHOOK) CALL DR_HOOK('STREAM_COPY'//TRIM(tag),1,ZHOOK_1) c(n) = c(n) + t times(1,k) = t IF (LHOOK) CALL DR_HOOK('STREAM_SCALE'//TRIM(tag),0,ZHOOK_2) - t = mysecond() + t = timef() c(1) = c(1) + t !$OMP PARALLEL DO DO 40 j = 1,n b(j) = scalar*c(j) 40 END DO - t = mysecond() - t + t = timef() - t IF (LHOOK) CALL DR_HOOK('STREAM_SCALE'//TRIM(tag),1,ZHOOK_2) b(n) = b(n) + t times(2,k) = t IF (LHOOK) CALL DR_HOOK('STREAM_ADD'//TRIM(tag),0,ZHOOK_3) - t = mysecond() + t = timef() a(1) = a(1) + t !$OMP PARALLEL DO DO 50 j = 1,n c(j) = a(j) + b(j) 50 END DO - t = mysecond() - t + t = timef() - t IF (LHOOK) CALL DR_HOOK('STREAM_ADD'//TRIM(tag),1,ZHOOK_3) c(n) = c(n) + t times(3,k) = t IF (LHOOK) CALL DR_HOOK('STREAM_TRIAD'//TRIM(tag),0,ZHOOK_4) - t = mysecond() + t = timef() b(1) = b(1) + t !$OMP PARALLEL DO DO 60 j = 1,n a(j) = b(j) + scalar*c(j) 60 END DO - t = mysecond() - t + t = timef() - t IF (LHOOK) CALL DR_HOOK('STREAM_TRIAD'//TRIM(tag),1,ZHOOK_4) a(n) = a(n) + t @@ -341,15 +345,15 @@ INTEGER FUNCTION checktick() DOUBLE PRECISION timesfound(n) ! .. ! .. External Functions .. - DOUBLE PRECISION mysecond - EXTERNAL mysecond + DOUBLE PRECISION timef + EXTERNAL timef ! .. ! .. Intrinsic Functions .. INTRINSIC max,min,nint ! .. i = 0 t1=-1 -10 t2 = mysecond() +10 t2 = timef() IF (t2.EQ.t1) GO TO 10 t1 = t2 @@ -457,4 +461,4 @@ function itoa(i) result(res) res = trim(tmp) end function itoa -END MODULE stream_mod +END MODULE