From 17c314d95e8d9856a0f0f37557d44854b8b115bf Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Thu, 21 Mar 2024 14:29:19 +0000 Subject: [PATCH] changes for CUDA C/Fortran --- src/ecwam/CMakeLists.txt | 137 ++++++++++++++++++++++++++++---- src/ecwam/ecwam_loki_gpu.config | 10 ++- src/ecwam/sdissip_ard.F90 | 4 +- src/ecwam/sinput_ard.F90 | 4 +- src/ecwam/wamintgr_loki_gpu.F90 | 5 ++ src/ecwam/wamodel.F90 | 13 ++- src/ecwam/yowfred.F90 | 35 ++++++++ src/ecwam/yowindn.F90 | 27 +++++-- src/ecwam/yowphys.F90 | 3 + src/ecwam/yowstat.F90 | 7 +- src/programs/CMakeLists.txt | 29 ++++++- 11 files changed, 243 insertions(+), 31 deletions(-) diff --git a/src/ecwam/CMakeLists.txt b/src/ecwam/CMakeLists.txt index cb3ec775f..8bf0dcb4b 100644 --- a/src/ecwam/CMakeLists.txt +++ b/src/ecwam/CMakeLists.txt @@ -349,7 +349,8 @@ list(APPEND phys_srcs z0wave.F90 ) -if( HAVE_GEN_DERIV_TYPES ) +# if( HAVE_GEN_DERIV_TYPES ) +if (TRUE) list( APPEND ecwam_srcs ${CMAKE_CURRENT_BINARY_DIR}/yowfield_mod.F90) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/yowfield_mod.F90 @@ -419,6 +420,7 @@ endif() # copies of module global variables set( LIBRARY_TYPE SHARED ) if( HAVE_ACC ) + # if (TRUE) set( LIBRARY_TYPE STATIC ) endif() @@ -670,31 +672,32 @@ if( HAVE_WAM_LOKI ) endif() endif() if( HAVE_CUDA ) +if( HAVE_WAM_LOKI ) ############################################################ -## Loki SCC transformation: ## +## Loki CUF-HOIST transformation: ## ############################################################ foreach(src ${phys_srcs} wamintgr_loki_gpu.F90 ${global_var_mods}) string(REPLACE ".F90" "" fnc ${src}) - string(CONCAT fnm "loki-cuf/" ${fnc} ".cuf_hoist_new.F90") - list(APPEND loki_cuf_srcs ${fnm}) + string(CONCAT fnm "loki-cuf-hoisted/" ${fnc} ".cuf_hoist_new.F90") + list(APPEND loki_cuf_hoisted_srcs ${fnm}) endforeach() - set_source_files_properties( ${loki_cuf_srcs} PROPERTIES COMPILE_OPTIONS "-Mcuda=maxregcount:128 -Minfo=accel" ) + set_source_files_properties( ${loki_cuf_hoisted_srcs} PROPERTIES COMPILE_OPTIONS "-Mcuda=maxregcount:128 -Minfo=accel" ) loki_transform_convert( MODE cuf-hoist-new FRONTEND ${LOKI_FRONTEND} CPP GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/ecwam_loki_gpu.config PATH ${CMAKE_CURRENT_SOURCE_DIR} INCLUDES ${ecwam_intfb_includes} - OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-cuf - OUTPUT ${loki_cuf_srcs} + OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-cuf-hoisted + OUTPUT ${loki_cuf_hoisted_srcs} DEPENDS ${phys_srcs} wamintgr_loki_gpu.F90 ${global_var_mods} ) ecbuild_add_library( - TARGET ${ecwam}_cuf + TARGET ${ecwam}_cuf_hoisted TYPE ${LIBRARY_TYPE} - SOURCES ${ecwam_srcs} ${loki_cuf_srcs} ${phys_srcs} + SOURCES ${ecwam_srcs} ${loki_cuf_hoisted_srcs} ${phys_srcs} PUBLIC_LIBS fiat parkind_${prec} ${ecwam}_intfb ${MPI_Fortran_LIBRARIES} ${${PNAME}_OCEANMODEL_LIBRARIES} @@ -710,21 +713,24 @@ if( HAVE_CUDA ) ) ecwam_target_fortran_module_directory( - TARGET ${ecwam}_cuf - MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/${ecwam}_cuf - INSTALL_DIRECTORY module/${ecwam}_cuf + TARGET ${ecwam}_cuf_hoisted + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/${ecwam}_cuf_hoisted + INSTALL_DIRECTORY module/${ecwam}_cuf_hoisted ) - ecwam_target_compile_definitions_FILENAME( ${ecwam}_cuf ) + ecwam_target_compile_definitions_FILENAME( ${ecwam}_cuf_hoisted ) - target_link_options( ${ecwam}_cuf PUBLIC "-cuda;-gpu=pinned" ) + target_link_options( ${ecwam}_cuf_hoisted PUBLIC "-cuda;-gpu=pinned" ) # if( CMAKE_Fortran_COMPILER_ID MATCHES PGI|NVHPC AND HAVE_ACC ) # target_compile_options( ${ecwam}_cuf PUBLIC "-gpu=gvmode,maxregcount:128" ) # endif() + +endif() ############################################################ ## SCC-cuf variant with parametrised temporaries: ## ############################################################ +if (TRUE) foreach(src ${phys_srcs} wamintgr_cuda_mod.F90 ${global_var_mods}) string(REPLACE ".F90" "" fnc ${src}) string(CONCAT fnm "../phys-scc-cuf/" ${fnc} ".cuf_parametrise.F90") @@ -761,3 +767,106 @@ if( HAVE_CUDA ) target_link_options( ${ecwam}_scc_cuf PUBLIC "-cuda;-gpu=pinned" ) endif() +############################################################ +## SCC-cuf variant with parametrised temporaries: ## +############################################################ + +if (TRUE) + foreach(src ${phys_srcs} wamintgr_loki_gpu.F90 ${global_var_mods}) + string(REPLACE ".F90" "" fnc ${src}) + string(CONCAT fnm "../phys-scc-cuf-hoist/" ${fnc} ".cuf_hoist_new.F90") + list(APPEND wam_scc_cuf_hoist_srcs ${fnm}) + endforeach() + + set_source_files_properties( ${wam_scc_cuf_hoist_srcs} PROPERTIES COMPILE_OPTIONS "-Mcuda=maxregcount:128" ) + + ecbuild_add_library( + TARGET ${ecwam}_scc_cuf_hoist + TYPE ${LIBRARY_TYPE} + DEFINITIONS ${ECWAM_DEFINITIONS} WAM_CUDA WAM_CUDA_C + SOURCES ${ecwam_srcs} ${wam_scc_cuf_hoist_srcs} ${phys_srcs} + PUBLIC_LIBS fiat parkind_${prec} ${ecwam}_intfb + ${MPI_Fortran_LIBRARIES} + ${${PNAME}_OCEANMODEL_LIBRARIES} + $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> + PRIVATE_LIBS eccodes_f90 + ${MULTIO_LIBRARIES} + ${OpenMP_Fortran_LIBRARIES} + $<${HAVE_FIELD_API}:field_api_${prec}> + MPI::MPI_Fortran + PUBLIC_INCLUDES $ + PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} + ) + + ecwam_target_fortran_module_directory( + TARGET ${ecwam}_scc_cuf_hoist + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/${ecwam}_scc_cuf_hoist + INSTALL_DIRECTORY module/${ecwam}_scc_cuf_hoist + ) + + ecwam_target_compile_definitions_FILENAME( ${ecwam}_scc_cuf_hoist ) + + target_link_options( ${ecwam}_scc_cuf_hoist PUBLIC "-cuda;-gpu=pinned" ) +endif() + +############################################################ +############################################################ +## SCC-CUDA hoisted temporaries, mostly generated via Loki# +##  but minor manual fixes/adaptations ... ## +############################################################ +if (TRUE) +# foreach(src ${phys_srcs} wamintgr_cuda_mod.F90 ${global_var_mods}) + foreach(src ${phys_srcs}) # wamintgr_cuda_mod.F90 ${global_var_mods}) + string(REPLACE ".F90" "" fnc ${src}) + string(CONCAT fnm "../phys-scc-cuda/" ${fnc} "_c.c") + list(APPEND wam_scc_cuda_srcs ${fnm}) + endforeach() + + foreach(src ${global_var_mods} wamintgr_loki_gpu.F90) # wamintgr_cuda_mod.F90 ${global_var_mods}) + string(REPLACE ".F90" "" fnc ${src}) + string(CONCAT fnm "../phys-scc-cuda/" ${fnc} ".c_hoist.F90") + list(APPEND wam_scc_cuda_srcs_2 ${fnm}) + endforeach() + + ecbuild_add_library( + TARGET ${ecwam}_scc_cuda + TYPE ${LIBRARY_TYPE} + DEFINITIONS ${ECWAM_DEFINITIONS} WAM_CUDA WAM_CUDA_C + SOURCES ${ecwam_srcs} ${wam_scc_cuda_srcs} ${phys_srcs} ${wam_scc_cuda_srcs_2} ../phys-scc-cuda/implsch_fc.F90 # ${global_var_mods} ../phys-scc-cuda/wamintgr_loki_gpu.c_hoist.F90 + PUBLIC_LIBS fiat parkind_${prec} ${ecwam}_intfb + ${MPI_Fortran_LIBRARIES} + ${${PNAME}_OCEANMODEL_LIBRARIES} + $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> + PRIVATE_LIBS eccodes_f90 + ${MULTIO_LIBRARIES} + ${OpenMP_Fortran_LIBRARIES} + $<${HAVE_FIELD_API}:field_api_${prec}> + MPI::MPI_Fortran + PUBLIC_INCLUDES $ + PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} + ) + + set_source_files_properties( ${wam_scc_cuda_srcs} PROPERTIES LANGUAGE CUDA ) + + # set_source_files_properties(${phys_srcs} ${global_var_mods} ../phys-scc-cuda/wamintgr_loki_gpu.c_hoist.F90 FLAGS "-cuda") + set(CMAKE_CUDA_ARCHITECTURES 80) + target_compile_options(${ecwam}_scc_cuda PUBLIC $<$: + # -keep -DDEBUG -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}> --shared --compiler-options -fPIC) + # -rdc=true -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>) + --maxrregcount 128 -dc -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>) + + # set_target_properties( ${ecwam}_scc_cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + + ecwam_target_fortran_module_directory( + TARGET ${ecwam}_scc_cuda + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/${ecwam}_scc_cuda + INSTALL_DIRECTORY module/${ecwam}_scc_cuda + ) + + ecwam_target_compile_definitions_FILENAME( ${ecwam}_scc_cuda ) + + target_link_options( ${ecwam}_scc_cuda PUBLIC "-cuda;-gpu=pinned;-lcudadevrt" ) +endif() + +endif() + diff --git a/src/ecwam/ecwam_loki_gpu.config b/src/ecwam/ecwam_loki_gpu.config index 6495f0320..347afc2b7 100644 --- a/src/ecwam/ecwam_loki_gpu.config +++ b/src/ecwam/ecwam_loki_gpu.config @@ -69,5 +69,13 @@ enable_imports = true vertical = '%dimensions.vertical%' block_dim = '%dimensions.block_dim%' -# derived_types = ['TECLDP'] +[transformations.c-hoist] + classname = 'SccCufTransformationNew' + module = 'transformations.scc_cuf' +[transformations.c-hoist.options] + transformation_type = 'hoist' + horizontal = '%dimensions.horizontal%' + vertical = '%dimensions.vertical%' + block_dim = '%dimensions.block_dim%' + mode = 'cuda' diff --git a/src/ecwam/sdissip_ard.F90 b/src/ecwam/sdissip_ard.F90 index 626b0c2d3..eaf1efaaf 100644 --- a/src/ecwam/sdissip_ard.F90 +++ b/src/ecwam/sdissip_ard.F90 @@ -70,6 +70,7 @@ SUBROUTINE SDISSIP_ARD (KIJS, KIJL, FL1, FLD, SL, & & BRKPBCOEF ,SSDSC5, NSDSNTH, NDIKCUMUL, & & INDICESSAT, SATWEIGHTS, CUMULW + USE YOWSHAL, ONLY: NDEPTH USE YOMHOOK , ONLY : LHOOK ,DR_HOOK, JPHOOK ! ---------------------------------------------------------------------- @@ -98,13 +99,14 @@ SUBROUTINE SDISSIP_ARD (KIJS, KIJL, FL1, FLD, SL, & REAL(KIND=JWRB), DIMENSION(KIJL,NANG_PARAM) :: SCUMUL, D REAL(KIND=JWRB), DIMENSION(KIJL) :: RENEWALFREQ - + INTEGER :: FOO ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('SDISSIP_ARD',0,ZHOOK_HANDLE) ! INITIALISATION + FOO = NDEPTH ! necessary for Loki ... EPSR = SQRT(SDSBR) TPIINV = 1.0_JWRB/ZPI diff --git a/src/ecwam/sinput_ard.F90 b/src/ecwam/sinput_ard.F90 index cf7f5182d..4eb1efc51 100644 --- a/src/ecwam/sinput_ard.F90 +++ b/src/ecwam/sinput_ard.F90 @@ -77,7 +77,7 @@ SUBROUTINE WSIGSTAR (WSWAVE, UFRIC, Z0M, WSTAR, SIG_N) REAL(KIND=JWRB), PARAMETER :: P1 = 1.48_JWRB REAL(KIND=JWRB), PARAMETER :: P2 = -0.21_JWRB - !$loki routine seq + ! $ loki routine seq REAL(KIND=JWRB) :: ZCHAR, C_D, DC_DDU, SIG_CONV REAL(KIND=JWRB) :: XKAPPAD, U10, C2U10P1, U10P2 REAL(KIND=JWRB) :: BCD, U10M1, ZN, Z0VIS @@ -88,6 +88,8 @@ SUBROUTINE WSIGSTAR (WSWAVE, UFRIC, Z0M, WSTAR, SIG_N) IF (LHOOK) CALL DR_HOOK('WSIGSTAR',0,ZHOOK_HANDLE) + !$loki routine seq + IF (LLGCBZ0) THEN ZN = RNUM diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index d68394418..9beb90b1b 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -221,6 +221,8 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & & TAUOCYD=TAUOCYD_DPTR, TAUOC=TAUOC_DPTR, PHIOCD=PHIOCD_DPTR, PHIEPS=PHIEPS_DPTR, PHIAW=PHIAW_DPTR) CALL SRC_CONTRIBS%UPDATE_DEVICE(XLLWS=XLLWS_DPTR, MIJ=MIJ_DPTR) +!$loki data + !$acc data present(FL1_DPTR,XLLWS_DPTR,MIJ_DPTR,WAVNUM_DPTR,CGROUP_DPTR,CIWA_DPTR,CINV_DPTR,XK2CG_DPTR,STOKFAC_DPTR,& !$acc & EMAXDPT_DPTR,INDEP_DPTR,DEPTH_DPTR,IOBND_DPTR,IODP_DPTR,CICOVER_DPTR,WSWAVE_DPTR,WDWAVE_DPTR,AIRD_DPTR,& !$acc & WSTAR_DPTR,UFRIC_DPTR,TAUW_DPTR,TAUWDIR_DPTR,Z0M_DPTR,Z0B_DPTR,CHRNCK_DPTR,CITHICK_DPTR,NEMOUSTOKES_DPTR,& @@ -251,6 +253,9 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !$acc end parallel loop TIME1(2) = TIME1(2) + (TIME0+WAM_USER_CLOCK())*1.E-06 !$acc end data + +!$loki end data + CALL WVPRPT_FIELD%ENSURE_HOST() CALL WVENVI_FIELD%ENSURE_HOST() CALL FF_NOW_FIELD%ENSURE_HOST() diff --git a/src/ecwam/wamodel.F90 b/src/ecwam/wamodel.F90 index 4de0b9b98..3094a7a30 100644 --- a/src/ecwam/wamodel.F90 +++ b/src/ecwam/wamodel.F90 @@ -86,7 +86,8 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & USE WAM_MULTIO_MOD, ONLY : WAM_MULTIO_FLUSH USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK -#ifdef WAM_CUDA +! # ifdef WAM_CUDA +#if defined(WAM_CUDA) && !defined(WAM_CUDA_C) USE WAMINTGR_CUDA_MOD, ONLY : WAMINTGR_CUF #endif @@ -114,7 +115,8 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & #include "updnemostress.intfb.h" #include "writsta.intfb.h" -#ifdef WAM_PHYS_GPU +! # ifdef WAM_PHYS_GPU +#if defined(WAM_PHYS_GPU) || defined(WAM_CUDA_C) #include "wamintgr_loki_gpu.intfb.h" #elif !defined(WAM_CUDA) #include "wamintgr.intfb.h" @@ -256,7 +258,12 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, & CDATEWH = CDATEWO ILOOP = 1 DO WHILE ( ILOOP == 1 .OR. CDTIMPNEXT <= CDTPRO) -#ifdef WAM_PHYS_GPU +#ifdef WAM_PHYS_GPU + CALL WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & + & BLK2GLO, & + & WVENVI, WVPRPT, FF_NOW, FF_NEXT, INTFLDS, & + & WAM2NEMO, MIJ, FL1, XLLWS, TIME1) +#elif defined(WAM_CUDA_C) CALL WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & & BLK2GLO, & & WVENVI, WVPRPT, FF_NOW, FF_NEXT, INTFLDS, & diff --git a/src/ecwam/yowfred.F90 b/src/ecwam/yowfred.F90 index bb8c19e15..91c5cfdc7 100644 --- a/src/ecwam/yowfred.F90 +++ b/src/ecwam/yowfred.F90 @@ -16,30 +16,52 @@ MODULE YOWFRED !* ** *FREDIR* - FREQUENCY AND DIRECTION GRID. + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: FR(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIM(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: RHOWG_DFIM(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIM_SIM(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIMOFR(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIMOFR_SIM(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIM_END_L(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIM_END_U(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIMFR(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIMFR_SIM(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIMFR2(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: DFIMFR2_SIM(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: GOM(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: C(:) REAL(KIND=JWRB) :: DELTH REAL(KIND=JWRB) :: DELTR + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: TH(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: COSTH(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: SINTH(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: ZPIFR(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: FR5(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: FRM5(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: COFRM4(:) + !$loki dimension(NFRE) REAL(KIND=JWRB), ALLOCATABLE :: FLMAX(:) TYPE(FREQUENCY_LAND) :: WVPRPT_LAND @@ -62,18 +84,31 @@ MODULE YOWFRED REAL(KIND=JWRB), PARAMETER :: XKS_GC = 0.006_JWRB REAL(KIND=JWRB), PARAMETER :: XKL_GC = 20000.0_JWRB + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: XK_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: XKM_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: OMEGA_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: OMXKM3_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: VG_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: C_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: CM_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: C2OSQRTVG_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: XKMSQRTVGOC2_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: OM3GMKM_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: DELKCC_GC(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: DELKCC_GC_NS(:) + !$loki dimension(NWAV_GC) REAL(KIND=JWRB), ALLOCATABLE :: DELKCC_OMXKM3_GC(:) REAL(KIND=JWRB), PARAMETER :: FRIC = 28.0_JWRB diff --git a/src/ecwam/yowindn.F90 b/src/ecwam/yowindn.F90 index 8f02a32ed..cade2d9fd 100644 --- a/src/ecwam/yowindn.F90 +++ b/src/ecwam/yowindn.F90 @@ -22,20 +22,34 @@ MODULE YOWINDN INTEGER(KIND=JWIM) :: MFRSTLW INTEGER(KIND=JWIM) :: MLSTHG + !$loki dimension(MFRSTLW:MLSTHG) INTEGER(KIND=JWIM), ALLOCATABLE :: IKP(:) + !$loki dimension(MFRSTLW:MLSTHG) INTEGER(KIND=JWIM), ALLOCATABLE :: IKP1(:) + !$loki dimension(MFRSTLW:MLSTHG) INTEGER(KIND=JWIM), ALLOCATABLE :: IKM(:) - INTEGER(KIND=JWIM), ALLOCATABLE :: IKM1(:) + !$loki dimension(MFRSTLW:MLSTHG) + INTEGER(KIND=JWIM), ALLOCATABLE :: IKM1(:) + !$loki dimension(NANG,2) INTEGER(KIND=JWIM), ALLOCATABLE :: K1W(:,:) + !$loki dimension(NANG,2) INTEGER(KIND=JWIM), ALLOCATABLE :: K2W(:,:) - INTEGER(KIND=JWIM), ALLOCATABLE :: K11W(:,:) + !$loki dimension(NANG,2) + INTEGER(KIND=JWIM), ALLOCATABLE :: K11W(:,:) + !$loki dimension(NANG,2) INTEGER(KIND=JWIM), ALLOCATABLE :: K21W(:,:) + !$loki dimension(NINL,1:MLSTHG) INTEGER(KIND=JWIM), ALLOCATABLE :: INLCOEF(:,:) - REAL(KIND=JWRB), ALLOCATABLE :: AF11(:) - REAL(KIND=JWRB), ALLOCATABLE :: FKLAP(:) + !$loki dimension(MFRSTLW:MLSTHG) + REAL(KIND=JWRB), ALLOCATABLE :: AF11(:) + !$loki dimension(MFRSTLW:MLSTHG) + REAL(KIND=JWRB), ALLOCATABLE :: FKLAP(:) + !$loki dimension(MFRSTLW:MLSTHG) REAL(KIND=JWRB), ALLOCATABLE :: FKLAP1(:) - REAL(KIND=JWRB), ALLOCATABLE :: FKLAM(:) + !$loki dimension(MFRSTLW:MLSTHG) + REAL(KIND=JWRB), ALLOCATABLE :: FKLAM(:) + !$loki dimension(MFRSTLW:MLSTHG) REAL(KIND=JWRB), ALLOCATABLE :: FKLAM1(:) REAL(KIND=JWRB) :: ACL1 REAL(KIND=JWRB) :: ACL2 @@ -43,8 +57,11 @@ MODULE YOWINDN REAL(KIND=JWRB) :: CL21 REAL(KIND=JWRB) :: DAL1 REAL(KIND=JWRB) :: DAL2 + !$loki dimension(KFRH) REAL(KIND=JWRB), ALLOCATABLE :: FRH(:) + !$loki dimension(MFRSTLW:1) REAL(KIND=JWRB), ALLOCATABLE :: FTRF(:) + !$loki dimension(NRNL,1:MLSTHG) REAL(KIND=JWRB), ALLOCATABLE :: RNLCOEF(:,:) !* VARIABLE. TYPE. PURPOSE. diff --git a/src/ecwam/yowphys.F90 b/src/ecwam/yowphys.F90 index 5bfd28582..beda0d769 100644 --- a/src/ecwam/yowphys.F90 +++ b/src/ecwam/yowphys.F90 @@ -153,8 +153,11 @@ MODULE YOWPHYS ! NDIKCUMUL is the integer difference in frequency bands INTEGER(KIND=JWIM) :: NDIKCUMUL + !$loki dimension(NANG,NSDSNTH*2+1) INTEGER(KIND=JWIM), ALLOCATABLE :: INDICESSAT(:,:) + !$loki dimension(NANG,NSDSNTH*2+1) REAL(KIND=JWRB), ALLOCATABLE :: SATWEIGHTS(:,:) + !$loki dimension(NDEPTH,0:NANG/2,NFRE,NFRE) REAL(KIND=JWRB), ALLOCATABLE :: CUMULW(:,:,:,:) ! ---------------------------------------------------------------------- END MODULE YOWPHYS diff --git a/src/ecwam/yowstat.F90 b/src/ecwam/yowstat.F90 index 0714a9949..e947c8c71 100644 --- a/src/ecwam/yowstat.F90 +++ b/src/ecwam/yowstat.F90 @@ -26,6 +26,7 @@ MODULE YOWSTAT CHARACTER(LEN=14) :: CDATEE CHARACTER(LEN=14) :: CDATEF CHARACTER(LEN=14) :: CDTPRO + !$loki dimension(NDELW_LST) CHARACTER(LEN=14), ALLOCATABLE :: CDTW_LST(:) CHARACTER(LEN=14) :: CDTRES @@ -40,9 +41,11 @@ MODULE YOWSTAT REAL(KIND=JWRB) :: DELPRO_LF INTEGER(KIND=JWIM) :: IDELPRO INTEGER(KIND=JWIM) :: IDELT - INTEGER(KIND=JWIM) :: IDELWI + INTEGER(KIND=JWIM) :: IDELWI + !$loki dimension(NDELW_LST) INTEGER(KIND=JWIM), ALLOCATABLE :: IDELWI_LST(:) - INTEGER(KIND=JWIM) :: IDELWO + INTEGER(KIND=JWIM) :: IDELWO + !$loki dimension(NDELW_LST) INTEGER(KIND=JWIM), ALLOCATABLE :: IDELWO_LST(:) INTEGER(KIND=JWIM) :: NDELW_LST INTEGER(KIND=JWIM) :: IDELALT diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt index c766dab85..c677498b3 100644 --- a/src/programs/CMakeLists.txt +++ b/src/programs/CMakeLists.txt @@ -59,13 +59,13 @@ if( HAVE_CUDA ) if( HAVE_WAM_LOKI ) ecbuild_add_executable( - TARGET ${PROJECT_NAME}-chief-loki-scc-cuf + TARGET ${PROJECT_NAME}-chief-loki-scc-cuf-hoisted SOURCES chief.F90 - LIBS ${PROJECT_NAME}_cuf ${OpenMP_Fortran_LIBRARIES} + LIBS ${PROJECT_NAME}_cuf_hoisted ${OpenMP_Fortran_LIBRARIES} ) - ecwam_target_compile_definitions_FILENAME(${PROJECT_NAME}-chief-loki-scc-cuf) + ecwam_target_compile_definitions_FILENAME(${PROJECT_NAME}-chief-loki-scc-cuf-hoisted) endif() - +if (TRUE) ecbuild_add_executable( TARGET ${PROJECT_NAME}-chief-scc-cuf SOURCES chief.F90 @@ -73,6 +73,27 @@ endif() ) ecwam_target_compile_definitions_FILENAME(${PROJECT_NAME}-chief-scc-cuf) endif() +if (TRUE) + ecbuild_add_executable( + TARGET ${PROJECT_NAME}-chief-scc-cuf-hoist + SOURCES chief.F90 + LIBS ${PROJECT_NAME}_scc_cuf_hoist ${OpenMP_Fortran_LIBRARIES} + ) + ecwam_target_compile_definitions_FILENAME(${PROJECT_NAME}-chief-scc-cuf-hoist) +endif() + ## + ecbuild_add_executable( + TARGET ${PROJECT_NAME}-chief-scc-cuda + SOURCES chief.F90 + LIBS ${PROJECT_NAME}_scc_cuda ${OpenMP_Fortran_LIBRARIES} + ) + ecwam_target_compile_definitions_FILENAME(${PROJECT_NAME}-chief-scc-cuda) + set_target_properties(${PROJECT_NAME}-chief-scc-cuda PROPERTIES LINKER_LANGUAGE Fortran) + target_link_libraries(${PROJECT_NAME}-chief-scc-cuda cudadevrt cudart) + # target_link_options(${PROJECT_NAME}-chief-scc-cuda PRIVATE "-cuda") + ## + +endif() string( TOUPPER ${PROJECT_NAME} PNAME ) foreach( lang Fortran C CXX )