Skip to content

Commit

Permalink
changes for CUDA C/Fortran
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelSt98 committed Mar 21, 2024
1 parent ffdf4ca commit 17c314d
Show file tree
Hide file tree
Showing 11 changed files with 243 additions and 31 deletions.
137 changes: 123 additions & 14 deletions src/ecwam/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,8 @@ list(APPEND phys_srcs
z0wave.F90
)

if( HAVE_GEN_DERIV_TYPES )
# if( HAVE_GEN_DERIV_TYPES )
if (TRUE)
list( APPEND ecwam_srcs ${CMAKE_CURRENT_BINARY_DIR}/yowfield_mod.F90)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/yowfield_mod.F90
Expand Down Expand Up @@ -419,6 +420,7 @@ endif()
# copies of module global variables
set( LIBRARY_TYPE SHARED )
if( HAVE_ACC )
# if (TRUE)
set( LIBRARY_TYPE STATIC )
endif()

Expand Down Expand Up @@ -670,31 +672,32 @@ if( HAVE_WAM_LOKI )
endif()
endif()
if( HAVE_CUDA )
if( HAVE_WAM_LOKI )
############################################################
## Loki SCC transformation: ##
## Loki CUF-HOIST transformation: ##
############################################################
foreach(src ${phys_srcs} wamintgr_loki_gpu.F90 ${global_var_mods})
string(REPLACE ".F90" "" fnc ${src})
string(CONCAT fnm "loki-cuf/" ${fnc} ".cuf_hoist_new.F90")
list(APPEND loki_cuf_srcs ${fnm})
string(CONCAT fnm "loki-cuf-hoisted/" ${fnc} ".cuf_hoist_new.F90")
list(APPEND loki_cuf_hoisted_srcs ${fnm})
endforeach()

set_source_files_properties( ${loki_cuf_srcs} PROPERTIES COMPILE_OPTIONS "-Mcuda=maxregcount:128 -Minfo=accel" )
set_source_files_properties( ${loki_cuf_hoisted_srcs} PROPERTIES COMPILE_OPTIONS "-Mcuda=maxregcount:128 -Minfo=accel" )

loki_transform_convert(
MODE cuf-hoist-new FRONTEND ${LOKI_FRONTEND} CPP GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS
CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/ecwam_loki_gpu.config
PATH ${CMAKE_CURRENT_SOURCE_DIR}
INCLUDES ${ecwam_intfb_includes}
OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-cuf
OUTPUT ${loki_cuf_srcs}
OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-cuf-hoisted
OUTPUT ${loki_cuf_hoisted_srcs}
DEPENDS ${phys_srcs} wamintgr_loki_gpu.F90 ${global_var_mods}
)

ecbuild_add_library(
TARGET ${ecwam}_cuf
TARGET ${ecwam}_cuf_hoisted
TYPE ${LIBRARY_TYPE}
SOURCES ${ecwam_srcs} ${loki_cuf_srcs} ${phys_srcs}
SOURCES ${ecwam_srcs} ${loki_cuf_hoisted_srcs} ${phys_srcs}
PUBLIC_LIBS fiat parkind_${prec} ${ecwam}_intfb
${MPI_Fortran_LIBRARIES}
${${PNAME}_OCEANMODEL_LIBRARIES}
Expand All @@ -710,21 +713,24 @@ if( HAVE_CUDA )
)

ecwam_target_fortran_module_directory(
TARGET ${ecwam}_cuf
MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/${ecwam}_cuf
INSTALL_DIRECTORY module/${ecwam}_cuf
TARGET ${ecwam}_cuf_hoisted
MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/${ecwam}_cuf_hoisted
INSTALL_DIRECTORY module/${ecwam}_cuf_hoisted
)

ecwam_target_compile_definitions_FILENAME( ${ecwam}_cuf )
ecwam_target_compile_definitions_FILENAME( ${ecwam}_cuf_hoisted )

target_link_options( ${ecwam}_cuf PUBLIC "-cuda;-gpu=pinned" )
target_link_options( ${ecwam}_cuf_hoisted PUBLIC "-cuda;-gpu=pinned" )

# if( CMAKE_Fortran_COMPILER_ID MATCHES PGI|NVHPC AND HAVE_ACC )
# target_compile_options( ${ecwam}_cuf PUBLIC "-gpu=gvmode,maxregcount:128" )
# endif()

endif()
############################################################
## SCC-cuf variant with parametrised temporaries: ##
############################################################
if (TRUE)
foreach(src ${phys_srcs} wamintgr_cuda_mod.F90 ${global_var_mods})
string(REPLACE ".F90" "" fnc ${src})
string(CONCAT fnm "../phys-scc-cuf/" ${fnc} ".cuf_parametrise.F90")
Expand Down Expand Up @@ -761,3 +767,106 @@ if( HAVE_CUDA )

target_link_options( ${ecwam}_scc_cuf PUBLIC "-cuda;-gpu=pinned" )
endif()
############################################################
## SCC-cuf variant with parametrised temporaries: ##
############################################################

if (TRUE)
foreach(src ${phys_srcs} wamintgr_loki_gpu.F90 ${global_var_mods})
string(REPLACE ".F90" "" fnc ${src})
string(CONCAT fnm "../phys-scc-cuf-hoist/" ${fnc} ".cuf_hoist_new.F90")
list(APPEND wam_scc_cuf_hoist_srcs ${fnm})
endforeach()

set_source_files_properties( ${wam_scc_cuf_hoist_srcs} PROPERTIES COMPILE_OPTIONS "-Mcuda=maxregcount:128" )

ecbuild_add_library(
TARGET ${ecwam}_scc_cuf_hoist
TYPE ${LIBRARY_TYPE}
DEFINITIONS ${ECWAM_DEFINITIONS} WAM_CUDA WAM_CUDA_C
SOURCES ${ecwam_srcs} ${wam_scc_cuf_hoist_srcs} ${phys_srcs}
PUBLIC_LIBS fiat parkind_${prec} ${ecwam}_intfb
${MPI_Fortran_LIBRARIES}
${${PNAME}_OCEANMODEL_LIBRARIES}
$<${HAVE_ACC}:OpenACC::OpenACC_Fortran>
PRIVATE_LIBS eccodes_f90
${MULTIO_LIBRARIES}
${OpenMP_Fortran_LIBRARIES}
$<${HAVE_FIELD_API}:field_api_${prec}>
MPI::MPI_Fortran
PUBLIC_INCLUDES $<INSTALL_INTERFACE:include>
PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}
)

ecwam_target_fortran_module_directory(
TARGET ${ecwam}_scc_cuf_hoist
MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/${ecwam}_scc_cuf_hoist
INSTALL_DIRECTORY module/${ecwam}_scc_cuf_hoist
)

ecwam_target_compile_definitions_FILENAME( ${ecwam}_scc_cuf_hoist )

target_link_options( ${ecwam}_scc_cuf_hoist PUBLIC "-cuda;-gpu=pinned" )
endif()

############################################################
############################################################
## SCC-CUDA hoisted temporaries, mostly generated via Loki#
##  but minor manual fixes/adaptations ... ##
############################################################
if (TRUE)
# foreach(src ${phys_srcs} wamintgr_cuda_mod.F90 ${global_var_mods})
foreach(src ${phys_srcs}) # wamintgr_cuda_mod.F90 ${global_var_mods})
string(REPLACE ".F90" "" fnc ${src})
string(CONCAT fnm "../phys-scc-cuda/" ${fnc} "_c.c")
list(APPEND wam_scc_cuda_srcs ${fnm})
endforeach()

foreach(src ${global_var_mods} wamintgr_loki_gpu.F90) # wamintgr_cuda_mod.F90 ${global_var_mods})
string(REPLACE ".F90" "" fnc ${src})
string(CONCAT fnm "../phys-scc-cuda/" ${fnc} ".c_hoist.F90")
list(APPEND wam_scc_cuda_srcs_2 ${fnm})
endforeach()

ecbuild_add_library(
TARGET ${ecwam}_scc_cuda
TYPE ${LIBRARY_TYPE}
DEFINITIONS ${ECWAM_DEFINITIONS} WAM_CUDA WAM_CUDA_C
SOURCES ${ecwam_srcs} ${wam_scc_cuda_srcs} ${phys_srcs} ${wam_scc_cuda_srcs_2} ../phys-scc-cuda/implsch_fc.F90 # ${global_var_mods} ../phys-scc-cuda/wamintgr_loki_gpu.c_hoist.F90
PUBLIC_LIBS fiat parkind_${prec} ${ecwam}_intfb
${MPI_Fortran_LIBRARIES}
${${PNAME}_OCEANMODEL_LIBRARIES}
$<${HAVE_ACC}:OpenACC::OpenACC_Fortran>
PRIVATE_LIBS eccodes_f90
${MULTIO_LIBRARIES}
${OpenMP_Fortran_LIBRARIES}
$<${HAVE_FIELD_API}:field_api_${prec}>
MPI::MPI_Fortran
PUBLIC_INCLUDES $<INSTALL_INTERFACE:include>
PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}
)

set_source_files_properties( ${wam_scc_cuda_srcs} PROPERTIES LANGUAGE CUDA )

# set_source_files_properties(${phys_srcs} ${global_var_mods} ../phys-scc-cuda/wamintgr_loki_gpu.c_hoist.F90 FLAGS "-cuda")
set(CMAKE_CUDA_ARCHITECTURES 80)
target_compile_options(${ecwam}_scc_cuda PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:
# -keep -DDEBUG -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}> --shared --compiler-options -fPIC)
# -rdc=true -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>)
--maxrregcount 128 -dc -gencode arch=compute_${CMAKE_CUDA_ARCHITECTURES},code=sm_${CMAKE_CUDA_ARCHITECTURES}>)

# set_target_properties( ${ecwam}_scc_cuda PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

ecwam_target_fortran_module_directory(
TARGET ${ecwam}_scc_cuda
MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/${ecwam}_scc_cuda
INSTALL_DIRECTORY module/${ecwam}_scc_cuda
)

ecwam_target_compile_definitions_FILENAME( ${ecwam}_scc_cuda )

target_link_options( ${ecwam}_scc_cuda PUBLIC "-cuda;-gpu=pinned;-lcudadevrt" )
endif()

endif()

10 changes: 9 additions & 1 deletion src/ecwam/ecwam_loki_gpu.config
Original file line number Diff line number Diff line change
Expand Up @@ -69,5 +69,13 @@ enable_imports = true
vertical = '%dimensions.vertical%'
block_dim = '%dimensions.block_dim%'

# derived_types = ['TECLDP']
[transformations.c-hoist]
classname = 'SccCufTransformationNew'
module = 'transformations.scc_cuf'
[transformations.c-hoist.options]
transformation_type = 'hoist'
horizontal = '%dimensions.horizontal%'
vertical = '%dimensions.vertical%'
block_dim = '%dimensions.block_dim%'
mode = 'cuda'

4 changes: 3 additions & 1 deletion src/ecwam/sdissip_ard.F90
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ SUBROUTINE SDISSIP_ARD (KIJS, KIJL, FL1, FLD, SL, &
& BRKPBCOEF ,SSDSC5, NSDSNTH, NDIKCUMUL, &
& INDICESSAT, SATWEIGHTS, CUMULW

USE YOWSHAL, ONLY: NDEPTH
USE YOMHOOK , ONLY : LHOOK ,DR_HOOK, JPHOOK

! ----------------------------------------------------------------------
Expand Down Expand Up @@ -98,13 +99,14 @@ SUBROUTINE SDISSIP_ARD (KIJS, KIJL, FL1, FLD, SL, &
REAL(KIND=JWRB), DIMENSION(KIJL,NANG_PARAM) :: SCUMUL, D

REAL(KIND=JWRB), DIMENSION(KIJL) :: RENEWALFREQ

INTEGER :: FOO
! ----------------------------------------------------------------------

IF (LHOOK) CALL DR_HOOK('SDISSIP_ARD',0,ZHOOK_HANDLE)

! INITIALISATION

FOO = NDEPTH ! necessary for Loki ...
EPSR = SQRT(SDSBR)

TPIINV = 1.0_JWRB/ZPI
Expand Down
4 changes: 3 additions & 1 deletion src/ecwam/sinput_ard.F90
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ SUBROUTINE WSIGSTAR (WSWAVE, UFRIC, Z0M, WSTAR, SIG_N)
REAL(KIND=JWRB), PARAMETER :: P1 = 1.48_JWRB
REAL(KIND=JWRB), PARAMETER :: P2 = -0.21_JWRB

!$loki routine seq
! $ loki routine seq
REAL(KIND=JWRB) :: ZCHAR, C_D, DC_DDU, SIG_CONV
REAL(KIND=JWRB) :: XKAPPAD, U10, C2U10P1, U10P2
REAL(KIND=JWRB) :: BCD, U10M1, ZN, Z0VIS
Expand All @@ -88,6 +88,8 @@ SUBROUTINE WSIGSTAR (WSWAVE, UFRIC, Z0M, WSTAR, SIG_N)

IF (LHOOK) CALL DR_HOOK('WSIGSTAR',0,ZHOOK_HANDLE)

!$loki routine seq

IF (LLGCBZ0) THEN
ZN = RNUM

Expand Down
5 changes: 5 additions & 0 deletions src/ecwam/wamintgr_loki_gpu.F90
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, &
& TAUOCYD=TAUOCYD_DPTR, TAUOC=TAUOC_DPTR, PHIOCD=PHIOCD_DPTR, PHIEPS=PHIEPS_DPTR, PHIAW=PHIAW_DPTR)
CALL SRC_CONTRIBS%UPDATE_DEVICE(XLLWS=XLLWS_DPTR, MIJ=MIJ_DPTR)

!$loki data

!$acc data present(FL1_DPTR,XLLWS_DPTR,MIJ_DPTR,WAVNUM_DPTR,CGROUP_DPTR,CIWA_DPTR,CINV_DPTR,XK2CG_DPTR,STOKFAC_DPTR,&
!$acc & EMAXDPT_DPTR,INDEP_DPTR,DEPTH_DPTR,IOBND_DPTR,IODP_DPTR,CICOVER_DPTR,WSWAVE_DPTR,WDWAVE_DPTR,AIRD_DPTR,&
!$acc & WSTAR_DPTR,UFRIC_DPTR,TAUW_DPTR,TAUWDIR_DPTR,Z0M_DPTR,Z0B_DPTR,CHRNCK_DPTR,CITHICK_DPTR,NEMOUSTOKES_DPTR,&
Expand Down Expand Up @@ -251,6 +253,9 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, &
!$acc end parallel loop
TIME1(2) = TIME1(2) + (TIME0+WAM_USER_CLOCK())*1.E-06
!$acc end data

!$loki end data

CALL WVPRPT_FIELD%ENSURE_HOST()
CALL WVENVI_FIELD%ENSURE_HOST()
CALL FF_NOW_FIELD%ENSURE_HOST()
Expand Down
13 changes: 10 additions & 3 deletions src/ecwam/wamodel.F90
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, &
USE WAM_MULTIO_MOD, ONLY : WAM_MULTIO_FLUSH
USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK

#ifdef WAM_CUDA
! # ifdef WAM_CUDA
#if defined(WAM_CUDA) && !defined(WAM_CUDA_C)
USE WAMINTGR_CUDA_MOD, ONLY : WAMINTGR_CUF
#endif

Expand Down Expand Up @@ -114,7 +115,8 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, &
#include "updnemostress.intfb.h"
#include "writsta.intfb.h"

#ifdef WAM_PHYS_GPU
! # ifdef WAM_PHYS_GPU
#if defined(WAM_PHYS_GPU) || defined(WAM_CUDA_C)
#include "wamintgr_loki_gpu.intfb.h"
#elif !defined(WAM_CUDA)
#include "wamintgr.intfb.h"
Expand Down Expand Up @@ -256,7 +258,12 @@ SUBROUTINE WAMODEL (NADV, LDSTOP, LDWRRE, BLK2GLO, &
CDATEWH = CDATEWO
ILOOP = 1
DO WHILE ( ILOOP == 1 .OR. CDTIMPNEXT <= CDTPRO)
#ifdef WAM_PHYS_GPU
#ifdef WAM_PHYS_GPU
CALL WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, &
& BLK2GLO, &
& WVENVI, WVPRPT, FF_NOW, FF_NEXT, INTFLDS, &
& WAM2NEMO, MIJ, FL1, XLLWS, TIME1)
#elif defined(WAM_CUDA_C)
CALL WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, &
& BLK2GLO, &
& WVENVI, WVPRPT, FF_NOW, FF_NEXT, INTFLDS, &
Expand Down
35 changes: 35 additions & 0 deletions src/ecwam/yowfred.F90
Original file line number Diff line number Diff line change
Expand Up @@ -16,30 +16,52 @@ MODULE YOWFRED

!* ** *FREDIR* - FREQUENCY AND DIRECTION GRID.

!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: FR(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIM(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: RHOWG_DFIM(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIM_SIM(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIMOFR(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIMOFR_SIM(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIM_END_L(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIM_END_U(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIMFR(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIMFR_SIM(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIMFR2(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: DFIMFR2_SIM(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: GOM(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: C(:)
REAL(KIND=JWRB) :: DELTH
REAL(KIND=JWRB) :: DELTR
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: TH(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: COSTH(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: SINTH(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: ZPIFR(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: FR5(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: FRM5(:)
!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: COFRM4(:)

!$loki dimension(NFRE)
REAL(KIND=JWRB), ALLOCATABLE :: FLMAX(:)

TYPE(FREQUENCY_LAND) :: WVPRPT_LAND
Expand All @@ -62,18 +84,31 @@ MODULE YOWFRED
REAL(KIND=JWRB), PARAMETER :: XKS_GC = 0.006_JWRB
REAL(KIND=JWRB), PARAMETER :: XKL_GC = 20000.0_JWRB

!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: XK_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: XKM_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: OMEGA_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: OMXKM3_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: VG_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: C_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: CM_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: C2OSQRTVG_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: XKMSQRTVGOC2_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: OM3GMKM_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: DELKCC_GC(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: DELKCC_GC_NS(:)
!$loki dimension(NWAV_GC)
REAL(KIND=JWRB), ALLOCATABLE :: DELKCC_OMXKM3_GC(:)

REAL(KIND=JWRB), PARAMETER :: FRIC = 28.0_JWRB
Expand Down
Loading

0 comments on commit 17c314d

Please sign in to comment.