From 00c73c42c030dcea1f64bdf44cdb972d43cd12e2 Mon Sep 17 00:00:00 2001 From: Michael Staneker Date: Fri, 8 Mar 2024 15:00:00 +0000 Subject: [PATCH] some changes related to Loki CUF-HOIST --- src/ecwam/CMakeLists.txt | 54 ++++++++++++++++++++++++++++++++- src/ecwam/ecwam_loki_gpu.config | 15 +++++++++ src/ecwam/sinflx.F90 | 4 +-- src/ecwam/taut_z0.F90 | 22 ++++---------- src/programs/CMakeLists.txt | 10 ++++++ 5 files changed, 86 insertions(+), 19 deletions(-) diff --git a/src/ecwam/CMakeLists.txt b/src/ecwam/CMakeLists.txt index 69cac7473..cb3ec775f 100644 --- a/src/ecwam/CMakeLists.txt +++ b/src/ecwam/CMakeLists.txt @@ -563,6 +563,7 @@ if( HAVE_WAM_LOKI ) target_compile_options( ${ecwam}_idem_stack PUBLIC "-fcray-pointer" ) endif() + if( HAVE_WAM_GPU ) ############################################################ ## Loki SCC transformation: ## @@ -668,9 +669,60 @@ if( HAVE_WAM_LOKI ) endif() endif() endif() - if( HAVE_CUDA ) ############################################################ +## Loki SCC transformation: ## +############################################################ + foreach(src ${phys_srcs} wamintgr_loki_gpu.F90 ${global_var_mods}) + string(REPLACE ".F90" "" fnc ${src}) + string(CONCAT fnm "loki-cuf/" ${fnc} ".cuf_hoist_new.F90") + list(APPEND loki_cuf_srcs ${fnm}) + endforeach() + + set_source_files_properties( ${loki_cuf_srcs} PROPERTIES COMPILE_OPTIONS "-Mcuda=maxregcount:128 -Minfo=accel" ) + + loki_transform_convert( + MODE cuf-hoist-new FRONTEND ${LOKI_FRONTEND} CPP GLOBAL_VAR_OFFLOAD TRIM_VECTOR_SECTIONS + CONFIG ${CMAKE_CURRENT_SOURCE_DIR}/ecwam_loki_gpu.config + PATH ${CMAKE_CURRENT_SOURCE_DIR} + INCLUDES ${ecwam_intfb_includes} + OUTPATH ${CMAKE_CURRENT_BINARY_DIR}/loki-cuf + OUTPUT ${loki_cuf_srcs} + DEPENDS ${phys_srcs} wamintgr_loki_gpu.F90 ${global_var_mods} + ) + + ecbuild_add_library( + TARGET ${ecwam}_cuf + TYPE ${LIBRARY_TYPE} + SOURCES ${ecwam_srcs} ${loki_cuf_srcs} ${phys_srcs} + PUBLIC_LIBS fiat parkind_${prec} ${ecwam}_intfb + ${MPI_Fortran_LIBRARIES} + ${${PNAME}_OCEANMODEL_LIBRARIES} + $<${HAVE_ACC}:OpenACC::OpenACC_Fortran> + PRIVATE_LIBS eccodes_f90 + ${MULTIO_LIBRARIES} + ${OpenMP_Fortran_LIBRARIES} + $<${HAVE_FIELD_API}:field_api_${prec}> + MPI::MPI_Fortran + PUBLIC_INCLUDES $ + PRIVATE_INCLUDES ${${PNAME}_OCEANMODEL_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR} + PUBLIC_DEFINITIONS ${ECWAM_DEFINITIONS} WAM_PHYS_GPU + ) + + ecwam_target_fortran_module_directory( + TARGET ${ecwam}_cuf + MODULE_DIRECTORY ${CMAKE_BINARY_DIR}/module/${ecwam}_cuf + INSTALL_DIRECTORY module/${ecwam}_cuf + ) + + ecwam_target_compile_definitions_FILENAME( ${ecwam}_cuf ) + + target_link_options( ${ecwam}_cuf PUBLIC "-cuda;-gpu=pinned" ) + + # if( CMAKE_Fortran_COMPILER_ID MATCHES PGI|NVHPC AND HAVE_ACC ) + # target_compile_options( ${ecwam}_cuf PUBLIC "-gpu=gvmode,maxregcount:128" ) + # endif() +############################################################ ## SCC-cuf variant with parametrised temporaries: ## ############################################################ foreach(src ${phys_srcs} wamintgr_cuda_mod.F90 ${global_var_mods}) diff --git a/src/ecwam/ecwam_loki_gpu.config b/src/ecwam/ecwam_loki_gpu.config index 302bf9685..6495f0320 100644 --- a/src/ecwam/ecwam_loki_gpu.config +++ b/src/ecwam/ecwam_loki_gpu.config @@ -11,6 +11,7 @@ disable = [ ] utility_routines = ['dr_hook', 'abort1', 'write(iu06'] enable_imports = true +# enable_imports = false # Define entry point for call-tree transformation [routines] @@ -56,3 +57,17 @@ enable_imports = true [dimensions.block_dim] size = "NCHNK" index = "ICHNK" + +# Define specific transformation settings +[transformations] +[transformations.cuf-hoist-new] + classname = 'SccCufTransformationNew' + module = 'transformations.scc_cuf' +[transformations.cuf-hoist-new.options] + transformation_type = 'hoist' + horizontal = '%dimensions.horizontal%' + vertical = '%dimensions.vertical%' + block_dim = '%dimensions.block_dim%' + +# derived_types = ['TECLDP'] + diff --git a/src/ecwam/sinflx.F90 b/src/ecwam/sinflx.F90 index cb52ea4a9..a179c02ab 100644 --- a/src/ecwam/sinflx.F90 +++ b/src/ecwam/sinflx.F90 @@ -90,7 +90,7 @@ SUBROUTINE SINFLX (ICALL, KIJS, KIJL, & INTEGER(KIND=JWIM) :: NGST REAL(KIND=JPHOOK) :: ZHOOK_HANDLE -REAL(KIND=JWRB), DIMENSION(KIJL) :: RNFAC +REAL(KIND=JWRB), DIMENSION(KIJL) :: RNFAC, TMP_EM LOGICAL :: LLPHIWA, LLSNEG @@ -157,7 +157,7 @@ SUBROUTINE SINFLX (ICALL, KIJS, KIJL, & ! MEAN FREQUENCY CHARACTERISTIC FOR WIND SEA -CALL FEMEANWS(KIJS, KIJL, FL1, XLLWS, FMEANWS) +CALL FEMEANWS(KIJS, KIJL, FL1, XLLWS, FMEANWS, TMP_EM) ! COMPUTE LAST FREQUENCY INDEX OF PROGNOSTIC PART OF SPECTRUM. CALL FRCUTINDEX(KIJS, KIJL, FMEAN, FMEANWS, UFRIC, CICOVER, MIJ, RHOWGDFTH) diff --git a/src/ecwam/taut_z0.F90 b/src/ecwam/taut_z0.F90 index cbeb70fb8..5d5e37dd8 100644 --- a/src/ecwam/taut_z0.F90 +++ b/src/ecwam/taut_z0.F90 @@ -171,7 +171,8 @@ SUBROUTINE TAUT_Z0(KIJS, KIJL, IUSFG, & ZCHAR = MIN(ZCHAR,ALPHAMAX) CDFG = ACDLIN + BCDLIN*SQRT(ZCHAR) * UTOP(IJ) ELSE - CDFG = CDM(UTOP(IJ)) + ! CDFG = CDM(UTOP(IJ)) ! TODO: revert and automate + CDFG = MAX(MIN(0.0006_JWRB+0.00008_JWRB*UTOP(IJ), 0.001_JWRB+0.0018_JWRB*EXP(-0.05_JWRB*(UTOP(IJ)-33._JWRB))),0.001_JWRB) ENDIF USTAR(IJ) = UTOP(IJ)*SQRT(CDFG) ENDDO @@ -210,7 +211,8 @@ SUBROUTINE TAUT_Z0(KIJS, KIJL, IUSFG, & ENDDO ! protection just in case there is no convergence IF (ITER > NITER ) THEN - CDFG = CDM(UTOP(IJ)) + ! CDFG = CDM(UTOP(IJ)) + CDFG = MAX(MIN(0.0006_JWRB+0.00008_JWRB*UTOP(IJ), 0.001_JWRB+0.0018_JWRB*EXP(-0.05_JWRB*(UTOP(IJ)-33._JWRB))),0.001_JWRB) USTAR(IJ) = UTOP(IJ)*SQRT(CDFG) Z0MINRST = USTAR(IJ)**2 * ALPHA*GM1 Z0(IJ) = MAX(XNLEV/(EXP(XKUTOP/USTAR(IJ))-1.0_JWRB), Z0MINRST) @@ -259,7 +261,8 @@ SUBROUTINE TAUT_Z0(KIJS, KIJL, IUSFG, & ENDDO ! protection just in case there is no convergence IF (ITER > NITER ) THEN - CDFG = CDM(UTOP(IJ)) + ! CDFG = CDM(UTOP(IJ)) + CDFG = MAX(MIN(0.0006_JWRB+0.00008_JWRB*UTOP(IJ), 0.001_JWRB+0.0018_JWRB*EXP(-0.05_JWRB*(UTOP(IJ)-33._JWRB))),0.001_JWRB) USTAR(IJ) = UTOP(IJ)*SQRT(CDFG) Z0MINRST = USTAR(IJ)**2 * ALPHA*GM1 Z0(IJ) = MAX(XNLEV/(EXP(XKUTOP/USTAR(IJ))-1.0_JWRB), Z0MINRST) @@ -334,18 +337,5 @@ SUBROUTINE TAUT_Z0(KIJS, KIJL, IUSFG, & IF (LHOOK) CALL DR_HOOK('TAUT_Z0',1,ZHOOK_HANDLE) -CONTAINS - -! INLINE FUNCTION. -! ---------------- - -! Simple empirical fit to model drag coefficient - FUNCTION CDM(U10) - !$loki routine seq - REAL(KIND=JWRB), INTENT(IN) :: U10 - REAL(KIND=JWRB) :: CDM - - CDM = MAX(MIN(0.0006_JWRB+0.00008_JWRB*U10, 0.001_JWRB+0.0018_JWRB*EXP(-0.05_JWRB*(U10-33._JWRB))),0.001_JWRB) - END FUNCTION CDM END SUBROUTINE TAUT_Z0 diff --git a/src/programs/CMakeLists.txt b/src/programs/CMakeLists.txt index 7af5545e9..c766dab85 100644 --- a/src/programs/CMakeLists.txt +++ b/src/programs/CMakeLists.txt @@ -56,6 +56,16 @@ if( HAVE_WAM_LOKI ) endif() if( HAVE_CUDA ) + +if( HAVE_WAM_LOKI ) + ecbuild_add_executable( + TARGET ${PROJECT_NAME}-chief-loki-scc-cuf + SOURCES chief.F90 + LIBS ${PROJECT_NAME}_cuf ${OpenMP_Fortran_LIBRARIES} + ) + ecwam_target_compile_definitions_FILENAME(${PROJECT_NAME}-chief-loki-scc-cuf) +endif() + ecbuild_add_executable( TARGET ${PROJECT_NAME}-chief-scc-cuf SOURCES chief.F90