Skip to content

Commit

Permalink
Reorganise ectrans_benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
samhatfield committed Sep 11, 2024
1 parent 62f44c2 commit b4fa742
Showing 1 changed file with 36 additions and 51 deletions.
87 changes: 36 additions & 51 deletions src/programs/ectrans-benchmark.F90
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,6 @@ program ectrans_benchmark
! This test performs spectral to real and real to spectral transforms repeated in
! timed loop.
!
! 1) One "surface" field is always transformed:
! zspsc2(1,1:nspec2) <-> zgmvs(1:nproma,1:1,1:ngbplk)
!
! 2) A Multiple "3d" fields are transformed and can be disabled with "--nfld 0"
!
! zspsc3a(1:nlev,1:nspec2,1:nfld) <-> zgp3a(1:nproma,1:nlev,1:nfld,1:ngpblk)
!
! 3) Optionally a "3d" vorticity/divergence field is transformed to uv (wind) and
! can be enabled with "--vordiv"
!
! zspvor(1:nlev,1:nspec2) / zspdiv(1:nlev,1:nspec2) <-> zgpuv(1:nproma,1:nlev,1:2,1:ngpblk)
!
! 4) Optionally scalar derivatives can be computed for the fields described in 1) and 2)
! This must be enabled with "--scders"
!
! 5) Optionally uv East-West derivate can be computed from vorticity/divergence.
! This must be enabled with "--vordiv --uvders"
!
!
! Authors : George Mozdzynski
! Willem Deconinck
! Ioan Hadade
Expand All @@ -52,7 +33,7 @@ program ectrans_benchmark
integer(kind=jpim), parameter :: min_octa_points = 20

integer(kind=jpim) :: istack, getstackusage
real(kind=jprd), dimension(1) :: zmaxerr(5), zerr(5)
real(kind=jprd) :: zmaxerr(5)
real(kind=jprd) :: zmaxerrg

! Output unit numbers
Expand All @@ -61,15 +42,14 @@ program ectrans_benchmark
integer(kind=jpim), parameter :: noutdump = 7 ! Unit number for field output

! Default parameters
integer(kind=jpim) :: nsmax = 79 ! Spectral truncation
integer(kind=jpim) :: iters = 10 ! Number of iterations for transform test
integer(kind=jpim) :: nfld = 1 ! Number of 3D scalar fields
integer(kind=jpim) :: nlev = 1 ! Number of vertical levels

integer(kind=jpim) :: nflevg ! Total number of vertical levels
integer(kind=jpim) :: ndgl ! Number of latitudes
integer(kind=jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary)
integer(kind=jpim) :: ngptot ! Total number of grid points on this task
integer(kind=jpim) :: nflevg ! Total number of vertical levels

integer(kind=jpim) :: nspec2 ! Number of spectral coefficients (real and imaginary)
integer(kind=jpim) :: ngptot ! Total number of grid points on this task
integer(kind=jpim) :: ngptotg ! Total number of grid points across all tasks

integer(kind=jpim) :: ifld
Expand All @@ -81,7 +61,7 @@ program ectrans_benchmark
integer(kind=jpim) :: ib
integer(kind=jpim) :: jprtrv

integer(kind=jpim), allocatable :: nloen(:), nprcids(:)
integer(kind=jpim), allocatable :: nprcids(:)
integer(kind=jpim) :: myproc, jj
integer :: jstep

Expand Down Expand Up @@ -110,13 +90,25 @@ program ectrans_benchmark
real(kind=jprb), allocatable :: zgp2(:,:,:)

logical :: lstack = .false. ! Output stack info
logical :: luserpnm = .false.
logical :: lkeeprpnm = .false.

! setup_trans options
integer(kind=jpim) :: nsmax = 79 ! Spectral truncation
integer(kind=jpim) :: ndgl ! Number of latitudes
integer(kind=jpim), allocatable :: nloen(:) ! Number of points on each latitude
logical :: luserpnm = .false. ! Use Belusov algorithm to compute RPNM array instead of per m
logical :: luseflt = .false. ! Use fast legendre transforms

! Extra inv_trans options
logical :: lvordiv = .false. ! Compute vorticity and divergence in grid point space
logical :: lscders = .false. ! Compute derivatives of scalar (North-South and East-West) in grid
! point space
logical :: luvder = .false. ! Compute East-West derivatives of U and V wind in grid point space

! GSTATS options
logical :: lstats = .true. ! gstats statistics
logical :: ltrace_stats = .false.
logical :: lstats_omp = .false.
logical :: lstats_comms = .false.
logical :: lstats = .true. ! gstats statistics
logical :: lbarrier_stats = .false.
logical :: lbarrier_stats2 = .false.
logical :: ldetailed_stats = .false.
Expand All @@ -125,17 +117,14 @@ program ectrans_benchmark
logical :: lstatscpu = .false.
logical :: lstats_mem = .false.
logical :: lxml_stats = .false.
logical :: lvordiv = .false.
logical :: lscders = .false.
logical :: luvder = .false.
logical :: lprint_norms = .false. ! Calculate and print spectral norms
logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end

integer(kind=jpim) :: nstats_mem = 0
integer(kind=jpim) :: ntrace_stats = 0
integer(kind=jpim) :: nprnt_stats = 1
integer(kind=jpim) :: nopt_mem_tr = 0

logical :: lprint_norms = .false. ! Calculate and print spectral norms
logical :: lmeminfo = .false. ! Show information from FIAT routine ec_meminfo at the end

! The multiplier of the machine epsilon used as a tolerance for correctness checking
! ncheck = 0 (the default) means that correctness checking is disabled
integer(kind=jpim) :: ncheck = 0
Expand All @@ -145,11 +134,6 @@ program ectrans_benchmark
! Verbosity level (0 or 1)
integer :: verbosity = 0

real(kind=jprd) :: zra = 6371229._jprd

integer(kind=jpim) :: nmax_resol = 37 ! Max number of resolutions
integer(kind=jpim) :: npromatr = 0 ! nproma for trans lib

integer(kind=jpim) :: nproc ! Number of procs
integer(kind=jpim) :: nthread
integer(kind=jpim) :: nprgpns ! Grid-point decomp
Expand All @@ -171,7 +155,6 @@ program ectrans_benchmark
logical :: lsync_trans = .true. ! Activate barrier sync
logical :: leq_regions = .true. ! Eq regions flag


integer(kind=jpim) :: nproma = 0
integer(kind=jpim) :: ngpblks
! locals
Expand Down Expand Up @@ -364,10 +347,9 @@ program ectrans_benchmark
if (verbosity >= 1) write(nout,'(a)')'======= Setup ecTrans ======='

call gstats(1, 0)
call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), &
& kmax_resol=nmax_resol, kpromatr=npromatr, kprgpns=nprgpns, kprgpew=nprgpew, &
& kprtrw=nprtrw, ldsync_trans=lsync_trans, &
& ldeq_regions=leq_regions, prad=zra, ldalloperm=.true., ldmpoff=.not.luse_mpi,&
call setup_trans0(kout=nout, kerr=nerr, kprintlev=merge(2, 0, verbosity == 1), &
& kprgpns=nprgpns, kprgpew=nprgpew, kprtrw=nprtrw, ldsync_trans=lsync_trans, &
& ldeq_regions=leq_regions, ldalloperm=.true., ldmpoff=.not.luse_mpi, &
& kopt_memory_tr=nopt_mem_tr)
call gstats(1, 1)

Expand All @@ -376,8 +358,7 @@ program ectrans_benchmark
call set_ectrans_gpu_nflev(nflevl)
! We pass nflevl via environment variable in order not to change API
! In long run, ectrans should grow its internal buffers automatically
call setup_trans(ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., &
& lduserpnm=luserpnm, ldkeeprpnm=lkeeprpnm, &
call setup_trans(ksmax=nsmax, kdgl=ndgl, kloen=nloen, ldsplit=.true., lduserpnm=luserpnm, &
& lduseflt=luseflt)
call gstats(2, 1)

Expand Down Expand Up @@ -430,11 +411,13 @@ program ectrans_benchmark
! Allocate and initialize spectral arrays
!===================================================================================================

! Initialize vorticity and divergence - same for both call modes
allocate(zspvor(nflevl,nspec2))
allocate(zspdiv(nflevl,nspec2))
call initialize_spectral_field(nsmax, zspvor)
call initialize_spectral_field(nsmax, zspdiv)

! Initialize spectral arrays differently depending on call mode
if (icall_mode == 1) then
allocate(zspscalar(nfld*nflevl+1,nspec2))
call initialize_spectral_field(nsmax, zspscalar)
Expand All @@ -447,9 +430,8 @@ program ectrans_benchmark
call initialize_spectral_field(nsmax, zspsc2)
endif

! Compute spectral distribution variables
allocate(ivset(nflevg))

! Compute spectral distribution
ilev = 0
do jb = 1, nprtrv
do jlev=1, numll(jb)
Expand All @@ -474,15 +456,17 @@ program ectrans_benchmark
! Allocate gridpoint arrays
!===================================================================================================

! Determine start and end slice points for grid point arrays when they are passed back to dir_trans
ipgp_start = 1
ipgp_end = (2 + nfld) * nflevg + 1
ipgpuv_start = 1
ipgpuv_end = 2

! Also enable vorticity divergence?
if (lvordiv) then
inum_wind_fields = 4
! If lvordiv, skip the vor and div elements when passing zgp
inum_wind_fields = 4 ! Four fields - U, V, vorticity, divergence
! If lvordiv, skip the vorticity and divergence elements when passing zgp
! These two come first when enabled
ipgp_start = ipgp_start + 2 * nflevg
ipgp_end = ipgp_end + 2 * nflevg
ipgpuv_start = ipgpuv_start + 2
Expand All @@ -507,6 +491,7 @@ program ectrans_benchmark
inum_sc_2d_fields = inum_sc_2d_fields * 3
endif

! Finally, allocate grid point arrays
if (icall_mode == 1) then
itotal_fields = nflevg * (inum_wind_fields + inum_sc_3d_fields) + inum_sc_2d_fields
allocate(zgp(nproma,itotal_fields,ngpblks))
Expand Down

0 comments on commit b4fa742

Please sign in to comment.