Skip to content

Commit

Permalink
Refactored to do the entire IFLDA computation in one place. Also reba…
Browse files Browse the repository at this point in the history
…sed against develop.
  • Loading branch information
PaulMullowney committed Dec 18, 2024
1 parent 82b4406 commit d56a116
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 114 deletions.
78 changes: 32 additions & 46 deletions src/trans/gpu/internal/trgtol_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,8 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
INTEGER(KIND=JPIM) :: ISEND_FIELD_COUNT(NPRTRV),ISEND_FIELD_COUNT_V
INTEGER(KIND=JPIM) :: ISEND_WSET_SIZE(NPRTRW),ISEND_WSET_SIZE_V
INTEGER(KIND=JPIM) :: ISEND_WSET_OFFSET(NPRTRW+1), ISEND_WSET_OFFSET_V
INTEGER(KIND=JPIB), ALLOCATABLE :: ICOMBUFS_OFFSET(:),ICOMBUFR_OFFSET(:), IFLDAS(:,:)
INTEGER(KIND=JPIB), ALLOCATABLE :: ICOMBUFS_OFFSET(:),ICOMBUFR_OFFSET(:), IFLDA(:,:)
INTEGER(KIND=JPIB) :: ICOMBUFS_OFFSET_V, ICOMBUFR_OFFSET_V
INTEGER(KIND=JPIM) :: IFLDA(KF_GP)
INTEGER(KIND=JPIM) :: IVSET(KF_GP)

REAL(KIND=JPHOOK) :: ZHOOK_HANDLE
Expand Down Expand Up @@ -476,22 +475,38 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
#endif

CALL GSTATS(1602,0)
ALLOCATE(IFLDAS(KF_GP,ISEND_COUNTS))
! Allocate this buffer. Add 1 for the potential self sends
ALLOCATE(IFLDA(KF_GP,1+ISEND_COUNTS))

IF(LLOCAL_CONTRIBUTION)THEN
! I have to send something to myself...

! Input is KF_GP fields. We find the resulting KF_FS fields.
IFLDS = 0
DO JFLD=1,KF_GP
IF(IVSET(JFLD) == MYSETV .OR. IVSET(JFLD) == -1) THEN
IFLDS = IFLDS+1
IF(PRESENT(KPTRGP)) THEN
IFLDA(IFLDS,1) = KPTRGP(JFLD)
ELSE
IFLDA(IFLDS,1) = JFLD
ENDIF
ENDIF
ENDDO
ENDIF
DO INS=1,ISEND_COUNTS
ISEND=ISEND_TO_PROC(INS)
CALL PE2SET(ISEND,ISETA,ISETB,ISETW,ISETV)

ISEND_FIELD_COUNT_V = ISEND_FIELD_COUNT(ISETV)
ICOMBUFS_OFFSET_V = ICOMBUFS_OFFSET(INS)


IFLDS = 0
DO JFLD=1,KF_GP
IF(IVSET(JFLD) == ISETV .OR. IVSET(JFLD) == -1 ) THEN
IFLDS = IFLDS+1
IF(PRESENT(KPTRGP)) THEN
IFLDAS(IFLDS,INS)=KPTRGP(JFLD)
IFLDA(IFLDS,1+INS)=KPTRGP(JFLD)
ELSE
IFLDAS(IFLDS,INS)=JFLD
IFLDA(IFLDS,1+INS)=JFLD
ENDIF
ENDIF
ENDDO
Expand All @@ -500,7 +515,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
#ifdef OMPGPU
#endif
#ifdef ACCGPU
!$ACC DATA COPYIN(IFLDAS) ASYNC(1)
!$ACC DATA COPYIN(IFLDA) ASYNC(1)
#endif

DO INS=1,ISEND_COUNTS
Expand All @@ -524,7 +539,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
DO JL=1,ISEND_WSET_SIZE_V
JK = MOD(ISEND_WSET_OFFSET_V+JL-1,NPROMA)+1
JBLK = (ISEND_WSET_OFFSET_V+JL-1)/NPROMA+1
IFLD = IFLDAS(JFLD,INS)
IFLD = IFLDA(JFLD,1+INS)
JI = (JFLD-1)*ISEND_WSET_SIZE_V+JL
ZCOMBUFS(ICOMBUFS_OFFSET_V+JI) = PGP(JK,IFLD,JBLK)
ENDDO
Expand All @@ -541,7 +556,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
DO JL=1,ISEND_WSET_SIZE_V
JK = MOD(ISEND_WSET_OFFSET_V+JL-1,NPROMA)+1
JBLK = (ISEND_WSET_OFFSET_V+JL-1)/NPROMA+1
IFLD = IFLDAS(JFLD,INS)
IFLD = IFLDA(JFLD,1+INS)
JI = ICOMBUFS_OFFSET_V+(JFLD-1)*ISEND_WSET_SIZE_V+JL
IF(IFLD < PGP_INDICES(PGP_INDICES_UV+1)) THEN
IOFF=IFLD-PGP_INDICES(PGP_INDICES_UV)
Expand All @@ -567,13 +582,9 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
#ifdef OMPGPU
#endif
#ifdef ACCGPU
!$ACC END DATA
!$ACC WAIT(1)
#endif

! Free this now
DEALLOCATE(IFLDAS)

CALL GSTATS(1602,1)

IF (LSYNC_TRANS) THEN
Expand Down Expand Up @@ -645,27 +656,6 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,

! Copy local contribution
IF(LLOCAL_CONTRIBUTION)THEN
! I have to send something to myself...

! Input is KF_GP fields. We find the resulting KF_FS fields.
IFLDS = 0
DO JFLD=1,KF_GP
IF(IVSET(JFLD) == MYSETV .OR. IVSET(JFLD) == -1) THEN
IFLDS = IFLDS+1
IF(PRESENT(KPTRGP)) THEN
IFLDA(IFLDS) = KPTRGP(JFLD)
ELSE
IFLDA(IFLDS) = JFLD
ENDIF
ENDIF
ENDDO

#ifdef OMPGPU
#endif
#ifdef ACCGPU
!$ACC DATA COPYIN(IFLDA(1:IFLDS)) ASYNC(1)
#endif

ISEND_WSET_OFFSET_V = ISEND_WSET_OFFSET(MYSETW)
ISEND_WSET_SIZE_V = ISEND_WSET_SIZE(MYSETW)
IRECV_BUFR_TO_OUT_V = IRECV_BUFR_TO_OUT_OFFSET(MYPROC)
Expand All @@ -682,7 +672,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
DO JL=1,ISEND_WSET_SIZE_V
JK = MOD(ISEND_WSET_OFFSET_V+JL-1,NPROMA)+1
JBLK = (ISEND_WSET_OFFSET_V+JL-1)/NPROMA+1
IFLD = IFLDA(JFLD)
IFLD = IFLDA(JFLD,1)
IPOS = IRECV_BUFR_TO_OUT(IRECV_BUFR_TO_OUT_V+JL,1)+ &
& (JFLD-1)*IRECV_BUFR_TO_OUT(IRECV_BUFR_TO_OUT_V+JL,2)+1
PREEL_REAL(IPOS) = PGP(JK,IFLD,JBLK)
Expand All @@ -700,7 +690,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
DO JL=1,ISEND_WSET_SIZE_V
JK = MOD(ISEND_WSET_OFFSET_V+JL-1,NPROMA)+1
JBLK = (ISEND_WSET_OFFSET_V+JL-1)/NPROMA+1
IFLD = IFLDA(JFLD)
IFLD = IFLDA(JFLD,1)
IPOS = IRECV_BUFR_TO_OUT(IRECV_BUFR_TO_OUT_V+JL,1)+ &
& (JFLD-1)*IRECV_BUFR_TO_OUT(IRECV_BUFR_TO_OUT_V+JL,2)+1
IF(IFLD < PGP_INDICES(PGP_INDICES_UV+1)) THEN
Expand All @@ -723,16 +713,8 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
ENDDO
ENDIF
CALL GSTATS(1601,1)

#ifdef OMPGPU
#endif
#ifdef ACCGPU
!$ACC END DATA
#endif

ENDIF


IF(IR > 0) THEN
CALL MPL_WAIT(KREQUEST=IREQ(1:IR), &
& CDSTRING='TRGTOL: WAIT FOR SENDS AND RECEIVES')
Expand Down Expand Up @@ -785,6 +767,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
#endif
#ifdef ACCGPU
!$ACC END DATA ! ZCOMBUFR
!$ACC END DATA ! IFLDA
!$ACC END DATA ! IRECV_BUFR_TO_OUT
!$ACC END DATA ! PGPINDICES
!$ACC END DATA !ZCOMBUFS (present)
Expand All @@ -796,6 +779,9 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G,
#endif
IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND)

! Free this now
DEALLOCATE(IFLDA)

IF (LHOOK) CALL DR_HOOK('TRGTOL',1,ZHOOK_HANDLE)
END SUBROUTINE TRGTOL
END MODULE TRGTOL_MOD
Loading

0 comments on commit d56a116

Please sign in to comment.