From e9249dacb8e56f903a568a34e24e317fb10fb137 Mon Sep 17 00:00:00 2001 From: "Alexey V. Medvedev" Date: Tue, 5 Nov 2024 17:17:54 +0100 Subject: [PATCH] [HOTFIX] Quick fix for occasional crashes in MPL_ALLTOALLV with an extra MPI_WAIT (#29) This actually reverts the async scenario to sync. Further development will properly provide async. --- src/fiat/mpl/internal/mpl_alltoallv_mod.F90 | 22 +++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/fiat/mpl/internal/mpl_alltoallv_mod.F90 b/src/fiat/mpl/internal/mpl_alltoallv_mod.F90 index 3fa766d..3e4e105 100644 --- a/src/fiat/mpl/internal/mpl_alltoallv_mod.F90 +++ b/src/fiat/mpl/internal/mpl_alltoallv_mod.F90 @@ -70,6 +70,7 @@ MODULE MPL_ALLTOALLV_MOD USE MPL_DATA_MODULE USE MPL_MESSAGE_MOD USE MPL_STATS_MOD +USE MPL_WAIT_MOD USE YOMMPLSTATS IMPLICIT NONE @@ -218,6 +219,8 @@ SUBROUTINE MPL_ALLTOALLV_REAL8(PSENDBUF,KSENDCOUNTS,PRECVBUF,KRECVCOUNTS,& CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KREQUEST,KERROR +!! FIXME: the lifetime of IRECVDISPL and ISENDDISPL is limited to this routine! +!! Details: https://github.com/ecmwf-ifs/fiat/issues/17 INTEGER(KIND=JPIM) :: IRECVDISPL(MPL_NUMPROC),ISENDDISPL(MPL_NUMPROC) ISENDCOUNT=SIZE(PSENDBUF) @@ -251,6 +254,11 @@ SUBROUTINE MPL_ALLTOALLV_REAL8(PSENDBUF,KSENDCOUNTS,PRECVBUF,KRECVCOUNTS,& IF(.NOT.PRESENT(KREQUEST)) CALL MPL_MESSAGE(KERROR,'MPL_ALLTOALLV',' KREQUEST MISSING',LDABORT=LLABORT) CALL MPI_IALLTOALLV(PSENDBUF(:),KSENDCOUNTS,ISENDDISPL,INT(MPI_REAL8), & & PRECVBUF(:),KRECVCOUNTS,IRECVDISPL,INT(MPI_REAL8),ICOMM,KREQUEST,IERROR) + !!! FIXME since the lifetime of IRECVDISPL and ISENDDISPL is limited to this routine + !!! we have to complete MPI_WAIT right in place + !!! Details: https://github.com/ecmwf-ifs/fiat/issues/17 + CALL MPL_WAIT(KREQUEST) + KREQUEST=MPI_REQUEST_NULL ELSE IF(PRESENT(KERROR)) THEN IERROR=1 @@ -291,6 +299,8 @@ SUBROUTINE MPL_ALLTOALLV_REAL4(PSENDBUF,KSENDCOUNTS,PRECVBUF,KRECVCOUNTS,& CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KREQUEST,KERROR +!! FIXME: the lifetime of IRECVDISPL and ISENDDISPL is limited to this routine! +!! Details: https://github.com/ecmwf-ifs/fiat/issues/17 INTEGER(KIND=JPIM) :: IRECVDISPL(MPL_NUMPROC),ISENDDISPL(MPL_NUMPROC) ISENDCOUNT=SIZE(PSENDBUF) @@ -321,6 +331,11 @@ SUBROUTINE MPL_ALLTOALLV_REAL4(PSENDBUF,KSENDCOUNTS,PRECVBUF,KRECVCOUNTS,& ELSEIF(IMP_TYPE == JP_NON_BLOCKING_STANDARD .OR. IMP_TYPE == JP_NON_BLOCKING_BUFFERED) THEN CALL MPI_IALLTOALLV(PSENDBUF(:),KSENDCOUNTS,ISENDDISPL,INT(MPI_REAL4), & & PRECVBUF(:),KRECVCOUNTS,IRECVDISPL,INT(MPI_REAL4),ICOMM,KREQUEST,IERROR) + !!! FIXME since the lifetime of IRECVDISPL and ISENDDISPL is limited to this routine + !!! we have to complete MPI_WAIT right in place + !!! Details: https://github.com/ecmwf-ifs/fiat/issues/17 + CALL MPL_WAIT(KREQUEST) + KREQUEST=MPI_REQUEST_NULL ELSE IF(PRESENT(KERROR)) THEN IERROR=1 @@ -362,6 +377,8 @@ SUBROUTINE MPL_ALLTOALLV_INTEGER(KSENDBUF,KSENDCOUNTS,KRECVBUF,KRECVCOUNTS,& CHARACTER(LEN=*),INTENT(IN),OPTIONAL :: CDSTRING INTEGER(KIND=JPIM),INTENT(OUT),OPTIONAL :: KREQUEST,KERROR +!! FIXME: the lifetime of IRECVDISPL and ISENDDISPL is limited to this routine! +!! Details: https://github.com/ecmwf-ifs/fiat/issues/17 INTEGER(KIND=JPIM) :: IRECVDISPL(MPL_NUMPROC),ISENDDISPL(MPL_NUMPROC) ISENDCOUNT=SIZE(KSENDBUF) @@ -393,6 +410,11 @@ SUBROUTINE MPL_ALLTOALLV_INTEGER(KSENDBUF,KSENDCOUNTS,KRECVBUF,KRECVCOUNTS,& IF(.NOT.PRESENT(KREQUEST)) CALL MPL_MESSAGE(KERROR,'MPL_ALLTOALLV',' KREQUEST MISSING',LDABORT=LLABORT) CALL MPI_IALLTOALLV(KSENDBUF(1),KSENDCOUNTS,ISENDDISPL,INT(MPI_INTEGER), & & KRECVBUF(1),KRECVCOUNTS,IRECVDISPL,INT(MPI_INTEGER),ICOMM,KREQUEST,IERROR) + !!! FIXME since the lifetime of IRECVDISPL and ISENDDISPL is limited to this routine + !!! we have to complete MPI_WAIT right in place + !!! Details: https://github.com/ecmwf-ifs/fiat/issues/17 + CALL MPL_WAIT(KREQUEST) + KREQUEST=MPI_REQUEST_NULL ELSE IF(PRESENT(KERROR)) THEN IERROR=1