diff --git a/src/ecwam/ctuw.F90 b/src/ecwam/ctuw.F90 index d84223788..ad4cdb36c 100644 --- a/src/ecwam/ctuw.F90 +++ b/src/ecwam/ctuw.F90 @@ -13,6 +13,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & & WLATM1, WCORM1, DP, & & CGROUP_EXT, OMOSNH2KD_EXT, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) +use nvtx ! ---------------------------------------------------------------------- !**** *CTUW* - COMPUTATION OF THE CONER TRANSPORT SCHEME WEIGHTS. @@ -99,7 +100,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: DRCP,DRCM REAL(KIND=JWRB), DIMENSION(KIJS:KIJL) :: CURMASK REAL(KIND=JWRB), DIMENSION(KIJS:KIJL,2) :: CGX, CGY - + ! ---------------------------------------------------------------------- @@ -141,86 +142,173 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !* LOOP OVER FREQUENCIES. ! ---------------------- +! call nvtxStartRange("ctuw: Loop 1") +!$acc kernels !loop private(CGYP,KIJS,KIJL,CGX,IX,KY,UU,UREL,ISSU,VV,VREL,ISSV,DXP,DYP,ADXP,ADYP,DXUP,DXDW,DYUP,DYDW,DXX,DYY,GRIDAREAM1,WEIGHT) DO M = MSTART, MEND !* LOOP OVER DIRECTIONS. ! --------------------- +!$acc loop private(CGX,CGY) + DO K=1,NANG ! FIND MEAN GROUP VELOCITY COMPONENTS FOR DIRECTION TH(K)+180 ! ----------------------------------------------------------- - DO IC=1,2 +!$acc loop private(CGYP,IX,KY,UU,UREL,ISSU,VV,VREL,ISSV,DXP,DYP,ADXP,ADYP,DXUP,DXDW,DYUP,DYDW,DXX,DYY,GRIDAREAM1,WEIGHT) DO IJ=KIJS,KIJL - CGX(IJ,IC)= & - & 0.5_JWRB*(CGROUP_EXT(IJ,M)+CGROUP_EXT(KLON(IJ,IC),M)) & +! IC = 1 + + CGX(IJ,1)= & + & 0.5_JWRB*(CGROUP_EXT(IJ,M)+CGROUP_EXT(KLON(IJ,1),M)) & & *SINTH(K)*COSPHM1_EXT(IJ) ! IRREGULAR GRID IF (IRGG == 1) THEN - CGYP=WLAT(IJ,IC)*CGROUP_EXT(KLAT(IJ,IC,1),M)+ & - & (1.0_JWRB-WLAT(IJ,IC))*CGROUP_EXT(KLAT(IJ,IC,2),M) + CGYP=WLAT(IJ,1)*CGROUP_EXT(KLAT(IJ,1,1),M)+ & + & (1.0_JWRB-WLAT(IJ,1))*CGROUP_EXT(KLAT(IJ,1,2),M) ELSE ! REGULAR GRID - CGYP=CGROUP_EXT(KLAT(IJ,IC,1),M) + CGYP=CGROUP_EXT(KLAT(IJ,1,1),M) + ENDIF + CGY(IJ,1)=0.5_JWRB*(CGROUP_EXT(IJ,M)+DP(IJ,1)*CGYP)*COSTH(K) + + IX=BLK2GLO%IXLG(IJ) + KY=BLK2GLO%KXLT(IJ) + IF (IREFRA == 2 .OR. IREFRA == 3 ) THEN + UU=U_EXT(IJ)*COSPHM1_EXT(IJ) + UREL=CGX(IJ,1)+UU + ISSU(1)=ISAMESIGN(UREL,CGX(IJ,1)) + VV=V_EXT(IJ)*0.5_JWRB*(1.0_JWRB+DP(IJ,1)) + VREL=CGY(IJ,1)+VV + ISSV(1)=ISAMESIGN(VREL,CGY(IJ,1)) + ELSE + UREL=CGX(IJ,1) + ISSU(1)=1 + VREL=CGY(IJ,1) + ISSV(1)=1 ENDIF - CGY(IJ,IC)=0.5_JWRB*(CGROUP_EXT(IJ,M)+DP(IJ,IC)*CGYP)*COSTH(K) - ENDDO - ENDDO + DXP=-DELPRO*UREL*CMTODEG + DYP=-DELPRO*VREL*CMTODEG + + ADXP(1)=ABS(DXP) + ADYP(1)=ABS(DYP) + DXUP(1)=ADXP(1)*ISSU(1) + DXDW(1)=ADXP(1)*(1-ISSU(1)) + DYUP(1)=ADYP(1)*ISSV(1) + DYDW(1)=ADYP(1)*(1-ISSV(1)) +! GET ADVECTION WEIGHT FOR ALL NEIGHBOURING GRID POINTS +! IC = 2 + CGX(IJ,2)= & + & 0.5_JWRB*(CGROUP_EXT(IJ,M)+CGROUP_EXT(KLON(IJ,2),M)) & + & *SINTH(K)*COSPHM1_EXT(IJ) +! IRREGULAR GRID + IF (IRGG == 1) THEN + CGYP=WLAT(IJ,2)*CGROUP_EXT(KLAT(IJ,2,1),M)+ & + & (1.0_JWRB-WLAT(IJ,2))*CGROUP_EXT(KLAT(IJ,2,2),M) + ELSE +! REGULAR GRID + CGYP=CGROUP_EXT(KLAT(IJ,2,1),M) + ENDIF + CGY(IJ,2)=0.5_JWRB*(CGROUP_EXT(IJ,M)+DP(IJ,2)*CGYP)*COSTH(K) + IF (IREFRA == 2 .OR. IREFRA == 3 ) THEN + UU=U_EXT(IJ)*COSPHM1_EXT(IJ) + UREL=CGX(IJ,2)+UU + ISSU(2)=ISAMESIGN(UREL,CGX(IJ,2)) + VV=V_EXT(IJ)*0.5_JWRB*(1.0_JWRB+DP(IJ,2)) + VREL=CGY(IJ,2)+VV + ISSV(2)=ISAMESIGN(VREL,CGY(IJ,2)) + ELSE + UREL=CGX(IJ,2) + ISSU(2)=1 + VREL=CGY(IJ,2) + ISSV(2)=1 + ENDIF + DXP=-DELPRO*UREL*CMTODEG + DYP=-DELPRO*VREL*CMTODEG + ADXP(2)=ABS(DXP) + ADYP(2)=ABS(DYP) + DXUP(2)=ADXP(2)*ISSU(2) + DXDW(2)=ADXP(2)*(1-ISSU(2)) + DYUP(2)=ADYP(2)*ISSV(2) + DYDW(2)=ADYP(2)*(1-ISSV(2)) -! LOOP OVER GRID POINTS -! --------------------- - DO IJ=KIJS,KIJL - IX=BLK2GLO%IXLG(IJ) - KY=BLK2GLO%KXLT(IJ) +! GET ADVECTION WEIGHT FOR ALL NEIGHBOURING GRID POINTS -! FLUX VELOCITIES AT THE GRID BOX INTERFACE + DXX=ZDELLO(KY)-DXUP(JXO(K,2))-DXDW(JXO(K,1)) + DYY=XDELLA-DYUP(JYO(K,2))-DYDW(JYO(K,1)) - DO IC=1,2 - - IF (IREFRA == 2 .OR. IREFRA == 3 ) THEN - UU=U_EXT(IJ)*COSPHM1_EXT(IJ) - UREL=CGX(IJ,IC)+UU - ISSU(IC)=ISAMESIGN(UREL,CGX(IJ,IC)) - VV=V_EXT(IJ)*0.5_JWRB*(1.0_JWRB+DP(IJ,IC)) - VREL=CGY(IJ,IC)+VV - ISSV(IC)=ISAMESIGN(VREL,CGY(IJ,IC)) - ELSE - UREL=CGX(IJ,IC) - ISSU(IC)=1 - VREL=CGY(IJ,IC) - ISSV(IC)=1 - ENDIF - DXP=-DELPRO*UREL*CMTODEG - DYP=-DELPRO*VREL*CMTODEG - ADXP(IC)=ABS(DXP) - ADYP(IC)=ABS(DYP) + GRIDAREAM1 = 1.0_JWRB/(ZDELLO(KY)*XDELLA) + +! WEIGHTED CONTRIBUTION FROM NORTH-SOUTH DIRECTION (WLATN) + + WEIGHT(JYO(K,1))=DXX*DYUP(JYO(K,1))*GRIDAREAM1 + WEIGHT(JYO(K,2))=DXX*DYDW(JYO(K,2))*GRIDAREAM1 + + WLATN(IJ,K,M,1,1)=WLAT(IJ,1)*WEIGHT(1) + WLATN(IJ,K,M,1,2)=WLATM1(IJ,1)*WEIGHT(1) + WLATN(IJ,K,M,2,1)=WLAT(IJ,2)*WEIGHT(2) + WLATN(IJ,K,M,2,2)=WLATM1(IJ,2)*WEIGHT(2) + +! WEIGHTED CONTRIBUTION FROM EAST-WEST DIRECTION (WLONN) + + WLONN(IJ,K,M,JXO(K,1))=DYY*DXUP(JXO(K,1))*GRIDAREAM1 + WLONN(IJ,K,M,JXO(K,2))=DYY*DXDW(JXO(K,2))*GRIDAREAM1 + + +! CONTRIBUTION FROM CORNERS (KCOR) + WEIGHT(1)=DXUP(JXO(K,1))*DYUP(JYO(K,1))*GRIDAREAM1 + WEIGHT(2)=DXDW(JXO(K,2))*DYUP(JYO(K,1))*GRIDAREAM1 + WEIGHT(3)=DXUP(JXO(K,1))*DYDW(JYO(K,2))*GRIDAREAM1 + WEIGHT(4)=DXDW(JXO(K,2))*DYDW(JYO(K,2))*GRIDAREAM1 + DO ICR=1,4 + WCORN(IJ,K,M,ICR,1)=WCOR(IJ,KCR(K,ICR))*WEIGHT(ICR) + WCORN(IJ,K,M,ICR,2)=WCORM1(IJ,KCR(K,ICR))*WEIGHT(ICR) + ENDDO + +! CONTRIBUTIONS FOR IJ + SUMWN(IJ,K,M)=(ZDELLO(KY)* & + & (DYDW(JYO(K,1))+DYUP(JYO(K,2))) + & + & XDELLA* & + & (DXUP(JXO(K,2))+DXDW(JXO(K,1))) - & + & (DXDW(JXO(K,1))+DXUP(JXO(K,2)))* & + & (DYDW(JYO(K,1))+DYUP(JYO(K,2))) ) & + & *GRIDAREAM1 + + + +! LOOP OVER GRID POINTS +! --------------------- + +#IFNDEF _OPENACC + +! FLUX VELOCITIES AT THE GRID BOX INTERFACE ! BASIC CFL CHECKS (IN EACH DIRECTION) ! ---------------- - IF (ADXP(IC) > ZDELLO(KY))THEN + IF (ADXP(1) > ZDELLO(KY))THEN WRITE (IU06,*) '********************************' WRITE (IU06,*) '* CTUW: *' WRITE (IU06,*) '* CFL VIOLATED IN X DIRECTION. *' WRITE (IU06,*) '* ADXP SHOULD BE < ZDELLO, BUT *' - WRITE (IU06,*) '* ADXP = ',ADXP(IC),IC + WRITE (IU06,*) '* ADXP = ',ADXP(1),1 WRITE (IU06,*) '* ZDELLO = ',ZDELLO(KY) - DTNEW=ZDELLO(KY)*DELPRO/ADXP(IC) + DTNEW=ZDELLO(KY)*DELPRO/ADXP(1) WRITE (IU06,*) '* TIME STEP ',DELPRO WRITE (IU06,*) '* SHOULD BE REDUCED TO ', DTNEW WRITE (IU06,*) '* *' WRITE (IU06,*) '********************************' LCFLFAIL(IJ)=.TRUE. ENDIF - IF (ADYP(IC) > XDELLA)THEN + IF (ADYP(1) > XDELLA)THEN XLON=AMOWEP+(IX-1)*ZDELLO(KY) XLAT=AMOSOP+(KY-1)*XDELLA - DTNEW=XDELLA*DELPRO/ADYP(IC) + DTNEW=XDELLA*DELPRO/ADYP(1) WRITE (IU06,*) '********************************' WRITE (IU06,*) '* CTUW: *' WRITE (IU06,*) '* CFL VIOLATED IN Y DIRECTION. *' WRITE (IU06,*) '* ADYP SHOULD BE < XDELLA, BUT *' - WRITE (IU06,*) '* ADYP = ',ADYP(IC),IC + WRITE (IU06,*) '* ADYP = ',ADYP(1),1 WRITE (IU06,*) '* XDELLA = ',XDELLA WRITE (IU06,*) '* XLAT= ',XLAT,' XLON= ',XLON WRITE (IU06,*) '* DEPTH= ',DEPTH_EXT(IJ) @@ -230,62 +318,58 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WRITE (IU06,*) '********************************' LCFLFAIL(IJ)=.TRUE. ENDIF - - DXUP(IC)=ADXP(IC)*ISSU(IC) - DXDW(IC)=ADXP(IC)*(1-ISSU(IC)) - DYUP(IC)=ADYP(IC)*ISSV(IC) - DYDW(IC)=ADYP(IC)*(1-ISSV(IC)) +#ENDIF - ENDDO - -! GET ADVECTION WEIGHT FOR ALL NEIGHBOURING GRID POINTS - - DXX=ZDELLO(KY)-DXUP(JXO(K,2))-DXDW(JXO(K,1)) - DYY=XDELLA-DYUP(JYO(K,2))-DYDW(JYO(K,1)) - - GRIDAREAM1 = 1.0_JWRB/(ZDELLO(KY)*XDELLA) - -! WEIGHTED CONTRIBUTION FROM NORTH-SOUTH DIRECTION (WLATN) - - WEIGHT(JYO(K,1))=DXX*DYUP(JYO(K,1))*GRIDAREAM1 - WEIGHT(JYO(K,2))=DXX*DYDW(JYO(K,2))*GRIDAREAM1 - DO IC=1,2 - WLATN(IJ,K,M,IC,1)=WLAT(IJ,IC)*WEIGHT(IC) - WLATN(IJ,K,M,IC,2)=WLATM1(IJ,IC)*WEIGHT(IC) - ENDDO - -! WEIGHTED CONTRIBUTION FROM EAST-WEST DIRECTION (WLONN) - - WLONN(IJ,K,M,JXO(K,1))=DYY*DXUP(JXO(K,1))*GRIDAREAM1 - WLONN(IJ,K,M,JXO(K,2))=DYY*DXDW(JXO(K,2))*GRIDAREAM1 - -! CONTRIBUTION FROM CORNERS (KCOR) - WEIGHT(1)=DXUP(JXO(K,1))*DYUP(JYO(K,1))*GRIDAREAM1 - WEIGHT(2)=DXDW(JXO(K,2))*DYUP(JYO(K,1))*GRIDAREAM1 - WEIGHT(3)=DXUP(JXO(K,1))*DYDW(JYO(K,2))*GRIDAREAM1 - WEIGHT(4)=DXDW(JXO(K,2))*DYDW(JYO(K,2))*GRIDAREAM1 - DO ICR=1,4 - WCORN(IJ,K,M,ICR,1)=WCOR(IJ,KCR(K,ICR))*WEIGHT(ICR) - WCORN(IJ,K,M,ICR,2)=WCORM1(IJ,KCR(K,ICR))*WEIGHT(ICR) - ENDDO - -! CONTRIBUTIONS FOR IJ - SUMWN(IJ,K,M)=(ZDELLO(KY)* & - & (DYDW(JYO(K,1))+DYUP(JYO(K,2))) + & - & XDELLA* & - & (DXUP(JXO(K,2))+DXDW(JXO(K,1))) - & - & (DXDW(JXO(K,1))+DXUP(JXO(K,2)))* & - & (DYDW(JYO(K,1))+DYUP(JYO(K,2))) ) & - & *GRIDAREAM1 +! BASIC CFL CHECKS (IN EACH DIRECTION) +! ---------------- +#IFNDEF _OPENACC + IF (ADXP(2) > ZDELLO(KY))THEN + WRITE (IU06,*) '********************************' + WRITE (IU06,*) '* CTUW: *' + WRITE (IU06,*) '* CFL VIOLATED IN X DIRECTION. *' + WRITE (IU06,*) '* ADXP SHOULD BE < ZDELLO, BUT *' + WRITE (IU06,*) '* ADXP = ',ADXP(2),2 + WRITE (IU06,*) '* ZDELLO = ',ZDELLO(KY) + DTNEW=ZDELLO(KY)*DELPRO/ADXP(2) + WRITE (IU06,*) '* TIME STEP ',DELPRO + WRITE (IU06,*) '* SHOULD BE REDUCED TO ', DTNEW + WRITE (IU06,*) '* *' + WRITE (IU06,*) '********************************' + LCFLFAIL(IJ)=.TRUE. + ENDIF + IF (ADYP(2) > XDELLA)THEN + XLON=AMOWEP+(IX-1)*ZDELLO(KY) + XLAT=AMOSOP+(KY-1)*XDELLA + DTNEW=XDELLA*DELPRO/ADYP(2) + WRITE (IU06,*) '********************************' + WRITE (IU06,*) '* CTUW: *' + WRITE (IU06,*) '* CFL VIOLATED IN Y DIRECTION. *' + WRITE (IU06,*) '* ADYP SHOULD BE < XDELLA, BUT *' + WRITE (IU06,*) '* ADYP = ',ADYP(2),2 + WRITE (IU06,*) '* XDELLA = ',XDELLA + WRITE (IU06,*) '* XLAT= ',XLAT,' XLON= ',XLON + WRITE (IU06,*) '* DEPTH= ',DEPTH_EXT(IJ) + WRITE (IU06,*) '* TIME STEP ',DELPRO + WRITE (IU06,*) '* SHOULD BE REDUCED TO ', DTNEW + WRITE (IU06,*) '* *' + WRITE (IU06,*) '********************************' + LCFLFAIL(IJ)=.TRUE. + ENDIF +#ENDIF - ENDDO ! END LOOP OVER GRID POINTS + ENDDO ! END LOOP OVER GRID POINTS ENDDO ! END LOOP OVER DIRECTIONS ENDDO ! END LOOP OVER FREQUENCIES +!$acc end kernels +! call nvtxEndRange + + + ELSE !* CARTESIAN GRID. @@ -327,9 +411,12 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & DELTH0 = 0.25*DELPRO/DELTH + !* LOOP OVER DIRECTIONS. ! --------------------- +! call nvtxStartRange("ctuw: Loop 2") +!$acc parallel loop private(km1,kp1,sp,sm,DELFR0,DRGP,DRGM,DRDP,DRDM,DRCP,DRCM) DO K=1,NANG KP1 = K+1 IF (KP1 > NANG) KP1 = 1 @@ -341,6 +428,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & SP = DELTH0*(SINTH(K)+SINTH(KP1))/R SM = DELTH0*(SINTH(K)+SINTH(KM1))/R +!$acc loop private(jh,tanph) DO IJ = KIJS,KIJL JH=BLK2GLO%KXLT(IJ) TANPH = SINPH(JH)/COSPH(JH) @@ -351,11 +439,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !* COMPUTE DEPTH REFRACTION. ! ------------------------- IF (IREFRA == 1) THEN +!$acc loop DO IJ = KIJS,KIJL DRDP(IJ) = (THDD(IJ,K) + THDD(IJ,KP1))*DELTH0 DRDM(IJ) = (THDD(IJ,K) + THDD(IJ,KM1))*DELTH0 ENDDO ELSE +!$acc loop DO IJ = KIJS,KIJL DRDP(IJ) = 0.0_JWRB DRDM(IJ) = 0.0_JWRB @@ -366,11 +456,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ! --------------------------- IF (IREFRA == 2 .OR. IREFRA == 3 ) THEN +!$acc loop DO IJ = KIJS,KIJL DRCP(IJ) = CURMASK(IJ)*(THDC(IJ,K) + THDC(IJ,KP1))*DELTH0 DRCM(IJ) = CURMASK(IJ)*(THDC(IJ,K) + THDC(IJ,KM1))*DELTH0 ENDDO ELSE +!$acc loop DO IJ = KIJS,KIJL DRCP(IJ) = 0.0_JWRB DRCM(IJ) = 0.0_JWRB @@ -384,6 +476,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !* NO DEPTH REFRACTION. ! ------------------- IF (IREFRA == 0) THEN +!$acc loop collapse(2) private(DTHP,DTHM) DO M = MSTART, MEND DO IJ=KIJS,KIJL DTHP = DRGP(IJ)*CGROUP_EXT(IJ,M) + DRCP(IJ) @@ -391,11 +484,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WKPMN(IJ,K,M,0)=(DTHP+ABS(DTHP))+(ABS(DTHM)-DTHM) WKPMN(IJ,K,M,1)=-DTHP+ABS(DTHP) WKPMN(IJ,K,M,-1)=DTHM+ABS(DTHM) + SUMWN(IJ,K,M)=SUMWN(IJ,K,M)+WKPMN(IJ,K,M,0) ENDDO ENDDO ELSE !* SHALLOW WATER AND DEPTH REFRACTION. ! ----------------------------------- +!$acc loop collapse(2) private(DTHP,DTHM) DO M = MSTART, MEND DO IJ=KIJS,KIJL DTHP = DRGP(IJ)*CGROUP_EXT(IJ,M)+OMOSNH2KD_EXT(IJ,M)*DRDP(IJ)+DRCP(IJ) @@ -403,6 +498,7 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & WKPMN(IJ,K,M,0)=(DTHP+ABS(DTHP))+(ABS(DTHM)-DTHM) WKPMN(IJ,K,M,1)=-DTHP+ABS(DTHP) WKPMN(IJ,K,M,-1)=DTHM+ABS(DTHM) + SUMWN(IJ,K,M)=SUMWN(IJ,K,M)+WKPMN(IJ,K,M,0) ENDDO ENDDO ENDIF @@ -414,12 +510,12 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & DELFR0 = 0.25_JWRB*DELPRO/((FRATIO-1)*ZPI) +!$acc loop private(MP1,MM1,DFP,DFM) private(DTHP,DTHM) DO M = MSTART, MEND MP1 = MIN(NFRE_RED,M+1) MM1 = MAX(1,M-1) DFP = DELFR0/FR(M) DFM = DELFR0/FR(MM1) - DO IJ=KIJS,KIJL DTHP = CURMASK(IJ) * (SDOT(IJ,K,M) + SDOT(IJ,K,MP1))*DFP DTHM = CURMASK(IJ) * (SDOT(IJ,K,M) + SDOT(IJ,K,MM1))*DFM @@ -431,12 +527,16 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDIF ENDDO ! END LOOP ON DIRECTIONS - +!$acc end parallel +! call nvtxEndRange ! CHECK THAT WEIGHTS ARE LESS THAN 1 ! AND COMPUTE THEIR SUM AND CHECK IT IS LESS THAN 1 AS WELL !!! THE SUM IS NEEDED LATER ON !!! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +! call nvtxStartRange("ctuw: Loop 3") +!!$acc kernels loop seq +#IFNDEF _OPENACC DO K=1,NANG DO M = MSTART, MEND DO IJ=KIJS,KIJL @@ -591,6 +691,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP OVER GRID POINTS ENDDO ! END LOOP OVER FREQUENCIES ENDDO ! END LOOP OVER DIRECTIONS +#ENDIF +!!$acc end kernels +! call nvtxEndRange + +!!WORKAROUNDDDDDDD +!LCFLFAIL=.FALSE. +!!WORKAROUNDDDDDDD DO IJ=KIJS,KIJL IF (LCFLFAIL(IJ)) THEN @@ -603,11 +710,14 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & !!!!!!INCLUDE THE BLOCKING COEFFICIENTS INTO THE WEIGHTS OF THE ! SURROUNDING POINTS. +! call nvtxStartRange("ctuw: Loop 4") +!$acc parallel loop collapse(3) DO K=1,NANG DO M = MSTART, MEND DO IJ=KIJS,KIJL ! POINTS ON SURROUNDING LATITUDES +!$acc loop collapse(2) DO IC=1,2 DO ICL=1,2 WLATN(IJ,K,M,IC,ICL) = WLATN(IJ,K,M,IC,ICL)*OBSLAT(IJ,M,IC) @@ -615,11 +725,13 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! POINTS ON SURROUNDING LONGITUDE +!$acc loop DO IC=1,2 WLONN(IJ,K,M,IC) = WLONN(IJ,K,M,IC)*OBSLON(IJ,M,IC) ENDDO ! SURROUNDING CORNER POINTS +!$acc loop collapse(2) DO ICR=1,4 DO ICL=1,2 WCORN(IJ,K,M,ICR,ICL) = WCORN(IJ,K,M,ICR,ICL)*OBSCOR(IJ,M,KCR(K,ICR)) @@ -629,9 +741,14 @@ SUBROUTINE CTUW (DELPRO, MSTART, MEND, & ENDDO ! END LOOP OVER GRID POINTS ENDDO ! END LOOP ON FREQUENCIES ENDDO ! END LOOP OVER DIRECTIONS +!$acc end parallel + ! call nvtxEndRange IF (LHOOK) CALL DR_HOOK('CTUW',1,ZHOOK_HANDLE) + + + RETURN CONTAINS diff --git a/src/ecwam/ctuwdrv.F90 b/src/ecwam/ctuwdrv.F90 index 646a1d539..369808538 100644 --- a/src/ecwam/ctuwdrv.F90 +++ b/src/ecwam/ctuwdrv.F90 @@ -29,12 +29,18 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO + USE YOWCURR , ONLY : LLCFLCUROFF USE YOWGRID , ONLY : NPROMA_WAM USE YOWMPP , ONLY : IRANK USE YOWPARAM , ONLY : NIBLO ,NANG ,NFRE_RED USE YOWSTAT , ONLY : IREFRA USE YOWTEST , ONLY : IU06 +USE YOWUBUF , ONLY : WLATN ,WLONN ,WCORN +USE YOWFRED , ONLY : FR ,DELTH, COSTH ,SINTH +USE YOWPCONS , ONLY : ZPI + + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK @@ -73,12 +79,15 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & IF (LHOOK) CALL DR_HOOK('CTUWDRV',0,ZHOOK_HANDLE) -!! NPROMA=NPROMA_WAM +!! =NPROMA_WAM MTHREADS=1 !$ MTHREADS=OMP_GET_MAX_THREADS() NPROMA=(IJL-IJS+1)/MTHREADS + 1 + +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JKGLO, KIJS, KIJL, ICALL, IJ, LL2NDCALL) +#endif /*_OPENACC*/ DO JKGLO = IJS, IJL, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1,IJL) @@ -91,6 +100,7 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) + ! WHEN SURFACE CURRENTS ARE USED AND LLCFLCUROFF IS TRUE ! THEN TRY TO SATISFY THE CFL CONDITION WITHOUT THE CURRENTS ! IF IT WAS VIOLATED IN THE FIRST PLACE @@ -112,8 +122,12 @@ SUBROUTINE CTUWDRV (DELPRO, MSTART, MEND, & & COSPHM1_EXT, DEPTH_EXT, U_EXT, V_EXT ) ENDIF ENDIF + + ENDDO +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ DO IJ=IJS,IJL IF (LCFLFAIL(IJ)) THEN diff --git a/src/ecwam/ctuwini.F90 b/src/ecwam/ctuwini.F90 index 802a5c13d..475274f9c 100644 --- a/src/ecwam/ctuwini.F90 +++ b/src/ecwam/ctuwini.F90 @@ -6,6 +6,8 @@ ! granted to it by virtue of its status as an intergovernmental organisation ! nor does it submit to any jurisdiction. ! +!MODULE CTUWINI_MOD +! CONTAINS SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & & WLATM1, WCORM1, DP) @@ -44,18 +46,20 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & REAL(KIND=JWRB), DIMENSION(NINF:NSUP,4), INTENT(OUT) :: WCORM1 ! 1 - WCOR REAL(KIND=JWRB), DIMENSION(NINF:NSUP,2), INTENT(OUT) :: DP ! COS PHI FACTOR - INTEGER(KIND=JWIM) :: IJ, K, M, IC, ICR, ICL, KY, KK, KKM INTEGER(KIND=JWIM) :: NLAND -REAL(KIND=JPHOOK) :: ZHOOK_HANDLE +!REAL(KIND=JPHOOK) :: ZHOOK_HANDLE + +!!$acc routine vector ! ---------------------------------------------------------------------- -IF (LHOOK) CALL DR_HOOK('CTUWINI',0,ZHOOK_HANDLE) +!IF (LHOOK) CALL DR_HOOK('CTUWINI',0,ZHOOK_HANDLE) NLAND = NSUP+1 - + + !$acc parallel loop independent collapse(2) DO IC=1,2 DO IJ = KIJS,KIJL IF (KLAT(IJ,IC,1) < NLAND .AND. KLAT(IJ,IC,2) < NLAND) THEN @@ -74,7 +78,9 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDIF ENDDO ENDDO - + !$acc end parallel + + !$acc parallel loop independent collapse(2) DO ICR=1,4 DO IJ = KIJS,KIJL IF (KCOR(IJ,ICR,1) < NLAND .AND. KCOR(IJ,ICR,2) < NLAND) THEN @@ -88,15 +94,17 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ELSE ! ADAPT CORNER POINT INTERPOLATION WEIGHT IF LAND IS PRESENT ! SECOND CLOSEST CORNER POINT IS OVER LAND - IF (WCOR(IJ,ICR) > 0.5_JWRB) WCOR(IJ,ICR)=1.0_JWRB + IF (WCOR(IJ,ICR) > 0.5_JWRB) WCOR(IJ,ICR)=1.0_JWRB WCORM1(IJ,ICR) = 1.0_JWRB - WCOR(IJ,ICR) ENDIF ENDDO ENDDO + !$acc end parallel ! INITIALISATION + !$acc parallel loop independent collapse(5) DO ICL=1,2 DO IC=1,2 DO M=1,NFRE_RED @@ -108,7 +116,10 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDDO ENDDO ENDDO + !$acc end parallel + + !$acc parallel loop independent collapse(4) DO IC=1,2 DO M=1,NFRE_RED DO K=1,NANG @@ -118,7 +129,10 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDDO ENDDO ENDDO + !$acc end parallel + + !$acc parallel loop independent collapse(5) DO ICL=1,2 DO ICR=1,4 DO M=1,NFRE_RED @@ -130,6 +144,7 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & ENDDO ENDDO ENDDO + !$acc end parallel @@ -137,10 +152,12 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & !* SPHERICAL GRID. ! --------------- - +! !* COMPUTE COS PHI FACTOR FOR ADJOINING GRID POINT. ! (for all grid points) + !$acc parallel loop independent collapse(2) private(KY,KK,KKM) DO IC=1,2 +! !!!$acc loop private(KY,KK,KKM) DO IJ = KIJS,KIJL KY=BLK2GLO%KXLT(IJ) KK=KY+2*IC-3 @@ -148,8 +165,11 @@ SUBROUTINE CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & DP(IJ,IC) = COSPH(KKM)*COSPHM1_EXT(IJ) ENDDO ENDDO + !$acc end parallel ENDIF -IF (LHOOK) CALL DR_HOOK('CTUWINI',1,ZHOOK_HANDLE) + +!IF (LHOOK) CALL DR_HOOK('CTUWINI',1,ZHOOK_HANDLE) END SUBROUTINE CTUWINI +!END MODULE CTUWINI_MOD diff --git a/src/ecwam/ctuwupdt.F90 b/src/ecwam/ctuwupdt.F90 index e60b93e02..a524dd0e6 100644 --- a/src/ecwam/ctuwupdt.F90 +++ b/src/ecwam/ctuwupdt.F90 @@ -24,23 +24,29 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ! ------------------------------------------------------------------- + USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO USE YOWCURR , ONLY : LLCFLCUROFF USE YOWFRED , ONLY : COSTH ,SINTH -USE YOWGRID , ONLY : NPROMA_WAM +USE YOWGRID , ONLY : NPROMA_WAM, COSPH USE YOWREFD , ONLY : THDD ,THDC ,SDOT USE YOWMPP , ONLY : IRANK ,NPROC -USE YOWPARAM , ONLY : NIBLO ,NANG ,NFRE_RED -USE YOWSTAT , ONLY : IFRELFMAX, DELPRO_LF, IDELPRO, IREFRA +USE YOWPARAM , ONLY : NIBLO ,NANG ,NFRE_RED, ngy +USE YOWSTAT , ONLY : IFRELFMAX, DELPRO_LF, IDELPRO, IREFRA, ICASE USE YOWTEST , ONLY : IU06 USE YOWUBUF , ONLY : SUMWN , & & JXO ,JYO ,KCR ,KPM ,MPM, & & WLATN ,WLONN ,WCORN ,WKPMN ,WMPMN , & -& LLWLATN ,LLWLONN ,LLWCORN ,LLWKPMN ,LLWMPMN +& LLWLATN ,LLWLONN ,LLWCORN ,LLWKPMN ,LLWMPMN , & +& KLON, KLAT, WLAT, KCOR, WCOR +USE YOWFRED , ONLY : FR ,DELTH, COSTH ,SINTH +USE YOWPCONS , ONLY : ZPI + USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK +!USE CTUWINI_MOD , ONLY : CTUWINI ! ---------------------------------------------------------------------- IMPLICIT NONE @@ -49,6 +55,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & #include "ctuw.intfb.h" #include "ctuwdrv.intfb.h" #include "ctuwini.intfb.h" +!!$acc routine(ctuwini) vector INTEGER(KIND=JWIM), INTENT(IN) :: IJS, IJL ! GRID POINTS WITHIN A BLOCK INTEGER(KIND=JWIM), INTENT(IN) :: NINF, NSUP ! GRID POINT WITH HALO EXTEND NINF:NSUP+1 @@ -78,26 +85,31 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & LOGICAL, SAVE :: LFRSTCTU DATA LFRSTCTU /.TRUE./ - ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('CTUWUPDT',0,ZHOOK_HANDLE) +!$acc update device(sinth,costh) +!$acc update device(icase, COSPH, nang, nfre_red, ngy, niblo) !F ! DEFINE JXO, JYO, KCR IF (LFRSTCTU) THEN IF (.NOT. ALLOCATED(MPM)) ALLOCATE(MPM(NFRE_RED,-1:1)) + !$acc kernels DO M=1,NFRE_RED MPM(M,-1)= MAX(1,M-1) MPM(M,0) = M MPM(M,1) = MIN(NFRE_RED,M+1) ENDDO + !$acc end kernels IF (.NOT. ALLOCATED(KPM)) ALLOCATE(KPM(NANG,-1:1)) IF (.NOT. ALLOCATED(JXO)) ALLOCATE(JXO(NANG,2)) IF (.NOT. ALLOCATED(JYO)) ALLOCATE(JYO(NANG,2)) IF (.NOT. ALLOCATED(KCR)) ALLOCATE(KCR(NANG,4)) +!$ACC ENTER DATA COPYIN(KLON, KLAT, KCOR, JXO, JYO, KCR) + !$acc kernels DO K=1,NANG KM1 = K-1 @@ -149,12 +161,12 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDIF ENDIF ENDDO + !$acc end kernels LFRSTCTU = .FALSE. ENDIF - ! THE CTU IS USED, COMPUTE THE WEIGHTS IF (.NOT. ALLOCATED(SUMWN)) ALLOCATE(SUMWN(IJS:IJL,NANG,NFRE_RED)) @@ -175,7 +187,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & IF (.NOT. ALLOCATED(LLWMPMN)) ALLOCATE(LLWMPMN(NANG,NFRE_RED,-1:1)) ENDIF - +!$acc enter data copyin(sumwn,LLWKPMN, WLATN,WLONN,WCORN,WKPMN) ! SOME INITIALISATION FOR *CTUW* !! NPROMA=NPROMA_WAM @@ -183,14 +195,29 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & !$ MTHREADS=OMP_GET_MAX_THREADS() NPROMA=(IJL-IJS+1)/MTHREADS + 1 + +!F!$acc update device(KLAT,WLAT,KCOR,WCOR,WLATN,WLONN,WCORN) + +!$acc enter data copyin(BLK2GLO) +!$acc enter data copyin(BLK2GLO%KXLT) + +!$acc update device(KLAT,WLAT,KCOR,WCOR) !F +!$acc update device(NFRE_RED,ZPI,FR,DELTH,NANG) +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(DYNAMIC,1) PRIVATE(JKGLO, KIJS, KIJL) +#endif /*_OPENACC*/ +!$acc data present(KLAT,WLAT,KCOR,WCOR,WLATN,WLONN,WCORN) DO JKGLO = IJS, IJL, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1,IJL) - CALL CTUWINI(KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & -& WLATM1, WCORM1, DP ) +! CALL CTUWINI(KIJS, KIJL,WLATM1, NINF, NSUP,WCORM1) + CALL CTUWINI (KIJS, KIJL, NINF, NSUP, BLK2GLO, COSPHM1_EXT, & + & WLATM1, WCORM1, DP) ENDDO +!$acc end data +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ ! COMPUTES THE WEIGHTS @@ -208,6 +235,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & MEND = IFRELFMAX ENDIF + CALL CTUWDRV (DELPRO, MSTART, MEND, & & IJS, IJL, NINF, NSUP, & & BLK2GLO, & @@ -234,14 +262,17 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDIF +!!$acc update host(WLATN,WCORN,WLONN) ! FIND THE LOGICAL FLAGS THAT WILL LIMIT THE EXTEND OF THE CALCULATION IN PROPAGS2 +!$acc parallel loop independent collapse(4) DO IC=1,2 DO ICL=1,2 DO K=1,NANG DO M=1,NFRE_RED LLWLATN(K,M,IC,ICL)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WLATN(IJ,K,M,IC,ICL) > 0.0_JWRB) THEN LLWLATN(K,M,IC,ICL)=.TRUE. @@ -252,11 +283,14 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel +!$acc parallel loop independent collapse(3) DO IC=1,2 DO M=1,NFRE_RED DO K=1,NANG LLWLONN(K,M,IC)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WLONN(IJ,K,M,IC) > 0.0_JWRB) THEN LLWLONN(K,M,IC)=.TRUE. @@ -266,12 +300,15 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel +!$acc parallel loop independent collapse(4) DO ICL=1,2 DO ICR=1,4 DO M=1,NFRE_RED DO K=1,NANG LLWCORN(K,M,ICR,ICL)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WCORN(IJ,K,M,ICR,ICL) > 0.0_JWRB) THEN LLWCORN(K,M,ICR,ICL)=.TRUE. @@ -282,11 +319,14 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel +!$acc parallel loop independent collapse(3) DO IC=-1,1 DO M=1,NFRE_RED DO K=1,NANG LLWKPMN(K,M,IC)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WKPMN(IJ,K,M,IC) > 0.0_JWRB) THEN LLWKPMN(K,M,IC)=.TRUE. @@ -296,12 +336,15 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel IF (IREFRA == 2 .OR. IREFRA == 3) THEN +!$acc parallel loop independent collapse(3) DO IC=-1,1 DO M=1,NFRE_RED DO K=1,NANG LLWMPMN(K,M,IC)=.FALSE. + !$acc loop DO IJ=IJS,IJL IF (WMPMN(IJ,K,M,IC) > 0.0_JWRB) THEN LLWMPMN(K,M,IC)=.TRUE. @@ -311,6 +354,7 @@ SUBROUTINE CTUWUPDT (IJS, IJL, NINF, NSUP, & ENDDO ENDDO ENDDO +!$acc end parallel ENDIF diff --git a/src/ecwam/mpexchng.F90 b/src/ecwam/mpexchng.F90 index d30e9eab7..2f6766f06 100644 --- a/src/ecwam/mpexchng.F90 +++ b/src/ecwam/mpexchng.F90 @@ -102,59 +102,96 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ALLOCATE(ZCOMBUFS(NBUFMAX,NGBTOPE)) ALLOCATE(ZCOMBUFR(NBUFMAX,NGBFROMPE)) - ! PACK SEND BUFFERS FOR NGBTOPE NEIGHBOURING PE's ! ------------------------------------------------- CALL GSTATS(1892,0) -!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INGB,IPROC,KCOUNT,M,K,IH,IJ) - DO INGB=1,NGBTOPE - IPROC=NTOPELST(INGB) - KCOUNT=0 - DO M = ND3S, ND3E - DO K = 1, NDIM2 - DO IH = 1, NTOPE(IPROC) - IJ=IJTOPE(IH,IPROC) - KCOUNT=KCOUNT+1 - ZCOMBUFS(KCOUNT,INGB)=FLD(IJ,K,M) + #ifdef _OPENACC + !$acc kernels loop independent private(KCOUNT,IJ) copyout(ZCOMBUFS) copyin(fld) + DO INGB=1,NGBTOPE !Total number of PE's to which information will be sent + IPROC=NTOPELST(INGB) !To which PE to send informations + !$acc loop independent collapse(3) + DO M = ND3S, ND3E + DO K = 1, NDIM2 + DO IH = 1, NTOPE(IPROC) !How many halo points to be sent + IJ=IJTOPE(IH,IPROC) !The index of which points to send + KCOUNT = (M - 1) * (NDIM2 * NTOPE(IPROC)) + (K - 1) * NTOPE(IPROC) + IH + ZCOMBUFS(KCOUNT,INGB)=FLD(IJ,K,M) ENDDO ENDDO ENDDO ENDDO + !$acc end kernels +#else +!$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INGB,IPROC,KCOUNT,M,K,IH,IJ) + DO INGB=1,NGBTOPE + IPROC=NTOPELST(INGB) + KCOUNT=0 + DO M = ND3S, ND3E + DO K = 1, NDIM2 + DO IH = 1, NTOPE(IPROC) + IJ=IJTOPE(IH,IPROC) + KCOUNT=KCOUNT+1 + ZCOMBUFS(KCOUNT,INGB)=FLD(IJ,K,M) + ENDDO + ENDDO + ENDDO + ENDDO !$OMP END PARALLEL DO + #endif /*_OPENACC*/ + CALL GSTATS(1892,1) ! DO NON BLOCKING SENDS AND RECVS IR=0 CALL GSTATS(676,0) - DO INGB=1,NGBFROMPE IR=IR+1 IPROC=NFROMPELST(INGB) KCOUNT=NDIM3*NDIM2*NFROMPE(IPROC) +!!$acc host_data use_device(ZCOMBUFR) CALL MPL_RECV(ZCOMBUFR(1:KCOUNT,INGB),KSOURCE=IPROC,KTAG=KTAG, & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & & CDSTRING='MPEXCHNG:') +!!$acc end host_data ENDDO DO INGB=1,NGBTOPE IR=IR+1 IPROC=NTOPELST(INGB) KCOUNT=NDIM3*NDIM2*NTOPE(IPROC) +!!$acc host_data use_device(ZCOMBUFR) CALL MPL_SEND(ZCOMBUFS(1:KCOUNT,INGB),KDEST=IPROC,KTAG=KTAG, & & KMP_TYPE=JP_NON_BLOCKING_STANDARD,KREQUEST=IREQ(IR), & & CDSTRING='MPEXCHNG:') +!!$acc end host_data ENDDO ! NOW WAIT FOR ALL TO COMPLETE CALL MPL_WAIT(KREQUEST=IREQ(1:IR),CDSTRING='MPEXCHNG:') - CALL GSTATS(676,1) ! DECODE THE RECEIVED BUFFERS CALL GSTATS(1893,0) + #ifdef _OPENACC + !$acc kernels loop independent private(KCOUNT,IJ) copyin(ZCOMBUFR) + DO INGB=1,NGBFROMPE + IPROC=NFROMPELST(INGB) + !$acc loop vector independent collapse(3) + DO M = ND3S, ND3E + DO K = 1, NDIM2 + DO IH = 1, NFROMPE(IPROC) + IJ=NIJSTART(IPROC)+IH-1 + KCOUNT = (M - 1) * (NDIM2 * NFROMPE(IPROC)) + (K - 1) * NFROMPE(IPROC) + IH + FLD(IJ,K,M)=ZCOMBUFR(KCOUNT,INGB) + ENDDO + ENDDO + ENDDO + ENDDO + !$acc end kernels + #else !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(INGB,IPROC,KCOUNT,M,K,IH,IJ) DO INGB=1,NGBFROMPE IPROC=NFROMPELST(INGB) @@ -170,6 +207,8 @@ SUBROUTINE MPEXCHNG(FLD, NDIM2, ND3S, ND3E) ENDDO ENDDO !$OMP END PARALLEL DO + #endif /*_OPENACC*/ + CALL GSTATS(1893,1) KTAG=KTAG+1 diff --git a/src/ecwam/mubuf.F90 b/src/ecwam/mubuf.F90 index 6ef274472..8cb1674fe 100644 --- a/src/ecwam/mubuf.F90 +++ b/src/ecwam/mubuf.F90 @@ -197,6 +197,7 @@ SUBROUTINE MUBUF (IU01, BATHY, IU08, NPROPAGS) DEALLOCATE(KLAT) + !* 2.2 LONGITUDE NEIGHBOURS (KLON) ! --------------------------- @@ -1163,5 +1164,4 @@ SUBROUTINE MUBUF (IU01, BATHY, IU08, NPROPAGS) ENDDO ! end loop over frequencies DEALLOCATE(KDUM) - END SUBROUTINE MUBUF diff --git a/src/ecwam/propag_wam.F90 b/src/ecwam/propag_wam.F90 index 34c095920..c765b5d11 100644 --- a/src/ecwam/propag_wam.F90 +++ b/src/ecwam/propag_wam.F90 @@ -37,6 +37,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ! ------------------------------------------------------------------- + use openacc USE PARKIND_WAVE, ONLY : JWIM, JWRB, JWRU USE YOWDRVTYPE , ONLY : WVGRIDGLO, ENVIRONMENT, FREQUENCY @@ -46,13 +47,19 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) USE YOWPARAM , ONLY : NANG ,NFRE ,NFRE_RED ,NIBLO , LLUNSTR USE YOWREFD , ONLY : LLUPDTTD ,THDD ,THDC ,SDOT USE YOWSTAT , ONLY : IPROPAGS ,IFRELFMAX, DELPRO_LF, IDELPRO - USE YOWUBUF , ONLY : LUPDTWGHT + USE YOWUBUF , ONLY : LUPDTWGHT, KLAT ,KLON ,KCOR , & + & WLATN ,WLONN ,WCORN ,WKPMN ,WMPMN , & + & LLWLATN ,LLWLONN ,LLWCORN ,LLWKPMN ,LLWMPMN , & + & SUMWN , & + & JXO ,JYO ,KCR ,KPM ,MPM #ifdef WAM_HAVE_UNWAM USE UNWAM , ONLY : PROPAG_UNWAM #endif USE YOMHOOK , ONLY : LHOOK, DR_HOOK, JPHOOK + USE NVTX + ! ---------------------------------------------------------------------- IMPLICIT NONE @@ -72,7 +79,7 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) REAL(KIND=JWRB), DIMENSION(NPROMA_WAM, NANG, NFRE, NCHNK), INTENT(INOUT) :: FL1 - INTEGER(KIND=JWIM) :: IJ, K, M, J + INTEGER(KIND=JWIM) :: IJ, K, M, J, II INTEGER(KIND=JWIM) :: JKGLO, NPROMA, MTHREADS INTEGER(KIND=JWIM) :: NSTEP_LF, ISUBST !$ INTEGER,EXTERNAL :: OMP_GET_MAX_THREADS @@ -97,6 +104,8 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) IF (LHOOK) CALL DR_HOOK('PROPAG_WAM',0,ZHOOK_HANDLE) +!$acc data present(FL1) +!$acc data CREATE(FL1_EXT,FL3_EXT) IF (NIBLO > 1) THEN IJSG = IJFROMCHNK(1,1) @@ -107,24 +116,38 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) NPROMA=(IJLG-IJSG+1)/MTHREADS + 1 +! !$acc data COPYIN(FL1_EXT) !!! the advection schemes are still written in block structure !!! mapping chuncks to block ONLY for actual grid points !!!! +! call nvtxStartRange("PROPAG: Loop 1") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(ICHNK, KIJS, IJSB, KIJL, IJLB, M, K) +#endif /*_OPENACC*/ + !$acc kernels loop independent private(KIJS, IJSB, KIJL, IJLB) DO ICHNK = 1, NCHNK KIJS = 1 IJSB = IJFROMCHNK(KIJS, ICHNK) KIJL = KIJL4CHNK(ICHNK) IJLB = IJFROMCHNK(KIJL, ICHNK) +! !$acc loop private(FL1_EXT) + !$acc loop independent collapse(2) DO M = 1, NFRE_RED DO K = 1, NANG - FL1_EXT(IJSB:IJLB, K, M) = FL1(KIJS:KIJL, K, M, ICHNK) +! FL1_EXT(IJFROMCHNK(1, ICHNK):IJFROMCHNK(KIJL4CHNK(ICHNK), ICHNK), K, M) = FL1(1:KIJL4CHNK(ICHNK), K, M, ICHNK) + FL1_EXT(IJSB:IJLB, K, M) = FL1(1:KIJL, K, M, ICHNK) ENDDO ENDDO ENDDO + !$acc end kernels +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ +! call nvtxEndRange ! SET THE DUMMY LAND POINT TO 0. + !$acc kernels FL1_EXT(NSUP+1,:,:) = 0.0_JWRB + !$acc end kernels IF (LLUNSTR) THEN @@ -206,26 +229,37 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) LUPDTWGHT=.FALSE. ENDIF +! call nvtxStartRange("PROPAG: First preloop") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL) +#endif /*_OPENACC*/ DO JKGLO = IJSG, IJLG, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1, IJLG) CALL PROPAGS2(FL1_EXT, FL3_EXT, NINF, NSUP, KIJS, KIJL, NANG, 1, NFRE_RED) ENDDO +#ifndef _OPENACC !$OMP END PARALLEL DO - +#endif /*_OPENACC*/ +! call nvtxEndRange ! SUB TIME STEPPING FOR FAST WAVES (only if IFRELFMAX > 0) IF (IFRELFMAX > 0 ) THEN NSTEP_LF = NINT(REAL(IDELPRO, JWRB)/DELPRO_LF) ISUBST = 2 ! The first step was done as part of the previous call to PROPAGS2 +! call nvtxStartRange("PROPAG: While loop") DO WHILE (ISUBST <= NSTEP_LF) +! call nvtxStartRange("PROPAG: Loop 2") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL, M, K, IJ) +#endif /*_OPENACC*/ +!$acc kernels loop private(KIJS, KIJL, FL1_EXT) DO JKGLO = IJSG, IJLG, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1, IJLG) + !$acc loop independent collapse(3) DO M = 1, IFRELFMAX DO K = 1, NANG DO IJ = KIJS, KIJL @@ -234,22 +268,38 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) ENDDO ENDDO ENDDO +!$acc end kernels +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ +! call nvtxEndRange CALL MPEXCHNG(FL1_EXT(:,:,1:IFRELFMAX), NANG, 1, IFRELFMAX) +!call nvtxStartRange("PROPAG: Inner propags2 loop") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC,1) PRIVATE(JKGLO, KIJS, KIJL) +#endif /*_OPENACC*/ +! !$ACC DATA COPYIN(KLON, KLAT, KCOR, WKPMN, LLWKPMN, SUMWN, WLONN, WLATN, WCORN, JXO, JYO, KCR) DO JKGLO = IJSG, IJLG, NPROMA KIJS=JKGLO KIJL=MIN(KIJS+NPROMA-1, IJLG) + + CALL PROPAGS2(FL1_EXT(:,:,1:IFRELFMAX), FL3_EXT(:,:,1:IFRELFMAX), NINF, NSUP, KIJS, KIJL, NANG, 1, IFRELFMAX) ENDDO +! !$ACC END DATA +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ +! call nvtxEndRange ISUBST = ISUBST + 1 ENDDO - ENDIF ! end sub time steps (if needed) +! call nvtxEndRange + +ENDIF ! end sub time steps (if needed) CASE(1) IF (L1STCALL .OR. LLCHKCFLA) LLCHKCFL=.TRUE. @@ -305,36 +355,84 @@ SUBROUTINE PROPAG_WAM (BLK2GLO, WVENVI, WVPRPT, FL1) !!! the advection schemes are still written in block structure !!! So need to convert back to the nproma_wam chuncks +! call nvtxStartRange("PROPAG: Loop 3") +#ifndef _OPENACC !$OMP PARALLEL DO SCHEDULE(STATIC) PRIVATE(ICHNK, KIJS, IJSB, KIJL, IJLB, M, K) +#endif /*_OPENACC*/ + !!$acc kernels loop private(KIJS, IJSB, KIJL, IJLB, M, K) + !$acc kernels loop independent private(KIJS, IJSB, KIJL, IJLB) DO ICHNK = 1, NCHNK KIJS = 1 IJSB = IJFROMCHNK(KIJS, ICHNK) KIJL = KIJL4CHNK(ICHNK) IJLB = IJFROMCHNK(KIJL, ICHNK) +! !$acc loop vector independent collapse(3) + !$acc loop independent collapse(3) DO M = 1, NFRE_RED DO K = 1, NANG - FL1(KIJS:KIJL, K, M, ICHNK) = FL3_EXT(IJSB:IJLB, K, M) + DO J = KIJS, KIJL + II = IJSB + J - KIJS + FL1(J, K, M, ICHNK) = FL3_EXT(II, K, M) +! FL1(KIJS:KIJL, K, M, ICHNK) = FL3_EXT(IJSB:IJLB, K, M) + ENDDO ENDDO ENDDO IF (KIJL < NPROMA_WAM) THEN !!! make sure fictious points keep values of the first point in the chunk + !$acc loop independent collapse(3) DO M = 1, NFRE_RED DO K = 1, NANG - FL1(KIJL+1:NPROMA_WAM, K, M, ICHNK) = FL1(1, K, M, ICHNK) + DO J = KIJL+1,NPROMA_WAM + FL1(J, K, M, ICHNK) = FL1(1, K, M, ICHNK) + !FL1(KIJL+1:NPROMA_WAM, K, M, ICHNK) = FL1(1, K, M, ICHNK) + ENDDO ENDDO ENDDO ENDIF ENDDO + !$acc end kernels + +!F !$acc kernels loop independent private(KIJS, IJSB, KIJL, IJLB) +!F DO ICHNK = 1, NCHNK +!F KIJS = 1 +!F IJSB = IJFROMCHNK(KIJS, ICHNK) +!F KIJL = KIJL4CHNK(ICHNK) +!F IJLB = IJFROMCHNK(KIJL, ICHNK) +!F !$acc loop seq collapse(2) +!F DO M = 1, NFRE_RED +!F DO K = 1, NANG +!F FL1(KIJS:KIJL, K, M, ICHNK) = FL3_EXT(IJSB:IJLB, K, M) +!F ENDDO +!F ENDDO +!F +!F IF (KIJL < NPROMA_WAM) THEN +!F !!! make sure fictious points keep values of the first point in the chunk +!F !$acc loop independent collapse(2) +!F DO M = 1, NFRE_RED +!F DO K = 1, NANG +!F FL1(KIJL+1:NPROMA_WAM, K, M, ICHNK) = FL1(1, K, M, ICHNK) +!F ENDDO +!F ENDDO +!F ENDIF +!F +!F ENDDO +!F !$acc end kernels +#ifndef _OPENACC !$OMP END PARALLEL DO +#endif /*_OPENACC*/ +! call nvtxEndRange CALL GSTATS(1430,1) ENDIF ! end propagation +! !$acc end data ENDIF ! more than one grid point +!$ACC END DATA +!$ACC END DATA L1STCALL=.FALSE. LLCHKCFL=.FALSE. diff --git a/src/ecwam/propags2.F90 b/src/ecwam/propags2.F90 index 039987dc5..149d12ea1 100644 --- a/src/ecwam/propags2.F90 +++ b/src/ecwam/propags2.F90 @@ -9,6 +9,7 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) +use nvtx ! ---------------------------------------------------------------------- !**** *PROPAGS2* - ADVECTION USING THE CORNER TRANSPORT SCHEME IN SPACE @@ -95,54 +96,112 @@ SUBROUTINE PROPAGS2 (F1, F3, NINF, NSUP, KIJS, KIJL, NANG, ND3S, ND3E) IF (IREFRA /= 2 .AND. IREFRA /= 3 ) THEN !* WITHOUT DEPTH OR/AND CURRENT REFRACTION. ! ---------------------------------------- - - DO K = 1, NANG - JJX=JXO(K,1) - JJY=JYO(K,1) - JJY=JYO(K,1) - JJK=KCR(K,1) + +!call nvtxStartRange("PROPAGS2: Begin loop NANG") + +!!$acc enter data create(FJ1, FJ2, FJ3, FJ4, FJ5) copyin(KLON, KLAT, KCOR, WKPMN, LLWKPMN, SUMWN, WLONN, WLATN, WCORN) +!!$acc enter data present(F1, F3) +!! create(FJ1, FJ2, FJ3, FJ4, FJ5) copyin(KLON, KLAT, KCOR, WKPMN, LLWKPMN, SUMWN, WLONN, WLATN, WCORN) + +!$acc kernels loop present(F1,F3) create(FJ1, FJ2, FJ3, FJ4, FJ5) PRESENT(KLON,KLAT,KCOR,WKPMN,LLWKPMN, SUMWN, WLONN, WLATN, WCORN) PRESENT(JXO,JYO,KCR) + DO K = 1, NANG +! JJX=JXO(K,1) +! JJY=JYO(K,1) +! JJY=JYO(K,1) +! JJK=KCR(K,1) + + !!$acc loop independent DO M = ND3S, ND3E - DO IJ = KIJS, KIJL - FJ1(IJ)= F1(KLON(IJ,JJX) ,K ,M) - FJ2(IJ)= F1(KLAT(IJ,JJY,1),K ,M) - FJ3(IJ)= F1(KLAT(IJ,JJY,2),K ,M) - FJ4(IJ)= F1(KCOR(IJ,JJK,1),K ,M) - FJ5(IJ)= F1(KCOR(IJ,JJK,2),K ,M) - ENDDO +! DO IJ = KIJS, KIJL +! FJ1(IJ)= F1(KLON(IJ,JJX) ,K ,M) +! FJ2(IJ)= F1(KLAT(IJ,JJY,1),K ,M) +! FJ3(IJ)= F1(KLAT(IJ,JJY,2),K ,M) +! FJ4(IJ)= F1(KCOR(IJ,JJK,1),K ,M) +! FJ5(IJ)= F1(KCOR(IJ,JJK,2),K ,M) +! ENDDO !JFH Loop split to enhance vectorisation + !DIR$ IVDEP !DIR$ PREFERVECTOR + !!$acc loop vector + IF (LLWKPMN(K,M,-1).AND.(.NOT.LLWKPMN(K,M,1))) THEN DO IJ = KIJS, KIJL F3(IJ,K,M) = & & (1.0_JWRB-SUMWN(IJ,K,M))* F1(IJ ,K ,M) & -! & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & -! & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & -! & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & -! & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & -! & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) & + & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & + & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & + & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) + F3(IJ,K,M) = F3(IJ,K,M) & + & + WKPMN(IJ,K,M,-1)* F1(IJ,KPM(K,-1),M) ! & + WLONN(IJ,K,M,JJX) * F1(KLON(IJ,JJX) ,K ,M) & ! & +WLATN(IJ,K,M,JJY,1)* F1(KLAT(IJ,JJY,1),K ,M) & ! & +WLATN(IJ,K,M,JJY,2)* F1(KLAT(IJ,JJY,2),K ,M) & ! & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,JJK,1),K ,M) & ! & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,JJK,2),K ,M) & - & + WLONN(IJ,K,M,JJX) * FJ1(IJ) & - & +WLATN(IJ,K,M,JJY,1)* FJ2(IJ) & - & +WLATN(IJ,K,M,JJY,2)* FJ3(IJ) & - & + WCORN(IJ,K,M,1,1)* FJ4(IJ) & - & + WCORN(IJ,K,M,1,2)* FJ5(IJ) +! & + WLONN(IJ,K,M,JJX) * FJ1(IJ) & +! & +WLATN(IJ,K,M,JJY,1)* FJ2(IJ) & +! & +WLATN(IJ,K,M,JJY,2)* FJ3(IJ) & +! & + WCORN(IJ,K,M,1,1)* FJ4(IJ) & +! & + WCORN(IJ,K,M,1,2)* FJ5(IJ) ENDDO - - DO IC=-1,1,2 - IF (LLWKPMN(K,M,IC)) THEN - DO IJ = KIJS, KIJL + ELSE IF (LLWKPMN(K,M,-1).AND.LLWKPMN(K,M,1)) THEN + DO IJ = KIJS, KIJL + F3(IJ,K,M) = & + & (1.0_JWRB-SUMWN(IJ,K,M))* F1(IJ ,K ,M) & + & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & + & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & + & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) F3(IJ,K,M) = F3(IJ,K,M) & - & + WKPMN(IJ,K,M,IC)* F1(IJ,KPM(K,IC),M) - ENDDO - ENDIF + & + WKPMN(IJ,K,M,-1)* F1(IJ,KPM(K,-1),M) + F3(IJ,K,M) = F3(IJ,K,M) & + & + WKPMN(IJ,K,M,1)* F1(IJ,KPM(K,1),M) ENDDO - + ELSE IF (LLWKPMN(K,M,1).AND.(.NOT.LLWKPMN(K,M,-1))) THEN + DO IJ = KIJS, KIJL + F3(IJ,K,M) = & + & (1.0_JWRB-SUMWN(IJ,K,M))* F1(IJ ,K ,M) & + & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & + & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & + & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) + F3(IJ,K,M) = F3(IJ,K,M) & + & + WKPMN(IJ,K,M,1)* F1(IJ,KPM(K,1),M) + ENDDO + ELSE IF ((.not.LLWKPMN(K,M,-1)).and.(.not.LLWKPMN(K,M,1))) THEN + DO IJ = KIJS, KIJL + F3(IJ,K,M) = & + & (1.0_JWRB-SUMWN(IJ,K,M))* F1(IJ ,K ,M) & + & + WLONN(IJ,K,M,JXO(K,1)) * F1(KLON(IJ,JXO(K,1)) ,K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),1)* F1(KLAT(IJ,JYO(K,1),1),K ,M) & + & +WLATN(IJ,K,M,JYO(K,1),2)* F1(KLAT(IJ,JYO(K,1),2),K ,M) & + & + WCORN(IJ,K,M,1,1)* F1(KCOR(IJ,KCR(K,1),1),K ,M) & + & + WCORN(IJ,K,M,1,2)* F1(KCOR(IJ,KCR(K,1),2),K ,M) + ENDDO + END IF + + !!$acc loop vector +! DO IC=-1,1,2 +! IF (LLWKPMN(K,M,IC)) THEN +! !!$acc loop vector +! DO IJ = KIJS, KIJL +! F3(IJ,K,M) = F3(IJ,K,M) & +! & + WKPMN(IJ,K,M,IC)* F1(IJ,KPM(K,IC),M) +! ENDDO +! ENDIF +! ENDDO ENDDO ENDDO + !$acc end kernels + +!!$acc exit data copyout(F3) +!!$acc exit data delete(FJ1, FJ2, FJ3, FJ4, FJ5) + +! call nvtxEndRange ELSE !* DEPTH AND CURRENT REFRACTION. diff --git a/src/ecwam/wamintgr_loki_gpu.F90 b/src/ecwam/wamintgr_loki_gpu.F90 index 132b2a57a..a1b66ca2e 100644 --- a/src/ecwam/wamintgr_loki_gpu.F90 +++ b/src/ecwam/wamintgr_loki_gpu.F90 @@ -147,6 +147,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & DATA LLNEWFILE / .FALSE. / + ! ---------------------------------------------------------------------- IF (LHOOK) CALL DR_HOOK('WAMINTGR',0,ZHOOK_HANDLE) @@ -154,13 +155,26 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & !* PROPAGATION TIME ! ---------------- +!!$acc enter data copyin(BLK2GLO, WVENVI, WVPRPT, FL1) copyout(FL1) + +CALL SRC_CONTRIBS%INIT(FL1=FL1) +CALL SRC_CONTRIBS%UPDATE_DEVICE(FL1=FL1_DPTR) +!$acc data present(FL1_DPTR) + +!!$acc data copyin(BLK2GLO, WVENVI, WVPRPT, FL1) copyout(FL1) IF (CDATE == CDTPRA) THEN TIME0=-WAM_USER_CLOCK() - CALL PROPAG_WAM(BLK2GLO, WVENVI, WVPRPT, FL1) + +!!$acc data present(BLK2GLO, WVENVI, WVPRPT, FL1) +!!$acc data present(FL1) + CALL PROPAG_WAM(BLK2GLO, WVENVI, WVPRPT, FL1_DPTR) +!!$acc end data TIME1(1) = TIME1(1) + (TIME0+WAM_USER_CLOCK())*1.E-06 CDATE = CDTPRO ENDIF +!$acc end data +!!$acc exit data delete(BLK2GLO, WVENVI, WVPRPT) !* RETRIEVING NEW FORCING FIELDS IF NEEDED. ! ---------------------------------------- @@ -189,7 +203,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & & STRNMS=INTFLDS%STRNMS, TAUXD=INTFLDS%TAUXD, TAUYD=INTFLDS%TAUYD, TAUOCXD=INTFLDS%TAUOCXD, & & TAUOCYD=INTFLDS%TAUOCYD, TAUOC=INTFLDS%TAUOC, PHIOCD=INTFLDS%PHIOCD, PHIEPS=INTFLDS%PHIEPS, & & PHIAW=INTFLDS%PHIAW) - CALL SRC_CONTRIBS%INIT(FL1=FL1, XLLWS=XLLWS, MIJ=MIJ) + CALL SRC_CONTRIBS%INIT(XLLWS=XLLWS, MIJ=MIJ) !$loki update_device @@ -205,7 +219,7 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL INTFLDS_FIELD%UPDATE_DEVICE(WSEMEAN=WSEMEAN_DPTR, WSFMEAN=WSFMEAN_DPTR, USTOKES=USTOKES_DPTR, & & VSTOKES=VSTOKES_DPTR, STRNMS=STRNMS_DPTR, TAUXD=TAUXD_DPTR, TAUYD=TAUYD_DPTR, TAUOCXD=TAUOCXD_DPTR, & & TAUOCYD=TAUOCYD_DPTR, TAUOC=TAUOC_DPTR, PHIOCD=PHIOCD_DPTR, PHIEPS=PHIEPS_DPTR, PHIAW=PHIAW_DPTR) - CALL SRC_CONTRIBS%UPDATE_DEVICE(FL1=FL1_DPTR, XLLWS=XLLWS_DPTR, MIJ=MIJ_DPTR) + CALL SRC_CONTRIBS%UPDATE_DEVICE(XLLWS=XLLWS_DPTR, MIJ=MIJ_DPTR) !$acc data present(FL1_DPTR,XLLWS_DPTR,MIJ_DPTR,WAVNUM_DPTR,CGROUP_DPTR,CIWA_DPTR,CINV_DPTR,XK2CG_DPTR,STOKFAC_DPTR,& !$acc & EMAXDPT_DPTR,INDEP_DPTR,DEPTH_DPTR,IOBND_DPTR,IODP_DPTR,CICOVER_DPTR,WSWAVE_DPTR,WDWAVE_DPTR,AIRD_DPTR,& @@ -245,12 +259,6 @@ SUBROUTINE WAMINTGR_LOKI_GPU(CDTPRA, CDATE, CDATEWH, CDTIMP, CDTIMPNEXT, & CALL SRC_CONTRIBS%ENSURE_HOST() !$loki update_host - CALL WVPRPT_FIELD%FINAL() - CALL WVENVI_FIELD%FINAL() - CALL FF_NOW_FIELD%FINAL() - CALL WAM2NEMO_FIELD%FINAL() - CALL INTFLDS_FIELD%FINAL() - CALL SRC_CONTRIBS%FINAL() TIME1(3) = TIME1(3) + (TIME2+WAM_USER_CLOCK())*1.E-06 IF (LWNEMOCOU) NEMONTAU = NEMONTAU + 1 diff --git a/src/ecwam/yowgrid.F90 b/src/ecwam/yowgrid.F90 index 313c20214..b2b31688c 100644 --- a/src/ecwam/yowgrid.F90 +++ b/src/ecwam/yowgrid.F90 @@ -87,4 +87,5 @@ MODULE YOWGRID ! ---------------------------------------------------------------------- + !$acc declare create( COSPH ) END MODULE YOWGRID diff --git a/src/ecwam/yowmap.F90 b/src/ecwam/yowmap.F90 index 7099ecc28..b670deed4 100644 --- a/src/ecwam/yowmap.F90 +++ b/src/ecwam/yowmap.F90 @@ -78,4 +78,5 @@ MODULE YOWMAP ! (i.e. NO LAND AND DEEP WATER). ! ---------------------------------------------------------------------- - END MODULE YOWMAP + +END MODULE YOWMAP diff --git a/src/ecwam/yowparam.F90 b/src/ecwam/yowparam.F90 index 0f713bdf6..0bd0552f6 100644 --- a/src/ecwam/yowparam.F90 +++ b/src/ecwam/yowparam.F90 @@ -91,5 +91,8 @@ MODULE YOWPARAM ! DONE IN LATITUNAL BANDS ! (like it used to be done). ! ---------------------------------------------------------------------- - +!$acc declare create( nang ) +!$acc declare create( nfre_red ) +!$acc declare create( ngy ) +!$acc declare create( niblo ) END MODULE YOWPARAM diff --git a/src/ecwam/yowstat.F90 b/src/ecwam/yowstat.F90 index e3555b656..0714a9949 100644 --- a/src/ecwam/yowstat.F90 +++ b/src/ecwam/yowstat.F90 @@ -251,4 +251,5 @@ MODULE YOWSTAT ! *CMETER* CHARACTER SMS or ECFLOW meter command (ECMWF supervisor) ! *CEVENT* CHARACTER SMS or ECFLOW event command (ECMWF supervisor) ! ---------------------------------------------------------------------- + !$acc declare create( icase ) END MODULE YOWSTAT diff --git a/src/ecwam/yowubuf.F90 b/src/ecwam/yowubuf.F90 index 29b69d6ee..2fd621c7c 100644 --- a/src/ecwam/yowubuf.F90 +++ b/src/ecwam/yowubuf.F90 @@ -154,4 +154,13 @@ MODULE YOWUBUF ! *LLWMPMN* LOGICAL ARRAY, TRUE IF WMPMN > 0. AT ALL GRID POINTS. ! ---------------------------------------------------------------------- - END MODULE YOWUBUF + +!$acc declare create(WLAT) +!$acc declare create(KLAT) +!$acc declare create(WCOR) +!$acc declare create(KCOR) +!$acc declare create(WLATN) +!$acc declare create(WCORN) +!$acc declare create(WLONN) + + END MODULE YOWUBUF