diff --git a/modulefiles/gsi_wcoss2.intel.lua b/modulefiles/gsi_wcoss2.intel.lua index c3bfd1156c..116baedb9f 100644 --- a/modulefiles/gsi_wcoss2.intel.lua +++ b/modulefiles/gsi_wcoss2.intel.lua @@ -9,7 +9,9 @@ local cmake_ver= os.getenv("cmake_ver") or "3.20.2" local python_ver=os.getenv("python_ver") or "3.8.6" local prod_util_ver=os.getenv("prod_util_ver") or "2.0.10" -local netcdf_ver=os.getenv("netcdf_ver") or "4.7.4" +local zlib_ver=os.getenv("zlib_ver") or "1.2.11" +local hdf5_ver=os.getenv("hdf5_ver") or "1.14.0" +local netcdf_ver=os.getenv("netcdf_ver") or "4.9.2" local bufr_ver=os.getenv("bufr_ver") or "11.7.0" local bacio_ver=os.getenv("bacio_ver") or "2.4.1" local w3emc_ver=os.getenv("w3emc_ver") or "2.9.2" @@ -32,6 +34,8 @@ load(pathJoin("python", python_ver)) load(pathJoin("prod_util", prod_util_ver)) +load(pathJoin("zlib", zlib_ver)) +load(pathJoin("hdf5", hdf5_ver)) load(pathJoin("netcdf", netcdf_ver)) load(pathJoin("bufr", bufr_ver)) load(pathJoin("bacio", bacio_ver)) diff --git a/src/enkf/observer_fv3reg.f90 b/src/enkf/observer_fv3reg.f90 index 8f77bca417..9084ee58f6 100644 --- a/src/enkf/observer_fv3reg.f90 +++ b/src/enkf/observer_fv3reg.f90 @@ -45,7 +45,7 @@ subroutine setup_linhx(rlat, rlon, time, ix, delx, ixp, delxp, iy, dely, & use params, only: nstatefields, nlons, nlats, nlevs, nhr_state, fhr_assim use gridinfo, only: npts, latsgrd, lonsgrd use statevec, only: nsdim - use constants, only: zero,one,pi + use constants, only: izero,zero,one,pi use mpisetup implicit none @@ -54,6 +54,18 @@ subroutine setup_linhx(rlat, rlon, time, ix, delx, ixp, delxp, iy, dely, & real(r_single) ,intent(in ) :: time ! observation time relative to middle of window integer(i_kind), intent(out) :: ix, iy, it, ixp, iyp, itp real(r_kind), intent(out) :: delx, dely, delxp, delyp, delt, deltp + ix=izero + iy=izero + it=izero + ixp=izero + iyp=izero + itp=izero + delx=zero + dely=zero + delxp=zero + delyp=zero + delt=zero + deltp=zero write(6,*)'this is a dummy subroutine, running this means something wrong ,stop' call stop2(555) @@ -110,6 +122,7 @@ subroutine calc_linhx(hx, dens, dhx_dx, hxpert, hx_ens, & type(raggedarr) ,intent(inout) :: hxpert ! interpolated background real(r_single) ,intent( out) :: hx_ens ! H (x_ens) integer(i_kind) i,j + hx_ens=zero write(6,*)'this is a dummy subroutine, running this means something wrong ,stop' call stop2(555) @@ -145,6 +158,7 @@ subroutine calc_linhx_modens(hx, dhx_dx, hxpert, hx_ens, vscale) !$$$ use kinds, only: r_kind,i_kind,r_single use sparsearr, only: sparr, raggedarr + use constants, only: zero use mpisetup implicit none @@ -155,6 +169,7 @@ subroutine calc_linhx_modens(hx, dhx_dx, hxpert, hx_ens, vscale) real(r_single) ,intent( out) :: hx_ens(neigv)! H (x_ens) real(r_double),dimension(neigv,nlevs+1) ,intent(in ) :: vscale ! vertical scaling (for modulated ens) integer(i_kind) i + hx_ens=zero write(6,*)'this is a dummy subroutine, running this means something wrong ,stop' call stop2(555) diff --git a/src/gsi/CMakeLists.txt b/src/gsi/CMakeLists.txt index f894b0a8a8..d7a02ed99f 100644 --- a/src/gsi/CMakeLists.txt +++ b/src/gsi/CMakeLists.txt @@ -62,6 +62,7 @@ find_package(NetCDF REQUIRED Fortran) if(OPENMP) find_package(OpenMP REQUIRED) endif() +find_package(ZLIB REQUIRED) # NCEPLibs dependencies find_package(bacio REQUIRED) @@ -157,6 +158,7 @@ target_link_libraries(gsi_fortran_obj PUBLIC w3emc::w3emc_d) target_link_libraries(gsi_fortran_obj PUBLIC sp::sp_d) target_link_libraries(gsi_fortran_obj PUBLIC bufr::bufr_d) target_link_libraries(gsi_fortran_obj PUBLIC crtm::crtm) +target_link_libraries(gsi_fortran_obj PUBLIC ZLIB::ZLIB) if(GSI_MODE MATCHES "Regional") target_link_libraries(gsi_fortran_obj PUBLIC wrf_io::wrf_io) endif() diff --git a/src/gsi/cplr_get_fv3_regional_ensperts.f90 b/src/gsi/cplr_get_fv3_regional_ensperts.f90 index 9b841f012c..55aa410297 100644 --- a/src/gsi/cplr_get_fv3_regional_ensperts.f90 +++ b/src/gsi/cplr_get_fv3_regional_ensperts.f90 @@ -76,6 +76,7 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) use netcdf_mod , only: nc_check use gsi_rfv3io_mod, only: fv3lam_io_phymetvars3d_nouv use obsmod, only: if_model_dbz,if_model_fed + use mpi, only : MPI_Wtime, MPI_REAL8, MPI_SUCCESS, MPI_MAX implicit none @@ -119,6 +120,8 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) integer(i_kind):: imem_start,n_fv3sar integer(i_kind):: i_caseflag + real(kind=8) :: time_beg,time_end,walltime, tb,te,wt + integer(i_kind) :: ierr if(n_ens/=(n_ens_gfs+n_ens_fv3sar)) then write(6,*)'wrong, the sum of n_ens_gfs and n_ens_fv3sar not equal n_ens, stop' @@ -275,7 +278,7 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) end if end if - + tb=MPI_Wtime() do m=1,ntlevs_ens @@ -452,7 +455,8 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) if( .not. parallelization_over_ensmembers )then if (mype == 0) write(6,'(a,a)') & 'CALL READ_FV3_REGIONAL_ENSPERTS FOR ENS DATA with the filename str : ',trim(ensfilenam_str) - + + time_beg=MPI_Wtime() select case (i_caseflag) case (0) call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz) @@ -469,9 +473,15 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz, & g_ql=ql,g_qi=qi,g_qr=qr,g_qs=qs,g_qg=qg,g_qnr=qnr,g_w=w,g_fed=fed) case (5) + !write(6,'("get_fv3_regional_ensperts_run: Before general_read_fv3_regional")') call this%general_read_fv3_regional(fv3_filename,ps,u,v,tv,rh,oz, & g_ql=ql,g_qi=qi,g_qr=qr,g_qs=qs,g_qg=qg,g_qnr=qnr,g_w=w,g_dbz=dbz,g_fed=fed) end select + time_end=MPI_Wtime() + call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) + if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr + if(mype==0) write(6,'("Maximum Walltime for general_read_fv3_regional" f15.4,I4)') walltime,i_caseflag + end if if( parallelization_over_ensmembers )then @@ -792,6 +802,11 @@ subroutine get_fv3_regional_ensperts_run(this,en_perts,nelen,ps_bar) end do enddo ! it 4d loop + te=MPI_Wtime() + call MPI_Reduce(te-tb, wt, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) + if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr + if(mype==0) write(6,'("Maximum Walltime to read ",I4," ensemble members ", f15.4)') n_ens_fv3sar,wt + ! CALCULATE ENSEMBLE SPREAD if(write_ens_sprd ) then call this%ens_spread_dualres_regional(mype,en_perts,nelen) @@ -851,7 +866,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g use hybrid_ensemble_parameters, only: grd_ens,q_hyb_ens use hybrid_ensemble_parameters, only: fv3sar_ensemble_opt,dual_res - use mpimod, only: mpi_comm_world,mpi_rtype + use mpimod, only: mpi_comm_world,mpi_rtype,mype use gsi_rfv3io_mod,only: type_fv3regfilenameg use gsi_rfv3io_mod,only:n2d use constants, only: half,zero @@ -870,6 +885,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g use directDA_radaruse_mod, only: l_use_cvpqx, cvpqx_pval, cld_nt_updt use directDA_radaruse_mod, only: l_cvpnr, cvpnr_pval use obsmod, only:if_model_dbz,if_model_fed + use mpi, only : MPI_Wtime, MPI_REAL8, MPI_MAX, MPI_SUCCESS implicit none @@ -913,6 +929,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g character(len=:),allocatable :: sfcdata !='fv3_sfcdata' character(len=:),allocatable :: couplerres!='coupler.res' integer (i_kind) ier,istatus + real(kind=8) :: time_beg,time_end,walltime + integer(i_kind) :: ierr associate( this => this ) ! eliminates warning for unused dummy argument needed for binding @@ -930,6 +948,7 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g couplerres=fv3_filenameginput%couplerres + !write(6,'("general_read_fv3_regional: fv3sar_ensemble_opt= " I4)') fv3sar_ensemble_opt if (allocated(fv3lam_ens_io_dynmetvars2d_nouv) ) then @@ -956,16 +975,28 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g end if - if(fv3sar_ensemble_opt == 0 ) then + if(fv3sar_ensemble_opt == 0 ) then + time_beg=MPI_Wtime() call gsi_fv3ncdf_readuv(grd_fv3lam_ens_uv,g_u,g_v,fv3_filenameginput,dual_res) + time_end=MPI_Wtime() + call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) + if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr + if(mype==0) write(6,'("general_read_fv3_regional: Maximum Walltime for gsi_fv3ncdf_readuv" f15.4)') walltime + else call gsi_fv3ncdf_readuv_v1(grd_fv3lam_ens_uv,g_u,g_v,fv3_filenameginput,dual_res) endif if(fv3sar_ensemble_opt == 0) then + time_beg=MPI_Wtime() call gsi_fv3ncdf_read(grd_fv3lam_ens_dynvar_io_nouv,gsibundle_fv3lam_ens_dynvar_nouv,& fv3_filenameginput%dynvars,fv3_filenameginput,dual_res) call gsi_fv3ncdf_read(grd_fv3lam_ens_tracer_io_nouv,gsibundle_fv3lam_ens_tracer_nouv,& fv3_filenameginput%tracers,fv3_filenameginput,dual_res) + time_end=MPI_Wtime() + call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) + if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr + if(mype==0) write(6,'("general_read_fv3_regional: Maximum Walltime for gsi_fv3ncdf_read" f15.4)') walltime + if( if_model_dbz .or. if_model_fed ) then call gsi_fv3ncdf_read(grd_fv3lam_ens_phyvar_io_nouv,gsibundle_fv3lam_ens_phyvar_nouv,& fv3_filenameginput%phyvars,fv3_filenameginput,dual_res) @@ -1013,6 +1044,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g endif !! tsen2tv !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !$omp parallel do default(none),private(k,i,j) & + !$omp shared(grd_ens,g_q,g_tsen,g_tv,fv) do k=1,grd_ens%nsig do j=1,grd_ens%lon2 do i=1,grd_ens%lat2 @@ -1023,6 +1056,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g if (.not.q_hyb_ens) then ice=.true. iderivative=0 + !$omp parallel do default(none),private(k,i,j,kp) & + !$omp shared(grd_ens,g_prsi,g_prsl) do k=1,grd_ens%nsig kp=k+1 do j=1,grd_ens%lon2 @@ -1032,6 +1067,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g end do end do call genqsat(g_rh,g_tsen(1,1,1),g_prsl(1,1,1),grd_ens%lat2,grd_ens%lon2,grd_ens%nsig,ice,iderivative) + !$omp parallel do default(none),private(k,i,j) & + !$omp shared(grd_ens,g_rh,g_q) do k=1,grd_ens%nsig do j=1,grd_ens%lon2 do i=1,grd_ens%lat2 @@ -1051,6 +1088,8 @@ subroutine general_read_fv3_regional(this,fv3_filenameginput,g_ps,g_u,g_v,g_tv,g ! CV transform + !$omp parallel do default(none),private(k,i,j) & + !$omp shared(grd_ens,l_use_cvpqx,g_qr,cvpqx_pval,g_qs,g_qg,g_qnr,cld_nt_updt,l_cvpnr,cvpnr_pval) do k=1,grd_ens%nsig do i=1,grd_ens%lon2 do j=1,grd_ens%lat2 diff --git a/src/gsi/crc32.F90 b/src/gsi/crc32.F90 new file mode 100644 index 0000000000..dd22e4c801 --- /dev/null +++ b/src/gsi/crc32.F90 @@ -0,0 +1,23 @@ +module crc32 + use iso_c_binding + implicit none + public :: digest + + interface + integer function digest_c(message) bind(c) + use iso_c_binding, only: c_char + character(kind=c_char), intent(in) :: message(*) + end function digest_c + end interface + + contains + + integer function digest(m) + use iso_c_binding, only: c_null_char + implicit none + character(len=*), intent(in) :: m + !m='nid001019' + digest=abs(digest_c(trim(m)//c_null_char)) + !write(6,'("Digest ",I12)') digest + end function digest +end module crc32 diff --git a/src/gsi/crc32_c.c b/src/gsi/crc32_c.c new file mode 100644 index 0000000000..049a725baa --- /dev/null +++ b/src/gsi/crc32_c.c @@ -0,0 +1,7 @@ +#include +#include +#include + +uLong digest_c(char * message) { + return crc32(0, (const void*)message, strlen(message)); +} diff --git a/src/gsi/get_gefs_for_regional.f90 b/src/gsi/get_gefs_for_regional.f90 index cc5e0a2c86..196ad2ec82 100644 --- a/src/gsi/get_gefs_for_regional.f90 +++ b/src/gsi/get_gefs_for_regional.f90 @@ -83,6 +83,7 @@ subroutine get_gefs_for_regional use get_wrf_mass_ensperts_mod, only: get_wrf_mass_ensperts_class use gsi_io, only: verbose use obsmod, only: l_wcp_cwm + use mpi, only : MPI_Wtime, MPI_REAL8, MPI_SUCCESS implicit none type(sub2grid_info) grd_gfs,grd_mix,grd_gfst @@ -187,6 +188,8 @@ subroutine get_gefs_for_regional real(r_kind), pointer :: ges_q (:,:,:)=>NULL() logical :: print_verbose real(r_kind), allocatable :: ges_z_ens(:,:) + real(kind=8) :: time_beg,time_end,walltime, tb,te,wt + integer(i_kind) :: ierr print_verbose=.false. if(verbose)print_verbose=.true. @@ -621,6 +624,7 @@ subroutine get_gefs_for_regional rewind(10) inithead=.true. + tb=MPI_Wtime() do n=1,n_ens_gfs read(10,'(a)',err=20,end=20)filename filename=trim(ensemble_path) // trim(filename) @@ -641,8 +645,13 @@ subroutine get_gefs_for_regional call general_read_gfsatm_nems(grd_gfst,sp_gfs,filename,uv_hyb_ens,.false.,.true., & atm_bundle,.true.,iret) else if (use_gfs_ncio) then + time_beg=MPI_Wtime() call general_read_gfsatm_nc(grd_gfst,sp_gfs,filename,uv_hyb_ens,.false.,.true., & atm_bundle,.true.,iret) + time_end=MPI_Wtime() + call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) + if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr + if(mype==0) write(6,'("Maximum Walltime for general_read_gfsatm_nc" f15.4)') walltime else call general_read_gfsatm(grd_gfst,sp_gfs,sp_gfs,filename,uv_hyb_ens,.false.,.true., & atm_bundle,inithead,iret) @@ -955,6 +964,10 @@ subroutine get_gefs_for_regional ! if(mype==0) write(6,*)' with halo, n,min,max ges_ps - matt ps =',n,pdiffmin0,pdiffmax0 end do ! end loop over ensemble members. + te=MPI_Wtime() + call MPI_Reduce(te-tb, wt, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) + if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr + if(mype==0) write(6,'("Maximum Walltime to read ",I4," ensemble members ", f15.4)') n_ens_gfs,wt deallocate(ges_z_ens) diff --git a/src/gsi/gsi_files.cmake b/src/gsi/gsi_files.cmake index 95d885e2ee..b7fb9e623b 100644 --- a/src/gsi/gsi_files.cmake +++ b/src/gsi/gsi_files.cmake @@ -1,5 +1,6 @@ list(APPEND GSI_SRC_C blockIO.c +crc32_c.c ) # Class files for WRF interfaces @@ -264,6 +265,7 @@ guess_grids.F90 half_nmm_grid2.f90 hdraobmod.f90 hilbert_curve.f90 +crc32.F90 hybrid_ensemble_isotropic.F90 hybrid_ensemble_parameters.f90 inc2guess.f90 diff --git a/src/gsi/mod_fv3_lola.f90 b/src/gsi/mod_fv3_lola.f90 index 11bb3b6e37..b01be9fbbb 100644 --- a/src/gsi/mod_fv3_lola.f90 +++ b/src/gsi/mod_fv3_lola.f90 @@ -128,8 +128,12 @@ subroutine generate_anl_grid(nx,ny,grid_lon,grid_lont,grid_lat,grid_latt) use gridmod, only:grid_ratio_fv3_regional, region_lat,region_lon,nlat,nlon use gridmod, only: region_dy,region_dx,region_dyi,region_dxi,coeffy,coeffx use gridmod, only:init_general_transform,region_dy,region_dx - use mpimod, only: mype + use mpimod, only: mype,mpi_rtype use egrid2agrid_mod, only: egrid2agrid_parm + use mpi + use crc32 + use ifcore + use ifport implicit none real(r_kind),allocatable,dimension(:)::xbh_a,xa_a,xa_b @@ -164,431 +168,641 @@ subroutine generate_anl_grid(nx,ny,grid_lon,grid_lont,grid_lat,grid_latt) real(r_kind) d(4),ds integer(i_kind) kk,k + integer(i_kind) name_len,nodeID,nodeComm,nodeRank,RanksPerNode,ierr,npes,inunit + character(len=72) :: input + character(len=5) :: np,nlatc,nlonc + logical :: res,exists + character(len=MPI_MAX_PROCESSOR_NAME) :: nodeName + !real(kind=8) :: time_beg,time_end,walltime + nord_e2a=4 bilinear=.false. + call MPI_Comm_size(mpi_comm_world,npes,ierr) + call MPI_Get_processor_name(nodeName,name_len,ierr) + nodeID=digest(trim(nodeName)) + !read(nodeName(4:9),*) nodeID ! Danger! This approach will not to work on platforms other than WCOSS2 + call MPI_Comm_split(mpi_comm_world, nodeID, mype, nodeComm, ierr) + call MPI_Comm_rank(nodeComm,nodeRank,ierr) + call MPI_Comm_size(nodeComm,RanksPerNode,ierr) + + ! Check for existing file + write(nlatc, '(i0)') nx; nlatc = adjustl(nlatc) + write(nlonc, '(i0)') ny; nlonc = adjustl(nlonc) + write(np, '(i0)') npes; np = adjustl(np) + input= 'anl_grid.'//trim(np)//'.'//trim(nlatc)//'.'//trim(nlonc) + inquire (file=trim(input), EXIST=exists) + + if(nodeRank==0) then + if (exists) then + !write(6,'("generate_anl_grid: Reading from ana_grid ",I4,2A)') mype,' ',trim(input) + inunit=2000+mype + open(inunit,file=trim(input),form='unformatted',action='read') + read(inunit) nlat,nlon,nxa,nya + if (allocated(region_dx )) deallocate(region_dx ) + if (allocated(region_dy )) deallocate(region_dy ) + allocate( region_dx(nlat,nlon), region_dy(nlat,nlon)) + allocate(region_dxi(nlat,nlon),region_dyi(nlat,nlon)) + allocate(coeffx(nlat,nlon),coeffy(nlat,nlon)) + if (allocated(region_lat)) deallocate(region_lat) + if (allocated(region_lon)) deallocate(region_lon) + allocate(region_lat(nlat,nlon),region_lon(nlat,nlon)) + allocate(fv3dx(nxa,nya),fv3dx1(nxa,nya),fv3dy(nxa,nya),fv3dy1(nxa,nya) ) + allocate(fv3ix(nxa,nya),fv3ixp(nxa,nya),fv3jy(nxa,nya),fv3jyp(nxa,nya) ) + allocate( a3dx( ny, nx),a3dx1(ny,nx),a3dy(ny,nx),a3dy1(ny,nx) ) + allocate( a3ix( ny, nx),a3ixp(ny,nx),a3jy(ny,nx),a3jyp(ny,nx) ) + allocate(cangu( nx,ny+1),sangu(nx,ny+1),cangv(nx+1,ny),sangv(nx+1,ny) ) + + rewind(inunit) + !time_beg=MPI_Wtime() + read(inunit) nlat,nlon,nxa,nya, & + region_dx,region_dy,region_dxi,region_dyi, & + coeffx,coeffy,region_lat,region_lon, & + fv3dx,fv3dx1,fv3dy,fv3dy1,fv3ix,fv3ixp,fv3jy,fv3jyp, & + a3dx,a3dx1,a3dy,a3dy1,a3ix,a3ixp,a3jy,a3jyp, & + cangu,sangu,cangv,sangv + !time_end=MPI_Wtime() + !call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, nodeComm, ierr) + !if(mype==0) write(6,'("Maximum Walltime to read anl_grid " f15.4)') walltime + + close(inunit) + else ! create xc,yc,zc for the cell centers. - allocate(xc(nx,ny)) - allocate(yc(nx,ny)) - allocate(zc(nx,ny)) - allocate(gclat(nx,ny)) - allocate(gclon(nx,ny)) - allocate(gcrlat(nx,ny)) - allocate(gcrlon(nx,ny)) - do j=1,ny - do i=1,nx - xc(i,j)=cos(grid_latt(i,j)*deg2rad)*cos(grid_lont(i,j)*deg2rad) - yc(i,j)=cos(grid_latt(i,j)*deg2rad)*sin(grid_lont(i,j)*deg2rad) - zc(i,j)=sin(grid_latt(i,j)*deg2rad) - enddo - enddo + allocate(xc(nx,ny)) + allocate(yc(nx,ny)) + allocate(zc(nx,ny)) + allocate(gclat(nx,ny)) + allocate(gclon(nx,ny)) + allocate(gcrlat(nx,ny)) + allocate(gcrlon(nx,ny)) + !$omp parallel do default(none),private(j,i) & + !$omp shared(ny,nx,xc,grid_latt,grid_lont,deg2rad,yc,zc) + do j=1,ny + do i=1,nx + xc(i,j)=cos(grid_latt(i,j)*deg2rad)*cos(grid_lont(i,j)*deg2rad) + yc(i,j)=cos(grid_latt(i,j)*deg2rad)*sin(grid_lont(i,j)*deg2rad) + zc(i,j)=sin(grid_latt(i,j)*deg2rad) + enddo + enddo + !$omp end parallel do ! compute center as average x,y,z coordinates of corners of domain -- - xcent=quarter*(xc(1,1)+xc(1,ny)+xc(nx,1)+xc(nx,ny)) - ycent=quarter*(yc(1,1)+yc(1,ny)+yc(nx,1)+yc(nx,ny)) - zcent=quarter*(zc(1,1)+zc(1,ny)+zc(nx,1)+zc(nx,ny)) + xcent=quarter*(xc(1,1)+xc(1,ny)+xc(nx,1)+xc(nx,ny)) + ycent=quarter*(yc(1,1)+yc(1,ny)+yc(nx,1)+yc(nx,ny)) + zcent=quarter*(zc(1,1)+zc(1,ny)+zc(nx,1)+zc(nx,ny)) - rnorm=one/sqrt(xcent**2+ycent**2+zcent**2) - xcent=rnorm*xcent - ycent=rnorm*ycent - zcent=rnorm*zcent - centlat=asin(zcent)*rad2deg - centlon=atan2(ycent,xcent)*rad2deg + rnorm=one/sqrt(xcent**2+ycent**2+zcent**2) + xcent=rnorm*xcent + ycent=rnorm*ycent + zcent=rnorm*zcent + centlat=asin(zcent)*rad2deg + centlon=atan2(ycent,xcent)*rad2deg !! compute new lats, lons - call rotate2deg(grid_lont,grid_latt,gcrlon,gcrlat, & - centlon,centlat,nx,ny) + call rotate2deg(grid_lont,grid_latt,gcrlon,gcrlat, & + centlon,centlat,nx,ny) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !! compute analysis A-grid lats, lons !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !--------------------------obtain analysis grid dimensions nxa,nya - nxa=1+nint((nx-one)/grid_ratio_fv3_regional) - nya=1+nint((ny-one)/grid_ratio_fv3_regional) - nlat=nya - nlon=nxa - if(mype==0) print *,'nlat,nlon=nya,nxa= ',nlat,nlon + nxa=1+nint((nx-one)/grid_ratio_fv3_regional) + nya=1+nint((ny-one)/grid_ratio_fv3_regional) + nlat=nya + nlon=nxa + if(mype==0) print *,'nlat,nlon=nya,nxa= ',nlat,nlon !--------------------------obtain analysis grid spacing - dlat=(maxval(gcrlat)-minval(gcrlat))/(ny-1) - dlon=(maxval(gcrlon)-minval(gcrlon))/(nx-1) - adlat=dlat*grid_ratio_fv3_regional - adlon=dlon*grid_ratio_fv3_regional + dlat=(maxval(gcrlat)-minval(gcrlat))/(ny-1) + dlon=(maxval(gcrlon)-minval(gcrlon))/(nx-1) + adlat=dlat*grid_ratio_fv3_regional + adlon=dlon*grid_ratio_fv3_regional !-------setup analysis A-grid; find center of the domain - nlonh=nlon/2 - nlath=nlat/2 - - if(nlonh*2==nlon)then - clon=adlon/two - cx=half - else - clon=adlon - cx=one - endif - - if(nlath*2==nlat)then - clat=adlat/two - cy=half - else - clat=adlat - cy=one - endif + nlonh=nlon/2 + nlath=nlat/2 + + if(nlonh*2==nlon)then + clon=adlon/two + cx=half + else + clon=adlon + cx=one + endif + + if(nlath*2==nlat)then + clat=adlat/two + cy=half + else + clat=adlat + cy=one + endif ! !-----setup analysis A-grid from center of the domain ! - allocate(rlat_in(nlat,nlon),rlon_in(nlat,nlon)) - do j=1,nlon - alon=(j-nlonh)*adlon-clon - do i=1,nlat - rlon_in(i,j)=alon - enddo - enddo - - - do j=1,nlon - do i=1,nlat - rlat_in(i,j)=(i-nlath)*adlat-clat - enddo - enddo - - if (allocated(region_dx )) deallocate(region_dx ) - if (allocated(region_dy )) deallocate(region_dy ) - allocate(region_dx(nlat,nlon),region_dy(nlat,nlon)) - allocate(region_dxi(nlat,nlon),region_dyi(nlat,nlon)) - allocate(coeffx(nlat,nlon),coeffy(nlat,nlon)) - dyy=rearth*adlat*deg2rad - dyyi=one/dyy - dyyh=half/dyy - do j=1,nlon - do i=1,nlat - region_dy(i,j)=dyy - region_dyi(i,j)=dyyi - coeffy(i,j)=dyyh - enddo - enddo - - do i=1,nlat - dxx=rearth*cos(rlat_in(i,1)*deg2rad)*adlon*deg2rad - dxxi=one/dxx - dxxh=half/dxx - do j=1,nlon - region_dx(i,j)=dxx - region_dxi(i,j)=dxxi - coeffx(i,j)=dxxh - enddo - enddo + allocate(rlat_in(nlat,nlon),rlon_in(nlat,nlon)) + !$omp parallel do default(none),private(j,alon,i) & + !$omp shared(nlon,nlat,rlon_in,nlonh,adlon,clon) + do j=1,nlon + alon=(j-nlonh)*adlon-clon + do i=1,nlat + rlon_in(i,j)=alon + enddo + enddo + !$omp end parallel do + + !$omp parallel do default(none),private(j,i) & + !$omp shared(nlon,nlat,rlat_in,nlath,adlat,clat) + do j=1,nlon + do i=1,nlat + rlat_in(i,j)=(i-nlath)*adlat-clat + enddo + enddo + !$omp end parallel do + + if (allocated(region_dx )) deallocate(region_dx ) + if (allocated(region_dy )) deallocate(region_dy ) + allocate(region_dx(nlat,nlon),region_dy(nlat,nlon)) + allocate(region_dxi(nlat,nlon),region_dyi(nlat,nlon)) + allocate(coeffx(nlat,nlon),coeffy(nlat,nlon)) + dyy=rearth*adlat*deg2rad + dyyi=one/dyy + dyyh=half/dyy + !$omp parallel do default(none),private(j,i) & + !$omp shared(nlon,nlat,dyy,dyyi,dyyh,region_dy,region_dyi,coeffy) + do j=1,nlon + do i=1,nlat + region_dy(i,j)=dyy + region_dyi(i,j)=dyyi + coeffy(i,j)=dyyh + enddo + enddo + !$omp end parallel do + + !$omp parallel do default(none),private(j,i,dxx,dxxi,dxxh) & + !$omp shared(nlon,nlat,rearth,rlat_in,deg2rad,adlon,region_dx,region_dxi,coeffx) + do i=1,nlat + dxx=rearth*cos(rlat_in(i,1)*deg2rad)*adlon*deg2rad + dxxi=one/dxx + dxxh=half/dxx + do j=1,nlon + region_dx(i,j)=dxx + region_dxi(i,j)=dxxi + coeffx(i,j)=dxxh + enddo + enddo + !$omp end parallel do ! !---------- setup region_lat,region_lon in earth coord ! - if (allocated(region_lat)) deallocate(region_lat) - if (allocated(region_lon)) deallocate(region_lon) - allocate(region_lat(nlat,nlon),region_lon(nlat,nlon)) - allocate(glat_an(nlon,nlat),glon_an(nlon,nlat)) + if (allocated(region_lat)) deallocate(region_lat) + if (allocated(region_lon)) deallocate(region_lon) + allocate(region_lat(nlat,nlon),region_lon(nlat,nlon)) + allocate(glat_an(nlon,nlat),glon_an(nlon,nlat)) - call unrotate2deg(region_lon,region_lat,rlon_in,rlat_in, & + call unrotate2deg(region_lon,region_lat,rlon_in,rlat_in, & centlon,centlat,nlat,nlon) - region_lat=region_lat*deg2rad - region_lon=region_lon*deg2rad - - do j=1,nlat - do i=1,nlon - glat_an(i,j)=region_lat(j,i) - glon_an(i,j)=region_lon(j,i) - enddo - enddo - - call init_general_transform(glat_an,glon_an) + region_lat=region_lat*deg2rad + region_lon=region_lon*deg2rad + !$omp parallel do default(none),private(j,i) & + !$omp shared(nlon,nlat,region_lat,region_lon,glat_an,glon_an) + do j=1,nlat + do i=1,nlon + glat_an(i,j)=region_lat(j,i) + glon_an(i,j)=region_lon(j,i) + enddo + enddo + !$omp end parallel do + + call init_general_transform(glat_an,glon_an) - deallocate(glat_an,glon_an) + deallocate(glat_an,glon_an) !--------------------compute all combinations of relative coordinates - allocate(xbh_a(nx),xbh_b(nx,ny),xa_a(nxa),xa_b(nxa)) - allocate(ybh_a(ny),ybh_b(nx,ny),ya_a(nya),ya_b(nya)) + allocate(xbh_a(nx),xbh_b(nx,ny),xa_a(nxa),xa_b(nxa)) + allocate(ybh_a(ny),ybh_b(nx,ny),ya_a(nya),ya_b(nya)) - nxh=nx/2 - nyh=ny/2 + nxh=nx/2 + nyh=ny/2 !!!!!! fv3 rotated grid; not equal spacing, non_orthogonal !!!!!! - do j=1,ny - jr=ny+1-j - do i=1,nx - ir=nx+1-i - xbh_b(ir,jr)=gcrlon(i,j)/dlon - end do - end do - do j=1,ny - jr=ny+1-j - do i=1,nx - ir=nx+1-i - ybh_b(ir,jr)=gcrlat(i,j)/dlat - end do - end do + !$omp parallel do default(none),private(j,jr,i,ir) & + !$omp shared(ny,nx,xbh_b,gcrlon,dlon) + do j=1,ny + jr=ny+1-j + do i=1,nx + ir=nx+1-i + xbh_b(ir,jr)=gcrlon(i,j)/dlon + end do + end do + !$omp end parallel do + + !$omp parallel do default(none),private(j,jr,i,ir) & + !$omp shared(ny,nx,ybh_b,gcrlat,dlat) + do j=1,ny + jr=ny+1-j + do i=1,nx + ir=nx+1-i + ybh_b(ir,jr)=gcrlat(i,j)/dlat + end do + end do + !$omp end parallel do !!!! define analysis A grid !!!!!!!!!!!!! - do j=1,nxa - xa_a(j)=(real(j-nlonh,r_kind)-cx)*grid_ratio_fv3_regional - end do - do i=1,nya - ya_a(i)=(real(i-nlath,r_kind)-cy)*grid_ratio_fv3_regional - end do + do j=1,nxa + xa_a(j)=(real(j-nlonh,r_kind)-cx)*grid_ratio_fv3_regional + end do + do i=1,nya + ya_a(i)=(real(i-nlath,r_kind)-cy)*grid_ratio_fv3_regional + end do !!!!!compute fv3 to A grid interpolation parameters !!!!!!!!! - allocate ( fv3dx(nxa,nya),fv3dx1(nxa,nya),fv3dy(nxa,nya),fv3dy1(nxa,nya) ) - allocate ( fv3ix(nxa,nya),fv3ixp(nxa,nya),fv3jy(nxa,nya),fv3jyp(nxa,nya) ) - allocate(yy(ny)) + allocate ( fv3dx(nxa,nya),fv3dx1(nxa,nya),fv3dy(nxa,nya),fv3dy1(nxa,nya) ) + allocate ( fv3ix(nxa,nya),fv3ixp(nxa,nya),fv3jy(nxa,nya),fv3jyp(nxa,nya) ) + allocate(yy(ny)) ! iteration to find the fv3 grid cell - jb1=1 - ib1=1 - do j=1,nya - do i=1,nxa - do n=1,3 - gxa=xa_a(i) - if(gxa < xbh_b(1,jb1))then - gxa= 1 - else if(gxa > xbh_b(nx,jb1))then - gxa= nx - else - call grdcrd1(gxa,xbh_b(1,jb1),nx,1) - endif - ib2=ib1 - ib1=gxa - do jj=1,ny - yy(jj)=ybh_b(ib1,jj) - enddo - gya=ya_a(j) - if(gya < yy(1))then - gya= 1 - else if(gya > yy(ny))then - gya= ny - else - call grdcrd1(gya,yy,ny,1) - endif - jb2=jb1 - jb1=gya - if(ib1+1 > nx)then !this block( 6 lines) is copied from GSL gsi repository - ib1=ib1-1 - endif - if(jb1+1 > ny)then - jb1=jb1-1 - endif - - - if((ib1 == ib2) .and. (jb1 == jb2)) exit - if(n==3 ) then + jb1=1 + ib1=1 + !$omp parallel do default(none),private(j,i,n,gxa,ib2,jj,yy,gya,jb2,d,kk,k,ds) & + !$omp shared(nya,nxa,xa_a,xbh_b,nx,ybh_b,ya_a,ny,fv3ix,fv3ixp,fv3jy,fv3jyp,fv3dy,fv3dy1,fv3dx,fv3dx1,bilinear) & + !$omp firstprivate(jb1,ib1) + do j=1,nya + do i=1,nxa + do n=1,3 + gxa=xa_a(i) + if(gxa < xbh_b(1,jb1))then + gxa= 1 + else if(gxa > xbh_b(nx,jb1))then + gxa= nx + else + call grdcrd1(gxa,xbh_b(1,jb1),nx,1) + endif + ib2=ib1 + ib1=gxa + do jj=1,ny + yy(jj)=ybh_b(ib1,jj) + enddo + gya=ya_a(j) + if(gya < yy(1))then + gya= 1 + else if(gya > yy(ny))then + gya= ny + else + call grdcrd1(gya,yy,ny,1) + endif + jb2=jb1 + jb1=gya + if(ib1+1 > nx)then !this block( 6 lines) is copied from GSL gsi repository + ib1=ib1-1 + endif + if(jb1+1 > ny)then + jb1=jb1-1 + endif + + + if((ib1 == ib2) .and. (jb1 == jb2)) exit + if(n==3 ) then !!!!!!! if not converge, find the nearest corner point - d(1)=(xa_a(i)-xbh_b(ib1,jb1))**2+(ya_a(j)-ybh_b(ib1,jb1))**2 - d(2)=(xa_a(i)-xbh_b(ib1+1,jb1))**2+(ya_a(j)-ybh_b(ib1+1,jb1))**2 - d(3)=(xa_a(i)-xbh_b(ib1,jb1+1))**2+(ya_a(j)-ybh_b(ib1,jb1+1))**2 - d(4)=(xa_a(i)-xbh_b(ib1+1,jb1+1))**2+(ya_a(j)-ybh_b(ib1+1,jb1+1))**2 - kk=1 - do k=2,4 - if(d(k) xa_a(nxa))then - gxa= nxa - else - call grdcrd1(gxa,xa_a,nxa,1) - endif - a3ix(j,i)=int(gxa) - a3ix(j,i)=min(max(1,a3ix(j,i)),nxa) - a3dx(j,i)=max(zero,min(one,gxa-a3ix(j,i))) - a3dx1(j,i)=one-a3dx(j,i) - a3ixp(j,i)=min(nxa,a3ix(j,i)+1) - end do - end do - - do i=1,nx - do j=1,ny - gya=ybh_b(i,j) - if(gya < ya_a(1))then - gya= 1 - else if(gya > ya_a(nya))then - gya= nya - else - call grdcrd1(gya,ya_a,nya,1) - endif - a3jy(j,i)=int(gya) - a3jy(j,i)=min(max(1,a3jy(j,i)),nya) - a3dy(j,i)=max(zero,min(one,gya-a3jy(j,i))) - a3dy1(j,i)=one-a3dy(j,i) - a3jyp(j,i)=min(nya,a3jy(j,i)+1) - end do - end do + allocate ( a3dx(ny,nx),a3dx1(ny,nx),a3dy(ny,nx),a3dy1(ny,nx) ) + allocate ( a3ix(ny,nx),a3ixp(ny,nx),a3jy(ny,nx),a3jyp(ny,nx) ) + !$omp parallel do default(none),private(j,i,gxa) & + !$omp shared(ny,nx,xbh_b,xa_a,nxa,a3ix,a3dx,a3dx1,a3ixp) + do i=1,nx + do j=1,ny + gxa=xbh_b(i,j) + if(gxa < xa_a(1))then + gxa= 1 + else if(gxa > xa_a(nxa))then + gxa= nxa + else + call grdcrd1(gxa,xa_a,nxa,1) + endif + a3ix(j,i)=int(gxa) + a3ix(j,i)=min(max(1,a3ix(j,i)),nxa) + a3dx(j,i)=max(zero,min(one,gxa-a3ix(j,i))) + a3dx1(j,i)=one-a3dx(j,i) + a3ixp(j,i)=min(nxa,a3ix(j,i)+1) + end do + end do + !$omp end parallel do + + !$omp parallel do default(none),private(j,i,gya) & + !$omp shared(ny,nx,ybh_b,ya_a,nya,a3jy,a3dy,a3dy1,a3jyp) + do i=1,nx + do j=1,ny + gya=ybh_b(i,j) + if(gya < ya_a(1))then + gya= 1 + else if(gya > ya_a(nya))then + gya= nya + else + call grdcrd1(gya,ya_a,nya,1) + endif + a3jy(j,i)=int(gya) + a3jy(j,i)=min(max(1,a3jy(j,i)),nya) + a3dy(j,i)=max(zero,min(one,gya-a3jy(j,i))) + a3dy1(j,i)=one-a3dy(j,i) + a3jyp(j,i)=min(nya,a3jy(j,i)+1) + end do + end do + !$omp end parallel do !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! !!! find coefficients for wind conversion btw FV3 & earth !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - allocate ( cangu(nx,ny+1),sangu(nx,ny+1),cangv(nx+1,ny),sangv(nx+1,ny) ) + allocate ( cangu(nx,ny+1),sangu(nx,ny+1),cangv(nx+1,ny),sangv(nx+1,ny) ) ! 1. compute x,y,z at cell cornor from grid_lon, grid_lat - - do j=1,ny+1 - do i=1,nx+1 - x(i,j)=cos(grid_lat(i,j)*deg2rad)*cos(grid_lon(i,j)*deg2rad) - y(i,j)=cos(grid_lat(i,j)*deg2rad)*sin(grid_lon(i,j)*deg2rad) - z(i,j)=sin(grid_lat(i,j)*deg2rad) - enddo - enddo + !$omp parallel do default(none),private(j,i) & + !$omp shared(ny,nx,grid_lat,grid_lon,deg2rad,x,y,z) + do j=1,ny+1 + do i=1,nx+1 + x(i,j)=cos(grid_lat(i,j)*deg2rad)*cos(grid_lon(i,j)*deg2rad) + y(i,j)=cos(grid_lat(i,j)*deg2rad)*sin(grid_lon(i,j)*deg2rad) + z(i,j)=sin(grid_lat(i,j)*deg2rad) + enddo + enddo + !$omp end parallel do ! 2 find angles to E-W and N-S for U edges - sq180=180._r_kind**2 - do j=1,ny+1 - do i=1,nx -! center lat/lon of the edge - rlat=half*(grid_lat(i,j)+grid_lat(i+1,j)) - diff=(grid_lon(i,j)-grid_lon(i+1,j))**2 - if(diff < sq180)then - rlon=half*(grid_lon(i,j)+grid_lon(i+1,j)) - else - rlon=half*(grid_lon(i,j)+grid_lon(i+1,j)-360._r_kind) - endif + sq180=180._r_kind**2 + !$omp parallel do default(none),private(j,i,rlat,diff,rlon,xr,yr,zr,xu,yu,zu,uval,ewval,nsval) & + !$omp shared(ny,nx,grid_lat,grid_lon,sq180,deg2rad,x,y,z,cangu,sangu) + do j=1,ny+1 + do i=1,nx +! center lat/lon of the edge + rlat=half*(grid_lat(i,j)+grid_lat(i+1,j)) + diff=(grid_lon(i,j)-grid_lon(i+1,j))**2 + if(diff < sq180)then + rlon=half*(grid_lon(i,j)+grid_lon(i+1,j)) + else + rlon=half*(grid_lon(i,j)+grid_lon(i+1,j)-360._r_kind) + endif ! vector to center of the edge - xr=cos(rlat*deg2rad)*cos(rlon*deg2rad) - yr=cos(rlat*deg2rad)*sin(rlon*deg2rad) - zr=sin(rlat*deg2rad) + xr=cos(rlat*deg2rad)*cos(rlon*deg2rad) + yr=cos(rlat*deg2rad)*sin(rlon*deg2rad) + zr=sin(rlat*deg2rad) ! vector of the edge - xu= x(i+1,j)-x(i,j) - yu= y(i+1,j)-y(i,j) - zu= z(i+1,j)-z(i,j) + xu= x(i+1,j)-x(i,j) + yu= y(i+1,j)-y(i,j) + zu= z(i+1,j)-z(i,j) ! find angle with cross product - uval=sqrt((xu**2+yu**2+zu**2)) - ewval=sqrt((xr**2+yr**2)) - nsval=sqrt((xr*zr)**2+(zr*yr)**2+(xr*xr+yr*yr)**2) - cangu(i,j)=(-yr*xu+xr*yu)/ewval/uval - sangu(i,j)=(-xr*zr*xu-zr*yr*yu+(xr*xr+yr*yr)*zu) / nsval/uval - enddo - enddo + uval=sqrt((xu**2+yu**2+zu**2)) + ewval=sqrt((xr**2+yr**2)) + nsval=sqrt((xr*zr)**2+(zr*yr)**2+(xr*xr+yr*yr)**2) + cangu(i,j)=(-yr*xu+xr*yu)/ewval/uval + sangu(i,j)=(-xr*zr*xu-zr*yr*yu+(xr*xr+yr*yr)*zu) / nsval/uval + enddo + enddo + !$omp end parallel do ! 3 find angles to E-W and N-S for V edges - do j=1,ny - do i=1,nx+1 - rlat=half*(grid_lat(i,j)+grid_lat(i,j+1)) - diff=(grid_lon(i,j)-grid_lon(i,j+1))**2 - if(diff < sq180)then - rlon=half*(grid_lon(i,j)+grid_lon(i,j+1)) - else - rlon=half*(grid_lon(i,j)+grid_lon(i,j+1)-360._r_kind) - endif - xr=cos(rlat*deg2rad)*cos(rlon*deg2rad) - yr=cos(rlat*deg2rad)*sin(rlon*deg2rad) - zr=sin(rlat*deg2rad) - xv= x(i,j+1)-x(i,j) - yv= y(i,j+1)-y(i,j) - zv= z(i,j+1)-z(i,j) - vval=sqrt((xv**2+yv**2+zv**2)) - ewval=sqrt((xr**2+yr**2)) - nsval=sqrt((xr*zr)**2+(zr*yr)**2+(xr*xr+yr*yr)**2) - cangv(i,j)=(-yr*xv+xr*yv)/ewval/vval - sangv(i,j)=(-xr*zr*xv-zr*yr*yv+(xr*xr+yr*yr)*zv) / nsval/vval - enddo - enddo - deallocate( xc,yc,zc,gclat,gclon,gcrlat,gcrlon) - deallocate(rlat_in,rlon_in) + !$omp parallel do default(none),private(j,i,rlat,diff,rlon,xr,yr,zr,xv,yv,zv,vval,ewval,nsval) & + !$omp shared(ny,nx,grid_lat,grid_lon,sq180,deg2rad,x,y,z,cangv,sangv) + do j=1,ny + do i=1,nx+1 + rlat=half*(grid_lat(i,j)+grid_lat(i,j+1)) + diff=(grid_lon(i,j)-grid_lon(i,j+1))**2 + if(diff < sq180)then + rlon=half*(grid_lon(i,j)+grid_lon(i,j+1)) + else + rlon=half*(grid_lon(i,j)+grid_lon(i,j+1)-360._r_kind) + endif + xr=cos(rlat*deg2rad)*cos(rlon*deg2rad) + yr=cos(rlat*deg2rad)*sin(rlon*deg2rad) + zr=sin(rlat*deg2rad) + xv= x(i,j+1)-x(i,j) + yv= y(i,j+1)-y(i,j) + zv= z(i,j+1)-z(i,j) + vval=sqrt((xv**2+yv**2+zv**2)) + ewval=sqrt((xr**2+yr**2)) + nsval=sqrt((xr*zr)**2+(zr*yr)**2+(xr*xr+yr*yr)**2) + cangv(i,j)=(-yr*xv+xr*yv)/ewval/vval + sangv(i,j)=(-xr*zr*xv-zr*yr*yv+(xr*xr+yr*yr)*zv) / nsval/vval + enddo + enddo + !$omp end parallel do + deallocate( xc,yc,zc,gclat,gclon,gcrlat,gcrlon) + deallocate(rlat_in,rlon_in) + ! File didnt exist so we computed the data. Now save it for subsequent runs. + if(mype==0) then + inunit=2000+mype + open(inunit,file=trim(input),form='unformatted',action='write') + write(inunit) nlat,nlon,nxa,nya, & + region_dx,region_dy,region_dxi,region_dyi, & + coeffx,coeffy,region_lat,region_lon, & + fv3dx,fv3dx1,fv3dy,fv3dy1,fv3ix,fv3ixp,fv3jy,fv3jyp, & + a3dx,a3dx1,a3dy,a3dy1,a3ix,a3ixp,a3jy,a3jyp, & + cangu,sangu,cangv,sangv + !call flush(inunit) + !res = COMMITQQ(inunit) + close(inunit) + !write(6,'("generate_anl_grid: Wrote ana_grid ",I4,A)') mype, trim(input) + endif + endif ! file exists + endif ! nodeRank==0 + + call MPI_Bcast( nxa,1,mpi_integer,0,nodeComm,ierr) + call MPI_Bcast( nya,1,mpi_integer,0,nodeComm,ierr) + call MPI_Bcast(nlat,1,mpi_integer,0,nodeComm,ierr) + call MPI_Bcast(nlon,1,mpi_integer,0,nodeComm,ierr) + if(nodeRank/=0) then + if (allocated(region_dx )) deallocate(region_dx ) + if (allocated(region_dy )) deallocate(region_dy ) + allocate( region_dx(nlat,nlon), region_dy(nlat,nlon)) + allocate(region_dxi(nlat,nlon),region_dyi(nlat,nlon)) + allocate(coeffx(nlat,nlon),coeffy(nlat,nlon)) + if (allocated(region_lat)) deallocate(region_lat) + if (allocated(region_lon)) deallocate(region_lon) + allocate(region_lat(nlat,nlon),region_lon(nlat,nlon)) + allocate(fv3dx(nxa,nya),fv3dx1(nxa,nya),fv3dy(nxa,nya),fv3dy1(nxa,nya) ) + allocate(fv3ix(nxa,nya),fv3ixp(nxa,nya),fv3jy(nxa,nya),fv3jyp(nxa,nya) ) + allocate( a3dx( ny, nx),a3dx1(ny,nx),a3dy(ny,nx),a3dy1(ny,nx) ) + allocate( a3ix( ny, nx),a3ixp(ny,nx),a3jy(ny,nx),a3jyp(ny,nx) ) + allocate(cangu( nx,ny+1),sangu(nx,ny+1),cangv(nx+1,ny),sangv(nx+1,ny) ) + endif + + +! Broadcast arrays to the other ranks on the same node + !time_beg=MPI_Wtime() + call MPI_Bcast( region_dx,nlat*nlon,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast( region_dy,nlat*nlon,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(region_dxi,nlat*nlon,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(region_dyi,nlat*nlon,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast( coeffx,nlat*nlon,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast( coeffy,nlat*nlon,mpi_rtype,0,nodeComm,ierr) + + call MPI_Bcast(region_lat,nlat*nlon,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(region_lon,nlat*nlon,mpi_rtype,0,nodeComm,ierr) + !walltime=MPI_Wtime()-time_beg + + if (exists) then + ! All ranks need to call init_general_transform eventually + allocate(glat_an(nlon,nlat),glon_an(nlon,nlat)) + !$omp parallel do default(none),private(j,i) & + !$omp shared(nlon,nlat,region_lat,region_lon,glat_an,glon_an) + do j=1,nlat + do i=1,nlon + glat_an(i,j)=region_lat(j,i) + glon_an(i,j)=region_lon(j,i) + enddo + enddo + call init_general_transform(glat_an,glon_an) + deallocate(glat_an,glon_an) + else + if(nodeRank/=0) then + ! nodeRank==0 ranks already called init_general_transform + ! above so just call from remaining ranks + allocate(glat_an(nlon,nlat),glon_an(nlon,nlat)) + !$omp parallel do default(none),private(j,i) & + !$omp shared(nlon,nlat,region_lat,region_lon,glat_an,glon_an) + do j=1,nlat + do i=1,nlon + glat_an(i,j)=region_lat(j,i) + glon_an(i,j)=region_lon(j,i) + enddo + enddo + call init_general_transform(glat_an,glon_an) + deallocate(glat_an,glon_an) + endif + endif + + !time_beg=MPI_Wtime() + call MPI_Bcast(fv3dx ,nxa*nya,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(fv3dx1,nxa*nya,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(fv3dy ,nxa*nya,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(fv3dy1,nxa*nya,mpi_rtype,0,nodeComm,ierr) + + call MPI_Bcast(fv3ix ,nxa*nya,mpi_integer,0,nodeComm,ierr) + call MPI_Bcast(fv3ixp,nxa*nya,mpi_integer,0,nodeComm,ierr) + call MPI_Bcast(fv3jy ,nxa*nya,mpi_integer,0,nodeComm,ierr) + call MPI_Bcast(fv3jyp,nxa*nya,mpi_integer,0,nodeComm,ierr) + + call MPI_Bcast(a3dx ,nx*ny ,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(a3dx1 ,nx*ny ,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(a3dy ,nx*ny ,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(a3dy1 ,nx*ny ,mpi_rtype,0,nodeComm,ierr) + + call MPI_Bcast(a3ix ,nx*ny ,mpi_integer,0,nodeComm,ierr) + call MPI_Bcast(a3ixp ,nx*ny ,mpi_integer,0,nodeComm,ierr) + call MPI_Bcast(a3jy ,nx*ny ,mpi_integer,0,nodeComm,ierr) + call MPI_Bcast(a3jyp ,nx*ny ,mpi_integer,0,nodeComm,ierr) + + call MPI_Bcast(cangu ,nx*(ny+1),mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(sangu ,nx*(ny+1),mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(cangv ,(nx+1)*ny,mpi_rtype,0,nodeComm,ierr) + call MPI_Bcast(sangv ,(nx+1)*ny,mpi_rtype,0,nodeComm,ierr) + !time_end=MPI_Wtime() + !call MPI_Reduce(walltime+(time_end-time_beg), walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) + !if(mype==0) write(6,'("Maximum Walltime to Bcast anl_grid " f15.4)') walltime + + call MPI_Comm_free(nodeComm,ierr) end subroutine generate_anl_grid subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_latt) @@ -667,6 +881,8 @@ subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_l allocate(gclon(nxen,nyen)) allocate(gcrlat(nxen,nyen)) allocate(gcrlon(nxen,nyen)) + !$omp parallel do default(none),private(j,i) & + !$omp shared(nyen,nxen,xc,grid_latt,grid_lont,deg2rad,yc,zc) do j=1,nyen do i=1,nxen xc(i,j)=cos(grid_latt(i,j)*deg2rad)*cos(grid_lont(i,j)*deg2rad) @@ -724,6 +940,8 @@ subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_l !!!!!! fv3 rotated grid; not equal spacing, non_orthogonal !!!!!! + !$omp parallel do default(none),private(j,jr,i,ir) & + !$omp shared(nyen,nxen,xbh_b,gcrlon,dlon) do j=1,nyen jr=nyen+1-j do i=1,nxen @@ -731,6 +949,8 @@ subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_l xbh_b(ir,jr)=gcrlon(i,j)/dlon end do end do + !$omp parallel do default(none),private(j,jr,i,ir) & + !$omp shared(nyen,nxen,ybh_b,gcrlat,dlat) do j=1,nyen jr=nyen+1-j do i=1,nxen @@ -757,6 +977,9 @@ subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_l ! iteration to find the fv3 grid cell jb1=1 ib1=1 + !$omp parallel do default(none),private(j,i,n,gxa,ib2,jj,yy,gya,jb2,d,kk,k,ds) & + !$omp shared(nye,nxe,xa_a,xbh_b,nxen,ybh_b,ya_a,nyen,fv3ixens,fv3ixpens,fv3jyens,fv3jypens,fv3dyens,fv3dy1ens,fv3dxens,fv3dx1ens,bilinear) & + !$omp firstprivate(jb1,ib1) do j=1,nye do i=1,nxe do n=1,3 @@ -892,6 +1115,8 @@ subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_l allocate (a3dxens(nyen,nxen),a3dx1ens(nyen,nxen),a3dyens(nyen,nxen),a3dy1ens(nyen,nxen)) allocate (a3ixens(nyen,nxen),a3ixpens(nyen,nxen),a3jyens(nyen,nxen),a3jypens(nyen,nxen)) + !$omp parallel do default(none),private(j,i,gxa) & + !$omp shared(nyen,nxen,xbh_b,xa_a,nxe,a3ixens,a3dxens,a3dx1ens,a3ixpens) do i=1,nxen do j=1,nyen gxa=xbh_b(i,j) @@ -910,6 +1135,8 @@ subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_l end do end do + !$omp parallel do default(none),private(j,i,gya) & + !$omp shared(nyen,nxen,ybh_b,ya_a,nye,a3jyens,a3dyens,a3dy1ens,a3jypens) do i=1,nxen do j=1,nyen gya=ybh_b(i,j) @@ -935,7 +1162,8 @@ subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_l allocate (canguens(nxen,nyen+1),sanguens(nxen,nyen+1),cangvens(nxen+1,nyen),sangvens(nxen+1,nyen)) ! 1. compute x,y,z at cell cornor from grid_lon, grid_lat - + !$omp parallel do default(none),private(j,i) & + !$omp shared(nyen,nxen,grid_lat,grid_lon,deg2rad,x,y,z) do j=1,nyen+1 do i=1,nxen+1 x(i,j)=cos(grid_lat(i,j)*deg2rad)*cos(grid_lon(i,j)*deg2rad) @@ -947,6 +1175,8 @@ subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_l ! 2 find angles to E-W and N-S for U edges sq180=180._r_kind**2 + !$omp parallel do default(none),private(j,i,rlat,diff,rlon,xr,yr,zr,xu,yu,zu,uval,ewval,nsval) & + !$omp shared(nyen,nxen,grid_lat,grid_lon,sq180,deg2rad,x,y,z,canguens,sanguens) do j=1,nyen+1 do i=1,nxen ! center lat/lon of the edge @@ -975,6 +1205,8 @@ subroutine definecoef_regular_grids(nxen,nyen,grid_lon,grid_lont,grid_lat,grid_l enddo ! 3 find angles to E-W and N-S for V edges + !$omp parallel do default(none),private(j,i,rlat,diff,rlon,xr,yr,zr,xv,yv,zv,vval,ewval,nsval) & + !$omp shared(nyen,nxen,grid_lat,grid_lon,sq180,deg2rad,x,y,z,cangvens,sangvens) do j=1,nyen do i=1,nxen+1 rlat=half*(grid_lat(i,j)+grid_lat(i,j+1)) @@ -1093,6 +1325,8 @@ subroutine fv3uv2earth(u,v,nx,ny,u_out,v_out) real(r_kind),intent( out) :: u_out(nx,ny),v_out(nx,ny) integer(i_kind) i,j + !$omp parallel do default(none), private(j,i), & + !$omp shared(ny,nx,u,sangv,v,sangu,cangu,cangv,u_out,v_out) do j=1,ny do i=1,nx u_out(i,j)=half *( (u(i,j)*sangv(i,j)-v(i,j)*sangu(i,j))/(cangu(i,j)*sangv(i,j)-sangu(i,j)*cangv(i,j)) & @@ -1194,6 +1428,8 @@ subroutine fv3_h_to_ll(b_in,a,nb,mb,na,ma,rev_flg) nbp=nb+1 if(rev_flg) then !!!!!!!!! reverse E-W and N-S + !$omp parallel do default(none), private(j,jr,i,ir), & + !$omp shared(mb,mbp,nb,nbp,b_in,b) do j=1,mb jr=mbp-j do i=1,nb @@ -1206,13 +1442,17 @@ subroutine fv3_h_to_ll(b_in,a,nb,mb,na,ma,rev_flg) endif !!!!!!!!! interpolate to A grid & reverse ij for array a(lat,lon) if(bilinear)then ! bilinear interpolation + !$omp parallel do default(none), private(j,i), & + !$omp shared(ma,na,b,a,fv3dx1,fv3dy1,fv3ix,fv3jy,fv3dy,fv3jyp,fv3dx,fv3ixp) do j=1,ma do i=1,na a(j,i)=fv3dx1(i,j)*(fv3dy1(i,j)*b(fv3ix (i,j),fv3jy(i,j))+fv3dy(i,j)*b(fv3ix (i,j),fv3jyp(i,j))) & +fv3dx (i,j)*(fv3dy1(i,j)*b(fv3ixp(i,j),fv3jy(i,j))+fv3dy(i,j)*b(fv3ixp(i,j),fv3jyp(i,j))) end do end do - else ! inverse-distance weighting average + else ! inverse-distance weighting average + !$omp parallel do default(none), private(j,i), & + !$omp shared(ma,na,b,a,fv3dx,fv3ix,fv3jy,fv3dy,fv3jyp,fv3dx1,fv3ixp,fv3dy1) do j=1,ma do i=1,na a(j,i)=fv3dx(i,j)*b(fv3ix (i,j),fv3jy(i,j))+fv3dy(i,j)*b(fv3ix (i,j),fv3jyp(i,j)) & @@ -1351,7 +1591,7 @@ subroutine fv3_ll_to_h(a,b,nxa,nya,nxb,nyb,rev_flg) do j=1,nxb jr=nxbp-j b(jr+ijr)=a3dy1(i,j)*(a3dx1(i,j)*a(a3jy (i,j),a3ix(i,j))+a3dx(i,j)*a(a3jy (i,j),a3ixp(i,j))) & - +a3dy (i,j)*(a3dx1(i,j)*a(a3jyp(i,j),a3ix(i,j))+a3dx(i,j)*a(a3jyp(i,j),a3ixp(i,j))) + +a3dy (i,j)*(a3dx1(i,j)*a(a3jyp(i,j),a3ix(i,j))+a3dx(i,j)*a(a3jyp(i,j),a3ixp(i,j))) end do end do else @@ -1360,7 +1600,7 @@ subroutine fv3_ll_to_h(a,b,nxa,nya,nxb,nyb,rev_flg) ijr=(i-1)*nxb do j=1,nxb b(j+ijr)=a3dy1(i,j)*(a3dx1(i,j)*a(a3jy (i,j),a3ix(i,j))+a3dx(i,j)*a(a3jy (i,j),a3ixp(i,j))) & - +a3dy (i,j)*(a3dx1(i,j)*a(a3jyp(i,j),a3ix(i,j))+a3dx(i,j)*a(a3jyp(i,j),a3ixp(i,j))) + +a3dy (i,j)*(a3dx1(i,j)*a(a3jyp(i,j),a3ix(i,j))+a3dx(i,j)*a(a3jyp(i,j),a3ixp(i,j))) end do end do endif @@ -1418,7 +1658,8 @@ subroutine rotate2deg(rlon_in,rlat_in,rlon_out,rlat_out,rlon0,rlat0,nx,ny) real(r_kind) x,y,z, xt,yt,zt, xtt,ytt,ztt integer(i_kind) i,j - + !$omp parallel do default(none),private(j,i,x,y,z,xt,yt,zt,xtt,ytt,ztt) & + !$omp shared(ny,nx,rlat_in,rlon_in,deg2rad,rlon0,rlat0,rlat_out,rlon_out,rad2deg) do j=1,ny do i=1,nx ! 1. compute x,y,z from rlon_in, rlat_in @@ -1475,6 +1716,8 @@ subroutine unrotate2deg(rlon_in,rlat_in,rlon_out,rlat_out,rlon0,rlat0,nx,ny) real(r_kind) x,y,z, xt,yt,zt, xtt,ytt,ztt integer(i_kind) i,j + !$omp parallel do default(none),private(j,i,x,y,z,xt,yt,zt,xtt,ytt,ztt) & + !$omp shared(ny,nx,rlat_in,rlon_in,deg2rad,rlat0,rlon0,rlat_out,rlon_out,rad2deg) do j=1,ny do i=1,nx xtt=cos(rlat_out(i,j)*deg2rad)*cos(rlon_out(i,j)*deg2rad) diff --git a/src/gsi/pcgsoi.f90 b/src/gsi/pcgsoi.f90 index 0b808c5c55..266c43b770 100644 --- a/src/gsi/pcgsoi.f90 +++ b/src/gsi/pcgsoi.f90 @@ -159,6 +159,7 @@ subroutine pcgsoi() use berror, only: vprecond use stpjomod, only: stpjo_setup use intradmod, only: setrad + use mpi, only : MPI_Wtime, MPI_Comm_World, MPI_REAL8, MPI_MAX, MPI_SUCCESS implicit none @@ -191,6 +192,8 @@ subroutine pcgsoi() integer(i_kind) :: iortho logical :: print_verbose,ortho,diag_print logical :: lanlerr,read_success + real(kind=8) :: time_beg,time_end,walltime + integer(i_kind) :: ierr ! Step size diagnostic strings data step /'good', 'SMALL'/ @@ -638,7 +641,14 @@ subroutine pcgsoi() ! Write output analysis files if(.not.l4dvar) call prt_guess('analysis') call prt_state_norms(sval(1),'increment') - if (twodvar_regional .or. jiter == miter) call write_all(-1) + if (twodvar_regional .or. jiter == miter) then + time_beg=MPI_Wtime() + call write_all(-1) + time_end=MPI_Wtime() + call MPI_Reduce(time_end-time_beg, walltime, 1, MPI_REAL8, MPI_MAX, 0, MPI_COMM_WORLD, ierr) + if(ierr /= MPI_SUCCESS) print*,'MPI_Reduce ',ierr + if(mype==0) write(6,'("Maximum Walltime for write_all" f15.4)') walltime + endif ! Overwrite guess with increment (4d-var only, for now) if (iwrtinc>0) then