From ce1d621918f2190a082dbfde69e85d9d1416c86f Mon Sep 17 00:00:00 2001 From: Alex Richert <82525672+AlexanderRichert-NOAA@users.noreply.github.com> Date: Thu, 29 Feb 2024 10:39:46 -0800 Subject: [PATCH] Replace Numerial Recipes LU decomp routines with LAPACK ones (#227) * Replace Numerial Recipes LU-decomp routines with LAPACK ones * no verbose Spack install * add lapack dep to cmake/PackageConfig.cmake.in * Update LAPACK logic * update spack package for lapack * Add test of output cmake config * Update Linux.yml * Update README.md for LAPACK/BLAS dep * update doco for lapack dep --- .github/workflows/Intel.yml | 2 +- .github/workflows/Linux.yml | 9 ++- .github/workflows/Spack.yml | 5 +- .github/workflows/developer.yml | 2 +- CMakeLists.txt | 2 + README.md | 5 +- cmake/PackageConfig.cmake.in | 3 + docs/user_guide.md | 5 ++ spack/package.py | 11 +++ src/CMakeLists.txt | 3 +- src/lapack_gen.F | 116 -------------------------------- src/splat.F | 22 ++++-- 12 files changed, 57 insertions(+), 128 deletions(-) delete mode 100644 src/lapack_gen.F diff --git a/.github/workflows/Intel.yml b/.github/workflows/Intel.yml index 22cec949..2c08edeb 100644 --- a/.github/workflows/Intel.yml +++ b/.github/workflows/Intel.yml @@ -37,7 +37,7 @@ jobs: rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list sudo apt-get update - sudo apt-get install intel-oneapi-openmp intel-oneapi-compiler-fortran-2023.2.1 intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.2.1 + sudo apt-get install intel-oneapi-openmp intel-oneapi-compiler-fortran-2023.2.1 intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.2.1 intel-oneapi-mkl-devel-2023.2.0 echo "source /opt/intel/oneapi/setvars.sh" >> ~/.bash_profile - name: checkout diff --git a/.github/workflows/Linux.yml b/.github/workflows/Linux.yml index 54a38060..95baece0 100644 --- a/.github/workflows/Linux.yml +++ b/.github/workflows/Linux.yml @@ -33,11 +33,18 @@ jobs: - name: build run: | + sudo apt install libopenblas-serial-dev cd ip mkdir build cd build - cmake -DCMAKE_PREFIX_PATH="~/" -DOPENMP=${{ matrix.openmp }} ${{ matrix.options }} .. + cmake -DCMAKE_PREFIX_PATH="~/" -DOPENMP=${{ matrix.openmp }} ${{ matrix.options }} -DCMAKE_INSTALL_PREFIX=~/install -DBLA_VENDOR=OpenBLAS .. make -j2 VERBOSE=1 + make install + # Ensure that manual setting of '-DBLA_VENDOR=...' is reflected in output CMake config + if [ $(grep -c "BLA_VENDOR OpenBLAS" ~/install/lib/cmake/ip/ip-config.cmake) -eq 0 ]; then + echo "OpenBLAS not set as BLA_VENDOR in ip-config.cmake!" + exit 1 + fi - name: test run: | diff --git a/.github/workflows/Spack.yml b/.github/workflows/Spack.yml index 6f3558d5..7b9577d8 100644 --- a/.github/workflows/Spack.yml +++ b/.github/workflows/Spack.yml @@ -33,6 +33,7 @@ jobs: - name: spack-build-and-test run: | + sudo apt install libopenblas-serial-dev git clone -c feature.manyFiles=true https://github.com/spack/spack . spack/share/spack/setup-env.sh spack env create ip-env @@ -43,9 +44,11 @@ jobs: precision=$(echo ${{ matrix.variants }} | grep -oP " precision=\K[4d8]") if [ "$precision" == "d" ]; then spack add grib-util@develop ; fi spack external find cmake gmake + spack external find --path /usr/lib/x86_64-linux-gnu/openblas-serial openblas + spack config add "packages:lapack:buildable:false" spack concretize # Run installation and run CTest suite - spack install --verbose --fail-fast --test root + spack install --fail-fast --test root # Run 'spack load' and check that key build options were respected spack load ip if [[ "${{ matrix.variants }}" =~ "+shared" ]]; then suffix="so" ; else suffix="a"; fi diff --git a/.github/workflows/developer.yml b/.github/workflows/developer.yml index 2c1ff20e..cd802787 100644 --- a/.github/workflows/developer.yml +++ b/.github/workflows/developer.yml @@ -25,7 +25,7 @@ jobs: - name: Install Dependencies run: | sudo apt-get update - sudo apt-get install doxygen + sudo apt-get install doxygen libopenblas-dev python3 -m pip install gcovr - name: checkout diff --git a/CMakeLists.txt b/CMakeLists.txt index b7803f43..1b1888d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,8 @@ if(OPENMP) find_package(OpenMP REQUIRED COMPONENTS Fortran) endif() +find_package(LAPACK REQUIRED) + # Set compiler flags. if(CMAKE_Fortran_COMPILER_ID MATCHES "^(Intel|IntelLLVM)$") set(CMAKE_Fortran_FLAGS "-g -traceback -assume byterecl -fp-model strict -fpp -auto ${CMAKE_Fortran_FLAGS}") diff --git a/README.md b/README.md index a6c86ef3..3ac8f3a5 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,9 @@ Code Manager: [Alex Richert](mailto:alexander.richert@noaa.gov) ### Prerequisites -This package does not link to any other libraries, but requires CMake (version -3.15+) to build. +This package requires a BLAS/LAPACK library to provide several LU decomposition-related +routines, and requires CMake (version 3.15+) to build. In spack-stack, OpenBLAS will +be used as the provider of BLAS/LAPACK routines for all compilers. ### Installing diff --git a/cmake/PackageConfig.cmake.in b/cmake/PackageConfig.cmake.in index e4e05327..6288ba83 100644 --- a/cmake/PackageConfig.cmake.in +++ b/cmake/PackageConfig.cmake.in @@ -13,6 +13,9 @@ if(@OPENMP@) find_dependency(OpenMP COMPONENTS Fortran) endif() +set(BLA_VENDOR @BLA_VENDOR@) +find_dependency(LAPACK) + # The target name needs to be one that's built, even if the dependent # build does not use that version. if(@BUILD_4@) diff --git a/docs/user_guide.md b/docs/user_guide.md index 2a188177..4040de59 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -19,6 +19,11 @@ where for certain regions of certain grids, floating point differences between field values. Some applications may therefore benefit from the use of 8-byte reals (libip_d or libip_8). +NCEPLIBS-ip uses several BLAS/LAPACK routines in the splat() subroutine, and +therefore requires an external BLAS/LAPACK provider. In practice, this should +generally be OpenBLAS, which is the [spack-stack](https://github.com/JCSDA/spack-stack) +BLAS/LAPACK provider. + ## Interpolation ### Interpolation Methods diff --git a/spack/package.py b/spack/package.py index b0b9c844..f050edcb 100644 --- a/spack/package.py +++ b/spack/package.py @@ -65,6 +65,7 @@ class Ip(CMakePackage): depends_on("sp precision=4", when="@4.1:4 precision=4") depends_on("sp precision=d", when="@4.1:4 precision=d") depends_on("sp precision=8", when="@4.1:4 precision=8") + depends_on("lapack", when="@develop") def cmake_args(self): args = [ @@ -88,6 +89,16 @@ def cmake_args(self): if self.spec.satisfies("@5:"): args.append(self.define_from_variant("BUILD_DEPRECATED", "deprecated")) + if self.spec.satisfies("@develop"): + # Use the LAPACK provider set by Spack even if the compiler supports native BLAS + bla_vendors = {"openblas": "OpenBLAS"} + lapack_provider = self.spec["lapack"].name + if lapack_provider in bla_vendors.keys(): + bla_vendor = bla_vendors[lapack_provider] + else: + bla_vendor = "All" + args.append(self.define("BLA_VENDOR", bla_vendor)) + return args def setup_run_environment(self, env): diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b979c148..a3b0fff9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,7 +14,7 @@ ip_mercator_grid_mod.F90 ip_polar_stereo_grid_mod.F90 ip_rot_equid_cylind_egrid_mod.F90 ip_rot_equid_cylind_grid_mod.F90 ip_constants_mod.F90 ip_grids_mod.F90 ip_grid_factory_mod.F90 ip_interpolators_mod.F90 earth_radius_mod.F90 polfix_mod.F90 -fftpack.F lapack_gen.F ncpus.F spanaly.f spdz2uv.f speps.f spfft1.f spffte.f +fftpack.F ncpus.F spanaly.f spdz2uv.f speps.f spfft1.f spffte.f spfftpt.f splaplac.f splat.F splegend.f sppad.f spsynth.f sptezd.f sptez.f sptezmd.f sptezm.f sptezmv.f sptezv.f sptgpm.f sptgpmv.f sptgps.f sptgpsv.f sptgpt.f sptgptv.f sptrand.f sptran.f sptranf0.f sptranf1.f sptranf.f sptranfv.f @@ -77,6 +77,7 @@ foreach(kind ${kinds}) if(OpenMP_Fortran_FOUND) target_link_libraries(${lib_name} PUBLIC OpenMP::OpenMP_Fortran) endif() + target_link_libraries(${lib_name} PUBLIC LAPACK::LAPACK) list(APPEND LIB_TARGETS ${lib_name}) diff --git a/src/lapack_gen.F b/src/lapack_gen.F deleted file mode 100644 index 36db1e46..00000000 --- a/src/lapack_gen.F +++ /dev/null @@ -1,116 +0,0 @@ -C> @file -C> @brief Two Numerical Recipes routines for matrix inversion From Numerical Recipes. -C> -C> ### Program History Log -C> Date | Programmer | Comments -C> -----|------------|--------- -C> 2012-11-05 | E.Mirvis | separated this generic LU from the splat.F - -C> Solves a system of linear equations, follows call to ludcmp(). -C> -C> @param A -C> @param N -C> @param NP -C> @param INDX -C> @param B - SUBROUTINE LUBKSB(A,N,NP,INDX,B) - REAL A(NP,NP),B(N) - INTEGER INDX(N) - II=0 - DO 12 I=1,N - LL=INDX(I) - SUM=B(LL) - B(LL)=B(I) - IF (II.NE.0)THEN - DO 11 J=II,I-1 - SUM=SUM-A(I,J)*B(J) - 11 CONTINUE - ELSE IF (SUM.NE.0.) THEN - II=I - ENDIF - B(I)=SUM - 12 CONTINUE - DO 14 I=N,1,-1 - SUM=B(I) - IF(I.LT.N)THEN - DO 13 J=I+1,N - SUM=SUM-A(I,J)*B(J) - 13 CONTINUE - ENDIF - B(I)=SUM/A(I,I) - 14 CONTINUE - RETURN - END - -C> Replaces an NxN matrix a with the LU decomposition. -C> -C> @param A -C> @param N -C> @param NP -C> @param INDX - SUBROUTINE LUDCMP(A,N,NP,INDX) -C PARAMETER (NMAX=400,TINY=1.0E-20) - PARAMETER (TINY=1.0E-20) -C==EM==^^^ -C - REAL A(NP,NP),VV(N),D -C REAL A(NP,NP),VV(NMAX),D -C==EM==^^^ - INTEGER INDX(N) - D=1. - DO 12 I=1,N - AAMAX=0. - DO 11 J=1,N - IF (ABS(A(I,J)).GT.AAMAX) AAMAX=ABS(A(I,J)) - 11 CONTINUE - IF (AAMAX.EQ.0.) print *, 'SINGULAR MATRIX.' - VV(I)=1./AAMAX - 12 CONTINUE - DO 19 J=1,N - IF (J.GT.1) THEN - DO 14 I=1,J-1 - SUM=A(I,J) - IF (I.GT.1)THEN - DO 13 K=1,I-1 - SUM=SUM-A(I,K)*A(K,J) - 13 CONTINUE - A(I,J)=SUM - ENDIF - 14 CONTINUE - ENDIF - AAMAX=0. - DO 16 I=J,N - SUM=A(I,J) - IF (J.GT.1)THEN - DO 15 K=1,J-1 - SUM=SUM-A(I,K)*A(K,J) - 15 CONTINUE - A(I,J)=SUM - ENDIF - DUM=VV(I)*ABS(SUM) - IF (DUM.GE.AAMAX) THEN - IMAX=I - AAMAX=DUM - ENDIF - 16 CONTINUE - IF (J.NE.IMAX)THEN - DO 17 K=1,N - DUM=A(IMAX,K) - A(IMAX,K)=A(J,K) - A(J,K)=DUM - 17 CONTINUE - D=-D - VV(IMAX)=VV(J) - ENDIF - INDX(J)=IMAX - IF(J.NE.N)THEN - IF(A(J,J).EQ.0.)A(J,J)=TINY - DUM=1./A(J,J) - DO 18 I=J+1,N - A(I,J)=A(I,J)*DUM - 18 CONTINUE - ENDIF - 19 CONTINUE - IF(A(N,N).EQ.0.)A(N,N)=TINY - RETURN - END diff --git a/src/splat.F b/src/splat.F index 3b27073e..e6269640 100644 --- a/src/splat.F +++ b/src/splat.F @@ -64,7 +64,7 @@ SUBROUTINE SPLAT(IDRT,JMAX,SLAT,WLAT) $ 146.870307625, 150.011882457, 153.153458019, 156.295034268 / REAL:: DLT,D1=1. REAL AWORK((JMAX+1)/2,((JMAX+1)/2)),BWORK(((JMAX+1)/2)) - INTEGER:: JHE,JHO,J0=0 + INTEGER:: JHE,JHO,J0=0, INFO INTEGER IPVT((JMAX+1)/2) PARAMETER(PI=3.14159265358979,C=(1.-(2./PI)**2)*0.25) C - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -133,8 +133,14 @@ SUBROUTINE SPLAT(IDRT,JMAX,SLAT,WLAT) BWORK(JS)=-D1/(4*(JS-1)**2-1) ENDDO - call ludcmp(awork,jho,jhe,ipvt) - call lubksb(awork,jho,jhe,ipvt,bwork) + ! Call LAPACK routines +#if (LSIZE==4) + CALL SGETRF(JHO, JHO, AWORK, JHE, IPVT, INFO) + CALL SGETRS('N', JHO, 1, AWORK, JHE, IPVT, BWORK, JHO, INFO) +#else + CALL DGETRF(JHO, JHO, AWORK, JHE, IPVT, INFO) + CALL DGETRS('N', JHO, 1, AWORK, JHE, IPVT, BWORK, JHO, INFO) +#endif WLAT(1)=0. DO J=1,JHO @@ -170,8 +176,14 @@ SUBROUTINE SPLAT(IDRT,JMAX,SLAT,WLAT) BWORK(JS)=-D1/(4*(JS-1)**2-1) ENDDO - call ludcmp(awork,jho,jhe,ipvt,d) - call lubksb(awork,jho,jhe,ipvt,bwork) + ! Call LAPACK routines +#if (LSIZE==4) + CALL SGETRF(JHO, JHO, AWORK, JHE, IPVT, INFO) + CALL SGETRS('N', JHO, 1, AWORK, JHE, IPVT, BWORK, JHO, INFO) +#else + CALL DGETRF(JHO, JHO, AWORK, JHE, IPVT, INFO) + CALL DGETRS('N', JHO, 1, AWORK, JHE, IPVT, BWORK, JHO, INFO) +#endif WLAT(1)=0. DO J=1,JHO