Merge branch 'master' into feature/acc-wait-directive

stfc · Jun 29, 2023 · 18fe9a6 · 18fe9a6
2 parents 37f2d73 + a4ec537
commit 18fe9a6
Show file tree

Hide file tree

Showing 17 changed files with 553 additions and 127 deletions.
diff --git a/.github/workflows/nemo_tests.yml b/.github/workflows/nemo_tests.yml
@@ -0,0 +1,132 @@
+# -----------------------------------------------------------------------------
+# BSD 3-Clause License
+#
+# Copyright (c) 2023, Science and Technology Facilities Council.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# * Neither the name of the copyright holder nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+# -----------------------------------------------------------------------------
+# Author S. Siso, STFC Daresbury Lab
+
+# This workflow will use a self-hosted runner to perform the more expensive
+# integrations tests that are not run on GHA systems.
+
+name: NEMO Integration Tests
+
+on:
+  push
+
+jobs:
+  run_if_on_mirror:
+    if: ${{ github.repository == 'stfc/PSyclone-mirror' }}
+    runs-on: self-hosted
+    env:
+        PERL5LIB: /home/aporter/perl5/lib/perl5
+        PERL_LOCAL_LIB_ROOT: /home/aporter/perl5
+        PERL_MB_OPT: "--install_base \"/home/aporter/perl5\""
+        PERL_MM_OPT: "INSTALL_BASE=/home/aporter/perl5"
+
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        submodules: recursive
+        # This is required to get the commit history for merge commits for
+        # the ci-skip check below.
+        fetch-depth: '0'
+    - name: Check for [skip ci] in commit message
+      uses: mstachniuk/ci-skip@v1
+      with:
+        # This setting causes the tests to 'fail' if [skip ci] is specified
+        fail-fast: true
+        commit-filter: '[skip ci]'
+    - name: Install dependencies
+      run: |
+        python -m venv .runner_venv
+        . .runner_venv/bin/activate
+        python -m pip install --upgrade pip
+        # If you wish to install the version of fparser pointed to by the
+        # submodule instead of the released version (from PyPI) then
+        # uncomment the following line:
+        pip install external/fparser
+        pip install .[test]
+        # Add Perl to the PATH
+        echo "/home/aporter/perl5/bin" >> $GITHUB_PATH
+        # Compile nvidia profiling tools
+        module load nvidia-hpcsdk
+        cd lib/profiling/nvidia/
+        F90=nvfortran make
+
+    # PSyclone, compile and run MetOffice NEMO with OpenMP for GPUs
+    - name: NEMO MetOffice OpenMP for GPU
+      run: |
+        . .runner_venv/bin/activate
+        export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
+        export NEMO_DIR=${HOME}/NEMO
+        cd examples/nemo/scripts
+        make -j 4 openmp_gpu
+        module load nvidia-hpcsdk netcdf_fortran
+        make -j 4 compile-openmp_gpu
+        export NV_ACC_POOL_THRESHOLD=75
+        make run-openmp_gpu | tee output.txt
+        # Check the output is as expected for the first 6 digits
+        tail -n 1 output.txt | grep -q " it :      10"
+        tail -n 1 output.txt | grep -q "|ssh|_max:  0.259483"
+        tail -n 1 output.txt | grep -q "|U|_max:  0.458515"
+        tail -n 1 output.txt | grep -q "S_min:  0.482686"
+        tail -n 1 output.txt | grep -q "S_max:  0.407622"
+        grep -A 1 "Elapsed Time" output.txt
+        echo  $GITHUB_REF_NAME $GITHUB_SHA $(grep -A 1 "Elapsed Time" output.txt | head -n 2 | tail -n 1) >> ${NEMO_DIR}/performance_history
+
+    # PSyclone, compile and run ECMWF NEMO with OpenMP for CPUs
+    - name: NEMO ECMWF OpenMP for CPU
+      run: |
+        . .runner_venv/bin/activate
+        export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
+        export NEMO_DIR=${HOME}/NEMOGCM_V40
+        export COMPILER_ARCH=linux_intel
+        export ADD_KEYS="IEEE_IS_NAN=ieee_is_nan key_nosignedzero"
+        export DEL_KEYS="key_iomput"
+        export MODEL_DIR=/archive/ssiso/ecmwf_eORCA1_GO8/
+        export NAMELISTS_DIR=${NEMO_DIR}/testscripts_V40/output/openmp_outer_V40_eORCA1_GO8_Z75_20170906_cray_dp_1x1/
+        cd examples/nemo/scripts
+        make -j 4 openmp_cpu
+        module load intel/oneapi compiler mpi
+        export LD_LIBRARY_PATH=/home/ssiso/ecmwf_nemo/ESiWACE2/scripts/dev/netcdf-c-4.9.0/lib/:$LD_LIBRARY_PATH
+        export LD_LIBRARY_PATH=/home/ssiso/ecmwf_nemo/ESiWACE2/scripts/dev/netcdf-fortran-4.5.4/lib:$LD_LIBRARY_PATH
+        export LD_LIBRARY_PATH=/home/ssiso/ecmwf_nemo/ESiWACE2/scripts/dev/hdf5-1.12.2/lib/:$LD_LIBRARY_PATH
+        make -j 4 compile-openmp_cpu
+        export OMP_NUM_THREADS=4
+        make run-openmp_cpu | tee output.txt
+        # Check the output is as expected for the first 6 digits
+        tail -n 1 output.txt | grep -q " it :      10"
+        tail -n 1 output.txt | grep -q "|ssh|_max:  0.199714"
+        tail -n 1 output.txt | grep -q "|U|_max:  0.148409"
+        tail -n 1 output.txt | grep -q "S_min:  0.108530"
+        tail -n 1 output.txt | grep -q "S_max:  0.404045"
+        grep -A 1 "Elapsed Time" output.txt
+        echo  $GITHUB_REF_NAME $GITHUB_SHA $(grep -A 1 "Elapsed Time" output.txt | head -n 2 | tail -n 1) >> ${NEMO_DIR}/performance_history
diff --git a/changelog b/changelog
@@ -470,6 +470,12 @@
 	159) PR #2181 for #2175. Bug fixes for processing of Fortran
 	declarations and integer literals.
 
+	160) PR #2107 for #2106. Improvements to NEMO processing scripts and
+	inclusion in Integration Testing.
+
+	161) PR #2198 for #2194. Fixes handling of a function where the
+	precision of the result is only defined within the function.
+
 release 2.3.1 17th of June 2022
 
 	1) PR #1747 for #1720. Adds support for If blocks to PSyAD.

diff --git a/doc/developer_guide/working_practises.rst b/doc/developer_guide/working_practises.rst
@@ -545,26 +545,30 @@ As mentioned above, running the test suite and/or examples with compilation
 enabled significantly increases the required compute time. However, there
 is a need to test PSyclone with full builds of the LFRic and NEMO
 applications. Therefore, in addition to the principal action described
-above, there are two others which are configured in ``repo-sync.yml``
-and ``compilation.yml``.
+above, there are the following workflow files that manage multiple
+Integration tests:
 
-The ``repo-sync`` action must be triggered
+The ``repo-sync`` action, which must be triggered
 manually (on GitHub) and pushes a copy of the current branch to a private
 repository. (This action uses the ``integration`` environment and can
 therefore only be triggered by GitHub users who have ``review`` permissions
 in that environment.) That private repository has a GitHub self-hosted runner
 setup which then enables tests to be run on a machine at the Hartree
 Centre. Access to the private repository is handled using ssh with a key
 saved as a 'secret' in the GitHub PSyclone repository.
-
-It is the work performed by the self-hosted runner that is configured in the
-``compilation.yml`` file. Currently this is limited to simply running the test
-suite with compilation enabled (using ``gfortran``) but we plan to extend this
-to perform integration tests with whole LFRic and NEMO applications, including
-GPU execution. Since the self-hosted runner is only available in the private
-repository, this action is configured such that it only runs if the name
+The work performed by the self-hosted runner is configured in the ``yml`` files
+below. Since the self-hosted runner is only available in the private
+repository, these action are configured such that they only run if the name
 of the repository is that of the private one.
 
+The ``compilation.yml`` action, runs the test suite and examples with
+compilation enabled (using ``gfortran``).
+
+The ``nemo.yml`` action, processes the NEMO source code (available in
+self-hosted runner) with the PSyclone scripts in examples/nemo/scripts.
+Then it compiles the generated code, runs it, and validates that the
+output produced matches with the expected results.
+
 
 Performance
 ===========

diff --git a/examples/nemo/scripts/.gitignore b/examples/nemo/scripts/.gitignore
@@ -0,0 +1 @@
+psycloned-*
diff --git a/examples/nemo/scripts/Makefile b/examples/nemo/scripts/Makefile
@@ -0,0 +1,108 @@
+# Makefile to Generate PSyclone versions of NEMO, Compile, and Run them.
+# It supports the targets: serial (only compile), openmp-cpu, openmp-gpu, openacc-kernels
+# - To generate the psycloned versions use: `make <target>` 
+# - To compile that version use: `make compile-<target>`
+# - To run that version use: `make run-<target>`
+
+# ---- Start of the configurable part of the Makefile ----
+
+# - Specify NEMO directory
+# NEMO_DIR ?=
+# - Specify location of PSyclone transformation scripts
+PSYCLONE_NEMO_DIR ?= ${WORKSPACE}/PSyclone/examples/nemo/scripts
+# - Specify location of preprocessed files - you need to call 'make compile-serial' once first)
+# ROOT_SRC ?=
+# - Specify NEMO test case
+TEST_CASE ?= SPITZ12
+# - Specify compilation options
+# COMPILER_ARCH ?= 
+# ADD_KEYS ?= 
+# DEL_KEYS ?=
+# - Specify input files
+# MODEL_DIR ?=
+# NAMELISTS_DIR ?=
+
+# Example Config for MetOffice - eORCA1_GO8 - nvidia
+NEMO_DIR ?= ${WORKSPACE}/NEMO
+ROOT_SRC ?= ${NEMO_DIR}/cfgs/SPITZ12_serial/BLD/ppsrc/nemo/
+COMPILER_ARCH ?= linux_nvidia_omp_gpu
+ADD_KEYS ?= "IEEE_IS_NAN=ieee_is_nan key_nosignedzero"
+DEL_KEYS ?= "key_iomput key_mpp_mpi key_si3"
+MODEL_DIR ?= /home/aporter/NEMO/orca1_inputs/
+NAMELISTS_DIR ?= ${NEMO_DIR}/
+
+# Example Config for ECMWF - eORCA1_GO8_Z75 (with MPI and SI3) - intel compiler
+# NEMO_DIR ?= ${WORKSPACE}/ecmwf_nemo/nemo/NEMOGCM_V40/
+# ROOT_SRC ?= ${NEMO_DIR}/cfgs/SPITZ12_serial/BLD/ppsrc/nemo/
+# COMPILER_ARCH ?= linux_intel
+# ADD_KEYS ?= "IEEE_IS_NAN=ieee_is_nan key_nosignedzero"
+# DEL_KEYS ?= "key_iomput"
+# MODEL_DIR ?= /archive/ssiso/ecmwf_eORCA1_GO8/
+# NAMELISTS_DIR ?= ${WORKSPACE}/ecmwf_nemo/nemo/testscripts_V40/output/openmp_outer_V40_eORCA1_GO8_Z75_20170906_cray_dp_1x1/
+
+# ---- End of configuration section - do not edit below this point ----
+
+TARGETS := openmp_cpu openmp_gpu openacc_kernels openacc_loops
+SRC_FILES := $(wildcard ${ROOT_SRC}/*.f90)
+OUTPUT_FOLDERS := $(addprefix psycloned-, ${TARGETS})
+
+.PHONY: clean
+
+# Generate PSycloned folders
+$(OUTPUT_FOLDERS):
+	@echo "Creating folder $@"
+	mkdir $@
+
+# PSyclone targets (process all the f90 files in ${ROOT_SRC})
+$(TARGETS): % : $(addprefix psycloned-%/, $(notdir $(SRC_FILES)))
+	@echo "Finished generating $@"
+
+# PSyclone instructions for each target type
+psycloned-openmp_cpu/%.f90: ${ROOT_SRC}%.f90 psycloned-openmp_cpu
+	${PSYCLONE_NEMO_DIR}/process_nemo.py -s ${PSYCLONE_NEMO_DIR}/omp_cpu_trans.py -I ${ROOT_SRC} -o psycloned-openmp_cpu $<
+
+psycloned-openmp_gpu/%.f90: ${ROOT_SRC}%.f90 psycloned-openmp_gpu
+	${PSYCLONE_NEMO_DIR}/process_nemo.py -s ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py -I ${ROOT_SRC} -o psycloned-openmp_gpu $<
+
+psycloned-openacc_kernels/%.f90: ${ROOT_SRC}%.f90 psycloned-openacc_kernels
+	${PSYCLONE_NEMO_DIR}/process_nemo.py -s ${PSYCLONE_NEMO_DIR}/kernels_trans.py -I ${ROOT_SRC} -o psycloned-openacc_kernels $<
+
+
+# Get the number of Makefile parallel jobs to pass it to the makenemo
+MAKE_PID := $(shell echo $$PPID)
+JOBS := $(shell ps T | sed -n 's/.*$(MAKE_PID).*$(MAKE).* \(-j\|--jobs=\) *\([0-9][0-9]*\).*/\2/p')
+JOBS := $(if ${JOBS}, ${JOBS}, 4)  # If none were given, default to 4
+
+# Compile NEMO
+compile-%:
+	@test -s psycloned-$(lastword $(subst -, ,$@)) || { \
+		echo "The psycloned-$(lastword $(subst -, ,$@)) folder does not exist!"; \
+		echo "You may need to execute 'make $(lastword $(subst -, ,$@))' first."; \
+		echo "Exiting..."; exit 1; }
+	cd $(NEMO_DIR) ; \
+	./makenemo -n ${TEST_CASE}_$(lastword $(subst -, ,$@)) -r ${TEST_CASE} \
+		-e ${PWD}/psycloned-$(lastword $(subst -, ,$@)) -m ${COMPILER_ARCH} -j ${JOBS} \
+		add_key ${ADD_KEYS} del_key ${DEL_KEYS}
+
+# The compile-serial is a special case
+compile-serial:
+	cd $(NEMO_DIR) ; \
+	./makenemo -n ${TEST_CASE}_serial -r ${TEST_CASE} \
+		-m ${COMPILER_ARCH} -j ${JOBS} add_key ${ADD_KEYS} del_key ${DEL_KEYS}
+
+# Run NEMO
+run-%:
+	 ln -sf ${MODEL_DIR}/*.nc ${NEMO_DIR}/cfgs/${TEST_CASE}_$(lastword $(subst -, ,$@))/EXP00/.
+	 cp ${NAMELISTS_DIR}namelist_* ${NEMO_DIR}/cfgs/${TEST_CASE}_$(lastword $(subst -, ,$@))/EXP00/.
+	 cd ${NEMO_DIR}/cfgs/${TEST_CASE}_$(lastword $(subst -, ,$@))/EXP00; ./nemo
+	 cd ${NEMO_DIR}/cfgs/${TEST_CASE}_$(lastword $(subst -, ,$@))/EXP00; cat timing.output; cat run.stat
+
+# Run NEMO with NVPROF
+nvprof-%:
+	 ln -sf ${MODEL_DIR}/*.nc ${NEMO_DIR}/cfgs/${TEST_CASE}_$(lastword $(subst -, ,$@))/EXP00/.
+	 cp ${NAMELISTS_DIR}namelist_* ${NEMO_DIR}/cfgs/${TEST_CASE}_$(lastword $(subst -, ,$@))/EXP00/.
+	 cd ${NEMO_DIR}/cfgs/${TEST_CASE}_$(lastword $(subst -, ,$@))/EXP00; nsys profile ./nemo
+
+# Clean (only psycloned- folders)
+clean:
+	rm -rf $(OUTPUT_FOLDERS)
diff --git a/examples/nemo/scripts/omp_cpu_trans.py b/examples/nemo/scripts/omp_cpu_trans.py
@@ -37,10 +37,9 @@
 ''' PSyclone transformation script to insert OpenMP for CPU
 directives into Nemo code. Tested with ECMWF Nemo 4.0 code. '''
 
+from psyclone.transformations import OMPLoopTrans
 from utils import insert_explicit_loop_parallelism, normalise_loops, \
     enhance_tree_information, add_profiling
-from psyclone.psyGen import TransInfo
-from psyclone.transformations import OMPParallelTrans
 
 PROFILING_ENABLED = False
 
@@ -56,8 +55,9 @@ def trans(psy):
     :rtype: :py:class:`psyclone.psyGen.PSy`
 
     '''
-    omp_parallel_trans = OMPParallelTrans()
-    omp_loop_trans = TransInfo().get_trans_name('OMPLoopTrans')
+    omp_parallel_trans = None
+    omp_loop_trans = OMPLoopTrans(omp_schedule="static")
+    omp_loop_trans.omp_directive = "paralleldo"
 
     print(f"Invokes found in {psy.name}:")
     for invoke in psy.invokes.invoke_list:
@@ -66,45 +66,37 @@ def trans(psy):
         if PROFILING_ENABLED:
             add_profiling(invoke.schedule.children)
 
-        # TODO #1841: These subroutines have a bug in the array-range-to-loop
-        # transformation.
-        if invoke.name in (
-                "blk_oce",  # NVFORTRAN-S-0083-Vector expression used where
-                            # scalar expression
+        enhance_tree_information(invoke.schedule)
+
+        if invoke.name in ("eos_rprof"):
+            # TODO #1959: This subroutines make the ECMWF compilation fail
+            # because it moves a statement function outside of the
+            # specification part.
+            print("Skipping normalisation for ", invoke.name)
+
+        elif invoke.name in (
                 "trc_oce_rgb",  # Produces incorrect results
                 "removepoints"  # Compiler error: The shapes of the array
                                 # expressions do not conform
                 ):
-            print("Skipping", invoke.name)
-            continue
-
-        # TODO #1959: This subroutines make the ECMWF compilation fail because
-        # it moves a statement function outside of the specification part.
-        if invoke.name in ("eos_rprof"):
-            print("Skipping", invoke.name)
-            continue
-
-        enhance_tree_information(invoke.schedule)
-
-        normalise_loops(
-                invoke.schedule,
-                hoist_local_arrays=True,
-                convert_array_notation=True,
-                convert_range_loops=True,
-                hoist_expressions=False
-        )
+            # TODO #1841: These subroutines have a bug in the
+            # array-range-to-loop transformation.
+            print("Skipping normalisation for ", invoke.name)
+        else:
+            normalise_loops(
+                    invoke.schedule,
+                    hoist_local_arrays=False,
+                    convert_array_notation=True,
+                    convert_range_loops=True,
+                    hoist_expressions=False
+            )
 
         insert_explicit_loop_parallelism(
                 invoke.schedule,
                 region_directive_trans=omp_parallel_trans,
                 loop_directive_trans=omp_loop_trans,
                 # Collapse may be useful in some architecture/compiler
                 collapse=False,
-                # Currently if there is a call we don't parallelise because we
-                # expect the subroutine to already be parallelised. lib_fortran
-                # is the only exception because we know it only has calls to
-                # functions without internal loops.
-                exclude_calls=psy.name != "psy_lib_fortran_psy",
         )
 
     return psy