Skip to content

Commit

Permalink
Leverage Intel Containers for NOAA RDHPC Cross-Platform Reproducibili…
Browse files Browse the repository at this point in the history
…ty (#109)

* Added scripts for running the containerized MOM6SIS2 on Gaea.

* update run_container.sh

* rename stdout for container test

* add ref and rename stdouts

* activate container ci

* fix typo

* mount ncrc in container

* fix mount name

* run build process using slurm

* forget link dataset
  • Loading branch information
yichengt900 authored Nov 7, 2024
1 parent a82a961 commit 950a043
Show file tree
Hide file tree
Showing 9 changed files with 515 additions and 72 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: CEFI-MOM6-ci

on:
# Triggers this workflow on pull request event with "CEFI_MOM6_RT_gaea_c5" label
# Triggers this workflow on pull request event with "CEFI_MOM6_RT_container" label
pull_request:
branches: [ "main" ]
types: [ labeled ]
Expand All @@ -14,8 +14,8 @@ env:
#
jobs:
checkout-build:
if: ${{ github.event.label.name == 'CEFI_MOM6_RT_gaea_c5' }}
runs-on: self-hosted
if: ${{ github.event.label.name == 'CEFI_MOM6_RT_container' }}
runs-on: [self-hosted, container]
timeout-minutes: 600
strategy:
max-parallel: 1
Expand All @@ -28,9 +28,12 @@ jobs:
fetch-depth: 1
submodules: recursive

- name: Build MOM6SIS2
- name: Build MOM6SIS2 using container
run: |
cd ${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/builds
pwd
#img=/gpfs/f6/ira-cefi/world-shared/container/ubuntu22.04-intel-ufs-env-v1.8.0-llvm.img
#apptainer exec -B /gpfs -B /ncrc/home2/Yi-cheng.Teng:/ncrc/home2/Yi-cheng.Teng $img bash linux-build.bash -m docker -p linux-intel -t repro -f mom6sis2
jobid=$(sbatch --parsable ci_build_driver.sh | awk -F';' '{print $1}' | cut -f1)
#
sleep 1
Expand All @@ -45,8 +48,8 @@ jobs:
fi
sleep 60 # Adjust the sleep duration as needed
done
#
check_file="${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/builds/build/gaea-ncrc5.intel23/ocean_ice/repro/MOM6SIS2"
check_file="${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/builds/build/docker-linux-intel/ocean_ice/repro/MOM6SIS2"
if [ -f "$check_file" ]; then
echo "PASSED: $check_file"
else
Expand All @@ -56,16 +59,16 @@ jobs:
run-CEFI_MOM6-ci:
needs: checkout-build
runs-on: self-hosted
runs-on: [self-hosted, container]
strategy:
max-parallel: 2
max-parallel: 1
matrix:
case: ["NWA12.COBALT", "NEP10.COBALT"]
case: ["NEP10.COBALT"]
steps:
- name: Run Experiment ${{ matrix.case }}
run: |
cd ${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/exps/${{ matrix.case }}
jobid=$(sbatch --parsable driver.sh | awk -F';' '{print $1}' | cut -f1)
jobid=$(sbatch --parsable run_container.sh | awk -F';' '{print $1}' | cut -f1)
#
sleep 1
while :; do
Expand All @@ -80,7 +83,7 @@ jobs:
sleep 60 # Adjust the sleep duration as needed
done
#
expected_string="All restart files are identical, PASS"
expected_string="ocean.stats is identical to ref, PASS"
check_file="${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/exps/${{ matrix.case }}/${{ matrix.case }}_o.$jobid"
if [ -f "$check_file" ]; then
if grep -qF "$expected_string" $check_file; then
Expand All @@ -96,22 +99,22 @@ jobs:
add-pass-label:
needs: run-CEFI_MOM6-ci
runs-on: self-hosted
runs-on: [self-hosted, container]
if: ${{ needs.run-CEFI_MOM6-ci.result == 'success' }}
steps:
- name: Add "pass_CEFI_MOM6_RT" label on success
- name: Add "pass_CEFI_MOM6_RT_container" label on success
run: |
TOKEN=${{ secrets.GITHUB_TOKEN }}
RT_TEST_LABEL="CEFI_MOM6_RT_gaea_c5"
PASS_LABEL="pass_CEFI_MOM6_RT"
RT_TEST_LABEL="CEFI_MOM6_RT_container"
PASS_LABEL="pass_CEFI_MOM6_RT_container"
# Remove the "CEFI_MOM6_RT_gaea_c5" label
# Remove the "CEFI_MOM6_RT_container" label
curl -X DELETE \
-H "Authorization: Bearer $TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/$GITHUB_REPOSITORY/issues/${{ env.PR_NUMBER }}/labels/$RT_TEST_LABEL"
# Add the "pass_CEFI_MOM6_RT" label
# Add the "pass_CEFI_MOM6_RT_container" label
curl -X POST \
-H "Authorization: Bearer $TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
Expand All @@ -120,7 +123,7 @@ jobs:
clean-up:
needs: add-pass-label
runs-on: self-hosted
runs-on: [self-hosted, container]
strategy:
max-parallel: 1
steps:
Expand Down
64 changes: 11 additions & 53 deletions builds/ci_build_driver.sh
Original file line number Diff line number Diff line change
@@ -1,65 +1,23 @@
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --time=60
#SBATCH --job-name="MOM6SIS2_ci_build"
#SBATCH --output=MOM6SIS2_ci_build_o.%j
#SBATCH --error=MOM6SIS2_ci_build_e.%j
#SBATCH --qos=debug
#SBATCH --time=360
#SBATCH --job-name="MOM6SIS2_container_build"
#SBATCH --output=MOM6SIS2_container_build_o.%j
#SBATCH --error=MOM6SIS2_container_build_e.%j
#SBATCH --qos=normal
#SBATCH --partition=batch
#SBATCH --clusters=c5
#SBATCH --account=cefi
#SBATCH --clusters=c6
#SBATCH --account=ira-cefi

#
[ -d "build" ] && rm -rf build

#
echo "Build MOM6SIS2-COBALT for CI testing started: " `date`
echo "Build MOM6SIS2-COBALT using container started: " `date`

#
machine_name="gaea"
platform="ncrc5.intel23"
target="repro"
flavor="fms1_mom6sis2"

FMSlib_PATH="/gpfs/f5/cefi/scratch/Yi-cheng.Teng/github/FMS/2024.02_FMS1"
rootdir=$(pwd)
abs_rootdir=$rootdir

echo $abs_rootdir

#load modules
source $MODULESHOME/init/bash
source $rootdir/$machine_name/$platform.env
. $rootdir/$machine_name/$platform.env

makeflags="NETCDF=3"

if [[ $target =~ "repro" ]] ; then
makeflags="$makeflags REPRO=1"
fi

srcdir=$abs_rootdir/../src

#
sed -i 's/static pid_t gettid(void)/pid_t gettid(void)/g' $srcdir/FMS/affinity/affinity.c

#
if [[ $flavor == "fms1_mom6sis2" ]] ; then
echo "build mom6sis2 with FMS1 cap"

mkdir -p build/$machine_name-$platform/ocean_ice/$target
pushd build/$machine_name-$platform/ocean_ice/$target
rm -f path_names
$srcdir/mkmf/bin/list_paths $srcdir/MOM6/{config_src/infra/FMS1,config_src/memory/dynamic_symmetric,config_src/drivers/FMS_cap,config_src/external/ODA_hooks,config_src/external/database_comms,config_src/external/drifters,config_src/external/stochastic_physics,pkg/GSW-Fortran/{modules,toolbox}/,src/{*,*/*}/} $srcdir/SIS2/{config_src/dynamic_symmetric,config_src/external/Icepack_interfaces,src} $srcdir/icebergs/src $srcdir/FMS/{coupler,include}/ $srcdir/{ocean_BGC/generic_tracers,ocean_BGC/mocsy/src}/ $srcdir/{atmos_null,ice_param,land_null,coupler/shared/,coupler/full/}/

compiler_options='-DINTERNAL_FILE_NML -DMAX_FIELDS_=600 -DNOT_SET_AFFINITY -Duse_deprecated_io -D_USE_MOM6_DIAG -D_USE_GENERIC_TRACER -DUSE_PRECISION=2 -D_USE_LEGACY_LAND_ -Duse_AM3_physics'

$srcdir/mkmf/bin/mkmf -t $abs_rootdir/$machine_name/$platform.mk -o "-I${FMSlib_PATH}/shared/$target" -p MOM6SIS2 -l "-L${FMSlib_PATH}/shared/$target -lfms" -c "$compiler_options" path_names

make $makeflags MOM6SIS2

fi

export img=/gpfs/f6/ira-cefi/world-shared/container/ubuntu22.04-intel-ufs-env-v1.8.0-llvm.img
apptainer exec -B /gpfs -B /ncrc/home2/Yi-cheng.Teng:/ncrc/home2/Yi-cheng.Teng $img bash linux-build.bash -m docker -p linux-intel -t repro -f mom6sis2

#
echo "Build MOM6SIS2-COBALT for CI testing ended: " `date`
echo "Build MOM6SIS2-COBALT using container ended: " `date`
110 changes: 110 additions & 0 deletions builds/container-scripts/externalize.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/bin/bash

################################################################################
# Help #
################################################################################
Help()
{
# Display Help
echo "Add description of the script functions here."
echo
echo "Syntax: scriptTemplate [-|h|e|p]"
echo "options:"
echo "-h Print this Help."
echo "-e Create the external executable in the user-specified folder"
echo "-p env file that contains the necessary modules within the container"
echo
}

################################################################################
################################################################################
# Main program #
################################################################################
################################################################################
################################################################################
# Process the input options. Add options as needed. #
################################################################################
# Get the options
#while getopts ":hep" option; do
# case $option in
# h) # display Help
# Help
# exit;;
# e) # external directory to hold externalized executables
# exec_dir=$2
# echo "Will create external executable in $exec_dir"
# p) # env file that contains the necessary modules inside the container
# env_file=$3
# echo "Will load modules in $env_file"
#
# esac
#done
#shift $(($OPTIND ))

while getopts ":he:p:" option; do
case $option in
h) # display Help
Help
exit
;;
e) # external directory to hold externalized executables
exec_dir=$OPTARG
echo "Will create external executable in $exec_dir"
;;
p) # env file that contains the necessary modules inside the container
env_file=$OPTARG
echo "Will load modules in $env_file"
;;
esac
done

# Shift past the processed options
shift $((OPTIND -1))


fileList=$@

source $env_file
mkdir -p $exec_dir
cp /opt/container-scripts/run_container_executable.sh $exec_dir
cp /opt/container-scripts/build_container_executable.sh $exec_dir
#replace the paths in the script
sed -i "s|IMAGE|$SINGULARITY_CONTAINER|g" $exec_dir/*_executable.sh
nbinds=`echo $SINGULARITY_BIND | awk -F "," '{print NF }'`
bindstring=" "
for (( i = 1; i <= $nbinds; i++ )); do binddir=`echo $SINGULARITY_BIND | cut -d "," -f $i` && bindstring="${bindstring} -B ${binddir}" ; done
echo $bindstring
sed -i "s|BINDDIRS|$bindstring|g" $exec_dir/*_executable.sh
sed -i "s|LDLIB_PATH|$LD_LIBRARY_PATH|g" $exec_dir/*_executable.sh
sed -i "s|LIB_PATH|$LIBRARY_PATH|g" $exec_dir/*_executable.sh
sed -i "s|FI_PATH|$FI_PROVIDER_PATH|g" $exec_dir/*_executable.sh

for file in $fileList
do
fullfile=$(readlink -m $file)
basefile=$(basename "$fullfile")
cp $exec_dir/run_container_executable.sh $exec_dir/$basefile
pathdir=$(dirname $fullfile)
echo "fullfile is $fullfile"
echo $pathdir

EXEC_PATH="$pathdir:$PATH"
sed -i "s|EXEC_PATH|$EXEC_PATH|g" $exec_dir/$basefile
sed -i "s|ESMF_MK|$ESMFMKFILE|g" $exec_dir/$basefile
done
#fileList="make cmake ecbuild python python3"
fileList=""
for file in $fileList
do
fullfile=$(which $file)
basefile=$(basename "$fullfile")
cp $exec_dir/build_container_executable.sh $exec_dir/$basefile
pathdir=$(dirname $fullfile)

EXEC_PATH="$pathdir:$PATH"
sed -i "s|EXEC_PATH|$EXEC_PATH|g" $exec_dir/$basefile
sed -i "s|CMAKE_PREPATH|$CMAKE_PREFIX_PATH|g" $exec_dir/$basefile
sed -i "s|ESMF_MK|$ESMFMKFILE|g" $exec_dir/$basefile
done

chmod +x $exec_dir/*
7 changes: 7 additions & 0 deletions builds/docker/linux-intel.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
source /usr/lmod/lmod/init/bash
module purge
module use /opt/spack-stack/spack-stack-1.8.0/envs/unified-env/install/modulefiles/Core
module load stack-oneapi
module load stack-intel-oneapi-mpi
module load jedi-ufs-env
module unload fms/2024.02
Loading

0 comments on commit 950a043

Please sign in to comment.