Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Leverage Intel Containers for NOAA RDHPC Cross-Platform Reproducibility #109

Merged
merged 10 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: CEFI-MOM6-ci

on:
# Triggers this workflow on pull request event with "CEFI_MOM6_RT_gaea_c5" label
# Triggers this workflow on pull request event with "CEFI_MOM6_RT_container" label
pull_request:
branches: [ "main" ]
types: [ labeled ]
Expand All @@ -14,8 +14,8 @@ env:
#
jobs:
checkout-build:
if: ${{ github.event.label.name == 'CEFI_MOM6_RT_gaea_c5' }}
runs-on: self-hosted
if: ${{ github.event.label.name == 'CEFI_MOM6_RT_container' }}
runs-on: [self-hosted, container]
timeout-minutes: 600
strategy:
max-parallel: 1
Expand All @@ -28,9 +28,12 @@ jobs:
fetch-depth: 1
submodules: recursive

- name: Build MOM6SIS2
- name: Build MOM6SIS2 using container
run: |
cd ${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/builds
pwd
#img=/gpfs/f6/ira-cefi/world-shared/container/ubuntu22.04-intel-ufs-env-v1.8.0-llvm.img
#apptainer exec -B /gpfs -B /ncrc/home2/Yi-cheng.Teng:/ncrc/home2/Yi-cheng.Teng $img bash linux-build.bash -m docker -p linux-intel -t repro -f mom6sis2
jobid=$(sbatch --parsable ci_build_driver.sh | awk -F';' '{print $1}' | cut -f1)
#
sleep 1
Expand All @@ -45,8 +48,8 @@ jobs:
fi
sleep 60 # Adjust the sleep duration as needed
done
#
check_file="${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/builds/build/gaea-ncrc5.intel23/ocean_ice/repro/MOM6SIS2"

check_file="${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/builds/build/docker-linux-intel/ocean_ice/repro/MOM6SIS2"
if [ -f "$check_file" ]; then
echo "PASSED: $check_file"
else
Expand All @@ -56,16 +59,16 @@ jobs:

run-CEFI_MOM6-ci:
needs: checkout-build
runs-on: self-hosted
runs-on: [self-hosted, container]
strategy:
max-parallel: 2
max-parallel: 1
matrix:
case: ["NWA12.COBALT", "NEP10.COBALT"]
case: ["NEP10.COBALT"]
steps:
- name: Run Experiment ${{ matrix.case }}
run: |
cd ${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/exps/${{ matrix.case }}
jobid=$(sbatch --parsable driver.sh | awk -F';' '{print $1}' | cut -f1)
jobid=$(sbatch --parsable run_container.sh | awk -F';' '{print $1}' | cut -f1)
#
sleep 1
while :; do
Expand All @@ -80,7 +83,7 @@ jobs:
sleep 60 # Adjust the sleep duration as needed
done
#
expected_string="All restart files are identical, PASS"
expected_string="ocean.stats is identical to ref, PASS"
check_file="${{ env.TEST_DIR }}/CEFI_MOM6_CHECK/exps/${{ matrix.case }}/${{ matrix.case }}_o.$jobid"
if [ -f "$check_file" ]; then
if grep -qF "$expected_string" $check_file; then
Expand All @@ -96,22 +99,22 @@ jobs:

add-pass-label:
needs: run-CEFI_MOM6-ci
runs-on: self-hosted
runs-on: [self-hosted, container]
if: ${{ needs.run-CEFI_MOM6-ci.result == 'success' }}
steps:
- name: Add "pass_CEFI_MOM6_RT" label on success
- name: Add "pass_CEFI_MOM6_RT_container" label on success
run: |
TOKEN=${{ secrets.GITHUB_TOKEN }}
RT_TEST_LABEL="CEFI_MOM6_RT_gaea_c5"
PASS_LABEL="pass_CEFI_MOM6_RT"
RT_TEST_LABEL="CEFI_MOM6_RT_container"
PASS_LABEL="pass_CEFI_MOM6_RT_container"

# Remove the "CEFI_MOM6_RT_gaea_c5" label
# Remove the "CEFI_MOM6_RT_container" label
curl -X DELETE \
-H "Authorization: Bearer $TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/$GITHUB_REPOSITORY/issues/${{ env.PR_NUMBER }}/labels/$RT_TEST_LABEL"

# Add the "pass_CEFI_MOM6_RT" label
# Add the "pass_CEFI_MOM6_RT_container" label
curl -X POST \
-H "Authorization: Bearer $TOKEN" \
-H "Accept: application/vnd.github.v3+json" \
Expand All @@ -120,7 +123,7 @@ jobs:

clean-up:
needs: add-pass-label
runs-on: self-hosted
runs-on: [self-hosted, container]
strategy:
max-parallel: 1
steps:
Expand Down
64 changes: 11 additions & 53 deletions builds/ci_build_driver.sh
Original file line number Diff line number Diff line change
@@ -1,65 +1,23 @@
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --time=60
#SBATCH --job-name="MOM6SIS2_ci_build"
#SBATCH --output=MOM6SIS2_ci_build_o.%j
#SBATCH --error=MOM6SIS2_ci_build_e.%j
#SBATCH --qos=debug
#SBATCH --time=360
#SBATCH --job-name="MOM6SIS2_container_build"
#SBATCH --output=MOM6SIS2_container_build_o.%j
#SBATCH --error=MOM6SIS2_container_build_e.%j
#SBATCH --qos=normal
#SBATCH --partition=batch
#SBATCH --clusters=c5
#SBATCH --account=cefi
#SBATCH --clusters=c6
#SBATCH --account=ira-cefi

#
[ -d "build" ] && rm -rf build

#
echo "Build MOM6SIS2-COBALT for CI testing started: " `date`
echo "Build MOM6SIS2-COBALT using container started: " `date`

#
machine_name="gaea"
platform="ncrc5.intel23"
target="repro"
flavor="fms1_mom6sis2"

FMSlib_PATH="/gpfs/f5/cefi/scratch/Yi-cheng.Teng/github/FMS/2024.02_FMS1"
rootdir=$(pwd)
abs_rootdir=$rootdir

echo $abs_rootdir

#load modules
source $MODULESHOME/init/bash
source $rootdir/$machine_name/$platform.env
. $rootdir/$machine_name/$platform.env

makeflags="NETCDF=3"

if [[ $target =~ "repro" ]] ; then
makeflags="$makeflags REPRO=1"
fi

srcdir=$abs_rootdir/../src

#
sed -i 's/static pid_t gettid(void)/pid_t gettid(void)/g' $srcdir/FMS/affinity/affinity.c

#
if [[ $flavor == "fms1_mom6sis2" ]] ; then
echo "build mom6sis2 with FMS1 cap"

mkdir -p build/$machine_name-$platform/ocean_ice/$target
pushd build/$machine_name-$platform/ocean_ice/$target
rm -f path_names
$srcdir/mkmf/bin/list_paths $srcdir/MOM6/{config_src/infra/FMS1,config_src/memory/dynamic_symmetric,config_src/drivers/FMS_cap,config_src/external/ODA_hooks,config_src/external/database_comms,config_src/external/drifters,config_src/external/stochastic_physics,pkg/GSW-Fortran/{modules,toolbox}/,src/{*,*/*}/} $srcdir/SIS2/{config_src/dynamic_symmetric,config_src/external/Icepack_interfaces,src} $srcdir/icebergs/src $srcdir/FMS/{coupler,include}/ $srcdir/{ocean_BGC/generic_tracers,ocean_BGC/mocsy/src}/ $srcdir/{atmos_null,ice_param,land_null,coupler/shared/,coupler/full/}/

compiler_options='-DINTERNAL_FILE_NML -DMAX_FIELDS_=600 -DNOT_SET_AFFINITY -Duse_deprecated_io -D_USE_MOM6_DIAG -D_USE_GENERIC_TRACER -DUSE_PRECISION=2 -D_USE_LEGACY_LAND_ -Duse_AM3_physics'

$srcdir/mkmf/bin/mkmf -t $abs_rootdir/$machine_name/$platform.mk -o "-I${FMSlib_PATH}/shared/$target" -p MOM6SIS2 -l "-L${FMSlib_PATH}/shared/$target -lfms" -c "$compiler_options" path_names

make $makeflags MOM6SIS2

fi

export img=/gpfs/f6/ira-cefi/world-shared/container/ubuntu22.04-intel-ufs-env-v1.8.0-llvm.img
apptainer exec -B /gpfs -B /ncrc/home2/Yi-cheng.Teng:/ncrc/home2/Yi-cheng.Teng $img bash linux-build.bash -m docker -p linux-intel -t repro -f mom6sis2

#
echo "Build MOM6SIS2-COBALT for CI testing ended: " `date`
echo "Build MOM6SIS2-COBALT using container ended: " `date`
110 changes: 110 additions & 0 deletions builds/container-scripts/externalize.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/bin/bash

################################################################################
# Help #
################################################################################
Help()
{
# Display Help
echo "Add description of the script functions here."
echo
echo "Syntax: scriptTemplate [-|h|e|p]"
echo "options:"
echo "-h Print this Help."
echo "-e Create the external executable in the user-specified folder"
echo "-p env file that contains the necessary modules within the container"
echo
}

################################################################################
################################################################################
# Main program #
################################################################################
################################################################################
################################################################################
# Process the input options. Add options as needed. #
################################################################################
# Get the options
#while getopts ":hep" option; do
# case $option in
# h) # display Help
# Help
# exit;;
# e) # external directory to hold externalized executables
# exec_dir=$2
# echo "Will create external executable in $exec_dir"
# p) # env file that contains the necessary modules inside the container
# env_file=$3
# echo "Will load modules in $env_file"
#
# esac
#done
#shift $(($OPTIND ))

while getopts ":he:p:" option; do
case $option in
h) # display Help
Help
exit
;;
e) # external directory to hold externalized executables
exec_dir=$OPTARG
echo "Will create external executable in $exec_dir"
;;
p) # env file that contains the necessary modules inside the container
env_file=$OPTARG
echo "Will load modules in $env_file"
;;
esac
done

# Shift past the processed options
shift $((OPTIND -1))


fileList=$@

source $env_file
mkdir -p $exec_dir
cp /opt/container-scripts/run_container_executable.sh $exec_dir
cp /opt/container-scripts/build_container_executable.sh $exec_dir
#replace the paths in the script
sed -i "s|IMAGE|$SINGULARITY_CONTAINER|g" $exec_dir/*_executable.sh
nbinds=`echo $SINGULARITY_BIND | awk -F "," '{print NF }'`
bindstring=" "
for (( i = 1; i <= $nbinds; i++ )); do binddir=`echo $SINGULARITY_BIND | cut -d "," -f $i` && bindstring="${bindstring} -B ${binddir}" ; done
echo $bindstring
sed -i "s|BINDDIRS|$bindstring|g" $exec_dir/*_executable.sh
sed -i "s|LDLIB_PATH|$LD_LIBRARY_PATH|g" $exec_dir/*_executable.sh
sed -i "s|LIB_PATH|$LIBRARY_PATH|g" $exec_dir/*_executable.sh
sed -i "s|FI_PATH|$FI_PROVIDER_PATH|g" $exec_dir/*_executable.sh

for file in $fileList
do
fullfile=$(readlink -m $file)
basefile=$(basename "$fullfile")
cp $exec_dir/run_container_executable.sh $exec_dir/$basefile
pathdir=$(dirname $fullfile)
echo "fullfile is $fullfile"
echo $pathdir

EXEC_PATH="$pathdir:$PATH"
sed -i "s|EXEC_PATH|$EXEC_PATH|g" $exec_dir/$basefile
sed -i "s|ESMF_MK|$ESMFMKFILE|g" $exec_dir/$basefile
done
#fileList="make cmake ecbuild python python3"
fileList=""
for file in $fileList
do
fullfile=$(which $file)
basefile=$(basename "$fullfile")
cp $exec_dir/build_container_executable.sh $exec_dir/$basefile
pathdir=$(dirname $fullfile)

EXEC_PATH="$pathdir:$PATH"
sed -i "s|EXEC_PATH|$EXEC_PATH|g" $exec_dir/$basefile
sed -i "s|CMAKE_PREPATH|$CMAKE_PREFIX_PATH|g" $exec_dir/$basefile
sed -i "s|ESMF_MK|$ESMFMKFILE|g" $exec_dir/$basefile
done

chmod +x $exec_dir/*
7 changes: 7 additions & 0 deletions builds/docker/linux-intel.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
source /usr/lmod/lmod/init/bash
module purge
module use /opt/spack-stack/spack-stack-1.8.0/envs/unified-env/install/modulefiles/Core
module load stack-oneapi
module load stack-intel-oneapi-mpi
module load jedi-ufs-env
module unload fms/2024.02
Loading