From 1231c9aa84d2378a1235fe73bb1488fcc565cf05 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 26 Aug 2024 12:12:24 -0600 Subject: [PATCH] Add GEFS C48 support on AWS (#2818) Changes to make GEFS C48 case run on AWS. After C48 ATM forecast only runs on AWs, the next step is to make GEFS C48 run on AWS. Changes to AWS env, and yaml files. Resolves #2817 Refs #2711 --- env/AWSPW.env | 18 +++++++++++++++++- parm/config/gefs/config.base | 6 ++++++ parm/config/gefs/config.resources | 2 +- parm/config/gefs/config.resources.AWSPW | 11 +++++++++++ parm/config/gfs/config.resources.AWSPW | 2 +- workflow/rocoto/workflow_xml.py | 3 +-- 6 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 parm/config/gefs/config.resources.AWSPW diff --git a/env/AWSPW.env b/env/AWSPW.env index 992281a1d7..7fe17d2492 100755 --- a/env/AWSPW.env +++ b/env/AWSPW.env @@ -27,7 +27,7 @@ if [[ -n "${ntasks:-}" && -n "${max_tasks_per_node:-}" && -n "${tasks_per_node:- NTHREADS1=${threads_per_task:-1} [[ ${NTHREADSmax} -gt ${max_threads_per_task} ]] && NTHREADSmax=${max_threads_per_task} [[ ${NTHREADS1} -gt ${max_threads_per_task} ]] && NTHREADS1=${max_threads_per_task} - APRUN="${launcher} -n ${ntasks}" + export APRUN="${launcher} -n ${ntasks}" else echo "ERROR config.resources must be sourced before sourcing AWSPW.env" exit 2 @@ -43,6 +43,13 @@ if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then export APRUN_UFS="${launcher} -n ${ufs_ntasks}" unset nnodes ufs_ntasks +elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then + + export CFP_MP="YES" + if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi + export wavempexec=${launcher} + export wave_mpmd=${mpmd_opt} + elif [[ "${step}" = "post" ]]; then export NTHREADS_NP=${NTHREADS1} @@ -52,6 +59,15 @@ elif [[ "${step}" = "post" ]]; then [[ ${NTHREADS_DWN} -gt ${max_threads_per_task} ]] && export NTHREADS_DWN=${max_threads_per_task} export APRUN_DWN="${launcher} -n ${ntasks_dwn}" +elif [[ "${step}" = "atmos_products" ]]; then + + export USE_CFP="YES" # Use MPMD for downstream product generation on Hera + +elif [[ "${step}" = "oceanice_products" ]]; then + + export NTHREADS_OCNICEPOST=${NTHREADS1} + export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}" + elif [[ "${step}" = "ecen" ]]; then export NTHREADS_ECEN=${NTHREADSmax} diff --git a/parm/config/gefs/config.base b/parm/config/gefs/config.base index a92349facd..189b7ba446 100644 --- a/parm/config/gefs/config.base +++ b/parm/config/gefs/config.base @@ -345,4 +345,10 @@ export DELETE_COM_IN_ARCHIVE_JOB="YES" # NO=retain ROTDIR. YES default in arc # Number of regional collectives to create soundings for export NUM_SND_COLLECTIVES=${NUM_SND_COLLECTIVES:-9} +# The tracker, genesis, and METplus jobs are not supported on AWS yet +# TODO: we should place these in workflow/hosts/awspw.yaml as part of AWS setup, not for general. +if [[ "${machine}" == "AWSPW" ]]; then + export DO_WAVE="NO" +fi + echo "END: config.base" diff --git a/parm/config/gefs/config.resources b/parm/config/gefs/config.resources index 297bc08c05..5667e5efa4 100644 --- a/parm/config/gefs/config.resources +++ b/parm/config/gefs/config.resources @@ -41,7 +41,7 @@ case ${machine} in ;; "AWSPW") export PARTITION_BATCH="compute" - max_tasks_per_node=40 + max_tasks_per_node=36 ;; *) echo "FATAL ERROR: Unknown machine encountered by ${BASH_SOURCE[0]}" diff --git a/parm/config/gefs/config.resources.AWSPW b/parm/config/gefs/config.resources.AWSPW new file mode 100644 index 0000000000..a735c7622d --- /dev/null +++ b/parm/config/gefs/config.resources.AWSPW @@ -0,0 +1,11 @@ +#! /usr/bin/env bash + +# AWS-specific job resources + +export is_exclusive="True" +unset memory + +# shellcheck disable=SC2312 +for mem_var in $(env | grep '^memory_' | cut -d= -f1); do + unset "${mem_var}" +done diff --git a/parm/config/gfs/config.resources.AWSPW b/parm/config/gfs/config.resources.AWSPW index 2bb5f35e76..a735c7622d 100644 --- a/parm/config/gfs/config.resources.AWSPW +++ b/parm/config/gfs/config.resources.AWSPW @@ -3,7 +3,7 @@ # AWS-specific job resources export is_exclusive="True" -export memory=None +unset memory # shellcheck disable=SC2312 for mem_var in $(env | grep '^memory_' | cut -d= -f1); do diff --git a/workflow/rocoto/workflow_xml.py b/workflow/rocoto/workflow_xml.py index ca54f3a5bb..d9ca4fb961 100644 --- a/workflow/rocoto/workflow_xml.py +++ b/workflow/rocoto/workflow_xml.py @@ -162,8 +162,7 @@ def _write_crontab(self, crontab_file: str = None, cronint: int = 5) -> None: # AWS need 'SHELL', and 'BASH_ENV' defined, or, the crontab job won't start. if os.environ.get('PW_CSP', None) in ['aws', 'azure', 'google']: strings.extend([f'SHELL="/bin/bash"', - f'BASH_ENV="/etc/bashrc"' - ]) + f'BASH_ENV="/etc/bashrc"']) strings.extend([f'{cronintstr} {rocotorunstr}', '#################################################################', ''])