diff --git a/tests/auto/rt_auto.py b/tests/auto/rt_auto.py index a5d23ff33e..2ded8b6fcb 100644 --- a/tests/auto/rt_auto.py +++ b/tests/auto/rt_auto.py @@ -42,8 +42,8 @@ def parse_args_in(): parser = argparse.ArgumentParser() # Setup Input Arguments - choices = ['hera.intel', 'orion.intel', 'gaea.intel', 'jet.intel', 'wcoss_dell_p3'] - parser.add_argument('-m', '--machine', help='Machine and Compiler combination', required=True, choices=choices, type=str) + choices = ['cheyenne', 'hera', 'orion', 'gaea', 'jet', 'wcoss_dell_p3'] + parser.add_argument('-m', '--machine', help='Machine name', required=True, choices=choices, type=str) parser.add_argument('-w', '--workdir', help='Working directory', required=True, type=str) # Get Arguments @@ -73,15 +73,19 @@ def input_data(args): def match_label_with_action(machine, actions, label): ''' Match the label that initiates a job with an action in the dict''' + # -- i.e. hera-gnu-RT + # RT = full regression test suite logger = logging.getLogger('MATCH_LABEL_WITH_ACTIONS') split_label = label.name.split('-') - - if len(split_label) != 3: return False - if not re.match(split_label[0], 'Auto'): return False - if not re.match(split_label[2], machine['name'].split('.')[0]): return False - action_match = next((action for action in actions if re.match(action['name'], split_label[1])), False) - - return action_match + if len(split_label) != 3: return False, False #Make sure it has three parts + if not re.match(split_label[0], machine['name']): return False, False #First check machine name matches + compiler = split_label[1] + if not str(compiler) in ["intel", "gnu"]: return False, False + action_match = next((action for action in actions if re.match(action['name'], split_label[2])), False) + action_match["command"] = f'export RT_COMPILER="{compiler}" && {action_match["command"]}' + if split_label[2] == "RT" and compiler == "gnu": + action_match["command"] = f'{action_match["command"]} -l rt_gnu.conf' + return compiler, action_match def get_preqs_with_actions(repos, machine, ghinterface_obj, actions): @@ -92,9 +96,10 @@ def get_preqs_with_actions(repos, machine, ghinterface_obj, actions): preq_labels = [{'preq': pr, 'label': label} for pr in each_pr for label in pr.get_labels()] for i, pr_label in enumerate(preq_labels): - match = match_label_with_action(machine, actions, pr_label['label']) + compiler, match = match_label_with_action(machine, actions, pr_label['label']) if match: preq_labels[i]['action'] = match + preq_labels[i]['compiler'] = compiler else: preq_labels[i] = False @@ -130,8 +135,20 @@ def remove_pr_label(self): self.logger.info(f'Removing Label: {self.preq_dict["label"]}') self.preq_dict['preq'].remove_from_labels(self.preq_dict['label']) - def send_log_name_as_comment(self): + def check_label_before_job_start(self): + # LETS Check the label still exists before the start of the job in the + # case of multiple jobs + label_to_check = f'{self.machine["name"]}-{self.preq_dict["compiler"]}-{self.preq_dict["action"]["name"]}' + labels = self.preq_dict['preq'].get_labels() + label_match = next((label for label in labels if re.match(label.name, label_to_check)), False) + + return label_match + + + def send_log_name_as_comment(self, log_filename): logger = logging.getLogger('JOB/SEND_LOG_NAME_AS_COMMENT') + + #Remove LAST MONTHS LOGS logger.info('Removing last months logs (if any)') last_month = datetime.date.today().replace(day=1) - datetime.timedelta(days=1) rm_command = [[f'rm rt_auto_*_{last_month.strftime("%Y%m")}*.log', os.getcwd()]] @@ -141,24 +158,16 @@ def send_log_name_as_comment(self): except Exception as e: logger.warning(f'"{rm_command}" failed with error:{e}') - new_log_name = f'rt_auto_{self.machine["name"]}_'\ - f'{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}.log' - cp_command = [[f'cp rt_auto.log {new_log_name}', os.getcwd()]] - logger.info(f'Running "{cp_command}"') + # Add log information to PR. + comment_text = f'Log Name:{log_filename}\n'\ + f'Log Location:{os.getcwd()}\n'\ + 'Logs are kept for one month' try: - self.run_commands(cp_command) + self.preq_dict['preq'].create_issue_comment(comment_text) except Exception as e: - logger.warning('Renaming rt_auto failed') + logger.warning('Creating comment with log location failed with:{e}') else: - comment_text = f'Log Name:{new_log_name}\n'\ - f'Log Location:{os.getcwd()}\n'\ - 'Logs are kept for one month' - try: - self.preq_dict['preq'].create_issue_comment(comment_text) - except Exception as e: - logger.warning('Creating comment with log location failed with:{e}') - else: - logger.info(f'{comment_text}') + logger.info(f'{comment_text}') def run_commands(self, commands_with_cwd): logger = logging.getLogger('JOB/RUN_COMMANDS') @@ -212,9 +221,12 @@ def clone_pr_repo(self): def run_function(self): ''' Run the command associted with the label used to initiate this job ''' logger = logging.getLogger('JOB/RUN_FUNCTION') + compiler = self.preq_dict['compiler'] + logger.info(f'Compiler being used for command is {compiler}') + command = self.preq_dict["action"]["command"] try: - logger.info(f'Running: "{self.preq_dict["action"]["command"]}" in "{self.pr_repo_loc}"') - output = subprocess.Popen(self.preq_dict['action']['command'], cwd=self.pr_repo_loc, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + logger.info(f'Running: "{command}" in "{self.pr_repo_loc}"') + output = subprocess.Popen(command, cwd=self.pr_repo_loc, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) out,err = output.communicate() out = [] if not out else out.decode('utf8').split('\n') err = [] if not err else err.decode('utf8').split('\n') @@ -225,7 +237,13 @@ def run_function(self): assert(e) else: if output.returncode != 0: - logger.critical(f'{self.preq_dict["action"]["command"]} Failed') + comment_text = f'rt.sh failed \n'\ + f'machine: {self.machine["name"]} \n'\ + f'compiler: {self.preq_dict["compiler"]}\n'\ + f'STDOUT: {out} \n'\ + f'STDERR: {err}' + self.preq_dict['preq'].create_issue_comment(comment_text) + logger.critical(f'{command} Failed') [logger.critical(f'stdout: {item}') for item in out if not None] [logger.critical(f'stderr: {eitem}') for eitem in err if not None] else: @@ -245,28 +263,27 @@ def run_function(self): def move_rt_logs(self): ''' This is the callback function associated with the "RT" command ''' logger = logging.getLogger('JOB/MOVE_RT_LOGS') - rt_log = f'tests/RegressionTests_{self.machine["name"]}.log' + rt_log = f'tests/RegressionTests_{self.machine["name"]}.{self.preq_dict["compiler"]}.log' filepath = f'{self.pr_repo_loc}/{rt_log}' - rm_filepath = '/'.join((self.pr_repo_loc.split('/'))[:-1]) if os.path.exists(filepath): move_rt_commands = [ + [f'git pull --ff-only origin {self.branch}', self.pr_repo_loc], [f'git add {rt_log}', self.pr_repo_loc], - [f'git commit -m "Auto: Added Updated RT Log file: {rt_log}"', self.pr_repo_loc], - [f'git pull --no-edit origin {self.branch}', self.pr_repo_loc], + [f'git commit -m "Auto: Add RT Log file: {rt_log} skip-ci"', self.pr_repo_loc], ['sleep 10', self.pr_repo_loc], [f'git push origin {self.branch}', self.pr_repo_loc] ] self.run_commands(move_rt_commands) else: - logger.critical('Could not find RT log') - raise FileNotFoundError('Could not find RT log') + logger.critical('Could not find Intel RT log') + raise FileNotFoundError('Could not find Intel RT log') def main(): # handle logging log_path = os.getcwd() - log_filename = 'rt_auto.log' + log_filename = f'rt_auto_{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}.log' # Please don't run the following on cron with level=logging.DEBUG # as it exposes the GH API Token # Only set it to DEBUG while debugging @@ -288,26 +305,26 @@ def main(): # get all pull requests from the GitHub object logger.info('Getting all pull requests, labels and actions applicable to this machine.') preq_dict = get_preqs_with_actions(repos, machine, ghinterface_obj, actions) - # add Job objects and run them logger.info('Adding all jobs to an object list and running them.') jobs = [Job(pullreq, ghinterface_obj, machine) for pullreq in preq_dict] for job in jobs: logger.info(f'Starting Job: {job}') - try: - logger.info('Calling remove_pr_label') - job.remove_pr_label() - logger.info('Calling clone_pr_repo') - job.clone_pr_repo() - logger.info('Calling run_function') - job.run_function() - logger.info('Calling remove_pr_dir') - job.remove_pr_dir() - logger.info('Calling send_log_name_as_comment') - job.send_log_name_as_comment() - except Exception as e: - logger.critical(e) - assert(e) + if job.check_label_before_job_start(): + try: + logger.info('Calling remove_pr_label') + job.remove_pr_label() + logger.info('Calling clone_pr_repo') + job.clone_pr_repo() + logger.info('Calling run_function') + job.run_function() + logger.info('Calling remove_pr_dir') + # job.remove_pr_dir() + # logger.info('Calling send_log_name_as_comment') + job.send_log_name_as_comment(log_filename) + except Exception as e: + logger.critical(e) + assert(e) logger.info('Script Finished') diff --git a/tests/auto/rt_auto.sh b/tests/auto/rt_auto.sh index 61fe523c6e..17d7e6ff00 100644 --- a/tests/auto/rt_auto.sh +++ b/tests/auto/rt_auto.sh @@ -1,45 +1,53 @@ #!/bin/bash --login set -eux if [ -f "accesstoken.sh" ]; then - source ./accesstoken.sh + if [ $(stat -L -c "%a" "accesstoken.sh") == "600" ]; then + echo "Sourcing accesstoken.sh" + source ./accesstoken.sh + else + echo "accesstoken.sh permissions NEED to be set to 600 before starting" + exit 1 + fi else echo "Please create accesstoken.sh (600) with the following content\n" echo "export ghapitoken=" exit 1 fi -export RT_COMPILER='intel' -source ../detect_machine.sh -echo "Machine ID: "+$MACHINE_ID -if [[ $MACHINE_ID = hera.* ]]; then +if [[ $HOSTNAME == hfe* ]]; then + MACHINE_NAME=hera WORKDIR=/scratch1/NCEPDEV/nems/Brian.Curtis/test export PATH=/scratch1/NCEPDEV/nems/emc.nemspara/soft/miniconda3/bin:$PATH export PYTHONPATH=/scratch1/NCEPDEV/nems/emc.nemspara/soft/miniconda3/lib/python3.8/site-packages -elif [[ $MACHINE_ID = orion.* ]]; then +elif [[ $HOSTNAME == Orion-login-* ]]; then + MACHINE_NAME=orion WORKDIR=/work/noaa/nems/bcurtis/test export PATH=/work/noaa/nems/emc.nemspara/soft/miniconda3/bin:$PATH export PYTHONPATH=/work/noaa/nems/emc.nemspara/soft/miniconda3/lib/python3.8/site-packages -elif [[ $MACHINE_ID = jet.* ]]; then +elif [[ $HOSTNAME == fe* ]]; then + MACHINE_NAME=jet WORKDIR=/lfs4/HFIP/h-nems/Brian.Curtis/test export ACCNR="h-nems" export PATH=/lfs4/HFIP/hfv3gfs/software/miniconda3/4.8.3/envs/ufs-weather-model/bin:/lfs4/HFIP/hfv3gfs/software/miniconda3/4.8.3/bin:$PATH export PYTHONPATH=/lfs4/HFIP/hfv3gfs/software/miniconda3/4.8.3/envs/ufs-weather-model/lib/python3.8/site-packages:/lfs4/HFIP/hfv3gfs/software/miniconda3/4.8.3/lib/python3.8/site-packages -elif [[ $MACHINE_ID = gaea.* ]]; then +elif [[ $HOSTNAME == gaea* ]]; then + MACHINE_NAME=gaea WORKDIR=/lustre/f2/pdata/ncep/Brian.Curtis/test export LOADEDMODULES=$LOADEDMODULES export ACCNR="nggps_emc" # This applies to Brian.Curtis, may need change later export PATH=/lustre/f2/pdata/esrl/gsd/contrib/miniconda3/4.8.3/envs/ufs-weather-model/bin:$PATH export PYTHONPATH=/lustre/f2/pdata/esrl/gsd/contrib/miniconda3/4.8.3/lib/python3.8/site-packages -elif [[ $MACHINE_ID = cheyenne.* ]]; then - #export PATH=/glade/p/ral/jntp/tools/ecFlow-5.3.1/bin:$PATH - #export PYTHONPATH=/glade/p/ral/jntp/tools/ecFlow-5.3.1/lib/python2.7/site-packages - echo "cheyenne not currently supported. automated RT not starting" - exit 1 +elif [[ $HOSTNAME == *.cheyenne.ucar.edu ]]; then + MACHINE_NAME=cheyenne + WORKDIR=/glade/work/heinzell/fv3/ufs-weather-model/auto-rt + export ACCNR="P48503002" + export PATH=/glade/p/ral/jntp/tools/miniconda3/4.8.3/envs/ufs-weather-model/bin:/glade/p/ral/jntp/tools/miniconda3/4.8.3/bin:$PATH + export PYTHONPATH=/glade/p/ral/jntp/tools/miniconda3/4.8.3/envs/ufs-weather-model/lib/python3.8/site-packages:/glade/p/ral/jntp/tools/miniconda3/4.8.3/lib/python3.8/site-packages else echo "No Python Path for this machine. automated RT not starting" exit 1 fi -python rt_auto.py -m $MACHINE_ID -w $WORKDIR +python rt_auto.py -m $MACHINE_NAME -w $WORKDIR exit 0 diff --git a/tests/detect_machine.sh b/tests/detect_machine.sh index c4bea08a2e..0356b38bce 100755 --- a/tests/detect_machine.sh +++ b/tests/detect_machine.sh @@ -88,6 +88,12 @@ case $(hostname -f) in cheyenne4.ib0.cheyenne.ucar.edu) MACHINE_ID=cheyenne ;; ### cheyenne4 cheyenne5.ib0.cheyenne.ucar.edu) MACHINE_ID=cheyenne ;; ### cheyenne5 cheyenne6.ib0.cheyenne.ucar.edu) MACHINE_ID=cheyenne ;; ### cheyenne6 + chadmin1.ib0.cheyenne.ucar.edu) MACHINE_ID=cheyenne ;; ### cheyenne1 + chadmin2.ib0.cheyenne.ucar.edu) MACHINE_ID=cheyenne ;; ### cheyenne1 + chadmin3.ib0.cheyenne.ucar.edu) MACHINE_ID=cheyenne ;; ### cheyenne1 + chadmin4.ib0.cheyenne.ucar.edu) MACHINE_ID=cheyenne ;; ### cheyenne1 + chadmin5.ib0.cheyenne.ucar.edu) MACHINE_ID=cheyenne ;; ### cheyenne1 + chadmin6.ib0.cheyenne.ucar.edu) MACHINE_ID=cheyenne ;; ### cheyenne1 login1.stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede1 login2.stampede2.tacc.utexas.edu) MACHINE_ID=stampede ;; ### stampede2