From 54d1b9966e9e29bbac58ff7fb5da2a51a90cb665 Mon Sep 17 00:00:00 2001 From: ArthurTolley Date: Tue, 23 Apr 2024 01:42:43 -0700 Subject: [PATCH] Cleaning up argparse and unused code --- bin/live/pycbc_live_collate_triggers | 87 +++++++++++++++++----------- 1 file changed, 54 insertions(+), 33 deletions(-) diff --git a/bin/live/pycbc_live_collate_triggers b/bin/live/pycbc_live_collate_triggers index 1a0aa75495d..ac3f21ac413 100644 --- a/bin/live/pycbc_live_collate_triggers +++ b/bin/live/pycbc_live_collate_triggers @@ -1,17 +1,31 @@ +#!/usr/bin/env python + +# Copyright 2024 Arthur Tolley +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. + +"""Find trigger files and combine them into a single hdf trigger merge file.""" + import glob import numpy import argparse import h5py import os import logging -import timeit +import pycbc from datetime import datetime, timedelta # Set up the command line argument parser -parser = argparse.ArgumentParser(description='Find trigger files and combine ' - 'them into a single hdf trigger ' - 'merge file.') - +parser = argparse.ArgumentParser(description=__doc__) +pycbc.add_common_pycbc_options(parser) parser.add_argument('--trigger-file-method', type=str, required=True, help='The method to use to find the trigger files. ' 'Options are: file, dir, start-end-date, ' @@ -71,24 +85,32 @@ parser.add_argument('--output-file-name', type=str, required=False, args = parser.parse_args() -logging.basicConfig(level=logging.INFO) +pycbc.init_logging(args.verbose) # GRAB THE TRIGGER FILES TO COLLATE if args.trigger_file_method == 'file': - assert args.list_of_trigger_files, 'Please provide a list of trigger files.' + if not args.list_of_trigger_files: + parser.error('Please provide a list of trigger files.') + trigger_files = numpy.loadtxt(args.list_of_trigger_files, delimiter=',', dtype=str) if args.trigger_file_method == 'dir': - assert args.trigger_dir, 'Please provide a directory containing trigger files.' + if not args.trigger_dir: + parser.error('Please provide a directory containing trigger files.') + trigger_files = [ l for l in glob.glob(args.trigger_dir + '/*/*-Live-*.hdf', recursive=True) ] if args.trigger_file_method == 'start-end-date': - assert args.start_date, 'Please provide a start date.' - assert args.end_date, 'Please provide an end date.' - assert args.trigger_dir, 'Please provide a directory containing trigger files.' + if not args.start_date: + parser.error('Please provide a start date.') + if not args.end_date: + parser.error('Please provide an end date.') + if not args.trigger_dir: + parser.error('Please provide a directory containing trigger files.') + # Convert dates to datetime to get the days array start_date = datetime.strptime(args.start_date, '%Y-%m-%d').date() end_date = datetime.strptime(args.end_date, '%Y-%m-%d').date() @@ -114,9 +136,13 @@ if args.trigger_file_method == 'start-end-date': ] if args.trigger_file_method == 'start-num-days': - assert args.start_date, 'Please provide a start date.' - assert args.num_days, 'Please provide a number of days.' - assert args.trigger_dir, 'Please provide a directory containing trigger files.' + if not args.start_date: + parser.error('Please provide a start date.') + if not args.num_days: + parser.error('Please provide a number of days.') + if not args.trigger_dir: + parser.error('Please provide a directory containing trigger files.') + # Convert dates to datetime to get the days array start_date = datetime.strptime(args.start_date, '%Y-%m-%d').date() num_days = timedelta(days=args.num_days - 1) @@ -143,16 +169,18 @@ if args.trigger_file_method == 'start-num-days': ] if args.trigger_file_method == 'gps-start-end-time': - assert args.gps_start_time, 'Please provide a GPS start time.' - assert args.gps_end_time, 'Please provide a GPS end time.' - assert args.trigger_dir, 'Please provide a directory containing trigger files.' + if not args.gps_start_time: + parser.error('Please provide a GPS start time.') + if not args.gps_end_time: + parser.error('Please provide a GPS end time.') + if not args.trigger_dir: + parser.error('Please provide a directory containing trigger files.') trigger_files = [ l for l in glob.glob(args.trigger_dir + '/*/*-Live-*.hdf', recursive=True) if float(l.split('/')[-1].split("-")[2]) > args.gps_start_time and float(l.split('/')[-1].split("-")[2]) < args.gps_end_time ] -start = timeit.default_timer() logging.info(f" {len(trigger_files)} files found") if args.output_trigger_file_list: @@ -161,11 +189,6 @@ if args.output_trigger_file_list: for item in trigger_files: f.write("%s\n" % item) -# Record start and end gpstime for the output file -# This assumes the trigger files are sorted chronologically -start_gpstime = float(trigger_files[0].split('/')[-1].split("-")[2]) -end_gpstime = float(trigger_files[0].split('/')[-1].split("-")[2]) - ########################### # COLLATE THE TRIGGER FILES ########################### @@ -173,6 +196,12 @@ end_gpstime = float(trigger_files[0].split('/')[-1].split("-")[2]) if args.output_file_name: output_file = args.output_dir + args.output_file_name else: + # Extract the gpstimes for every file and take min/max for start and end + trigger_file_times = [float(trigger_file.split('/')[-1].split("-")[2]) + for trigger_file in trigger_files] + start_gpstime = min(trigger_file_times) + end_gpstime = max(trigger_file_times) + ifo_string = "".join(args.ifos) output_file = args.output_dir + f'{ifo_string}-Live-' + start_gpstime + '-' + end_gpstime + '.hdf' @@ -250,17 +279,11 @@ with h5py.File(output_file, 'a') as output: logging.info(f" No triggers for {ifo}, skipping") continue - # 17.4 seconds to do this - tids = numpy.arange(numpy.max(template_ids) + 1, dtype=int) - sorted_indices = numpy.argsort(template_ids) sorted_template_ids = template_ids[sorted_indices] unique_template_ids, template_id_counts = numpy.unique(sorted_template_ids, return_counts=True) index_boundaries = numpy.cumsum(template_id_counts) template_boundaries = numpy.insert(index_boundaries, 0, 0)[:-1] - - # Re-running purposes, comment out otherwise - #del triggers['template_boundaries'] triggers['template_boundaries'] = template_boundaries # Sort other datasets by template_id so it makes sense: @@ -276,7 +299,7 @@ with h5py.File(output_file, 'a') as output: # Chisq is saved as reduced chisq for live triggers but offline - # code required original chisq. This converts it back. + # code requires original chisq. This converts it back. tmpval = triggers['chisq'][:] * (2 * triggers['chisq_dof'][:] - 2) triggers['chisq'][:] = tmpval @@ -300,6 +323,4 @@ with h5py.File(output_file, 'a') as output: triggers.create_dataset\ (dataset + '_template', data=refs, dtype=h5py.special_dtype(ref=h5py.RegionReference)) -end = timeit.default_timer() -total_time = float(end - start) -logging.info(f" Time taken: {total_time}") +logging.info("Done!")