Skip to content

Commit

Permalink
Cleaning up argparse and unused code
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurTolley committed Jul 2, 2024
1 parent 8f3cac2 commit 54d1b99
Showing 1 changed file with 54 additions and 33 deletions.
87 changes: 54 additions & 33 deletions bin/live/pycbc_live_collate_triggers
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
#!/usr/bin/env python

# Copyright 2024 Arthur Tolley
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.

"""Find trigger files and combine them into a single hdf trigger merge file."""

import glob
import numpy
import argparse
import h5py
import os
import logging
import timeit
import pycbc
from datetime import datetime, timedelta

# Set up the command line argument parser
parser = argparse.ArgumentParser(description='Find trigger files and combine '
'them into a single hdf trigger '
'merge file.')

parser = argparse.ArgumentParser(description=__doc__)
pycbc.add_common_pycbc_options(parser)
parser.add_argument('--trigger-file-method', type=str, required=True,
help='The method to use to find the trigger files. '
'Options are: file, dir, start-end-date, '
Expand Down Expand Up @@ -71,24 +85,32 @@ parser.add_argument('--output-file-name', type=str, required=False,

args = parser.parse_args()

logging.basicConfig(level=logging.INFO)
pycbc.init_logging(args.verbose)

# GRAB THE TRIGGER FILES TO COLLATE
if args.trigger_file_method == 'file':
assert args.list_of_trigger_files, 'Please provide a list of trigger files.'
if not args.list_of_trigger_files:
parser.error('Please provide a list of trigger files.')

trigger_files = numpy.loadtxt(args.list_of_trigger_files,
delimiter=',', dtype=str)

if args.trigger_file_method == 'dir':
assert args.trigger_dir, 'Please provide a directory containing trigger files.'
if not args.trigger_dir:
parser.error('Please provide a directory containing trigger files.')

trigger_files = [
l for l in glob.glob(args.trigger_dir + '/*/*-Live-*.hdf', recursive=True)
]

if args.trigger_file_method == 'start-end-date':
assert args.start_date, 'Please provide a start date.'
assert args.end_date, 'Please provide an end date.'
assert args.trigger_dir, 'Please provide a directory containing trigger files.'
if not args.start_date:
parser.error('Please provide a start date.')
if not args.end_date:
parser.error('Please provide an end date.')
if not args.trigger_dir:
parser.error('Please provide a directory containing trigger files.')

# Convert dates to datetime to get the days array
start_date = datetime.strptime(args.start_date, '%Y-%m-%d').date()
end_date = datetime.strptime(args.end_date, '%Y-%m-%d').date()
Expand All @@ -114,9 +136,13 @@ if args.trigger_file_method == 'start-end-date':
]

if args.trigger_file_method == 'start-num-days':
assert args.start_date, 'Please provide a start date.'
assert args.num_days, 'Please provide a number of days.'
assert args.trigger_dir, 'Please provide a directory containing trigger files.'
if not args.start_date:
parser.error('Please provide a start date.')
if not args.num_days:
parser.error('Please provide a number of days.')
if not args.trigger_dir:
parser.error('Please provide a directory containing trigger files.')

# Convert dates to datetime to get the days array
start_date = datetime.strptime(args.start_date, '%Y-%m-%d').date()
num_days = timedelta(days=args.num_days - 1)
Expand All @@ -143,16 +169,18 @@ if args.trigger_file_method == 'start-num-days':
]

if args.trigger_file_method == 'gps-start-end-time':
assert args.gps_start_time, 'Please provide a GPS start time.'
assert args.gps_end_time, 'Please provide a GPS end time.'
assert args.trigger_dir, 'Please provide a directory containing trigger files.'
if not args.gps_start_time:
parser.error('Please provide a GPS start time.')
if not args.gps_end_time:
parser.error('Please provide a GPS end time.')
if not args.trigger_dir:
parser.error('Please provide a directory containing trigger files.')

trigger_files = [
l for l in glob.glob(args.trigger_dir + '/*/*-Live-*.hdf', recursive=True)
if float(l.split('/')[-1].split("-")[2]) > args.gps_start_time and float(l.split('/')[-1].split("-")[2]) < args.gps_end_time
]

start = timeit.default_timer()
logging.info(f" {len(trigger_files)} files found")

if args.output_trigger_file_list:
Expand All @@ -161,18 +189,19 @@ if args.output_trigger_file_list:
for item in trigger_files:
f.write("%s\n" % item)

# Record start and end gpstime for the output file
# This assumes the trigger files are sorted chronologically
start_gpstime = float(trigger_files[0].split('/')[-1].split("-")[2])
end_gpstime = float(trigger_files[0].split('/')[-1].split("-")[2])

###########################
# COLLATE THE TRIGGER FILES
###########################

if args.output_file_name:
output_file = args.output_dir + args.output_file_name
else:
# Extract the gpstimes for every file and take min/max for start and end
trigger_file_times = [float(trigger_file.split('/')[-1].split("-")[2])
for trigger_file in trigger_files]
start_gpstime = min(trigger_file_times)
end_gpstime = max(trigger_file_times)

ifo_string = "".join(args.ifos)
output_file = args.output_dir + f'{ifo_string}-Live-' + start_gpstime + '-' + end_gpstime + '.hdf'

Expand Down Expand Up @@ -250,17 +279,11 @@ with h5py.File(output_file, 'a') as output:
logging.info(f" No triggers for {ifo}, skipping")
continue

# 17.4 seconds to do this
tids = numpy.arange(numpy.max(template_ids) + 1, dtype=int)

sorted_indices = numpy.argsort(template_ids)
sorted_template_ids = template_ids[sorted_indices]
unique_template_ids, template_id_counts = numpy.unique(sorted_template_ids, return_counts=True)
index_boundaries = numpy.cumsum(template_id_counts)
template_boundaries = numpy.insert(index_boundaries, 0, 0)[:-1]

# Re-running purposes, comment out otherwise
#del triggers['template_boundaries']
triggers['template_boundaries'] = template_boundaries

# Sort other datasets by template_id so it makes sense:
Expand All @@ -276,7 +299,7 @@ with h5py.File(output_file, 'a') as output:


# Chisq is saved as reduced chisq for live triggers but offline
# code required original chisq. This converts it back.
# code requires original chisq. This converts it back.
tmpval = triggers['chisq'][:] * (2 * triggers['chisq_dof'][:] - 2)
triggers['chisq'][:] = tmpval

Expand All @@ -300,6 +323,4 @@ with h5py.File(output_file, 'a') as output:
triggers.create_dataset\
(dataset + '_template', data=refs, dtype=h5py.special_dtype(ref=h5py.RegionReference))

end = timeit.default_timer()
total_time = float(end - start)
logging.info(f" Time taken: {total_time}")
logging.info("Done!")

0 comments on commit 54d1b99

Please sign in to comment.