-
Notifications
You must be signed in to change notification settings - Fork 356
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Script to create hdf trigger merge files from PyCBC Live output trigg…
…er files (#4697) * combined trigger file list and collate triggers, untested * working state * removing print statement * Cleaning up argparse and unused code * Trigger file checking and ifo permutation * Rework trigger file finding in pycbc_live_single_trigger_fits * enumerate + continue indentation * implement minor changes --------- Co-authored-by: GarethCabournDavies <[email protected]>
- Loading branch information
1 parent
3483ad5
commit e033ad8
Showing
1 changed file
with
264 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,264 @@ | ||
#!/usr/bin/env python | ||
|
||
# Copyright 2024 Arthur Tolley | ||
# | ||
# This program is free software; you can redistribute it and/or modify it | ||
# under the terms of the GNU General Public License as published by the | ||
# Free Software Foundation; either version 3 of the License, or (at your | ||
# option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, but | ||
# WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General | ||
# Public License for more details. | ||
|
||
"""Find trigger files and combine them into a single hdf trigger merge file.""" | ||
|
||
import glob | ||
import numpy | ||
import argparse | ||
import itertools | ||
import h5py | ||
import os | ||
import logging | ||
from datetime import datetime, timedelta | ||
|
||
from ligo.segments import segmentlist, segment | ||
|
||
import pycbc | ||
from pycbc.io import live as liveio | ||
from pycbc.events import cuts, veto | ||
|
||
# Set up the command line argument parser | ||
parser = argparse.ArgumentParser(description=__doc__) | ||
pycbc.add_common_pycbc_options(parser) | ||
liveio.add_live_trigger_selection_options(parser) | ||
cuts.insert_cuts_option_group(parser) | ||
|
||
parser.add_argument( | ||
'--ifos', | ||
nargs='+', | ||
required=True, | ||
help="The list of detectors to include triggers in the merged file" | ||
) | ||
|
||
parser.add_argument( | ||
'--output-file', | ||
required=True, | ||
help='The output file containing merged triggers.' | ||
) | ||
parser.add_argument( | ||
"--bank-file", | ||
required=True, | ||
help="The bank file used in the search" | ||
) | ||
|
||
args = parser.parse_args() | ||
|
||
pycbc.init_logging(args.verbose) | ||
|
||
logging.info("Finding trigger files") | ||
trigger_files = liveio.find_trigger_files_from_cli(args) | ||
logging.info("%s files found", len(trigger_files)) | ||
|
||
########################### | ||
# COLLATE THE TRIGGER FILES | ||
########################### | ||
# Deal with where the gps time is in the middle of a file: | ||
args.trigger_cuts = args.trigger_cuts or [] | ||
args.trigger_cuts.append(f"end_time:{args.gps_start_time}:lower_inc") | ||
args.trigger_cuts.append(f"end_time:{args.gps_end_time}:upper_inc") | ||
|
||
trigger_cut_dict, template_cut_dict = cuts.ingest_cuts_option_group(args) | ||
|
||
logging.info( | ||
"Collating triggers to %s", | ||
args.output_file | ||
) | ||
|
||
# Some tracking objects | ||
file_count = 0 | ||
n_triggers = {ifo: 0 for ifo in args.ifos} | ||
n_triggers_cut = {ifo: 0 for ifo in args.ifos} | ||
segs = {ifo: segmentlist([]) for ifo in args.ifos} | ||
|
||
with h5py.File(args.bank_file,'r') as bank_file: | ||
# Count the number of templates | ||
n_templates = bank_file['template_hash'].size | ||
|
||
with h5py.File(args.output_file, 'w') as destination: | ||
# Create the ifo groups in the trigger file | ||
for ifo in args.ifos: | ||
if ifo not in destination: | ||
destination.create_group(ifo) | ||
|
||
for file_count, source_file in enumerate(trigger_files): | ||
trigger_file = os.path.basename(source_file) | ||
|
||
start_time = float(trigger_file.split('-')[2]) | ||
duration = float(trigger_file.split('-')[3][:-4]) | ||
end_time = start_time + duration | ||
|
||
if file_count % 100 == 0: | ||
logging.info( | ||
"Files appended: %d/%d", | ||
file_count, | ||
len(trigger_files) | ||
) | ||
|
||
with h5py.File(source_file, 'r') as source: | ||
for ifo in args.ifos: | ||
try: | ||
n_trigs_ifo = source[ifo]['snr'].size | ||
n_triggers[ifo] += n_trigs_ifo | ||
except KeyError: | ||
# No triggers in this IFO in this file | ||
continue | ||
|
||
# Triggers were generated, so add to the segment list | ||
segs[ifo].append(segment(start_time, end_time)) | ||
|
||
triggers = { | ||
k: source[ifo][k][:] for k in source[ifo].keys() | ||
if k not in ('loudest', 'stat', 'gates', 'psd') | ||
and source[ifo][k].size == n_trigs_ifo | ||
} | ||
# The stored chisq is actually reduced chisq, so convert back | ||
# to unreduced chisq using chisq_dof | ||
tmpchisq = triggers['chisq'][:] * (2 * triggers['chisq_dof'][:] - 2) | ||
triggers['chisq'][:] = tmpchisq | ||
|
||
# Apply the cuts to triggers | ||
keep_idx = cuts.apply_trigger_cuts(triggers, trigger_cut_dict) | ||
|
||
# triggers contains the datasets that we want to use for | ||
# the template cuts, so here it can be used as the template bank | ||
keep_idx = cuts.apply_template_cuts( | ||
triggers, | ||
template_cut_dict, | ||
template_ids=keep_idx | ||
) | ||
if not any(keep_idx): | ||
# No triggers kept after cuts in this ifo for this file | ||
continue | ||
n_triggers_cut[ifo] += keep_idx.size | ||
|
||
triggers = { | ||
k: triggers[k][keep_idx] | ||
for k in triggers.keys() | ||
} | ||
|
||
if ('approximant' not in triggers) or (len(triggers['approximant']) == 0): | ||
continue | ||
|
||
for name, dataset in triggers.items(): | ||
if name in destination[ifo]: | ||
# Append new data to existing dataset in destination | ||
if triggers[name].shape[0] == 0: | ||
continue | ||
destination[ifo][name].resize( | ||
n_triggers_cut[ifo], | ||
axis=0 | ||
) | ||
destination[ifo][name][-keep_idx.size:] = dataset | ||
else: | ||
destination[ifo].create_dataset( | ||
name, | ||
data=dataset, | ||
chunks=True, | ||
maxshape=(None,) | ||
) | ||
|
||
for attr_name, attr_value in source.attrs.items(): | ||
destination.attrs[attr_name] = attr_value | ||
|
||
for ifo in args.ifos: | ||
# Collect the segments, and output to the file | ||
search_grp = destination[ifo].create_group('search') | ||
segs[ifo].coalesce() | ||
seg_starts, seg_ends = veto.segments_to_start_end(segs[ifo]) | ||
search_grp.create_dataset( | ||
'end_time', | ||
data=seg_ends, | ||
) | ||
search_grp.create_dataset( | ||
'start_time', | ||
data=seg_starts, | ||
) | ||
logging.info( | ||
"Found %d %s triggers in %s seconds", | ||
n_triggers[ifo], | ||
ifo, | ||
abs(segs[ifo]) | ||
) | ||
if n_triggers[ifo] != n_triggers_cut[ifo]: | ||
logging.info( | ||
" %d triggers after cuts", | ||
n_triggers_cut[ifo], | ||
) | ||
|
||
# Add all the template boundaries | ||
for ifo in args.ifos: | ||
logging.info("Processing %s", ifo) | ||
triggers = destination[ifo] | ||
try: | ||
template_ids = triggers['template_id'][:] | ||
except KeyError: | ||
logging.info("No triggers for %s, skipping", ifo) | ||
continue | ||
|
||
logging.info("Calculating template boundaries") | ||
sorted_indices = numpy.argsort(template_ids) | ||
sorted_template_ids = template_ids[sorted_indices] | ||
unique_template_ids, template_id_counts = numpy.unique( | ||
sorted_template_ids, | ||
return_counts=True | ||
) | ||
template_boundaries = numpy.searchsorted( | ||
sorted_template_ids, | ||
numpy.arange(n_templates) | ||
) | ||
triggers['template_boundaries'] = template_boundaries | ||
|
||
# Sort other datasets by template_id so it makes sense | ||
# Datasets with the same length as the number of triggers: | ||
# approximant, chisq, chisq_dof, coa_phase, end_time, f_lower | ||
# mass_1, mass_2, sg_chisq, sigmasq, snr, spin1z, spin2z, | ||
# template_duration, template_hash, template_id | ||
for key in triggers.keys(): | ||
if len(triggers[key]) == len(template_ids): | ||
logging.info('Sorting %s by template id', key) | ||
sorted_key = triggers[key][:][sorted_indices] | ||
triggers[key][:] = sorted_key | ||
|
||
|
||
logging.info("Setting up region references") | ||
# Datasets which need region references: | ||
region_ref_datasets = ('chisq_dof', 'chisq', 'coa_phase', | ||
'end_time', 'sg_chisq', 'snr', | ||
'template_duration', 'sigmasq') | ||
if 'psd_var_val' in triggers.keys(): | ||
region_ref_datasets += ('psd_var_val',) | ||
|
||
start_boundaries = template_boundaries | ||
end_boundaries = numpy.roll(start_boundaries, -1) | ||
end_boundaries[-1] = len(template_ids) | ||
|
||
for dataset in region_ref_datasets: | ||
logging.info( | ||
"Region references for %s", | ||
dataset | ||
) | ||
refs = [ | ||
triggers[dataset].regionref[l:r] | ||
for l, r in zip(start_boundaries, end_boundaries) | ||
] | ||
|
||
logging.info("Adding to file") | ||
triggers.create_dataset( | ||
dataset + '_template', | ||
data=refs, | ||
dtype=h5py.special_dtype(ref=h5py.RegionReference) | ||
) | ||
|
||
logging.info("Done!") |