Skip to content

Commit

Permalink
PyGRB writing/loading timeslides (gwastro#4427)
Browse files Browse the repository at this point in the history
* Timeslides written and loaded

* More commented stuff

* Make event manager write slide offset

* Slide offset is actually slide offset now

* Write full timeslide information

* Try writing full slide info another way

* Load timeslides using new file output

* Load fix + keep sanity checks

* Correct number of ids

* "slide shift" sounds better than "slide offset"

* Bring back slide id

* Codeclimate fix

* Move time slides column to search group

* Load time slides from search group

* Avoid layers in output files

* Try different placement in eventmgr

* Try ifo/search group

* Add search group copying to trig cluster

* Get number of sets right for bar updating

* Add more search group copying to work with bins
  • Loading branch information
jakeb245 authored and maxtrevor committed Dec 5, 2023
1 parent 7659d3f commit e3a90ac
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 19 deletions.
6 changes: 3 additions & 3 deletions bin/pycbc_multi_inspiral
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ with ctx:
'bank_chisq': None,
'bank_chisq_dof': None,
'cont_chisq': None,
'slide_id': int
'slide_id': None
}
ifo_names = sorted(ifo_out_vals.keys())
network_out_types = {
Expand All @@ -303,13 +303,13 @@ with ctx:
'nifo': None,
'my_network_chisq': None,
'reweighted_snr': None,
'slide_id': int
'slide_id': None
}
network_names = sorted(network_out_vals.keys())
event_mgr = EventManagerCoherent(
args, args.instruments, ifo_names,
[ifo_out_types[n] for n in ifo_names], network_names,
[network_out_types[n] for n in network_names])
[network_out_types[n] for n in network_names], time_slides=time_slides)
logging.info("Read in template bank")
bank = waveform.FilterBank(
args.bank_file, flen, delta_f, complex64, low_frequency_cutoff=flow,
Expand Down
11 changes: 10 additions & 1 deletion bin/pygrb/pycbc_grb_trig_cluster
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ def slice_hdf5(inputfile, outfile, include, verbose=False):
) for ifo in ifos
}

nsets = sum(isinstance(item, h5py.Dataset) for
nsets = sum(isinstance(item, h5py.Dataset)
or isinstance(item, h5py.Group) for
group in h5in.values() for
item in group.values())
msg = "Slicing {} network events into new file".format(nevents)
Expand All @@ -88,6 +89,10 @@ def slice_hdf5(inputfile, outfile, include, verbose=False):
compression_opts=old.compression_opts,
)
bar.update()
elif isinstance(old, h5py.Group):
if "search" in old.name:
h5in.copy(h5in[old.name], h5out, old.name)
bar.update()
for ifo in ifos:
idx = numpy.in1d(h5in[ifo]["event_id"][()], ifo_index[ifo])
for old in h5in[ifo].values():
Expand All @@ -99,6 +104,10 @@ def slice_hdf5(inputfile, outfile, include, verbose=False):
compression_opts=old.compression_opts,
)
bar.update()
elif isinstance(old, h5py.Group):
if "search" in old.name:
h5in.copy(h5in[old.name], h5out, old.name)
bar.update()
bar.close()
if verbose:
print("Slice written to {}".format(outfile))
Expand Down
12 changes: 9 additions & 3 deletions bin/pygrb/pycbc_grb_trig_combiner
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,11 @@ def bin_events(inputfile, bins, outdir, filetag,
compression_opts=old.compression_opts,
)
bar.update()
elif isinstance(old, h5py.Group):
# If search group, copy it all over
if "search" in old.name:
h5in.copy(h5in[old.name], h5out, old.name)
bar.update()
for ifo in ifos:
idx = numpy.in1d(h5in[ifo]["event_id"][()], ifo_index[ifo])
for old in h5in[ifo].values():
Expand All @@ -290,6 +295,10 @@ def bin_events(inputfile, bins, outdir, filetag,
compression_opts=old.compression_opts,
)
bar.update()
elif isinstance(old, h5py.Group):
if "search" in old.name:
h5in.copy(h5in[old.name], h5out, old.name)
bar.update()
bar.close()
if verbose:
print("{} written to {}".format(bin_, outf))
Expand Down Expand Up @@ -492,9 +501,6 @@ vprint("-- Merging events")
if args.short_slides and args.long_slides:
raise NotImplementedError

elif args.short_slides and not args.long_slides:
raise NotImplementedError

else:
outfilename = "{}-{}_ALL_TIMES-{}-{}.h5".format(
args.ifo_tag, args.user_tag, start, end-start,
Expand Down
6 changes: 4 additions & 2 deletions pycbc/events/eventmgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ def add_template_events_to_ifo(self, ifo, columns, vectors):

class EventManagerCoherent(EventManagerMultiDetBase):
def __init__(self, opt, ifos, column, column_types, network_column,
network_column_types, psd=None, **kwargs):
network_column_types, time_slides, psd=None, **kwargs):
super(EventManagerCoherent, self).__init__(
opt, ifos, column, column_types, psd=None, **kwargs)
self.network_event_dtype = \
Expand All @@ -612,6 +612,7 @@ def __init__(self, opt, ifos, column, column_types, network_column,
self.event_index['network'] = 0
self.template_event_dict['network'] = numpy.array(
[], dtype=self.network_event_dtype)
self.time_slides = time_slides

def cluster_template_network_events(self, tcolumn, column, window_size,
slide=0):
Expand Down Expand Up @@ -727,6 +728,7 @@ def __setitem__(self, name, data):
float(self.opt.sample_rate[ifo_str]) + \
self.opt.gps_start_time[ifo_str]
f['time_index'] = ifo_events['time_index']
f['slide_id'] = ifo_events['slide_id']
try:
# Precessing
template_sigmasq_plus = numpy.array(
Expand Down Expand Up @@ -772,7 +774,7 @@ def __setitem__(self, name, data):
f['chisq_dof'] = numpy.zeros(len(ifo_events))

f['template_hash'] = th[tid]

f['search/time_slides'] = numpy.array(self.time_slides[ifo])
if self.opt.trig_start_time:
f['search/start_time'] = numpy.array([
self.opt.trig_start_time[ifo]], dtype=numpy.int32)
Expand Down
19 changes: 9 additions & 10 deletions pycbc/results/pygrb_postprocessing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,19 +615,18 @@ def load_injections(inj_file, vetoes, sim_table=False, label=None):
# =============================================================================
# Function to load timeslides
# =============================================================================
def load_time_slides(xml_file):
def load_time_slides(hdf_file_path):
"""Loads timeslides from PyGRB output file as a dictionary"""
hdf_file = h5py.File(hdf_file_path, 'r')
ifos = extract_ifos(hdf_file_path)
ids = numpy.arange(len(hdf_file[f'{ifos[0]}/search/time_slides']))
time_slide_dict = {
slide_id: {
ifo: hdf_file[f'{ifo}/search/time_slides'][slide_id]
for ifo in ifos}
for slide_id in ids}

# Get all timeslides: these are number_of_ifos * number_of_timeslides
time_slide = load_xml_table(xml_file, glsctables.TimeSlideTable.tableName)
# Get a list of unique timeslide dictionaries
time_slide_list = [dict(i) for i in time_slide.as_dict().values()]
# Turn it into a dictionary indexed by the timeslide ID
time_slide_dict = {int(time_slide.get_time_slide_id(ov)): ov
for ov in time_slide_list}
# Check time_slide_ids are ordered correctly.
ids = _get_id_numbers(time_slide,
"time_slide_id")[::len(time_slide_dict[0].keys())]
if not (numpy.all(ids[1:] == numpy.array(ids[:-1])+1) and ids[0] == 0):
err_msg = "time_slide_ids list should start at zero and increase by "
err_msg += "one for every element"
Expand Down

0 comments on commit e3a90ac

Please sign in to comment.