Skip to content

Commit

Permalink
Fix IndexError in PyCBC Live's followup code (gwastro#4676)
Browse files Browse the repository at this point in the history
* Draft fix for followup indexing error

* Use a nonzero min required lookback, better variable names

* Alex's comment on td_samples vs delta_f

* Add docstring for `followup_event_significance`

* Docstring fixes

* Work on comments

* Try to fix Sphinx error

* Simplify definition of strain buffer length

* Explicit kwarg name

Co-authored-by: maxtrevor <[email protected]>

* Simplify `from_cli()` args

* Fix error

* Update example

---------

Co-authored-by: maxtrevor <[email protected]>
  • Loading branch information
titodalcanton and maxtrevor committed Apr 10, 2024
1 parent 0d98014 commit 1198d96
Show file tree
Hide file tree
Showing 5 changed files with 223 additions and 79 deletions.
32 changes: 23 additions & 9 deletions bin/pycbc_live
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class LiveEventManager(object):
self.padata = livepau.PAstroData(args.p_astro_spec, args.bank_file)
self.use_date_prefix = args.day_hour_output_prefix
self.ifar_upload_threshold = args.ifar_upload_threshold
self.pvalue_lookback_time = args.pvalue_lookback_time
self.pvalue_livetime = args.pvalue_combination_livetime
self.gracedb_server = args.gracedb_server
self.gracedb_search = args.gracedb_search
Expand Down Expand Up @@ -221,7 +222,8 @@ class LiveEventManager(object):
self.data_readers[ifo],
self.bank,
template_id,
coinc_times
coinc_times,
lookback=self.pvalue_lookback_time
)
if pvalue_info is None:
continue
Expand Down Expand Up @@ -802,6 +804,20 @@ class LiveEventManager(object):
store_psd[ifo].save(fname, group='%s/psd' % ifo)


def check_max_length(args, waveforms):
"""Check that the `--max-length` option is sufficient to accomodate the
longest template in the bank and the PSD estimation options.
"""
lengths = numpy.array([1.0 / wf.delta_f for wf in waveforms])
psd_len = args.psd_segment_length * (args.psd_samples // 2 + 1)
max_length = max(lengths.max() + args.pvalue_lookback_time, psd_len)
if max_length > args.max_length:
raise ValueError(
'--max-length is too short for this template bank. '
f'Use at least {max_length}.'
)


parser = argparse.ArgumentParser(description=__doc__)
pycbc.waveform.bank.add_approximant_arg(parser)
parser.add_argument('--verbose', action='store_true')
Expand Down Expand Up @@ -962,6 +978,10 @@ parser.add_argument('--enable-background-estimation', default=False, action='sto
parser.add_argument('--ifar-double-followup-threshold', type=float, required=True,
help='Inverse-FAR threshold to followup double coincs with'
'additional detectors')
parser.add_argument('--pvalue-lookback-time', type=float, default=150,
metavar='SECONDS',
help='Lookback time for the calculation of the p-value in '
'followup detectors.')
parser.add_argument('--pvalue-combination-livetime', type=float, required=True,
help="Livetime used for p-value combination with followup "
"detectors, in years")
Expand Down Expand Up @@ -1106,13 +1126,10 @@ with ctx:
print(e)
exit()

maxlen = args.psd_segment_length * (args.psd_samples // 2 + 1)
if evnt.rank > 0:
bank.table.sort(order='mchirp')
waveforms = list(bank[evnt.rank-1::evnt.size-1])
lengths = numpy.array([1.0 / wf.delta_f for wf in waveforms])
psd_len = args.psd_segment_length * (args.psd_samples // 2 + 1)
maxlen = max(lengths.max(), psd_len)
check_max_length(args, waveforms)
mf = LiveBatchMatchedFilter(waveforms, args.snr_threshold,
args.chisq_bins, sg_chisq,
snr_abort_threshold=args.snr_abort_threshold,
Expand All @@ -1134,11 +1151,8 @@ with ctx:
# Initialize the data readers for all detectors. For rank 0, we need data
# from all detectors, including the localization-only ones. For higher
# ranks, we only need the detectors that can generate candidates.
if args.max_length is not None:
maxlen = args.max_length
maxlen = int(maxlen)
data_reader = {
ifo: StrainBuffer.from_cli(ifo, args, maxlen)
ifo: StrainBuffer.from_cli(ifo, args)
for ifo in (evnt.ifos if evnt.rank == 0 else evnt.trigg_ifos)
}
evnt.data_readers = data_reader
Expand Down
2 changes: 1 addition & 1 deletion examples/live/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ python -m mpi4py `which pycbc_live` \
--sample-rate 2048 \
--enable-bank-start-frequency \
--low-frequency-cutoff ${f_min} \
--max-length 256 \
--max-length 512 \
--approximant "SPAtmplt:mtotal<4" "SEOBNRv4_ROM:else" \
--chisq-bins "0.72*get_freq('fSEOBNRv4Peak',params.mass1,params.mass2,params.spin1z,params.spin2z)**0.7" \
--snr-abort-threshold 500 \
Expand Down
120 changes: 108 additions & 12 deletions pycbc/filter/matchedfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -1804,7 +1804,50 @@ def followup_event_significance(ifo, data_reader, bank,
to determine if the SNR in the first detector has a significant peak
in the on-source window. The significance is given in terms of a
p-value. See Dal Canton et al. 2021 (https://arxiv.org/abs/2008.07494)
for details.
for details. A portion of the SNR time series around the on-source window
is also returned for use in BAYESTAR.
If the calculation cannot be carried out, for example because `ifo` is
not in observing mode at the requested time, then None is returned.
Otherwise, the dict contains the following keys. `snr_series` is a
TimeSeries object with the SNR time series for BAYESTAR. `peak_time` is the
time of maximum SNR in the on-source window. `pvalue` is the p-value for
the maximum on-source SNR compared to the off-source realizations.
`pvalue_saturated` is a bool indicating whether the p-value is limited by
the number of off-source realizations, i.e. whether the maximum on-source
SNR is larger than all the off-source ones. `sigma2` is the SNR
normalization (squared) for the given template and detector.
Parameters
----------
ifo: str
Which detector is being used for the calculation.
data_reader: StrainBuffer
StrainBuffer object providing the data for the given detector.
bank: LiveFilterBank
Template bank object providing the template related quantities.
template_id: int
Index of the template in the bank.
coinc_times: dict
Dictionary keyed by detector names reporting the coalescence times of
a candidate measured at the different detectors. Used to define the
on-source window of the candidate in `ifo`.
coinc_threshold: float
Nominal statistical uncertainty in `coinc_times`; expands the
on-source window by twice the given amount.
lookback: float
Nominal amount of time to use for the calculation of the onsource and
offsource SNR time series. The actual time may be reduced depending on
the duration of the template and the strain buffer in the data reader
(if so, a warning is logged).
duration: float
Duration of the SNR time series to be reported to BAYESTAR.
Returns
-------
followup_info: dict or None
Results of the followup calculation (see above) or None if `ifo` did
not have usable data.
"""
from pycbc.waveform import get_waveform_filter_length_in_time
tmplt = bank.table[template_id]
Expand All @@ -1830,17 +1873,63 @@ def followup_event_significance(ifo, data_reader, bank,
onsource_start -= coinc_threshold
onsource_end += coinc_threshold

# Calculate how much time needed to calculate significance
trim_pad = (data_reader.trim_padding * data_reader.strain.delta_t)
bdur = int(lookback + 2.0 * trim_pad + length_in_time)
if bdur > data_reader.strain.duration * .75:
bdur = data_reader.strain.duration * .75
# Calculate how much time is needed to calculate the significance.
# At the minimum, we need enough time to include the lookback, plus time
# that we will throw away because of corruption from finite-duration filter
# responses (this is equal to the nominal padding plus the template
# duration). Next, for efficiency, we round the resulting duration up to
# align it with one of the frequency resolutions preferred by the template
# bank. And finally, the resulting duration must fit into the strain buffer
# available in the data reader, so we check that.
trim_pad = data_reader.trim_padding * data_reader.strain.delta_t
buffer_duration = lookback + 2 * trim_pad + length_in_time
buffer_samples = bank.round_up(int(buffer_duration * bank.sample_rate))
max_safe_buffer_samples = int(
0.9 * data_reader.strain.duration * bank.sample_rate
)
if buffer_samples > max_safe_buffer_samples:
buffer_samples = max_safe_buffer_samples
new_lookback = (
buffer_samples / bank.sample_rate - (2 * trim_pad + length_in_time)
)
# Require a minimum lookback time of twice the onsource window or SNR
# time series (whichever is longer) so we have enough data for the
# onsource window, the SNR time series, and at least a few background
# samples
min_required_lookback = 2 * max(onsource_end - onsource_start, duration)
if new_lookback > min_required_lookback:
logging.warning(
'Strain buffer too short for a lookback time of %f s, '
'reducing lookback to %f s',
lookback,
new_lookback
)
else:
logging.error(
'Strain buffer too short to compute the followup SNR time '
'series for template %d, will not use %s for followup. '
'Either use shorter templates, or raise --max-length.',
template_id,
ifo
)
return None
buffer_duration = buffer_samples / bank.sample_rate

# Require all strain be valid within lookback time
if data_reader.state is not None:
state_start_time = data_reader.strain.end_time \
- data_reader.reduced_pad * data_reader.strain.delta_t - bdur
if not data_reader.state.is_extent_valid(state_start_time, bdur):
state_start_time = (
data_reader.strain.end_time
- data_reader.reduced_pad * data_reader.strain.delta_t
- buffer_duration
)
if not data_reader.state.is_extent_valid(
state_start_time, buffer_duration
):
logging.info(
'%s strain buffer contains invalid data during lookback, '
'will not use for followup',
ifo
)
return None

# We won't require that all DQ checks be valid for now, except at
Expand All @@ -1849,16 +1938,23 @@ def followup_event_significance(ifo, data_reader, bank,
dq_start_time = onsource_start - duration / 2.0
dq_duration = onsource_end - onsource_start + duration
if not data_reader.dq.is_extent_valid(dq_start_time, dq_duration):
logging.info(
'%s DQ buffer indicates invalid data during onsource window, '
'will not use for followup',
ifo
)
return None

# Calculate SNR time series for this duration
htilde = bank.get_template(template_id, min_buffer=bdur)
# Calculate SNR time series for the entire lookback duration
htilde = bank.get_template(
template_id, delta_f=bank.sample_rate / float(buffer_samples)
)
stilde = data_reader.overwhitened_data(htilde.delta_f)

sigma2 = htilde.sigmasq(stilde.psd)
snr, _, norm = matched_filter_core(htilde, stilde, h_norm=sigma2)

# Find peak in on-source and determine p-value
# Find peak SNR in on-source and determine p-value
onsrc = snr.time_slice(onsource_start, onsource_end)
peak = onsrc.abs_arg_max()
peak_time = peak * snr.delta_t + onsrc.start_time
Expand Down
80 changes: 42 additions & 38 deletions pycbc/strain/strain.py
Original file line number Diff line number Diff line change
Expand Up @@ -1418,8 +1418,8 @@ def execute_cached_ifft(*args, **kwargs):

class StrainBuffer(pycbc.frame.DataBuffer):
def __init__(self, frame_src, channel_name, start_time,
max_buffer=512,
sample_rate=4096,
max_buffer,
sample_rate,
low_frequency_cutoff=20,
highpass_frequency=15.0,
highpass_reduction=200.0,
Expand Down Expand Up @@ -1460,9 +1460,9 @@ def __init__(self, frame_src, channel_name, start_time,
Name of the channel to read from the frame files
start_time:
Time to start reading from.
max_buffer: {int, 512}, Optional
Length of the buffer in seconds
sample_rate: {int, 2048}, Optional
max_buffer: int
Length of the strain buffer in seconds.
sample_rate: int, Optional
Rate in Hz to sample the data.
low_frequency_cutoff: {float, 20}, Optional
The low frequency cutoff to use for inverse spectrum truncation
Expand Down Expand Up @@ -1534,7 +1534,7 @@ def __init__(self, frame_src, channel_name, start_time,
filesystem.
"""
super(StrainBuffer, self).__init__(frame_src, channel_name, start_time,
max_buffer=32,
max_buffer=max_buffer,
force_update_cache=force_update_cache,
increment_update_cache=increment_update_cache)

Expand Down Expand Up @@ -1950,7 +1950,7 @@ def advance(self, blocksize, timeout=10):
return self.wait_duration <= 0

@classmethod
def from_cli(cls, ifo, args, maxlen):
def from_cli(cls, ifo, args):
"""Initialize a StrainBuffer object (data reader) for a particular
detector.
"""
Expand Down Expand Up @@ -1982,34 +1982,38 @@ def from_cli(cls, ifo, args, maxlen):
frame_src = [args.frame_src[ifo]]
strain_channel = ':'.join([ifo, args.channel_name[ifo]])

return cls(frame_src, strain_channel,
args.start_time, max_buffer=maxlen * 2,
state_channel=state_channel,
data_quality_channel=dq_channel,
idq_channel=idq_channel,
idq_state_channel=idq_state_channel,
idq_threshold=args.idq_threshold,
sample_rate=args.sample_rate,
low_frequency_cutoff=args.low_frequency_cutoff,
highpass_frequency=args.highpass_frequency,
highpass_reduction=args.highpass_reduction,
highpass_bandwidth=args.highpass_bandwidth,
psd_samples=args.psd_samples,
trim_padding=args.trim_padding,
psd_segment_length=args.psd_segment_length,
psd_inverse_length=args.psd_inverse_length,
autogating_threshold=args.autogating_threshold,
autogating_cluster=args.autogating_cluster,
autogating_pad=args.autogating_pad,
autogating_width=args.autogating_width,
autogating_taper=args.autogating_taper,
autogating_duration=args.autogating_duration,
autogating_psd_segment_length=args.autogating_psd_segment_length,
autogating_psd_stride=args.autogating_psd_stride,
psd_abort_difference=args.psd_abort_difference,
psd_recalculate_difference=args.psd_recalculate_difference,
force_update_cache=args.force_update_cache,
increment_update_cache=args.increment_update_cache[ifo],
analyze_flags=analyze_flags,
data_quality_flags=dq_flags,
dq_padding=args.data_quality_padding)
return cls(
frame_src,
strain_channel,
args.start_time,
max_buffer=args.max_length,
state_channel=state_channel,
data_quality_channel=dq_channel,
idq_channel=idq_channel,
idq_state_channel=idq_state_channel,
idq_threshold=args.idq_threshold,
sample_rate=args.sample_rate,
low_frequency_cutoff=args.low_frequency_cutoff,
highpass_frequency=args.highpass_frequency,
highpass_reduction=args.highpass_reduction,
highpass_bandwidth=args.highpass_bandwidth,
psd_samples=args.psd_samples,
trim_padding=args.trim_padding,
psd_segment_length=args.psd_segment_length,
psd_inverse_length=args.psd_inverse_length,
autogating_threshold=args.autogating_threshold,
autogating_cluster=args.autogating_cluster,
autogating_pad=args.autogating_pad,
autogating_width=args.autogating_width,
autogating_taper=args.autogating_taper,
autogating_duration=args.autogating_duration,
autogating_psd_segment_length=args.autogating_psd_segment_length,
autogating_psd_stride=args.autogating_psd_stride,
psd_abort_difference=args.psd_abort_difference,
psd_recalculate_difference=args.psd_recalculate_difference,
force_update_cache=args.force_update_cache,
increment_update_cache=args.increment_update_cache[ifo],
analyze_flags=analyze_flags,
data_quality_flags=dq_flags,
dq_padding=args.data_quality_padding
)
Loading

0 comments on commit 1198d96

Please sign in to comment.