Skip to content

Commit

Permalink
WOrking version of offline dq workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
maxtrevor committed Jan 10, 2024
1 parent 02e3ce2 commit 2a26fa5
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 195 deletions.
6 changes: 1 addition & 5 deletions bin/all_sky_search/pycbc_bin_templates
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import numpy as np
import pycbc
import pycbc.pnutils
from pycbc.version import git_verbose_msg as version
from pycbc.events import background_bin_from_string, stat as pystat
from pycbc.events import background_bin_from_string

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--version', action='version', version=version)
Expand All @@ -23,12 +23,8 @@ parser.add_argument('--bank-file', help='hdf format template bank file',
parser.add_argument('--background-bins', nargs='+',
help='Used to provide a list of '
'precomputed background bins')
parser.add_argument('--approximant', type=str, default='SEOBNRv4',
help='Approximant used to calculate template durations.')
parser.add_argument("--output-file", required=True)

pystat.insert_statistic_option_group(parser,
default_ranking_statistic='single_ranking_only')
args = parser.parse_args()

pycbc.init_logging(args.verbose)
Expand Down
23 changes: 12 additions & 11 deletions bin/all_sky_search/pycbc_bin_trigger_rates_dq
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,12 @@ from pycbc.version import git_verbose_msg as version
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--version', action='version', version=version)
parser.add_argument('--verbose', action="store_true")
parser.add_argument("--ifo", type=str, required=True)
parser.add_argument("--dq-label", type=str, required=True)
parser.add_argument("--template-bins-file", required=True)
parser.add_argument("--trig-file", required=True)
parser.add_argument("--flag-file", required=True)
parser.add_argument("--flag-name", required=True)
parser.add_argument("--analysis-segments", required=True)
parser.add_argument("--analysis-segment-file", required=True)
parser.add_argument("--analysis-segment-name", required=True)
parser.add_argument("--gating-windows", nargs='+',
action=MultiDetOptionAction,
help="Seconds to be reweight before and after the central time "
Expand All @@ -35,9 +34,6 @@ parser.add_argument("--gating-windows", nargs='+',
parser.add_argument("--stat-threshold", type=float, default=1.,
help="Only consider triggers with --sngl-ranking value "
"above this threshold")
parser.add_argument("--f-lower", type=float, default=15.,
help='Enforce a uniform low frequency cutoff to '
'calculate template duration over the bank')
parser.add_argument("--output-file", required=True)

pystat.insert_statistic_option_group(
Expand All @@ -47,7 +43,7 @@ pycbc.init_logging(args.verbose)

logging.info('Start')

ifo = args.ifo
ifo, flag_name = args.flag_name.split(':')

# Setup a data mask to remove any triggers with SNR below threshold
# This works as a pre-filter as SNR is always greater than or equal
Expand Down Expand Up @@ -76,7 +72,7 @@ with HFile(args.trig_file, 'r') as trig_file:

# also get the gated times while we have the file open
gate_times = []
if args.gating_window:
if args.gating_windows:
logging.info('Getting gated times')
try:
gating_key = f'{ifo}/gating'
Expand Down Expand Up @@ -122,15 +118,21 @@ with h5.File(args.template_bins_file, 'r') as f:
for bin_name in ifo_grp.keys():
bin_tids_dict[bin_name] = ifo_grp[bin_name]['tids'][:]


# get analysis segments
analysis_segs = select_segments_by_definer(args.analysis_segment_file,
segment_name=args.analysis_segment_name,
ifo=ifo)

# get flag segments
flag_segs = select_segments_by_definer(args.flag_file,
segment_name=args.flag_name,
segment_name=flag_name,
ifo=ifo)

# construct gate segments
gating_segs = segmentlist([])
if args.gating_windows:
gating_windows = args.gating_windows[args.ifo].split(',')
gating_windows = args.gating_windows[ifo].split(',')
gate_before = float(gating_windows[0])
gate_after = float(gating_windows[1])
if gate_before > 0 or gate_after < 0:
Expand All @@ -143,7 +145,6 @@ if args.gating_windows:
).coalesce()

# make segments into mutually exclusive dq states
analysis_segs = args.analysis_segments
gating_segs = gating_segs & analysis_segs
flag_segs = flag_segs & analysis_segs

Expand Down
84 changes: 51 additions & 33 deletions bin/plotting/pycbc_plot_dq_flag_likelihood
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@ import argparse
import numpy
import pycbc
import h5py
from matplotlib import use; use('Agg')
from matplotlib import use
from matplotlib import pyplot
from pycbc.version import git_verbose_msg as version
import pycbc.results
use('Agg')

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--version', action='version', version=version)
parser.add_argument('--verbose', action="store_true")
parser.add_argument("--dq-file", required=True)
parser.add_argument("--dq-label", required=True)
parser.add_argument("--ifo", type=str, required=True)
parser.add_argument("--output-file", required=True)
args = parser.parse_args()
Expand All @@ -24,45 +26,61 @@ pycbc.init_logging(args.verbose)
ifo = args.ifo

f = h5py.File(args.dq_file, 'r')
dq_name = f.attrs['stat'].split('-')[1]
logrates_grp = f[ifo + '/dq_percentiles']
bin_names = logrates_grp.keys()

# when using a flag, should have exactly 2 dq bins (off and on)
# times when the flag was on go to the last dq bin
for b in bin_names:
num_bins = len(logrates_grp[b])
assert num_bins == 2, f'Flag should have exactly 2 dq bins, found {num_bins}'
logrates = [logrates_grp[b][-1] for b in bin_names]
x = numpy.arange(len(logrates))

ymax = 1.2 * max(logrates)
color = pycbc.results.ifo_color(ifo)

fig = pyplot.figure(0)
ax = fig.add_subplot(111)
ifo_grp = f[ifo]

dq_states = ifo_grp['dq_segments'].keys()
bin_names = ifo_grp['bins'].keys()

num_bins = len(bin_names)
x = numpy.arange(num_bins)
width = 0.25

dq_states = {
0: 'Clean',
1: 'DQ Flag',
2: 'Autogating',
}

colors = {
0: 'green',
1: 'gold',
2: 'red'
}

fig, ax = pyplot.subplots(figsize=(9, 6))
ax2 = ax.twinx()

ax2.bar(x, height=logrates, label=ifo, color=color)
ax2.set_ylabel('DQ Log Likelihood Penalty')
ax2.legend(loc='upper left', markerscale=5)
ax2.set_ylim(0, ymax)
ax2.grid()
ymax = 1
ymin = 1

yticks = ax.get_yticks()
for n, dqstate_name in dq_states.items():
dq_rates = numpy.array([ifo_grp['bins'][b]['dq_rates'][n] for b in bin_names])
logrates = numpy.log(dq_rates)

ax_ymax = numpy.exp(ymax)
ax.set_ylim(1, ax_ymax)
new_ticks = range(0, int(numpy.ceil(numpy.log10(ax_ymax))))
ax.set_yticks([10**t for t in new_ticks])
ax.set_ylabel('(Rate during DQ Flag)/(Mean Rate) [Min 1]')
ymax = max(ymax, numpy.max(dq_rates))
ymin = min(ymin, numpy.min(dq_rates))

offset = width*(n-1)
ax.bar(x + offset, dq_rates, width, label=dqstate_name, color=colors[n])

ymax = ymax**1.05
ymin = ymin**1.05
if not ymin:
ymin = 1e-3
ax.set_ylim(ymin, ymax)
ax.set_yscale('log')
ax.set_xlabel('Template Bin Number')
ax.set_ylabel('(Trigger rate during DQ state)/(Mean Rate)')

ax.set_xticks(x)
ax.set_xlabel('Template Bin Number (Longer duration -> Lower number)')
ax.legend()
ax.grid()

ax2.set_ylabel('DQ Log Likelihood Penalty')
ax2.set_ylim(numpy.log(ymin), numpy.log(ymax))

# add meta data and save figure
plot_title = f'{ifo}: {dq_name} flag log likelihood'
plot_caption = f'The log likelihood correction during during times flagged \
by the {dq_name} flag'
plot_title = f'{ifo} DQ Trigger Rates'
plot_caption = 'The log likelihood correction during during each dq state for each template bin.'
pycbc.results.save_fig_with_metadata(fig, args.output_file, title=plot_title,
caption=plot_caption, cmd=' '.join(sys.argv))
35 changes: 10 additions & 25 deletions bin/workflows/pycbc_make_offline_search_workflow
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ insps = wf.merge_single_detector_hdf_files(workflow, hdfbank,
statfiles = []

dqfiles, dqfile_labels = wf.setup_dq_reranking(workflow, insps,
hdfbank, analyzable_segs,
hdfbank, analyzable_file,
analyzable_name,
dq_segment_file,
output_dir='dq',
tags=['full_data'])
Expand Down Expand Up @@ -549,32 +550,16 @@ for key in final_bg_files:

# DQ log likelihood plots
for dqf, dql in zip(dqfiles, dqfile_labels):
dq_type = workflow.cp.get_opt_tags('workflow-data_quality', 'dq-type', tags=[dql])
tags = [dql, 'full_data']
if dq_type == 'timeseries':
# plot rates as function of time
outdir = rdir[f'data_quality/{dqf.ifo}_{dql}_trigger_rates']
wf.make_dq_timeseries_trigger_rate_plot(workflow, [dqf], outdir,
tags=tags)
# plot rates as function of dq percentile
outdir = rdir[f'data_quality/{dqf.ifo}_{dql}_percentiles']
wf.make_dq_percentile_plot(workflow, [dqf], outdir, tags=tags)
elif dq_type == 'flag':
# plot rates when flag was on
outdir = rdir[f'data_quality/{dqf.ifo}_{dql}_trigger_rates']
wf.make_dq_flag_trigger_rate_plot(workflow, [dqf], outdir, tags=[dql])
# plot rates when flag was on
outdir = rdir[f'data_quality/{dqf.ifo}_{dql}_trigger_rates']
wf.make_dq_flag_trigger_rate_plot(workflow, dqf, dql, outdir, tags=[dql])

# DQ background binning plot
if workflow.cp.has_option_tags('bin_trigger_rates_dq', 'background-bins',
tags=None):
background_bins = workflow.cp.get_opt_tags('bin_trigger_rates_dq',
'background-bins', tags=None)
bank_tags = workflow.cp.get_subsections('plot_bank')
if len(bank_tags) == 0:
bank_tags = ['bank']
for b_tag in bank_tags:
wf.make_template_plot(workflow, hdfbank, rdir['data_quality'],
bins=background_bins, tags=['dq_bins', b_tag])
ifo_bins = workflow.cp.get_subsections('bin_templates')
for ifo in ifo_bins:
background_bins = workflow.cp.get_opt_tags('bin_templates', 'background-bins', tags=[ifo])
wf.make_template_plot(workflow, hdfbank, rdir['data_quality'],
bins=background_bins, tags=[ifo, 'dq_bins'])

############################## Setup the injection runs #######################

Expand Down
58 changes: 25 additions & 33 deletions pycbc/events/stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import logging
import numpy
import h5py
import copy
from . import ranking
from . import coinc_rate
from .eventmgr_cython import logsignalrateinternals_computepsignalbins
Expand Down Expand Up @@ -1564,9 +1563,6 @@ def rank_stat_coinc(self, s, slide, step, to_shift,

# First get signal PDF logr_s
stat = {ifo: st for ifo, st in s}

self.curr_mchirp = kwargs['mchirp']

logr_s = self.logsignalrate(stat, slide * step, to_shift)

# Find total volume of phase-time-amplitude space occupied by noise
Expand Down Expand Up @@ -2088,11 +2084,11 @@ def setup_segments(self, key):
ifo = key.split('-')[0]
with h5py.File(self.files[key], 'r') as dq_file:
ifo_grp = dq_file[ifo]
if 'dqsegs' in ifo_grp.keys():
if 'dq_segments' in ifo_grp.keys():
# if segs are in stat file, we are not in LL
assert not self.low_latency, 'Should not have segments in LL'
dq_state_segs_dict = {}
for k in ifo_grp['dqsegs'].keys():
for k in ifo_grp['dq_segments'].keys():
seg_dict = {}
seg_dict['start'] = ifo_grp[f'dq_segments/{k}/segment_starts'][:]
seg_dict['end'] = ifo_grp[f'dq_segments/{k}/segment_ends'][:]
Expand Down Expand Up @@ -2142,11 +2138,28 @@ def find_dq_state_by_time(self, ifo, times):
starts = self.dq_state_segments[ifo][k]['start']
ends = self.dq_state_segments[ifo][k]['end']
inds = indices_within_times(times, starts, ends)
# states are named in file as 'stateN'
dq_state[inds] = int(k[5:])
# states are named in file as 'dq_state_N', need to extract N
dq_state[inds] = int(k[9:])
return dq_state

def single(self, trigs):
def lognoiserate(self, trigs):
"""
Calculate the log noise rate density over single-ifo ranking
Read in single trigger information, compute the ranking
and rescale by the fitted coefficients alpha and rate
Parameters
-----------
trigs: dict of numpy.ndarrays, h5py group or similar dict-like object
Object holding single detector trigger information.
Returns
---------
lognoiserate: numpy.array
Array of log noise rate density for each input trigger.
"""

# make sure every trig has a dq state

try:
Expand All @@ -2165,34 +2178,13 @@ def single(self, trigs):
dq_state = trigs['dq_state'][:]
else:
dq_state = self.find_dq_state_by_time(
ifo, trigs['end_time'][:]
)

trigsc = copy.copy(trigs)
trigsc['dq_rate'] = self.find_dq_noise_rate(trigsc, dq_state)
singles = ExpFitFgBgNormStatistic.single(self, trigsc)
return singles

def lognoiserate(self, trigs):
"""
Calculate the log noise rate density over single-ifo ranking
Read in single trigger information, compute the ranking
and rescale by the fitted coefficients alpha and rate
ifo, trigs['end_time'][:])

Parameters
-----------
trigs: dict of numpy.ndarrays, h5py group or similar dict-like object
Object holding single detector trigger information.
dq_rate = self.find_dq_noise_rate(trigs, dq_state)

Returns
---------
lognoisel: numpy.array
Array of log noise rate density for each input trigger.
"""
logr_n = ExpFitFgBgNormStatistic.lognoiserate(
self, trigs)
logr_n += numpy.log(trigs['dq_rate'][:])
logr_n += numpy.log(dq_rate)
return logr_n


Expand Down
Loading

0 comments on commit 2a26fa5

Please sign in to comment.