Merge branch 'gwastro:master' into template_fits

gwastro · Oct 12, 2023 · 84f28a7 · 84f28a7
2 parents 379da30 + cee6437
commit 84f28a7
Show file tree

Hide file tree

Showing 13 changed files with 236 additions and 84 deletions.
diff --git a/.github/workflows/mac-test.yml b/.github/workflows/mac-test.yml
@@ -21,7 +21,7 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
     - run: |
-        brew install fftw openssl
+        brew install fftw openssl gsl
         pip install --upgrade pip setuptools "tox<4.0.0"
     - name: run basic pycbc test suite
       run: |

diff --git a/bin/all_sky_search/pycbc_bin_trigger_rates_dq b/bin/all_sky_search/pycbc_bin_trigger_rates_dq
@@ -9,15 +9,16 @@ from pycbc.events import stat as pystat
 from pycbc.types.timeseries import load_timeseries
 import numpy as np
 import h5py as h5
+from pycbc.io.hdf import HFile, SingleDetTriggers
 from pycbc.version import git_verbose_msg as version
 
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument('--version', action='version', version=version)
 parser.add_argument('--verbose', action="store_true")
 parser.add_argument("--ifo", type=str, required=True)
 parser.add_argument("--trig-file", required=True)
-parser.add_argument("--stat-threshold", type=float,
-                    help="Only consider triggers with statistic value "
+parser.add_argument("--stat-threshold", type=float, default=1.,
+                    help="Only consider triggers with --sngl-ranking value "
                     "above this threshold")
 parser.add_argument("--f-lower", type=float, default=15.,
                     help='Enforce a uniform low frequency cutoff to '
@@ -45,69 +46,90 @@ pystat.insert_statistic_option_group(parser,
 args = parser.parse_args()
 pycbc.init_logging(args.verbose)
 
-with h5.File(args.bank_file, 'r') as bank:
-    if args.background_bins:
-        logging.info('Sorting bank into bins...')
-        data = {
-            'mass1': bank['mass1'][:],
-            'mass2': bank['mass2'][:],
-            'spin1z': bank['spin1z'][:],
-            'spin2z': bank['spin2z'][:],
-            'f_lower': np.ones_like(bank['mass1'][:]) * args.f_lower
-            }
-        locs_dict = pycbc.events.background_bin_from_string(
-                                                   args.background_bins, data)
-        del data
-        locs_names = [b.split(':')[0] for b in args.background_bins]
-    else:
-        locs_dict = {'all_bin': np.arange(0, len(bank['mass1'][:]), 1)}
-        locs_names = ['all_bin']
+logging.info('Start')
 
-logging.info('Reading trigger file...')
 ifo = args.ifo
-with h5.File(args.trig_file, 'r') as trig_file:
-    trig_times = trig_file[ifo+'/end_time'][:]
-    trig_ids = trig_file[ifo+'/template_id'][:]
-
-    if args.stat_threshold or args.prune_number > 0:
-        logging.info('Calculating stat and filtering...')
-        rank_method = pystat.get_statistic_from_opts(args, [ifo])
-        stat = rank_method.get_sngl_ranking(trig_file[ifo])
-        if args.stat_threshold:
-            abovethresh = stat >= args.stat_threshold
-            trig_ids = trig_ids[abovethresh]
-            trig_times = trig_times[abovethresh]
-            stat = stat[abovethresh]
-            del abovethresh
-        if args.prune_number < 1:
-            del stat
-
-trig_times_int = trig_times.astype('int')
 
-dq_times = np.array([])
+# Setup a data mask to remove any triggers with SNR below threshold
+# This works as a pre-filter as SNR is always greater than or equal
+# to sngl_ranking, except in the psdvar case, where it could increase.
+
+with HFile(args.trig_file, 'r') as trig_file:
+    n_triggers_orig = trig_file[f'{ifo}/snr'].size
+    logging.info("Trigger file has %d triggers", n_triggers_orig)
+    logging.info('Generating trigger mask')
+    if f'{ifo}/psd_var_val' in trig_file:
+        idx, _, _ = trig_file.select(
+            lambda snr, psdvar: snr / psdvar ** 0.5 >= args.stat_threshold,
+            f'{ifo}/snr',
+            f'{ifo}/psd_var_val',
+            return_indices=True
+        )
+    else:
+        # psd_var_val may not have been calculated
+        idx, _ = trig_file.select(
+            lambda snr: snr >= args.stat_threshold,
+            f'{ifo}/snr',
+            return_indices=True
+        )
+    data_mask = np.zeros(n_triggers_orig, dtype=bool)
+    data_mask[idx] = True
+
+logging.info("Getting %s triggers from file with pre-cut SNR > %.3f",
+             idx.size, args.stat_threshold)
+
+trigs = SingleDetTriggers(
+    args.trig_file,
+    None,
+    None,
+    None,
+    None,
+    ifo,
+    premask=data_mask
+)
+
+# Extract the data we actually need from the data structure:
+tmplt_ids = trigs.template_id
+trig_times = trigs.end_time
+stat = trigs.get_ranking(args.sngl_ranking)
+trig_times_int = trig_times.astype(np.int64)
+
+del trigs
+
+n_triggers = tmplt_ids.size
+
+logging.info("Applying %s > %.3f cut", args.sngl_ranking,
+             args.stat_threshold)
+keep = stat >= args.stat_threshold
+tmplt_ids = tmplt_ids[keep]
+trig_times = trig_times[keep]
+trig_times_int = trig_times_int[keep]
+logging.info("Removed %d triggers, %d remain",
+             n_triggers - tmplt_ids.size, tmplt_ids.size)
+
 dq_logl = np.array([])
+dq_times = np.array([])
 
 for filename in args.dq_file:
-    logging.info('Reading DQ file %s...', filename)
+    logging.info('Reading DQ file %s', filename)
     dq_data = load_timeseries(filename, group=args.dq_channel)
     dq_logl = np.concatenate((dq_logl, dq_data[:]))
     dq_times = np.concatenate((dq_times, dq_data.sample_times))
     del dq_data
 
-
 n_bins = args.n_time_bins
 percentiles = np.linspace(0, 100, n_bins+1)
 bin_times = np.zeros(n_bins)
 dq_percentiles = np.percentile(dq_logl, percentiles)[1:]
 
 # seconds bin tells what bin each second ends up
 seconds_bin = np.array([n_bins - np.sum(dq_percentiles >= dq_ll)
-                        for dq_ll in dq_logl]).astype('int')
+                        for dq_ll in dq_logl]).astype(np.int64)
 del dq_percentiles
 
 # bin times tells how much time ends up in each bin
 bin_times = np.array([np.sum(seconds_bin == i)
-                      for i in range(n_bins)]).astype('float')
+                      for i in range(n_bins)]).astype(np.float64)
 full_time = float(len(seconds_bin))
 times_nz = (bin_times > 0)
 del dq_logl
@@ -116,24 +138,45 @@ del dq_logl
 dq_percentiles_time = dict(zip(dq_times, seconds_bin/n_bins))
 del dq_times
 
+with h5.File(args.bank_file, 'r') as bank:
+    if args.background_bins:
+        logging.info('Sorting bank into bins')
+        data = {
+            'mass1': bank['mass1'][:],
+            'mass2': bank['mass2'][:],
+            'spin1z': bank['spin1z'][:],
+            'spin2z': bank['spin2z'][:],
+            'f_lower': np.ones_like(bank['mass1'][:]) * args.f_lower
+            }
+        locs_dict = pycbc.events.background_bin_from_string(
+                                                   args.background_bins, data)
+        del data
+        locs_names = [b.split(':')[0] for b in args.background_bins]
+    else:
+        locs_dict = {'all_bin': np.arange(0, len(bank['mass1'][:]), 1)}
+        locs_names = ['all_bin']
+
 if args.prune_number > 0:
     for bin_name in locs_names:
-        logging.info('Processing bin %s...', bin_name)
+        logging.info('Pruning bin %s', bin_name)
         bin_locs = locs_dict[bin_name]
-        trig_times_bin = trig_times[np.isin(trig_ids, bin_locs)]
-        trig_stats_bin = stat[np.isin(trig_ids, bin_locs)]
+        inbin = np.isin(tmplt_ids, bin_locs)
+        trig_times_bin = trig_times[inbin]
+        trig_stats_bin = stat[inbin]
 
         for j in range(args.prune_number):
             max_stat_arg = np.argmax(trig_stats_bin)
-            remove = np.nonzero(abs(trig_times_bin[max_stat_arg] - trig_times)
-                                < args.prune_window)[0]
-            remove_inbin = np.nonzero(abs(trig_times_bin[max_stat_arg]
-                                      - trig_times_bin) < args.prune_window)[0]
+            remove = abs(trig_times_bin[max_stat_arg] - trig_times) \
+                         < args.prune_window
+            logging.info("Prune %d: pruning %d triggers", j, sum(remove))
+            remove_inbin = abs(trig_times_bin[max_stat_arg]
+                               - trig_times_bin) < args.prune_window
             stat[remove] = 0
             trig_stats_bin[remove_inbin] = 0
-    keep = np.nonzero(stat)[0]
+    keep = np.flatnonzero(stat)
+    logging.info("%d triggers removed through pruning", keep.size)
     trig_times_int = trig_times_int[keep]
-    trig_ids = trig_ids[keep]
+    tmplt_ids = tmplt_ids[keep]
     del stat
     del keep
 
@@ -142,20 +185,22 @@ del trig_times
 with h5.File(args.output_file, 'w') as f:
     for bin_name in locs_names:
         bin_locs = locs_dict[bin_name]
-        trig_times_bin = trig_times_int[np.isin(trig_ids, bin_locs)]
+        inbin = np.isin(tmplt_ids, bin_locs)
+        trig_times_bin = trig_times_int[inbin]
         trig_percentile = np.array([dq_percentiles_time[t]
                                     for t in trig_times_bin])
-        logging.info('Processing %d triggers...', len(trig_percentile))
+        logging.info('Processing %d triggers in bin %s',
+                     len(trig_percentile), bin_name)
 
         (counts, bins) = np.histogram(trig_percentile, bins=(percentiles)/100)
-        counts = counts.astype('float')
+        counts = counts.astype(np.float64)
         rates = np.zeros_like(bin_times)
         rates[times_nz] = counts[times_nz]/bin_times[times_nz]
         mean_rate = len(trig_percentile) / full_time
         if mean_rate > 0.:
             rates = rates / mean_rate
 
-        logging.info('Writing rates to output file %s...', args.output_file)
+        logging.info('Writing rates to output file %s', args.output_file)
         grp = f.create_group(bin_name)
         grp['rates'] = rates
         grp['locs'] = locs_dict[bin_name]

diff --git a/bin/minifollowups/pycbc_injection_minifollowup b/bin/minifollowups/pycbc_injection_minifollowup
@@ -25,7 +25,7 @@ from pycbc.detector import Detector
 import pycbc.workflow.minifollowups as mini
 from pycbc.workflow.core import resolve_url_to_file
 import pycbc.version, pycbc.pnutils
-from pycbc.io.hdf import SingleDetTriggers
+from pycbc.io.hdf import SingleDetTriggers, HFile
 
 parser = argparse.ArgumentParser(description=__doc__[1:])
 parser.add_argument('--version', action='version', version=pycbc.version.git_verbose_msg)
@@ -53,6 +53,10 @@ parser.add_argument('--ifar-threshold', type=float, default=None,
 parser.add_argument('--maximum-decisive-snr', type=float, default=None,
                     help="If given, only followup injections where the "
                          "decisive SNR is smaller than this value.")
+parser.add_argument('--nearby-triggers-window', type=float, default=0.05,
+                    help="Maximum time difference between the missed "
+                         "injection and the loudest SNR nearby trigger to "
+                         "display, seconds. Default=0.05")
 wf.add_workflow_command_line_group(parser)
 wf.add_workflow_settings_cli(parser, include_subdax_opts=True)
 args = parser.parse_args()
@@ -140,6 +144,50 @@ except KeyError:
     dec_chirp_dist = pycbc.pnutils.chirp_distance(dec_dist, mchirp)
     sorting = dec_chirp_dist.argsort()  # ascending order of dec chirp distance
 
+# Get the trigger SNRs and times
+# But only ones which are within a small window of the missed injection
+missed_inj_times = numpy.sort(f['injections/tc'][:][missed])
+
+# Note: Adding Earth diameter in light seconds to the window here
+# to allow for different IFO's arrival times of the injection
+safe_window = args.nearby_triggers_window + 0.0425
+
+def nearby_missedinj(endtime, snr):
+    """
+    Convenience function to check if trigger times are within a small
+    window of the injections
+
+    Parameters
+    ----------
+    endtime: numpy array
+        Trigger times to be checked against the missed injection times
+
+    snr: numpy array
+        Required by design of the HFile.select method but not used,
+        SNR of the triggers
+
+    Returns
+    -------
+    boolean array
+        True for triggers which are close to any missed injection
+    """
+    left = numpy.searchsorted(missed_inj_times - safe_window, endtime)
+    right = numpy.searchsorted(missed_inj_times + safe_window, endtime)
+    return left != right
+
+trigger_idx = {}
+trigger_snrs = {}
+trigger_times = {}
+for trig in single_triggers:
+    ifo = trig.ifo
+    with HFile(trig.lfn, 'r') as trig_f:
+        trigger_idx[ifo], trigger_times[ifo], trigger_snrs[ifo] = \
+            trig_f.select(
+                nearby_missedinj,
+                f'{ifo}/end_time',
+                f'{ifo}/snr',
+                return_indices=True)
+
 if len(missed) < num_events:
     num_events = len(missed)
 
@@ -178,7 +226,29 @@ for num_event in range(num_events):
                      (workflow, single_triggers, tmpltbank_file,
                       injection_file, args.output_dir, trig_id=trig_id,
                       file_substring='found_after_vetoes',
+                      title="Details of closest event",
                       tags=args.tags + [str(num_event)])[0],)]
+
+    for sngl in single_triggers:
+        # Find the triggers close to this injection at this IFO
+        ifo = sngl.ifo
+        trig_tdiff = abs(inj_params[ifo + '_end_time'] - trigger_times[ifo])
+        nearby = trig_tdiff < args.nearby_triggers_window
+        if not any(nearby):
+            # If there are no triggers in the defined window,
+            # do not print any info
+            continue
+        # Find the loudest SNR in this window
+        loudest = numpy.argmax(trigger_snrs[ifo][nearby])
+        # Convert to the indexin the trigger file
+        nearby_trigger_idx = trigger_idx[ifo][nearby][loudest]
+        # Make the info snippet
+        sngl_info = mini.make_sngl_ifo(workflow, sngl, tmpltbank_file,
+            nearby_trigger_idx, args.output_dir, ifo,
+            title=f"Parameters of loudest SNR nearby trigger in {ifo}",
+            tags=args.tags + [str(num_event)])[0]
+        layouts += [(sngl_info,)]
+
     files += mini.make_trigger_timeseries(workflow, single_triggers,
                               ifo_times, args.output_dir,
                               tags=args.tags + [str(num_event)])

diff --git a/bin/minifollowups/pycbc_page_coincinfo b/bin/minifollowups/pycbc_page_coincinfo
@@ -41,16 +41,20 @@ parser.add_argument('--statmap-file-subspace-name', default='background_exc',
 trig_input = parser.add_mutually_exclusive_group(required=True)
 trig_input.add_argument('--n-loudest', type=int,
     help="Examine the n'th loudest trigger, use with statmap file")
-parser.add_argument('--sort-variable', default='ifar',
-                    help='Which subgroup of --analysis-category to use for '
-                         'sorting. Default=ifar')
-parser.add_argument('--sort-order', default='descending',
-                    choices=['ascending','descending'],
-                    help='Which direction to use when sorting on '
-                         '--sort-variable. Default=descending')
 trig_input.add_argument('--trigger-id', type=int,
     help="Examine the trigger with specified ID, use with statmap file. An "
          "alternative to --n-loudest. Cannot be used together")
+parser.add_argument('--sort-variable', default='ifar',
+    help='Which subgroup of --analysis-category to use for '
+         'sorting if using --n-loudest. Default=ifar')
+parser.add_argument('--sort-order', default='descending',
+    choices=['ascending','descending'],
+    help='Which direction to use when sorting on '
+         '--sort-variable with --n-loudest. Default=descending')
+parser.add_argument('--title',
+    help="Supply a title for the event details. Defaults are "
+         "'Parameters of event ranked N' if --n-loudest is given, or "
+         "'Details of trigger' for --trigger-id.")
 parser.add_argument('--include-summary-page-link', action='store_true',
     help="If given, will include a link to the DQ summary page on the "
          "single detector trigger tables.")
@@ -211,5 +215,5 @@ html += str(pycbc.results.static_table(data, headers))
 
 pycbc.results.save_fig_with_metadata(html, args.output_file, {},
                         cmd=' '.join(sys.argv),
-                        title=title,
+                        title=args.title if args.title else title,
                         caption=caption)