From ec8bd5def9919db8187ab3b4f6bebbdacf98efae Mon Sep 17 00:00:00 2001
From: Gareth S Cabourn Davies <gareth.cabourndavies@ligo.org>
Date: Mon, 5 Feb 2024 09:20:04 +0000
Subject: [PATCH] Some changes to HFile.select() and SingleDetTriggers (#4545)

* Some changes to HFile.select() and mask_to_n_loudest_clustered

* missed comma

* Some CC problemas

* Update

* revert changes which meant kwargs couldnt be passed to mask_to_n_loudest_clustered_events

* CC / fixes

* Use list rather than numpy array for mask

* remove messing about with the mask dtyping

* initialise a transparent mask if it is not given

* Fix problem when the filter value may not be available to the HFile

* thinko in pycbc_bin_trigger_rates_dq

* Don't make a huge boolean mask of all True values and then run nonzero on it = BAD IDEA

* CC

* re-enable option to have no mask

* efficiency saving in pycbc_plot_hist

* no mask option in trig_dict

* typo

* allow None maks in mask_size

* thought i could apply another mask but I cant

* get filter_func back, use new interface for recent changes

* Some minor fixes

* cc

* allow option to do a logical and on masks

* various minor bugfixes

* max z problem

* fixes to pycbc_page_snglinfo

* use alternative dict for beter clarity

* IWH comments

* add check for required datasets (mostly to remind people to add the required dataset list if they add a new ranking\!)

* fix bad check, remove min_snr option

* remove option from CI

* Fix bug with applying premask and using filter_rank threshold at the same time

* stop checking for required datasets - this is difficult when so may different types use the same function

* CC prefers np.flatnonzero(arr) to arr.nonzero()[0]

* Fix a couple of rarely-used scripts, remove unused imports, whitespace
---
 bin/all_sky_search/pycbc_bin_trigger_rates_dq |  60 +--
 .../pycbc_fit_sngls_split_binned              |  35 +-
 .../pycbc_injection_minifollowup              |  42 +-
 bin/minifollowups/pycbc_page_snglinfo         |  50 ++-
 .../pycbc_plot_trigger_timeseries             |  15 +-
 bin/minifollowups/pycbc_sngl_minifollowup     |  32 +-
 bin/plotting/pycbc_plot_hist                  |  19 +-
 bin/plotting/pycbc_plot_singles_timefreq      |  18 +-
 bin/plotting/pycbc_plot_singles_vs_params     |  65 ++--
 bin/pycbc_fit_sngl_trigs                      |  27 +-
 bin/pycbc_process_sngls                       |  11 +-
 docs/hwinj.rst                                |   2 +-
 examples/search/plotting.ini                  |   3 +-
 pycbc/events/ranking.py                       |  17 +-
 pycbc/io/hdf.py                               | 363 ++++++++++++++----
 15 files changed, 468 insertions(+), 291 deletions(-)

diff --git a/bin/all_sky_search/pycbc_bin_trigger_rates_dq b/bin/all_sky_search/pycbc_bin_trigger_rates_dq
index 1bb84299bd0..25234a687f0 100644
--- a/bin/all_sky_search/pycbc_bin_trigger_rates_dq
+++ b/bin/all_sky_search/pycbc_bin_trigger_rates_dq
@@ -15,7 +15,7 @@ from pycbc.events.veto import (select_segments_by_definer,
                                start_end_to_segments,
                                segments_to_start_end)
 from pycbc.types.optparse import MultiDetOptionAction
-from pycbc.io.hdf import HFile, SingleDetTriggers
+from pycbc.io.hdf import SingleDetTriggers
 from pycbc.version import git_verbose_msg as version
 
 parser = argparse.ArgumentParser(description=__doc__)
@@ -46,54 +46,11 @@ logging.info('Start')
 
 ifo, flag_name = args.flag_name.split(':')
 
-# Setup a data mask to remove any triggers with SNR below threshold
-# This works as a pre-filter as SNR is always greater than or equal
-# to sngl_ranking, except in the psdvar case, where it could increase.
-
-with HFile(args.trig_file, 'r') as trig_file:
-    n_triggers_orig = trig_file[f'{ifo}/snr'].size
-    logging.info("Trigger file has %d triggers", n_triggers_orig)
-    logging.info('Generating trigger mask')
-    if f'{ifo}/psd_var_val' in trig_file:
-        idx, _, _ = trig_file.select(
-            lambda snr, psdvar: snr / psdvar ** 0.5 >= args.stat_threshold,
-            f'{ifo}/snr',
-            f'{ifo}/psd_var_val',
-            return_indices=True
-        )
-    else:
-        # psd_var_val may not have been calculated
-        idx, _ = trig_file.select(
-            lambda snr: snr >= args.stat_threshold,
-            f'{ifo}/snr',
-            return_indices=True
-        )
-    data_mask = np.zeros(n_triggers_orig, dtype=bool)
-    data_mask[idx] = True
-
-    # also get the gated times while we have the file open
-    gate_times = []
-    if args.gating_windows:
-        logging.info('Getting gated times')
-        try:
-            gating_types = trig_file[f'{ifo}/gating'].keys()
-            for gt in gating_types:
-                gate_times += list(trig_file[f'{ifo}/gating/{gt}/time'][:])
-            gate_times = np.unique(gate_times)
-        except KeyError:
-            logging.warning('No gating found in trigger file')
-
-logging.info("Getting %s triggers from file with pre-cut SNR > %.3f",
-             idx.size, args.stat_threshold)
-
 trigs = SingleDetTriggers(
     args.trig_file,
-    None,
-    None,
-    None,
-    None,
     ifo,
-    premask=data_mask
+    filter_rank=args.sngl_ranking,
+    filter_threshold=args.stat_threshold,
 )
 
 # Extract the data we actually need from the data structure:
@@ -101,16 +58,6 @@ tmplt_ids = trigs.template_id
 trig_times = trigs.end_time
 stat = trigs.get_ranking(args.sngl_ranking)
 
-n_triggers = tmplt_ids.size
-
-logging.info("Applying %s > %.3f cut", args.sngl_ranking,
-             args.stat_threshold)
-keep = stat >= args.stat_threshold
-tmplt_ids = tmplt_ids[keep]
-trig_times = trig_times[keep]
-logging.info("Removed %d triggers, %d remain",
-             n_triggers - tmplt_ids.size, tmplt_ids.size)
-
 # Get the template bins
 bin_tids_dict = {}
 with h5.File(args.template_bins_file, 'r') as f:
@@ -118,7 +65,6 @@ with h5.File(args.template_bins_file, 'r') as f:
     for bin_name in ifo_grp.keys():
         bin_tids_dict[bin_name] = ifo_grp[bin_name]['tids'][:]
 
-
 # get analysis segments
 analysis_segs = select_segments_by_definer(
     args.analysis_segment_file,
diff --git a/bin/all_sky_search/pycbc_fit_sngls_split_binned b/bin/all_sky_search/pycbc_fit_sngls_split_binned
index 79fa51e575f..d77581ca81b 100644
--- a/bin/all_sky_search/pycbc_fit_sngls_split_binned
+++ b/bin/all_sky_search/pycbc_fit_sngls_split_binned
@@ -22,7 +22,7 @@ from matplotlib import pyplot as plt
 import numpy as np
 
 from pycbc import events, bin_utils, results
-from pycbc.io import HFile, SingleDetTriggers
+from pycbc.io import SingleDetTriggers
 from pycbc.events import triggers as trigs
 from pycbc.events import trigger_fits as trstats
 from pycbc.events import stat as pystat
@@ -202,40 +202,12 @@ boundaries = trigf[args.ifo + '/template_boundaries'][:]
 
 trigf.close()
 
-# Setup a data mask to remove any triggers with SNR below threshold
-# This works as a pre-filter as SNR is always greater than or equal
-# to sngl_ranking, except in the psdvar case, where it could increase.
-with HFile(args.trigger_file, 'r') as trig_file:
-    n_triggers_orig = trig_file[f'{args.ifo}/snr'].size
-    logging.info("Trigger file has %d triggers", n_triggers_orig)
-    logging.info('Generating trigger mask')
-    if f'{args.ifo}/psd_var_val' in trig_file:
-        idx, _, _ = trig_file.select(
-            lambda snr, psdvar: snr / psdvar ** 0.5 >= args.plot_lower_stat_limit,
-            f'{args.ifo}/snr',
-            f'{args.ifo}/psd_var_val',
-            return_indices=True
-        )
-    else:
-        # psd_var_val may not have been calculated
-        idx, _ = trig_file.select(
-            lambda snr: snr >= args.plot_lower_stat_limit,
-            f'{args.ifo}/snr',
-            return_indices=True
-        )
-    data_mask = np.zeros(n_triggers_orig, dtype=bool)
-    data_mask[idx] = True
-
-
 logging.info('Calculating single stat values from trigger file')
 trigs = SingleDetTriggers(
     args.trigger_file,
-    None,
-    None,
-    None,
-    None,
     args.ifo,
-    premask=data_mask
+    filter_rank=args.sngl_ranking,
+    filter_threshold=args.plot_lower_stat_limit,
 )
 # This is the direct pointer to the HDF file, used later on
 trigf = trigs.trigs_f
@@ -243,7 +215,6 @@ trigf = trigs.trigs_f
 stat = trigs.get_ranking(args.sngl_ranking)
 time = trigs.end_time
 
-
 logging.info('Processing template boundaries')
 max_boundary_id = np.argmax(boundaries)
 sorted_boundary_list = np.sort(boundaries)
diff --git a/bin/minifollowups/pycbc_injection_minifollowup b/bin/minifollowups/pycbc_injection_minifollowup
index 198ca2a34bf..768c475c253 100644
--- a/bin/minifollowups/pycbc_injection_minifollowup
+++ b/bin/minifollowups/pycbc_injection_minifollowup
@@ -244,15 +244,17 @@ def nearby_missedinj(endtime, snr):
 trigger_idx = {}
 trigger_snrs = {}
 trigger_times = {}
+# This finds the triggers near to _any_ missed injection
 for trig in single_triggers:
     ifo = trig.ifo
     with HFile(trig.lfn, 'r') as trig_f:
-        trigger_idx[ifo], trigger_times[ifo], trigger_snrs[ifo] = \
+        trigger_idx[ifo], data_tuple = \
             trig_f.select(
                 nearby_missedinj,
                 f'{ifo}/end_time',
                 f'{ifo}/snr',
-                return_indices=True)
+            )
+    trigger_times[ifo], trigger_snrs[ifo] = data_tuple
 
 # figure out how many injections to follow up
 num_events = int(workflow.cp.get_opt_tags(
@@ -305,7 +307,7 @@ for num_event in range(num_events):
                       tags=args.tags + [str(num_event)])[0],)]
 
     for sngl in single_triggers:
-        # Find the triggers close to this injection at this IFO
+        # Find the triggers close to _this_ injection at this IFO
         ifo = sngl.ifo
         trig_tdiff = abs(inj_params[ifo + '_end_time'] - trigger_times[ifo])
         nearby = trig_tdiff < args.nearby_triggers_window
@@ -315,7 +317,7 @@ for num_event in range(num_events):
             continue
         # Find the loudest SNR in this window
         loudest = numpy.argmax(trigger_snrs[ifo][nearby])
-        # Convert to the indexin the trigger file
+        # Convert to the index in the trigger file
         nearby_trigger_idx = trigger_idx[ifo][nearby][loudest]
         # Make the info snippet
         sngl_info = mini.make_sngl_ifo(workflow, sngl, tmpltbank_file,
@@ -381,19 +383,31 @@ for num_event in range(num_events):
                             use_exact_inj_params=True)
 
     for curr_ifo in args.single_detector_triggers:
+        # Finding loudest template in this detector near to the injection:
+        # First, find triggers close to the missed injection
         single_fname = args.single_detector_triggers[curr_ifo]
-        hd_sngl = SingleDetTriggers(single_fname, args.bank_file, None, None,
-                                    None, curr_ifo)
-        end_times = hd_sngl.end_time
-        # Use SNR here or NewSNR??
-        snr = hd_sngl.snr
-        lgc_mask = abs(end_times - inj_params['tc']) < args.inj_window
-
-        if len(snr[lgc_mask]) == 0:
+        idx, _ = HFile(single_fname).select(
+            lambda t: abs(t - inj_params['tc']) < args.inj_window,
+            f'{curr_ifo}/end_time',
+            return_data=False,
+        )
+
+        if len(idx) == 0:
+            # No events in the injection window for this detector
             continue
 
-        snr_idx = numpy.arange(len(lgc_mask))[lgc_mask][snr[lgc_mask].argmax()]
-        hd_sngl.mask = [snr_idx]
+        hd_sngl = SingleDetTriggers(
+            single_fname,
+            curr_ifo,
+            bank_file=args.bank_file,
+            premask=idx
+        )
+        # Next, find the loudest within this set of triggers
+        # Use SNR here or NewSNR, or other??
+        loudest_idx = hd_sngl.snr.argmax()
+        hd_sngl.apply_mask([loudest_idx])
+
+        # What are the parameters of this trigger?
         curr_params = copy.deepcopy(inj_params)
         curr_params['mass1'] = hd_sngl.mass1[0]
         curr_params['mass2'] = hd_sngl.mass2[0]
diff --git a/bin/minifollowups/pycbc_page_snglinfo b/bin/minifollowups/pycbc_page_snglinfo
index eb248daecd1..5d741e66946 100644
--- a/bin/minifollowups/pycbc_page_snglinfo
+++ b/bin/minifollowups/pycbc_page_snglinfo
@@ -21,7 +21,7 @@ matplotlib.use('Agg')
 import lal
 import pycbc.version, pycbc.events, pycbc.results, pycbc.pnutils
 from pycbc.results import followup
-from pycbc.events import stat
+from pycbc.events import stat as pystat
 from pycbc.io import hdf
 
 
@@ -62,7 +62,7 @@ parser.add_argument('--include-gracedb-link', action='store_true',
 parser.add_argument('--significance-file',
     help="If given, will search for this trigger's id in the file to see if "
          "stat and p_astro values exists for this trigger.")
-stat.insert_statistic_option_group(parser,
+pystat.insert_statistic_option_group(parser,
     default_ranking_statistic='single_ranking_only')
 
 
@@ -70,29 +70,45 @@ args = parser.parse_args()
 
 pycbc.init_logging(args.verbose)
 
+if args.trigger_id is not None:
+    # Make a mask which is just the trigger of interest
+    mask = numpy.array([args.trigger_id])
+else:
+    mask = None
+
 # Get the single-ifo triggers
-sngl_file = hdf.SingleDetTriggers(args.single_trigger_file, args.bank_file,
-                args.veto_file, args.veto_segment_name, None, args.instrument)
+sngl_file = hdf.SingleDetTriggers(
+    args.single_trigger_file,
+    args.instrument,
+    bank_file=args.bank_file,
+    veto_file=args.veto_file,
+    segment_name=args.veto_segment_name,
+    premask=mask,
+)
+
+rank_method = pystat.get_statistic_from_opts(args, [args.instrument])
 
-rank_method = stat.get_statistic_from_opts(args, [args.instrument])
 if args.trigger_id is not None:
-    sngl_file.mask = [args.trigger_id]
-    trig_id = args.trigger_id
-    sngl_file.mask_to_n_loudest_clustered_events(rank_method, n_loudest=1)
-    stat = sngl_file.stat[0]
+    # Mask already applied
+    pass
 elif args.n_loudest is not None:
     # Cluster by a ranking statistic and retain only the loudest n clustered
     # triggers
-    sngl_file.mask_to_n_loudest_clustered_events(rank_method,
-                                                 n_loudest=args.n_loudest+1)
+    sngl_file.mask_to_n_loudest_clustered_events(
+        rank_method,
+        n_loudest=args.n_loudest+1
+    )
+else:
+    raise ValueError("Must give --n-loudest or --trigger-id.")
+
+sds = rank_method.single(sngl_file.trig_dict())
+stat = rank_method.rank_stat_single((sngl_file.ifo, sds))
+if args.n_loudest is not None:
     # Restrict to only the nth loudest, instead of all triggers up to nth
     # loudest
-    stat = sngl_file.stat
     l = stat.argsort()
     stat = stat[l[0]]
-    sngl_file.mask = sngl_file.mask[l[0]]
-else:
-    raise ValueError("Must give --n-loudest or --trigger-id.")
+    sngl_file.apply_mask(l[0])
 
 # make a table for the single detector information ############################
 time = sngl_file.end_time
@@ -142,7 +158,9 @@ if args.ranking_statistic in ["quadsum", "single_ranking_only"]:
 else:
     # Name would be too long - just call it ranking statistic
     stat_name = 'Ranking Statistic'
-    stat_name_long = ' with '.join([args.ranking_statistic, args.sngl_ranking])
+    stat_name_long = ' with '.join(
+        [args.ranking_statistic, args.sngl_ranking]
+    )
 
 headers.append(stat_name)
 
diff --git a/bin/minifollowups/pycbc_plot_trigger_timeseries b/bin/minifollowups/pycbc_plot_trigger_timeseries
index ef9829dab01..933870143a4 100644
--- a/bin/minifollowups/pycbc_plot_trigger_timeseries
+++ b/bin/minifollowups/pycbc_plot_trigger_timeseries
@@ -61,9 +61,13 @@ for ifo in args.single_trigger_files.keys():
 
     # Identify trigger idxs within window of trigger time
     with HFile(args.single_trigger_files[ifo], 'r') as data:
-        idx, _ = data.select(lambda endtime: abs(endtime - t) < args.window,
-                             f'{ifo}/end_time', return_indices=True)
-        data_mask = numpy.zeros(len(data[f'{ifo}/snr']), dtype=bool)
+        idx, _ = data.select(
+            lambda endtime: abs(endtime - t) < args.window,
+            'end_time',
+            group=ifo,
+            return_data=False,
+        )
+        data_mask = numpy.zeros(data[ifo]['snr'].size, dtype=bool)
         data_mask[idx] = True
 
     if not len(idx):
@@ -75,13 +79,8 @@ for ifo in args.single_trigger_files.keys():
                       label=ifo)
         continue
 
-    # Not using bank file, or veto file, so lots of "None"s here.
     trigs = SingleDetTriggers(
         args.single_trigger_files[ifo],
-        None,
-        None,
-        None,
-        None,
         ifo,
         premask=data_mask
     )
diff --git a/bin/minifollowups/pycbc_sngl_minifollowup b/bin/minifollowups/pycbc_sngl_minifollowup
index 01fbc743e83..5515e87a930 100644
--- a/bin/minifollowups/pycbc_sngl_minifollowup
+++ b/bin/minifollowups/pycbc_sngl_minifollowup
@@ -65,8 +65,9 @@ parser.add_argument('--inspiral-data-read-name',
 parser.add_argument('--inspiral-data-analyzed-name',
                     help="Name of inspiral segmentlist containing data "
                          "analyzed by each analysis job.")
-parser.add_argument('--min-snr', type=float, default=6.5,
-                    help="Minimum SNR to consider for loudest triggers")
+parser.add_argument('--min-sngl-ranking', type=float, default=6.5,
+                    help="Minimum sngl-ranking to consider for loudest "
+                         "triggers. Default=6.5.")
 parser.add_argument('--non-coinc-time-only', action='store_true',
                     help="If given remove (veto) single-detector triggers "
                          "that occur during a time when at least one other "
@@ -130,31 +131,14 @@ insp_data_seglists.coalesce()
 num_events = int(workflow.cp.get_opt_tags('workflow-sngl_minifollowups',
                  'num-sngl-events', ''))
 
-# This helps speed up the processing to ignore a large fraction of triggers
-mask = None
-f = hdf.HFile(args.single_detector_file, 'r')
-n_triggers = f['{}/snr'.format(args.instrument)].size
-logging.info("%i triggers in file", n_triggers)
-if args.min_snr:
-    logging.info('Calculating Prefilter')
-    idx, _ = f.select(lambda snr: snr > args.min_snr,
-                      '{}/snr'.format(args.instrument),
-                      return_indices=True)
-    mask = numpy.zeros(n_triggers, dtype=bool)
-    mask[idx] = True
-    if len(idx) < num_events:
-        logging.info("Fewer triggers exist after the --min-snr cut (%d) "
-                     "than requested for the minifollowup (%d)",
-                     len(idx), num_events)
-
 trigs = hdf.SingleDetTriggers(
     args.single_detector_file,
-    args.bank_file,
-    args.foreground_censor_file,
-    args.foreground_segment_name,
-    None,
     args.instrument,
-    premask=mask
+    bank_file=args.bank_file,
+    veto_file=args.foreground_censor_file,
+    segment_name=args.foreground_segment_name,
+    filter_rank=args.sngl_ranking,
+    filter_threshold=args.min_sngl_ranking
 )
 
 # Include gating vetoes
diff --git a/bin/plotting/pycbc_plot_hist b/bin/plotting/pycbc_plot_hist
index 4e76b69bb47..b252c82b430 100644
--- a/bin/plotting/pycbc_plot_hist
+++ b/bin/plotting/pycbc_plot_hist
@@ -7,7 +7,7 @@ import pycbc.version, pycbc.results, pycbc.io
 from itertools import cycle
 from matplotlib import use; use('Agg')
 from matplotlib import pyplot
-from pycbc.events import background_bin_from_string, veto
+from pycbc.events import background_bin_from_string, veto, ranking
 
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument('--version', action='version', version=pycbc.version.git_verbose_msg)
@@ -46,10 +46,21 @@ pycbc.init_logging(args.verbose)
 f = h5py.File(args.trigger_file, 'r')
 ifo = tuple(f.keys())[0]
 
+# Apply presort if we can; if we can't, load everything but it may be
+# inefficient
+xv = args.x_var if args.x_var in ranking.sngls_ranking_function_dict else None
+xt = args.x_min if args.x_var in ranking.sngls_ranking_function_dict else None
+
 # read single-detector triggers
-trigs = pycbc.io.SingleDetTriggers(args.trigger_file, args.bank_file,
-                                   args.veto_file, args.segment_name,
-                                   None, ifo)
+trigs = pycbc.io.SingleDetTriggers(
+    args.trigger_file,
+    ifo,
+    bank_file=args.bank_file,
+    veto_file=args.veto_file,
+    segment_name=args.segment_name,
+    filter_rank=xv,
+    filter_threshold=xt,
+)
 
 # get x values and find the maximum x value
 val = getattr(trigs, args.x_var)
diff --git a/bin/plotting/pycbc_plot_singles_timefreq b/bin/plotting/pycbc_plot_singles_timefreq
index 20f480240f9..e8d12e9575a 100644
--- a/bin/plotting/pycbc_plot_singles_timefreq
+++ b/bin/plotting/pycbc_plot_singles_timefreq
@@ -105,13 +105,17 @@ def rough_filter(snr, chisq, chisq_dof, end_time, tmp_id, tmp_dur):
     return np.logical_and(end_time > opts.gps_start_time,
                           end_time < opts.gps_end_time + tmp_dur)
 
-indices, snr, chisq, chisq_dof, end_time, template_ids, template_duration = \
-        trig_f.select(rough_filter, opts.detector + '/snr',
-                      opts.detector + '/chisq', opts.detector + '/chisq_dof',
-                      opts.detector + '/end_time',
-                      opts.detector + '/template_id',
-                      opts.detector + '/template_duration',
-                      return_indices=True)
+indices, data_tuple = trig_f.select(
+    rough_filter,
+    'snr',
+    'chisq',
+    'chisq_dof',
+    'end_time',
+    'template_id',
+    'template_duration',
+    group=opts.detector
+)
+snr, chisq, chisq_dof, end_time, template_ids, template_duration = data_tuple
 
 if len(indices) > 0:
     if opts.veto_file:
diff --git a/bin/plotting/pycbc_plot_singles_vs_params b/bin/plotting/pycbc_plot_singles_vs_params
index c834037a30d..f588689ddd4 100644
--- a/bin/plotting/pycbc_plot_singles_vs_params
+++ b/bin/plotting/pycbc_plot_singles_vs_params
@@ -52,8 +52,6 @@ parser.add_argument('--segment-name', default=None, type=str,
                     help='Optional, name of segment list to use for vetoes')
 parser.add_argument('--filter-string', default=None, type=str,
                     help='Optional, boolean expression for filtering triggers')
-parser.add_argument('--min-snr', default=0., type=float,
-                    help='Only plot triggers above the given SNR')
 parser.add_argument('--output-file', type=str, required=True,
                     help='Destination path for plot')
 parser.add_argument('--x-var', required=True,
@@ -84,30 +82,27 @@ opts = parser.parse_args()
 
 logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
 
-data_mask = None
-if opts.min_snr > 0:
-    with pycbc.io.HFile(opts.single_trig_file, 'r') as trig_file:
-        n_triggers_orig = trig_file[f'{opts.detector}/snr'].size
-        logging.info("Trigger file has %d triggers", n_triggers_orig)
-        logging.info('Generating trigger mask (on SNR)')
-        idx, _ = trig_file.select(
-            lambda snr: snr >= opts.min_snr,
-            f'{opts.detector}/snr',
-            return_indices=True
-        )
-        data_mask = np.zeros(n_triggers_orig, dtype=bool)
-        data_mask[idx] = True
+if opts.z_var == 'density' or opts.min_z is None:
+    filter_rank = None
+    filter_thresh = None
+else:
+    # We can apply a/another filter on the z value
+    filter_rank = opts.z_var
+    filter_thresh = opts.min_z
 
 trigs = pycbc.io.SingleDetTriggers(
     opts.single_trig_file,
-    opts.bank_file,
-    opts.veto_file,
-    opts.segment_name,
-    opts.filter_string,
     opts.detector,
-    premask=data_mask
+    bank_file=opts.bank_file,
+    veto_file=opts.veto_file,
+    segment_name=opts.segment_name,
+    filter_func=opts.filter_string,
+    filter_rank=filter_rank,
+    filter_threshold=filter_thresh
 )
 
+# Can this be folded into the SingleDetTriggers call? Maybe, but we can
+# do this later if needed
 x = getattr(trigs, opts.x_var)
 y = getattr(trigs, opts.y_var)
 
@@ -123,6 +118,29 @@ if opts.max_y is not None:
 x = x[mask]
 y = y[mask]
 
+title = f'{opts.z_var.title()} of {opts.detector} triggers ' + \
+        f'over {opts.x_var.title()} and {opts.y_var.title()}'
+fig_caption = f"This plot shows the {opts.z_var} of single detector " + \
+              f"triggers for the {opts.detector} detector. " + \
+              f"{opts.z_var.title()} is shown on the colorbar axis " + \
+              f"against {opts.x_var} and {opts.y_var} on the x- and y-axes."
+
+if not any(mask):
+    # All triggers removed - make a blank plot which says so:
+    fig = pl.figure()
+    ax = fig.gca()
+    pl.text(0.5, 0.5, 'no triggers in the range')
+
+    pycbc.results.save_fig_with_metadata(
+        fig,
+        opts.output_file,
+        title=title,
+        caption=fig_caption,
+        cmd=' '.join(sys.argv),
+        fig_kwds={'dpi': 200}
+    )
+    sys.exit()
+
 hexbin_style = {
     'gridsize': opts.grid_size,
     'linewidths': 0.03
@@ -145,7 +163,6 @@ logging.info('Plotting')
 fig = pl.figure()
 ax = fig.gca()
 
-
 if opts.z_var == 'density':
     hb = ax.hexbin(x, y, **hexbin_style)
     fig.colorbar(hb, ticks=LogLocator(subs=range(10)))
@@ -166,12 +183,6 @@ else:
 ax.set_xlabel(opts.x_var)
 ax.set_ylabel(opts.y_var)
 ax.set_title(opts.z_var.title() + ' of %s triggers ' % (opts.detector))
-title = '%s of %s triggers over %s and %s' % (opts.z_var.title(),
-                         opts.detector, opts.x_var.title(), opts.y_var.title())
-fig_caption = ("This plot shows the %s of single detector triggers for the %s "
-               "detector. %s is shown on the colorbar axis against %s and %s "
-               "on the x- and y-axes." % (opts.z_var, opts.detector,
-                                   opts.z_var.title(), opts.x_var, opts.y_var))
 pycbc.results.save_fig_with_metadata(fig, opts.output_file, title=title,
                                      caption=fig_caption, cmd=' '.join(sys.argv),
                                      fig_kwds={'dpi': 200})
diff --git a/bin/pycbc_fit_sngl_trigs b/bin/pycbc_fit_sngl_trigs
index 79af37fe680..cef76488606 100644
--- a/bin/pycbc_fit_sngl_trigs
+++ b/bin/pycbc_fit_sngl_trigs
@@ -204,9 +204,14 @@ for ifo in opt.ifos:
     elif trigformat == "hdf":
         if opt.bank_file:
             # io function can currently only handle 1 file
-            sngls = io.hdf.SingleDetTriggers(opt.inputs[0], 
-                  opt.bank_file, opt.veto_file, opt.veto_segment_name,
-                  filter_func, ifo)
+            sngls = io.hdf.SingleDetTriggers(
+                opt.inputs[0],
+                ifo,
+                bank_file=opt.bank_file,
+                veto_file=opt.veto_file,
+                segment_name=opt.veto_segment_name,
+                filter_func=filter_func,
+            )
             done_vetoing = True
         else:
             sngls = io.hdf.DataFromFiles(opt.inputs, group=ifo,
@@ -236,10 +241,10 @@ for ifo in opt.ifos:
     #rchisq = np.maximum(rchisq, bank_rchisq)
 
     if opt.veto_file and not done_vetoing:
-        logging.info("Applying vetoes from %s for ifo %s" % 
+        logging.info("Applying vetoes from %s for ifo %s" %
                      (opt.veto_file, ifo))
         keep_idx, segs = events.veto.indices_outside_segments(
-                         sngls.get_column("end_time").astype(int), 
+                         sngls.get_column("end_time").astype(int),
                          [opt.veto_file], ifo=ifo,
                          segment_name=opt.veto_segment_name)
         snr = snr[keep_idx]
@@ -304,7 +309,7 @@ for ifo in opt.ifos:
             else:
                 mass1_inbin = mass1[pind == i]
                 mass2_inbin = mass2[pind == i]
-                mass_tuples = [(m1, m2) for m1, m2 in 
+                mass_tuples = [(m1, m2) for m1, m2 in
                                                  zip(mass1_inbin, mass2_inbin)]
                 numtmpl = len(set(mass_tuples))
             templates[ifo][i] = numtmpl
@@ -326,7 +331,7 @@ for ifo in opt.ifos:
             stdev[ifo][th][i] = sig_alpha
             _, ks_prob[ifo][th][i] = trstats.KS_test(
                                        opt.fit_function, vals_inbin, alpha, th)
-            outfile.write("%s %.2f %.3g %.3g %d %d %.3f %.3f %.3g\n" % 
+            outfile.write("%s %.2f %.3g %.3g %d %d %.3f %.3f %.3g\n" %
               (ifo, th, lower, upper, numtmpl, counts[ifo][th][i], alpha,
                sig_alpha, ks_prob[ifo][th][i]))
 
@@ -335,7 +340,7 @@ for ifo in opt.ifos:
                 histcounts, edges = np.histogram(vals_inbin, bins=50)
                 cum_counts = histcounts[::-1].cumsum()[::-1]
                 binlabel = r"%.2g - %.2g" % (lower, upper)
-                plt.semilogy(edges[:-1], cum_counts, linewidth=2, 
+                plt.semilogy(edges[:-1], cum_counts, linewidth=2,
                              color=histcolors[i], label=binlabel, alpha=0.6)
 
                 plt.semilogy(plotrange, counts[ifo][th][i] * \
@@ -348,7 +353,7 @@ for ifo in opt.ifos:
                 plt.semilogy(plotrange, counts[ifo][th][i] * \
                   trstats.cum_fit(opt.fit_function, plotrange, alpha - \
                   sig_alpha, th), ":", alpha=0.6, color=histcolors[i])
- 
+
         if opt.plot_dir:
             leg = plt.legend(labelspacing=0.2)
             plt.setp(leg.get_texts(), fontsize=11)
@@ -370,7 +375,7 @@ if opt.plot_dir:
     for ifo in fits.keys():
         for th in opt.stat_threshold:
             plt.errorbar(pbins.centres(), [fits[ifo][th][i] for i in binind],
-              yerr=[stdev[ifo][th][i] for i in binind], fmt="+-", 
+              yerr=[stdev[ifo][th][i] for i in binind], fmt="+-",
               label=ifo + " fit above %.2f" % th)
         if opt.bin_spacing == "log": plt.semilogx()
         plt.grid()
@@ -381,7 +386,7 @@ if opt.plot_dir:
         plt.close()
 
         for th in opt.stat_threshold:
-            plt.plot(pbins.centres(), [ks_prob[ifo][th][i] for i in binind], 
+            plt.plot(pbins.centres(), [ks_prob[ifo][th][i] for i in binind],
               '+--', label=ifo+' KS prob, thresh %.2f' % th)
         if opt.bin_spacing == 'log': plt.loglog()
         else : plt.semilogy()
diff --git a/bin/pycbc_process_sngls b/bin/pycbc_process_sngls
index 4174a72b9a8..c740415ecdd 100644
--- a/bin/pycbc_process_sngls
+++ b/bin/pycbc_process_sngls
@@ -57,9 +57,14 @@ else:
 if filter_func is not None:
     logging.info('Will filter trigs using %s', filter_func)
 # Filter will be stored as self.mask attribute of sngls instance
-sngls = SingleDetTriggers(args.single_trig_file, args.bank_file,
-                          args.veto_file, args.segment_name, filter_func,
-                          args.detector)
+sngls = SingleDetTriggers(
+    args.single_trig_file,
+    args.detector,
+    bank_file=args.bank_file,
+    veto_file=args.veto_file,
+    segment_name=args.segment_name,
+    filter_func=filter_func,
+)
 
 logging.info('Calculating stat')
 rank_method = stat.get_statistic_from_opts(args, [args.detector])
diff --git a/docs/hwinj.rst b/docs/hwinj.rst
index 11da93a62c7..5072b23ab6a 100644
--- a/docs/hwinj.rst
+++ b/docs/hwinj.rst
@@ -202,7 +202,7 @@ Where ``${START}`` is the start of the injection. We kept the same PSD options (
 You can print out the recovered SNR and other parameters as follows ::
 
   echo `python -c "import numpy;from pycbc.io.hdf import SingleDetTriggers; \
-  h1_triggers=SingleDetTriggers('${INSPIRAL_FILE}',None, None, None, None, 'H1'); \
+  h1_triggers=SingleDetTriggers('${INSPIRAL_FILE}', 'H1'); \
   imax=numpy.argmax(h1_triggers.snr); max_snr=h1_triggers.snr[imax]; \
   time=h1_triggers.end_time[imax]; print(time, max_snr)"`
 
diff --git a/examples/search/plotting.ini b/examples/search/plotting.ini
index 7b3c92496e4..0b1ab2a2cbe 100644
--- a/examples/search/plotting.ini
+++ b/examples/search/plotting.ini
@@ -164,14 +164,13 @@ gradient-far =
 missed-on-top =
 
 [plot_singles]
-min-snr = 6
 
 [plot_singles-mtotal_eta_newsnr]
 x-var = mtotal
 log-x =
 y-var = eta
 z-var = "newsnr_sgveto"
-min-z = 6
+min-z = 5.5
 
 [plot_range]
 
diff --git a/pycbc/events/ranking.py b/pycbc/events/ranking.py
index e06a0f14ac0..5da85d01019 100644
--- a/pycbc/events/ranking.py
+++ b/pycbc/events/ranking.py
@@ -299,6 +299,21 @@ def get_newsnr_sgveto_psdvar_scaled_threshold(trigs):
     'newsnr_sgveto_psdvar_scaled_threshold': get_newsnr_sgveto_psdvar_scaled_threshold,
 }
 
+# Lists of datasets required in the trigs object for each function
+required_datasets = {}
+required_datasets['snr'] = ['snr']
+required_datasets['newsnr'] = required_datasets['snr'] + ['chisq', 'chisq_dof']
+required_datasets['new_snr'] = required_datasets['newsnr']
+required_datasets['newsnr_sgveto'] = required_datasets['newsnr'] + ['sg_chisq']
+required_datasets['newsnr_sgveto_psdvar'] = \
+    required_datasets['newsnr_sgveto'] + ['psd_var_val']
+required_datasets['newsnr_sgveto_psdvar_threshold'] = \
+    required_datasets['newsnr_sgveto_psdvar']
+required_datasets['newsnr_sgveto_psdvar_scaled'] = \
+    required_datasets['newsnr_sgveto_psdvar']
+required_datasets['newsnr_sgveto_psdvar_scaled_threshold'] = \
+    required_datasets['newsnr_sgveto_psdvar']
+
 
 def get_sngls_ranking_from_trigs(trigs, statname, **kwargs):
     """
@@ -309,7 +324,7 @@ def get_sngls_ranking_from_trigs(trigs, statname, **kwargs):
 
     Parameters
     -----------
-    trigs: dict of numpy.ndarrays
+    trigs: dict of numpy.ndarrays, SingleDetTriggers or ReadByTemplate
         Dictionary holding single detector trigger information.
     statname:
         The statistic to use.
diff --git a/pycbc/io/hdf.py b/pycbc/io/hdf.py
index 754c63bf663..9bccc92536c 100644
--- a/pycbc/io/hdf.py
+++ b/pycbc/io/hdf.py
@@ -29,7 +29,8 @@
 class HFile(h5py.File):
     """ Low level extensions to the capabilities of reading an hdf5 File
     """
-    def select(self, fcn, *args, **kwds):
+    def select(self, fcn, *args, chunksize=10**6, derived=None, group='',
+               return_data=True, premask=None):
         """ Return arrays from an hdf5 file that satisfy the given function
 
         Parameters
@@ -42,66 +43,137 @@ def select(self, fcn, *args, **kwds):
             A variable number of strings that are keys into the hdf5. These must
             refer to arrays of equal length.
 
-        chunksize : {1e6, int}, optional
+        chunksize : {10**6, int}, optional
             Number of elements to read and process at a time.
 
-        return_indices : bool, optional
-            If True, also return the indices of elements passing the function.
+        derived : dictionary
+            Dictionary keyed on argument name (must be given in args), values
+            are a tuple of: the function to be computed, and the required
+            datasets. The function must take in a dictionary keyed on those
+            dataset names.
+
+        group : string, optional
+            The group within the h5py file containing the datasets, e.g. in
+            standard offline merged trigger files, this would be the IFO. This
+            can be included in the args manually, but is required in the case
+            of derived functions, e.g. newsnr.
+
+        return_data : bool, optional, default True
+            If True, return the data for elements passing the function.
+
+        premask : array of boolean values, optional
+            The pre-mask to apply to the triggers at read-in.
 
         Returns
         -------
-        values : np.ndarrays
-            A variable number of arrays depending on the number of keys into
-            the hdf5 file that are given. If return_indices is True, the first
-            element is an array of indices of elements passing the function.
+        indices: np.ndarray
+            An array of indices of elements passing the function.
+
+        return_tuple : tuple of np.ndarrays
+            A variable number of arrays depending on the number of
+            args provided,
+            If return_data is True, arrays are the values of each
+            arg.
+            If return_data is False, this is None.
 
         >>> f = HFile(filename)
         >>> snr = f.select(lambda snr: snr > 6, 'H1/snr')
         """
 
-        # get references to each array
+        # Required datasets are the arguments requested and datasets given
+        # for any derived functions
+        derived = derived if derived is not None else {}
+        dsets = [a for a in list(args) if a not in derived]
+        for _, rqd_list in derived.values():
+            dsets += rqd_list
+
+        # remove any duplicates from req_dsets
+        dsets = list(set(dsets))
+
+        # Get the pointers to the h5py Datasets,
+        # check they can all be used together
         refs = {}
+        size = None
+        for ds in dsets:
+            refs[ds] = self[group + '/' + ds]
+            if (size is not None) and (refs[ds].size != size):
+                raise RuntimeError(f"Dataset {ds} is {self[ds].size} "
+                                   "entries long, which does not match "
+                                   f"previous input datasets ({size}).")
+            size = refs[ds].size
+
+        # Apply any pre-masks
+        if premask is None:
+            mask = np.ones(size, dtype=bool)
+        else:
+            mask = premask
+
+        if not mask.dtype == bool:
+            # mask is an array of indices rather than booleans,
+            # make it a bool array
+            new_mask = np.zeros(size, dtype=bool)
+            new_mask[mask] = True
+            mask = new_mask
+
+        if not mask.size == size:
+            raise RuntimeError(f"Using premask of size {mask.size} which "
+                               f"does not match the input datasets ({size}).")
+
+        # datasets being returned (possibly)
         data = {}
+        indices = np.array([], dtype=np.uint64)
         for arg in args:
-            refs[arg] = self[arg]
             data[arg] = []
 
-        return_indices = kwds.get('return_indices', False)
-        indices = np.array([], dtype=np.uint64)
-
-        # To conserve memory read the array in chunks
-        chunksize = kwds.get('chunksize', int(1e6))
-        size = len(refs[arg])
-
+        # Loop through the chunks:
         i = 0
         while i < size:
             r = i + chunksize if i + chunksize < size else size
 
-            # Read each chunk's worth of data and find where it passes the function
-            partial = [refs[arg][i:r] for arg in args]
+            if not any(mask[i:r]):
+                # Nothing allowed through the mask in this chunk
+                i += chunksize
+                continue
+
+            if all(mask[i:r]):
+                # Everything allowed through the mask in this chunk
+                submask = np.arange(r - i)
+            else:
+                submask = np.flatnonzero(mask[i:r])
+
+            # Read each chunk's worth of data
+            partial_data = {arg: refs[arg][i:r][mask[i:r]]
+                            for arg in dsets}
+            partial = []
+            for a in args:
+                if a in derived.keys():
+                    # If this is a derived dataset, calculate it
+                    derived_fcn = derived[a][0]
+                    partial += [derived_fcn(partial_data)]
+                else:
+                    # otherwise, just read from the file
+                    partial += [partial_data[a]]
+
+            # Find where it passes the function
             keep = fcn(*partial)
-            if return_indices:
-                indices = np.concatenate([indices, np.flatnonzero(keep) + i])
 
-            # Store only the results that pass the function
-            for arg, part in zip(args, partial):
-                data[arg].append(part[keep])
+            # Keep the indices which pass the function:
+            indices = np.concatenate([indices, submask[keep] + i])
+
+            if return_data:
+                # Store the dataset results that pass the function
+                for arg, part in zip(args, partial):
+                    data[arg].append(part[keep])
 
             i += chunksize
 
-        # Combine the partial results into full arrays
-        if len(args) == 1:
-            res = np.concatenate(data[args[0]])
-            if return_indices:
-                return indices.astype(np.uint64), res
-            else:
-                return res
+        if return_data:
+            return_tuple = tuple(np.concatenate(data[arg])
+                                 for arg in args)
         else:
-            res = tuple(np.concatenate(data[arg]) for arg in args)
-            if return_indices:
-                return (indices.astype(np.uint64),) + res
-            else:
-                return res
+            return_tuple = None
+
+        return indices.astype(np.uint64), return_tuple
 
 
 class DictArray(object):
@@ -402,47 +474,92 @@ class SingleDetTriggers(object):
     """
     Provides easy access to the parameters of single-detector CBC triggers.
     """
-    # FIXME: Some of these are optional and should be kwargs.
-    def __init__(self, trig_file, bank_file, veto_file,
-                 segment_name, filter_func, detector, premask=None):
+    def __init__(self, trig_file, detector, bank_file=None, veto_file=None,
+                 segment_name=None, premask=None, filter_rank=None,
+                 filter_threshold=None, chunksize=10**6, filter_func=None):
+        """
+        Create a SingleDetTriggers instance
+
+        Parameters
+        ----------
+        trig_file : string or os.pathtype, required
+            HDF file containing trigger information
+
+        detector : string, required
+            The detectior being used, this is used to access the
+            triggers in trig_file
+
+        bank_file: string or os.pathtype, optional
+            hdf file containing template bank information
+
+        veto_file: string or os.pathtype, optional
+            File used to define vetoes
+
+        segment_name : string, optional
+            Segment name being used in the veto_file
+
+        premask : array of indices or boolean, optional
+           Array of used triggers
+
+        filter_rank : string, optional
+            The ranking, as defined by ranking.py to compare to
+            filter_threshold
+
+        filter_threshold: float, required if filter_rank is used
+            Threshold to filter the ranking values
+
+        chunksize : int , default 10**6
+            Size of chunks to read in for the filter_rank / threshold.
+        """
         logging.info('Loading triggers')
         self.trigs_f = HFile(trig_file, 'r')
         self.trigs = self.trigs_f[detector]
+        self.ntriggers = self.trigs['end_time'].size
         self.ifo = detector  # convenience attributes
         self.detector = detector
         if bank_file:
             logging.info('Loading bank')
             self.bank = HFile(bank_file, 'r')
         else:
-            logging.info('No bank file given to SingleDetTriggers')
             # empty dict in place of non-existent hdf file
             self.bank = {}
 
-        if premask is not None:
-            self.mask = premask
-        else:
-            self.mask = np.ones(len(self.trigs['end_time']), dtype=bool)
-
-        if veto_file:
-            logging.info('Applying veto segments')
-            # veto_mask is an array of indices into the trigger arrays
-            # giving the surviving triggers
-            logging.info('%i triggers before vetoes', self.mask.sum())
-            self.veto_mask, _ = events.veto.indices_outside_segments(
-                self.end_time, [veto_file],
-                ifo=detector, segment_name=segment_name)
+        # Apply some masks to start off with - here we should try and apply
+        # them in the order which cuts most things earliest.
 
-            idx = np.flatnonzero(self.mask)[self.veto_mask]
-            self.mask[:] = False
-            self.mask[idx] = True
-            logging.info('%i triggers remain after vetoes',
-                          len(self.veto_mask))
+        # Apply any pre-masks
+        if premask is None:
+            self.mask = np.ones(self.ntriggers, dtype=bool)
+        else:
+            self.mask = None
+            self.apply_mask(premask)
+
+        if filter_rank:
+            assert filter_threshold is not None
+            logging.info("Applying threshold of %.3f on %s",
+                         filter_threshold, filter_rank)
+            fcn_dsets = (ranking.sngls_ranking_function_dict[filter_rank],
+                         ranking.required_datasets[filter_rank])
+            idx, _ = self.trigs_f.select(
+                 lambda rank: rank > filter_threshold,
+                 filter_rank,
+                 derived={filter_rank: fcn_dsets},
+                 return_data=False,
+                 premask=self.mask,
+                 group=detector,
+                 chunksize=chunksize,
+            )
+            logging.info("%d triggers remain", idx.size)
+            # If self.mask already has values, need to take these into account:
+            self.and_masks(idx)
 
-        # FIXME this should use the hfile select interface to avoid
-        # memory and processing limitations.
         if filter_func:
-            # get required columns into the namespace with dummy attribute
-            # names to avoid confusion with other class properties
+            # Apply a filter on the triggers which is _not_ a ranking statistic
+            for rank_str in ranking.sngls_ranking_function_dict.keys():
+                if f'self.{rank_str}' in filter_func:
+                    logging.warning('Supplying the ranking (%s) in '
+                                    'filter_func is inefficient, suggest to '
+                                    'use filter_rank instead.', rank_str)
             logging.info('Setting up filter function')
             for c in self.trigs.keys():
                 if c in filter_func:
@@ -451,17 +568,31 @@ def __init__(self, trig_file, bank_file, veto_file,
                 if c in filter_func:
                     # get template parameters corresponding to triggers
                     setattr(self, '_'+c,
-                          np.array(self.bank[c])[self.trigs['template_id'][:]])
+                        np.array(self.bank[c])[self.trigs['template_id'][:]])
 
-            self.filter_mask = eval(filter_func.replace('self.', 'self._'))
+            filter_mask = eval(filter_func.replace('self.', 'self._'))
             # remove the dummy attributes
             for c in chain(self.trigs.keys(), self.bank.keys()):
                 if c in filter_func: delattr(self, '_'+c)
 
-            self.mask = self.mask & self.filter_mask
+            self.apply_mask(filter_mask)
             logging.info('%i triggers remain after cut on %s',
                          sum(self.mask), filter_func)
 
+        if veto_file:
+            logging.info('Applying veto segments')
+            # veto_mask is an array of indices into the trigger arrays
+            # giving the surviving triggers
+            logging.info('%i triggers before vetoes', self.mask_size)
+            veto_mask, _ = events.veto.indices_outside_segments(
+                self.end_time, [veto_file],
+                ifo=detector, segment_name=segment_name)
+
+            # Update mask accordingly
+            self.apply_mask(veto_mask)
+            logging.info('%i triggers remain after vetoes',
+                         self.mask_size)
+
     def __getitem__(self, key):
         # Is key in the TRIGGER_MERGE file?
         try:
@@ -483,7 +614,7 @@ def checkbank(self, param):
                                                                        % param)
 
     def trig_dict(self):
-        """Returns dict of the masked trigger valuse """
+        """Returns dict of the masked trigger values"""
         mtrigs = {}
         for k in self.trigs:
             if len(self.trigs[k]) == len(self.trigs['end_time']):
@@ -500,54 +631,115 @@ def get_param_names(cls):
             if type(m[1]) == property]
 
     def apply_mask(self, logic_mask):
-        """Apply a boolean array to the set of triggers"""
-        if hasattr(self.mask, 'dtype') and (self.mask.dtype == 'bool'):
-            orig_indices = self.mask.nonzero()[0][logic_mask]
+        """Apply a mask over the top of the current mask
+
+        Parameters
+        ----------
+        logic_mask : boolean array or numpy array of indices
+        """
+        if self.mask is None:
+            self.mask = np.zeros(self.ntriggers, dtype=bool)
+            self.mask[logic_mask] = True
+        elif hasattr(self.mask, 'dtype') and (self.mask.dtype == 'bool'):
+            orig_indices = np.flatnonzero(self.mask)[logic_mask]
             self.mask[:] = False
             self.mask[orig_indices] = True
         else:
             self.mask = list(np.array(self.mask)[logic_mask])
 
+    def and_masks(self, logic_mask):
+        """Apply a mask to be combined as a logical and with the current mask.
+
+        Parameters
+        ----------
+        logic_mask : boolean array or numpy array/list of indices
+        """
+        if self.mask_size == self.ntriggers:
+            # No mask exists, just update to use the given mask
+            self.apply_mask(logic_mask)
+            return
+
+        # Use intersection of the indices of True values in the masks
+        if hasattr(logic_mask, 'dtype') and (logic_mask.dtype == 'bool'):
+            new_indices = np.flatnonzero(logic_mask)
+        else:
+            new_indices = np.array(logic_mask)
+
+        if hasattr(self.mask, 'dtype') and (self.mask.dtype == 'bool'):
+            orig_indices = np.flatnonzero(self.mask)
+        else:
+            orig_indices = np.array(self.mask)
+
+        self.mask[:] = False
+        and_indices = np.intersect1d(new_indices, orig_indices)
+        self.mask[and_indices.astype(np.uint64)] = True
+
     def mask_to_n_loudest_clustered_events(self, rank_method,
+                                           ranking_threshold=6,
                                            n_loudest=10,
                                            cluster_window=10):
         """Edits the mask property of the class to point to the N loudest
-        single detector events as ranked by ranking statistic. Events are
-        clustered so that no more than 1 event within +/- cluster-window will
-        be considered."""
+        single detector events as ranked by ranking statistic.
+
+        Events are clustered so that no more than 1 event within +/-
+        cluster_window will be considered. Can apply a threshold on the
+        ranking using ranking_threshold
+        """
 
-        # If this becomes memory intensive we can optimize
         sds = rank_method.single(self.trig_dict())
         stat = rank_method.rank_stat_single((self.ifo, sds))
         if len(stat) == 0:
-            # No triggers, so just return here
-            self.stat = np.array([])
+            # No triggers at all, so just return here
+            self.apply_mask(np.array([], dtype=np.uint64))
             return
 
         times = self.end_time
+        if ranking_threshold:
+            # Threshold on sngl_ranking
+            # Note that we can provide None or zero to do no thresholding
+            # but the default is to do some
+            keep = stat >= ranking_threshold
+            stat = stat[keep]
+            times = times[keep]
+            self.apply_mask(keep)
+
+            if len(stat) == 0:
+                logging.warning("No triggers after thresholding")
+                return
+            else:
+                logging.info("%d triggers after thresholding", len(stat))
+
         index = stat.argsort()[::-1]
         new_times = []
         new_index = []
+        # Loop through triggers - loudest first
         for curr_idx in index:
             curr_time = times[curr_idx]
             for time in new_times:
+                # Have we already got a louder trigger within the window?
                 if abs(curr_time - time) < cluster_window:
                     break
             else:
-                # Only get here if no other triggers within cluster window
+                # Store if no other triggers within cluster window
                 new_index.append(curr_idx)
                 new_times.append(curr_time)
             if len(new_index) >= n_loudest:
+                # We have as many triggers as we want now
                 break
 
+        # For indexing, indices need to be a numpy array, in order
         index = np.array(new_index)
         index.sort()
-        self.stat = stat[index]
-        if hasattr(self.mask, 'dtype') and self.mask.dtype == 'bool':
-            orig_indices = np.flatnonzero(self.mask)[index]
-            self.mask = list(orig_indices)
-        elif isinstance(self.mask, list):
-            self.mask = list(np.array(self.mask)[index])
+        # Apply to the existing mask
+        self.apply_mask(index)
+
+    @property
+    def mask_size(self):
+        if self.mask is None:
+            return self.ntriggers
+        if isinstance(self.mask, list):
+            return len(self.mask)
+        return np.count_nonzero(self.mask)
 
     @property
     def template_id(self):
@@ -690,14 +882,17 @@ def get_ranking(self, rank_name, **kwargs):
         return ranking.get_sngls_ranking_from_trigs(self, rank_name, **kwargs)
 
     def get_column(self, cname):
+        """
+        Read columns while applying the mask
+        """
         # Fiducial value that seems to work, not extensively tuned.
         MFRAC = 0.3
 
         # If the mask accesses few enough elements then directly use it
         # This can be slower than reading in all the elements if most of them
         # will be read.
-        if self.mask is not None and (isinstance(self.mask, list) or \
-                (len(self.mask.nonzero()[0]) < (len(self.mask) * MFRAC))):
+        if isinstance(self.mask, list) or \
+                self.mask_size < (self.ntriggers * MFRAC):
             return self.trigs[cname][self.mask]
 
         # We have a lot of elements to read so we resort to readin the entire