Merge branch 'gwastro:master' into pr_prototype_cupy_backend

gwastro · Dec 11, 2024 · 3b3b9a1 · 3b3b9a1
2 parents 61d9f5d + ed7d43e
commit 3b3b9a1
Show file tree

Hide file tree

Showing 9 changed files with 402 additions and 321 deletions.
diff --git a/bin/pygrb/pycbc_pygrb_efficiency b/bin/pygrb/pycbc_pygrb_efficiency
diff --git a/bin/pygrb/pycbc_pygrb_page_tables b/bin/pygrb/pycbc_pygrb_page_tables
diff --git a/bin/pygrb/pycbc_pygrb_plot_injs_results b/bin/pygrb/pycbc_pygrb_plot_injs_results
@@ -96,7 +96,7 @@ def complete_mass_data(injs, key, tag):
     if key == 'mtotal':
         data = mass1 + mass2
     elif key == 'mchirp':
-        data = conversions.mchirp_from_mass1_mass2(mass1, mass2)
+        data = pycbc.conversions.mchirp_from_mass1_mass2(mass1, mass2)
     else:
         data = mass2 / mass1
         data = np.where(data > 1, 1./data, data)
@@ -260,7 +260,7 @@ all_off_trigs = ppu.load_data(trig_file, ifos, data_tag='trigs',
 
 # Extract needed trigger properties and store them as dictionaries
 # Based on trial_dict: if vetoes were applied, trig_* are the veto survivors
-keys = ['network/reweighted_snr']
+keys = ['network/end_time_gc', 'network/reweighted_snr']
 trig_data = ppu.extract_trig_properties(
     trial_dict,
     all_off_trigs,
@@ -269,9 +269,25 @@ trig_data = ppu.extract_trig_properties(
     keys
 )
 
-background = list(trig_data['network/reweighted_snr'].values())
+# Max BestNR values in each trial: these are stored in a dictionary keyed
+# by slide_id, as arrays indexed by trial number
+background = {k: np.zeros(len(v)) for k,v in trial_dict.items()}
+for slide_id in slide_dict:
+    trig_times = trig_data[keys[0]][slide_id][:]
+    for j, trial in enumerate(trial_dict[slide_id]):
+        # True whenever the trigger is in the trial
+        trial_cut = (trial[0] <= trig_times) & (trig_times < trial[1])
+        # Move on if nothing was in the trial
+        if not trial_cut.any():
+            continue
+        # Max BestNR
+        background[slide_id][j] = max(trig_data[keys[1]][slide_id][trial_cut])
+# Gather all values and max over them
+background = list(background.values())
 background = np.concatenate(background)
 max_bkgd_reweighted_snr = background.max()
+assert total_trials == len(background)
+logging.info("Background bestNR calculated.")
 
 # =======================
 # Post-process injections
@@ -340,7 +356,7 @@ if len(list(found_after_vetoes['reweighted_snr'])) > 0:
     """
     found_quieter['fap'] = np.array([sum(background > bestnr) for bestnr in
                                      found_quieter['reweighted_snr']],
-                                    dtype=float) / len(background)
+                                    dtype=float) / total_trials
 
     # 3) Missed due to vetoes
     # TODO: needs function to cherry-pick a subset of inj_data specified by

diff --git a/examples/live/make_singles_significance_fits.py b/examples/live/make_singles_significance_fits.py
@@ -21,7 +21,6 @@
 # Set attributes to be something appropriate
 f.attrs['start_date'] = 0
 f.attrs['end_date'] = n_days
-f.attrs['fit_threshold'] = 6.5
 
 f['bins_edges'] = np.logspace(np.log10(min_duration),
                               np.log10(max_duration),
@@ -35,6 +34,7 @@
     ifo_group.attrs['live_time'] = np.round(n_days * 0.7 * 86400.)
     ifo_group.attrs['mean_alpha'] = alpha
     ifo_group.attrs['total_counts'] = daily_counts_per_bin * n_days * n_bins
+    ifo_group.attrs['fit_threshold'] = 6.5
 
     # Make daily fits datasets, this doesn't atually matter for the test, but
     # means that the file format matches in case of future requirements

diff --git a/pycbc/events/single.py b/pycbc/events/single.py
@@ -320,7 +320,7 @@ def calculate_ifar(self, sngl_ranking, duration):
             with HFile(self.fit_file, 'r') as fit_file:
                 bin_edges = fit_file['bins_edges'][:]
                 live_time = fit_file[self.ifo].attrs['live_time']
-                thresh = fit_file.attrs['fit_threshold']
+                thresh = fit_file[self.ifo].attrs['fit_threshold']
                 dist_grp = fit_file[self.ifo][self.sngl_ifar_est_dist]
                 rates = dist_grp['counts'][:] / live_time
                 coeffs = dist_grp['fit_coeff'][:]

diff --git a/pycbc/results/pygrb_postprocessing_utils.py b/pycbc/results/pygrb_postprocessing_utils.py
@@ -30,16 +30,15 @@
 import h5py
 
 from scipy import stats
-import ligo.segments as segments
+from ligo import segments
+from ligo.segments.utils import fromsegwizard
 from pycbc.events.coherent import reweightedsnr_cut
 from pycbc.events import veto
 from pycbc import add_common_pycbc_options
 from pycbc.io.hdf import HFile
 
 logger = logging.getLogger('pycbc.results.pygrb_postprocessing_utils')
 
-from ligo.segments.utils import fromsegwizard
-
 
 # =============================================================================
 # Arguments functions:
@@ -374,8 +373,6 @@ def load_data(input_file, ifos, rw_snr_threshold=None, data_tag=None,
 def apply_vetoes_to_found_injs(found_missed_file, found_injs, ifos,
                                veto_file=None, keys=None):
     """Separate injections surviving vetoes from vetoed injections.
-    THIS IS ESSENTIALLY AN EMPTY PLACE HOLDER AT THE MOMENT: IT RETURNS
-    THE INJECTIONS GIVEN IN INPUT, WITHOUT APPLYING VETOES.
 
     Parameters
     ----------
@@ -412,6 +409,28 @@ def apply_vetoes_to_found_injs(found_missed_file, found_injs, ifos,
     found_idx = numpy.arange(len(found_injs[ifos[0]+'/end_time'][:]))
     veto_idx = numpy.array([], dtype=numpy.int64)
 
+    if veto_file:
+        logging.info("Applying data vetoes to found injections...")
+        for ifo in ifos:
+            inj_time = found_injs[ifo+'/end_time'][:]
+            idx, _ = veto.indices_outside_segments(inj_time, [veto_file], ifo, None)
+            veto_idx = numpy.append(veto_idx, idx)
+            logging.info("%d injections vetoed due to %s.", len(idx), ifo)
+            idx, _ = veto.indices_within_segments(inj_time, [veto_file], ifo, None)
+            found_idx = numpy.intersect1d(found_idx, idx)
+        veto_idx = numpy.unique(veto_idx)
+        logging.info("%d injections vetoed.", len(veto_idx))
+        logging.info("%d injections surviving vetoes.", len(found_idx))
+
+        found_after_vetoes = {}
+        missed_after_vetoes = {}
+        for key in keep_keys:
+            if key == 'network/coincident_snr':
+                found_injs[key] = get_coinc_snr(found_injs)
+            if isinstance(found_injs[key], numpy.ndarray):
+                found_after_vetoes[key] = found_injs[key][found_idx]
+                missed_after_vetoes[key] = found_injs[key][veto_idx]
+
     return found_after_vetoes, missed_after_vetoes, found_idx, veto_idx
 
 
@@ -510,7 +529,7 @@ def extract_trig_properties(trial_dict, trigs, slide_dict, seg_dict, keys):
 
     # Sort the triggers into each slide
     sorted_trigs = sort_trigs(trial_dict, trigs, slide_dict, seg_dict)
-    n_surviving_trigs = sum([len(i) for i in sorted_trigs.values()])
+    n_surviving_trigs = sum(len(i) for i in sorted_trigs.values())
     msg = f"{n_surviving_trigs} triggers found within the trials dictionary "
     msg += "and sorted."
     logging.info(msg)
@@ -676,7 +695,7 @@ def construct_trials(seg_files, seg_dict, ifos, slide_dict, veto_file,
 
                 iter_int += 1
 
-    total_trials = sum([len(trial_dict[slide_id]) for slide_id in slide_dict])
+    total_trials = sum(len(trial_dict[slide_id]) for slide_id in slide_dict)
     logging.info("%d trials generated.", total_trials)
 
     return trial_dict, total_trials
@@ -701,8 +720,8 @@ def sort_stat(time_veto_max_stat):
 def max_median_stat(slide_dict, time_veto_max_stat, trig_stat, total_trials):
     """Return maximum and median of trig_stat and sorted time_veto_max_stat"""
 
-    max_stat = max([trig_stat[slide_id].max() if trig_stat[slide_id].size
-                   else 0 for slide_id in slide_dict])
+    max_stat = max(trig_stat[slide_id].max() if trig_stat[slide_id].size
+                   else 0 for slide_id in slide_dict)
 
     full_time_veto_max_stat = sort_stat(time_veto_max_stat)
 

diff --git a/pycbc/results/table_utils.py b/pycbc/results/table_utils.py
@@ -86,23 +86,37 @@ def html_table(columns, names, page_size=None, format_strings=None):
 
     column_descriptions = []
     for column, name in zip(columns, names):
-        if column.dtype.kind == 'S' or column.dtype.kind == 'U':
-            ctype = 'string'
-        else:
+        if column.dtype.kind in 'iuf':
+            # signed and unsigned integers and floats
             ctype = 'number'
+        else:
+            # this comprises strings, bools, complex, void, etc
+            # but we will convert all those to str in a moment
+            ctype = 'string'
         column_descriptions.append((ctype, name))
 
     data = []
-    for item in zip(*columns):
-        data.append(list(item))
-
-    return google_table_template.render(div_id=div_id,
-                                page_enable=page,
-                                column_descriptions = column_descriptions,
-                                page_size=page_size,
-                                data=data,
-                                format_strings=format_strings,
-                               )
+    for row in zip(*columns):
+        row2 = []
+        # the explicit conversions here are to make sure the JS code
+        # sees proper numbers and not something like 'np.float64(12)'
+        for item, column in zip(row, columns):
+            if column.dtype.kind == 'f':
+                row2.append(float(item))
+            elif column.dtype.kind in 'iu':
+                row2.append(int(item))
+            else:
+                row2.append(str(item))
+        data.append(row2)
+
+    return google_table_template.render(
+        div_id=div_id,
+        page_enable=page,
+        column_descriptions=column_descriptions,
+        page_size=page_size,
+        data=data,
+        format_strings=format_strings,
+    )
 
 static_table_template = mako.template.Template("""
     <table class="table">

diff --git a/pycbc/types/config.py b/pycbc/types/config.py
@@ -230,7 +230,8 @@ def read_ini_file(self, fpath):
         """
         Read a .ini file and return it as a ConfigParser class.
         This function does none of the parsing/combining of sections. It simply
-        reads the file and returns it unedited
+        reads the file, checks if there are overlaping options
+        and returns it unedited
 
         Stub awaiting more functionality - see configparser_test.py
 
@@ -245,6 +246,27 @@ def read_ini_file(self, fpath):
             The ConfigParser class containing the read in .ini file
         """
         # Read the file
+
+        options_seen = {}
+
+        for filename in fpath:
+            parser = ConfigParser.ConfigParser()
+            parser.optionxform=str
+            parser.read(filename)
+
+            for section in parser.sections():
+                if section not in options_seen:
+                    options_seen[section] = set()
+
+                section_options = parser.options(section)
+
+                option_intersection = options_seen[section].intersection(section_options)
+
+                if option_intersection:
+                    raise ValueError(f"Duplicate option(s) {', '.join(option_intersection)} found in section '{section}' in file '{filename}'")
+
+                options_seen[section].update(section_options)
+
         self.read(fpath)
 
     def get_subsections(self, section_name):

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,5 @@
 [build-system]
-requires = ["setuptools",
+requires = ["setuptools>=70.0.0",
             "wheel",
             "cython>=0.29.21",
             "numpy>=2.0.0",