Merge pull request #1200 from cta-observatory/likelihood_reco_dvr_and…

…_srcdep_update Likelihood reconstruction update
cta-observatory · Mar 12, 2024 · e0f6d78 · e0f6d78
2 parents 1d98657 + c241816
commit e0f6d78
Show file tree

Hide file tree

Showing 7 changed files with 120 additions and 63 deletions.
diff --git a/lstchain/data/lstchain_lhfit_config.json b/lstchain/data/lstchain_lhfit_config.json
@@ -141,39 +141,69 @@
     "lhfit_width",
     "lhfit_length",
     "lhfit_length_asymmetry",
-    "lhfit_time_gradient"
+    "lhfit_time_gradient",
+    "leakage_intensity_width_2",
+    "lhfit_r",
+    "lhfit_psi",
+    "sin_az_tel",
+    "alt_tel"
   ],
 
-  "disp_method": "disp_vector",
+  "disp_method": "disp_norm_sign",
 
   "disp_regression_features": [
     "lhfit_log_intensity",
+    "lhfit_width",
     "lhfit_length",
-    "lhfit_wl",
     "lhfit_length_asymmetry",
+    "lhfit_time_gradient",
+    "leakage_intensity_width_2",
+    "lhfit_r",
     "lhfit_psi",
-    "lhfit_time_gradient"
+    "lhfit_phi",
+    "sin_az_tel",
+    "alt_tel"
   ],
 
   "disp_classification_features": [
     "lhfit_log_intensity",
+    "lhfit_width",
     "lhfit_length",
-    "lhfit_wl",
     "lhfit_length_asymmetry",
+    "lhfit_time_gradient",
+    "leakage_intensity_width_2",
+    "lhfit_r",
+    "lhfit_wl",
     "lhfit_psi",
-    "lhfit_time_gradient"
+    "lhfit_phi",
+    "sin_az_tel",
+    "alt_tel"
   ],
 
   "particle_classification_features": [
     "lhfit_log_intensity",
     "lhfit_width",
     "lhfit_length",
-    "lhfit_wl",
     "lhfit_length_asymmetry",
     "lhfit_time_gradient",
+    "log_intensity",
+    "width",
+    "length",
+    "x",
+    "y",
+    "wl",
+    "signed_skewness",
+    "kurtosis",
+    "signed_time_gradient",
+    "leakage_intensity_width_2",
+    "lhfit_r",
+    "lhfit_wl",
+    "lhfit_psi",
+    "lhfit_phi",
     "log_reco_energy",
-    "reco_disp_dx",
-    "reco_disp_dy"
+    "reco_disp_norm",
+    "sin_az_tel",
+    "alt_tel"
   ],
 
   "allowed_tels": [1],
@@ -212,6 +242,7 @@
 
   "LSTCalibrationCalculator":{
     "systematic_correction_path": null,
+    "npe_median_cut_outliers": [-5,5],
     "squared_excess_noise_factor": 1.222,
     "flatfield_product": "FlasherFlatFieldCalculator",
     "pedestal_product": "PedestalIntegrator",
@@ -236,7 +267,7 @@
       "tel_id":1,
       "time_sampling_correction_path": null,
       "charge_product":"LocalPeakWindowSum",
-      "charge_median_cut_outliers": [-0.5,0.5],
+      "charge_median_cut_outliers": [-0.9,2],
       "charge_std_cut_outliers": [-10,10],
       "time_cut_outliers": [2,38],
       "LocalPeakWindowSum":{
@@ -272,7 +303,7 @@
             ],
     "n_peaks": 0,
     "no_asymmetry": false,
-    "use_weight": false,
+    "use_interleaved": true,
     "verbose": 0
   }
 }
diff --git a/lstchain/reco/dl1_to_dl2.py b/lstchain/reco/dl1_to_dl2.py
@@ -360,6 +360,11 @@ def build_models(filegammas, fileprotons,
         effective_focal_length = OPTICS.effective_focal_length
 
     df_gamma = update_disp_with_effective_focal_length(df_gamma, effective_focal_length=effective_focal_length)
+    if 'lh_fit_config' in config.keys():
+        lhfit_df_gamma = pd.read_hdf(filegammas, key=dl1_likelihood_params_lstcam_key)
+        df_gamma = pd.concat([df_gamma, lhfit_df_gamma], axis=1)
+        lhfit_df_proton = pd.read_hdf(fileprotons, key=dl1_likelihood_params_lstcam_key)
+        df_proton = pd.concat([df_proton, lhfit_df_proton], axis=1)
 
     if config['source_dependent']:
         # if source-dependent parameters are already in dl1 data, just read those data
@@ -395,13 +400,6 @@ def build_models(filegammas, fileprotons,
 
         df_proton = pd.concat([df_proton, src_dep_df_proton['on']], axis=1)
 
-    if 'lh_fit_config' in config.keys():
-        lhfit_df_gamma = pd.read_hdf(filegammas, key=dl1_likelihood_params_lstcam_key)
-        df_gamma = pd.concat([df_gamma, lhfit_df_gamma], axis=1)
-
-        lhfit_df_proton = pd.read_hdf(fileprotons, key=dl1_likelihood_params_lstcam_key)
-        df_proton = pd.concat([df_proton, lhfit_df_proton], axis=1)
-
     # Normalize all azimuth angles to the range [0, 360) degrees
     df_gamma.az_tel = Angle(df_gamma.az_tel, u.rad).wrap_at(360 * u.deg).rad
     df_proton.az_tel = Angle(df_proton.az_tel, u.rad).wrap_at(360 * u.deg).rad
@@ -794,8 +792,6 @@ def calc_source_dependent_parameters(data, expected_src_pos_x_m, expected_src_po
     src_dep_params['expected_src_x'] = expected_src_pos_x_m
     src_dep_params['expected_src_y'] = expected_src_pos_y_m
 
-    src_dep_params['dist'] = np.sqrt((data['x'] - expected_src_pos_x_m) ** 2 + (data['y'] - expected_src_pos_y_m) ** 2)
-
     disp, miss = camera_to_shower_coordinates(
         expected_src_pos_x_m,
         expected_src_pos_y_m,
@@ -805,6 +801,21 @@ def calc_source_dependent_parameters(data, expected_src_pos_x_m, expected_src_po
 
     src_dep_params['time_gradient_from_source'] = data['time_gradient'] * np.sign(disp) * -1
     src_dep_params['skewness_from_source'] = data['skewness'] * np.sign(disp) * -1
+
+    if 'lhfit_x' in data.keys():
+        # Use lhfit parameters for 'dist' and 'alpha'
+        disp, miss = camera_to_shower_coordinates(
+            expected_src_pos_x_m,
+            expected_src_pos_y_m,
+            data['lhfit_x'],
+            data['lhfit_y'],
+            data['lhfit_psi'])
+        src_dep_params['dist'] = np.sqrt((data['lhfit_x'] - expected_src_pos_x_m) ** 2 +
+                                         (data['lhfit_y'] - expected_src_pos_y_m) ** 2)
+    else:
+        src_dep_params['dist'] = np.sqrt((data['x'] - expected_src_pos_x_m) ** 2 +
+                                         (data['y'] - expected_src_pos_y_m) ** 2)
+
     src_dep_params['alpha'] = np.rad2deg(np.arctan(np.abs(miss / disp)))
 
     return src_dep_params

diff --git a/lstchain/reco/r0_to_dl1.py b/lstchain/reco/r0_to_dl1.py
@@ -303,9 +303,7 @@ def apply_lh_fit(
             try:
                 lhfit_container = fitter(event=event, telescope_id=telescope_id, dl1_container=dl1_container)
             except Exception:
-                logger.error("Unexpected error encountered in likelihood reconstruction.\n"
-                             "Compiled likelihood reconstruction numbaCC functions may be missing.\n"
-                             "In this case you should run: lstchain/scripts/numba_compil_lhfit.py")
+                logger.error("Unexpected error encountered in likelihood reconstruction.")
                 raise
     else:
         lhfit_container = DL1LikelihoodParametersContainer(lhfit_call_status=-10)
@@ -445,6 +443,11 @@ def r0_to_dl1(
     if 'lh_fit_config' in config.keys():
         lhfit_fitter_config = {'TimeWaveformFitter': config['lh_fit_config']}
         lhfit_fitter = TimeWaveformFitter(subarray=subarray, config=Config(lhfit_fitter_config))
+        if lhfit_fitter_config['TimeWaveformFitter']['use_interleaved'] and not is_simu:
+            tmp_source = EventSource(input_url=input_filename,
+                                      config=Config(config["source_config"]))
+            lhfit_fitter.get_ped_from_interleaved(tmp_source)
+            del tmp_source
 
     # initialize the writer of the interleaved events 
     interleaved_writer = None

diff --git a/lstchain/reco/reconstructor.py b/lstchain/reco/reconstructor.py
@@ -6,8 +6,9 @@
 
 from lstchain.data.normalised_pulse_template import NormalizedPulseTemplate
 
+from ctapipe.containers import EventType
 from ctapipe.core import TelescopeComponent
-from ctapipe.core.traits import Bool, Float, FloatTelescopeParameter, Int, Path
+from ctapipe.core.traits import Bool, Float, FloatTelescopeParameter, Int
 from lstchain.io.lstcontainers import DL1LikelihoodParametersContainer
 from lstchain.reco.reconstructorCC import log_pdf as log_pdf
 
@@ -33,12 +34,10 @@ class TimeWaveformFitter(TelescopeComponent):
     time_after_shower = FloatTelescopeParameter(default_value=20,
                                                 help='Additional time at the end of the fit temporal window.',
                                                 allow_none=False).tag(config=True)
-    use_weight = Bool(False, help='If True, the brightest sample is twice as important as the dimmest pixel in the '
-                                  'likelihood. If false all samples are equivalent.', allow_none=False).tag(config=True)
     no_asymmetry = Bool(False, help='If true, the asymmetry of the spatial model is fixed to 0.',
                         allow_none=False).tag(config=True)
-    use_interleaved = Path(None, help='Location of the dl1 file used to estimate the pedestal exploiting interleaved'
-                                      ' events.', allow_none=True).tag(config=True)
+    use_interleaved = Bool(None, help='If true, the std deviation of pedestals and dimmed pixels are estimated on '
+                                      'interleaved events', allow_none=True).tag(config=True)
     n_peaks = Int(0, help='Maximum brightness (p.e.) for which the full likelihood computation is used. '
                           'If the Poisson term for Np.e.>n_peak is more than 1e-6 a Gaussian approximation is used.',
                   allow_none=False).tag(config=True)
@@ -96,6 +95,8 @@ def __init__(self, subarray, config=None, parent=None, **kwargs):
         self.transition_charges = {}
         for tel_id in subarray.tel:
             self.transition_charges[tel_id] = transition_charges[self.crosstalk.tel[tel_id]]
+        self.error = None
+        self.allowed_pixels = True
 
         self.start_parameters = None
         self.names_parameters = None
@@ -104,6 +105,29 @@ def __init__(self, subarray, config=None, parent=None, **kwargs):
         self.bound_parameters = None
         self.fcn = None
 
+    def get_ped_from_interleaved(self, source):
+        """
+        Parameters
+        ----------
+        source
+        """
+        self.error = {}
+        waveforms = {}
+        for tel_id in self.subarray.tel:
+            waveforms[tel_id] = []
+        for i, event in enumerate(source):
+            if event.trigger.event_type == EventType.SKY_PEDESTAL:
+                source.r0_r1_calibrator.calibrate(event)
+                for tel_id in event.r1.tel.keys():
+                    waveforms[tel_id].append(event.r1.tel[tel_id].waveform.squeeze())
+        for tel_id, tel_waveforms in waveforms.items():
+            x=np.concatenate(np.asarray(tel_waveforms), axis=1)
+            std=[]
+            for elt in x:
+                std.append(np.nanstd(elt))
+            self.error[tel_id] = std
+            self.allowed_pixels = (std > 0.5 * np.median(std))
+
     def call_setup(self, event, telescope_id, dl1_container):
         """
         Extract all event dependent quantities used for the fit.
@@ -212,6 +236,7 @@ def call_setup(self, event, telescope_id, dl1_container):
                             }
 
         mask_pixel, mask_time = self.clean_data(pix_x, pix_y, pix_radius, times, start_parameters, telescope_id)
+        mask_pixel = mask_pixel & self.allowed_pixels
         spatial_ones = np.ones(np.sum(mask_pixel))
 
         is_high_gain = is_high_gain[mask_pixel]
@@ -226,14 +251,16 @@ def call_setup(self, event, telescope_id, dl1_container):
         pix_area = geometry.pix_area[mask_pixel].to_value(unit ** 2)
 
         data = waveform
-        error = None  # TODO include option to use calibration data
+        error = self.error
 
         filter_pixels = np.nonzero(~mask_pixel)
         filter_times = np.nonzero(~mask_time)
 
         if error is None:
             std = np.std(data[~mask_pixel])
             error = np.full(data.shape[0], std)
+        else:
+            error = self.error[telescope_id]
 
         data = np.delete(data, filter_pixels, axis=0)
         data = np.delete(data, filter_times, axis=1)
@@ -247,7 +274,7 @@ def call_setup(self, event, telescope_id, dl1_container):
                       template.t0, template.amplitude_LG,
                       template.amplitude_HG, self.n_peaks,
                       self.transition_charges[telescope_id],
-                      self.use_weight, self.factorial]
+                      self.factorial]
 
         self.start_parameters = start_parameters
         self.names_parameters = start_parameters.keys()

diff --git a/lstchain/reco/reconstructorCC.py b/lstchain/reco/reconstructorCC.py
@@ -7,7 +7,7 @@
 
 
 @njit(cache=True)
-def log_pdf_ll(mu, waveform, error, crosstalk, sig_s, templates, factorial, n_peaks, weight):
+def log_pdf_ll(mu, waveform, error, crosstalk, sig_s, templates, factorial, n_peaks):
     """
     Performs the sum log likelihood for low luminosity pixels in TimeWaveformFitter.
     The log likelihood is sum(pixels) sum(times) of the log single sample likelihood.
@@ -33,8 +33,6 @@ def log_pdf_ll(mu, waveform, error, crosstalk, sig_s, templates, factorial, n_pe
     n_peaks: int64
         Size of the factorial array and possible number of photo-electron
         in a pixel with relevant Poisson probability
-    weight : float 64 2D array
-        Weight to use in the likelihood for each sample
 
     Returns
     ----------
@@ -65,12 +63,11 @@ def log_pdf_ll(mu, waveform, error, crosstalk, sig_s, templates, factorial, n_pe
             if sumlh_k <= 0:
                 return -np.inf
             # Add the log single sample likelihood to the full log likelihood
-            # An optional weight increasing high signal sample importance is supported
-            sumlh += weight[i, j] * np.log(sumlh_k)
+            sumlh += np.log(sumlh_k)
     return sumlh
 
 @njit(cache=True)
-def log_pdf_hl(mu, waveform, error, crosstalk, templates, weight):
+def log_pdf_hl(mu, waveform, error, crosstalk, templates):
     """
     Performs the sum log likelihood for high luminosity pixels in TimeWaveformFitter.log_pdf
     The log likelihood is sum(pixels) sum(times) of the log single sample likelihood.
@@ -88,8 +85,6 @@ def log_pdf_hl(mu, waveform, error, crosstalk, templates, weight):
         Crosstalk factor for each pixel
     templates: float64 2D array
         Value of the pulse template evaluated in each pixel at each observed time
-    weight : float 64 2D array
-        Weight to use in the likelihood for each sample
 
     Returns
     ----------
@@ -109,8 +104,7 @@ def log_pdf_hl(mu, waveform, error, crosstalk, templates, weight):
             log_gauss = (-(waveform[i, j] - mean) * (waveform[i, j] - mean) / 2.0 / sigma / sigma
                          - np.log(np.sqrt(2 * np.pi) * sigma))
             # Add the log single sample likelihood to the full log likelihood
-            # An optional weight increasing high signal sample importance is supported
-            sumlh += weight[i, j] * log_gauss
+            sumlh += log_gauss
     return sumlh
 
 
@@ -297,7 +291,7 @@ def nsb_only_waveforms(time, is_high_gain, additional_nsb, amplitude, t_0,
 def log_pdf(charge, t_cm, x_cm, y_cm, length, wl, psi, v, rl,
             data, error, is_high_gain, sig_s, crosstalks, times, time_shift,
             p_x, p_y, pix_area,  template_dt, template_t0, template_lg,
-            template_hg, n_peaks, transition_charge, use_weight, factorial):
+            template_hg, n_peaks, transition_charge, factorial):
     """
     Compute the log likelihood of the model used for a set of input parameters.
 
@@ -344,8 +338,6 @@ def log_pdf(charge, t_cm, x_cm, y_cm, length, wl, psi, v, rl,
         Maximum number of p.e. term used in the low luminosity likelihood
     transition_charge: float32
         Model charge above which the Gaussian approximation (log_pdf_hl) is used
-    use_weight: bool
-        If True, the brightest sample are made more important in the likelihood computation
     factorial: unsigned int64
         Pre-computed table of factorials
 
@@ -384,28 +376,21 @@ def log_pdf(charge, t_cm, x_cm, y_cm, length, wl, psi, v, rl,
     mask_LL = (mu <= transition_charge) & (mu > 0)
     mask_HL = ~mask_LL
 
-    if use_weight:
-        weight = 1.0 + (data / np.max(data))
-    else:
-        weight = np.ones(data.shape)
-
     log_pdf_faint = log_pdf_ll(mu[mask_LL],
                                data[mask_LL],
                                error[mask_LL],
                                crosstalks[mask_LL],
                                sig_s[mask_LL],
                                templates[mask_LL],
                                factorial,
-                               n_peaks,
-                               weight[mask_LL])
+                               n_peaks)
 
     log_pdf_bright = log_pdf_hl(mu[mask_HL],
                                 data[mask_HL],
                                 error[mask_HL],
                                 crosstalks[mask_HL],
-                                templates[mask_HL],
-                                weight[mask_HL])
+                                templates[mask_HL])
 
-    log_lh = (log_pdf_faint + log_pdf_bright) / np.sum(weight)
+    log_lh = (log_pdf_faint + log_pdf_bright) / (data.shape[0] * data.shape[1])
 
     return log_lh