Start operating on contiguous arrays

spacetelescope · Nov 10, 2023 · 43d6e1c · 43d6e1c
1 parent fd8ec01
commit 43d6e1c
Show file tree

Hide file tree

Showing 8 changed files with 119 additions and 84 deletions.
diff --git a/src/stcal/ramp_fitting/ols_cas22/_fit.pyx b/src/stcal/ramp_fitting/ols_cas22/_fit.pyx
@@ -93,9 +93,9 @@ class RampFitOutputs(NamedTuple):
 
 @boundscheck(False)
 @wraparound(False)
-def fit_ramps(float[:, :] resultants,
+def fit_ramps(float[:, ::1] resultants,
               cnp.ndarray[int, ndim=2] dq,
-              float[:] read_noise,
+              float[::1] read_noise,
               float read_time,
               list[list[int]] read_pattern,
               bool use_jump=False,
@@ -109,17 +109,27 @@ def fit_ramps(float[:, :] resultants,
         jump detection to mark additional resultants for each pixel as bad if
         a jump is suspected in them.
 
+        Note: This function needs the arrays to be shaped n_pixels x n_resultants
+            and be C contiguous. This is because this function feeds a single pixel's
+            resultants into the jump detection algorithm which operates on "contiguous"
+            sections of that array of resultants. This means for maximum efficiency,
+            the resultants for pixel i, resultants[i, :] should be contiguous a contiguous
+            array.
+
     Parameters
     ----------
-    resultants : float[n_resultants, n_pixel]
+    resultants : float[:, ::1] (shape = (n_pixels, n_resultants))
         the resultants in electrons (Note that this can be based as any sort of
-        array, such as a numpy array. The memmory view is just for efficiency in
+        array, such as a numpy array. The memory view is just for efficiency in
         cython)
-    dq : np.ndarry[n_resultants, n_pixel]
+        The memory needs to be C contiguous, meaning that the for each fixed pixel i,
+        resultants[i, :] is contiguous in memory.
+    dq : np.ndarry[n_pixels, n_resultants]
         the dq array.  dq != 0 implies bad pixel / CR. (Kept as a numpy array
         so that it can be passed out without copying into new numpy array, will
         be working on memmory views of this array)
-    read_noise : float[n_pixel]
+        The memory needs to be C contiguous, see resultants for details.
+    read_noise : float[::1] (shape = (n_pixels,))
         the read noise in electrons for each pixel (same note as the resultants)
     read_time : float
         Time to perform a readout. For Roman data, this is FRAME_TIME.
@@ -140,8 +150,8 @@ def fit_ramps(float[:, :] resultants,
     A RampFitOutputs tuple
     """
     cdef int n_pixels, n_resultants
-    n_resultants = resultants.shape[0]
-    n_pixels = resultants.shape[1]
+    n_pixels = resultants.shape[0]
+    n_resultants = resultants.shape[1]
 
     # Raise error if input data is inconsistent
     if n_resultants != len(read_pattern):
@@ -150,13 +160,13 @@ def fit_ramps(float[:, :] resultants,
 
     # Compute the main metadata from the read pattern and cast it to memory views
     cdef ReadPattern metadata = from_read_pattern(read_pattern, read_time, n_resultants)
-    cdef float[:] t_bar = metadata.t_bar
-    cdef float[:] tau = metadata.tau
-    cdef int[:] n_reads = metadata.n_reads
+    cdef float[::1] t_bar = metadata.t_bar
+    cdef float[::1] tau = metadata.tau
+    cdef int[::1] n_reads = metadata.n_reads
 
     # Setup pre-compute arrays for jump detection
-    cdef float[:, :] fixed
-    cdef float[:, :] pixel
+    cdef float[:, ::1] fixed
+    cdef float[:, ::1] pixel
     if use_jump:
         # Initialize arrays for the jump detection pre-computed values
         fixed = np.empty((n_fixed_offsets, n_resultants - 1), dtype=np.float32)
@@ -184,8 +194,8 @@ def fit_ramps(float[:, :] resultants,
     #    zero, where as every variance is calculated and set. This means that the
     #    parameters need to be filled with zeros, where as the variances can just
     #    be allocated
-    cdef float[:, :] parameters = np.zeros((n_pixels, Parameter.n_param), dtype=np.float32)
-    cdef float[:, :] variances = np.empty((n_pixels, Variance.n_var), dtype=np.float32)
+    cdef float[:, ::1] parameters = np.zeros((n_pixels, Parameter.n_param), dtype=np.float32)
+    cdef float[:, ::1] variances = np.empty((n_pixels, Variance.n_var), dtype=np.float32)
 
     # Cast the enum values into integers for indexing (otherwise compiler complains)
     #   These will be optimized out
@@ -199,8 +209,8 @@ def fit_ramps(float[:, :] resultants,
     cdef int index
     for index in range(n_pixels):
         # Fit all the ramps for the given pixel
-        fit = fit_jumps(resultants[:, index],
-                        dq[:, index],
+        fit = fit_jumps(resultants[index, :],
+                        dq[index, :],
                         read_noise[index],
                         t_bar,
                         tau,

diff --git a/src/stcal/ramp_fitting/ols_cas22/_jump.pxd b/src/stcal/ramp_fitting/ols_cas22/_jump.pxd
@@ -41,22 +41,22 @@ cdef struct JumpFits:
     RampQueue index
 
 
-cpdef float[:, :] fill_fixed_values(float[:, :] fixed,
-                                    float[:] t_bar,
-                                    float[:] tau,
-                                    int[:] n_reads,
-                                    int n_resultants)
+cpdef float[:, ::1] fill_fixed_values(float[:, ::1] fixed,
+                                      float[::1] t_bar,
+                                      float[::1] tau,
+                                      int[::1] n_reads,
+                                      int n_resultants)
 
 
-cdef JumpFits fit_jumps(float[:] resultants,
-                        int[:] dq,
+cdef JumpFits fit_jumps(float[::1] resultants,
+                        int[::1] dq,
                         float read_noise,
-                        float[:] t_bar,
-                        float[:] tau,
-                        int[:] n_reads,
+                        float[::1] t_bar,
+                        float[::1] tau,
+                        int[::1] n_reads,
                         int n_resultants,
-                        float[:, :] fixed,
-                        float[:, :] pixel,
+                        float[:, ::1] fixed,
+                        float[:, ::1] pixel,
                         Thresh thresh,
                         bool use_jump,
                         bool include_diagnostic)
diff --git a/src/stcal/ramp_fitting/ols_cas22/_jump.pyx b/src/stcal/ramp_fitting/ols_cas22/_jump.pyx
@@ -67,25 +67,25 @@ from stcal.ramp_fitting.ols_cas22._ramp cimport RampIndex, RampQueue, RampFit, f
 @boundscheck(False)
 @wraparound(False)
 @cdivision(True)
-cpdef inline float[:, :] fill_fixed_values(float[:, :] fixed,
-                                           float[:] t_bar,
-                                           float[:] tau,
-                                           int[:] n_reads,
-                                           int n_resultants):
+cpdef inline float[:, ::1] fill_fixed_values(float[:, ::1] fixed,
+                                             float[::1] t_bar,
+                                             float[::1] tau,
+                                             int[::1] n_reads,
+                                             int n_resultants):
     """
     Pre-compute all the values needed for jump detection which only depend on
         the read pattern.
 
     Parameters
     ----------
-    fixed : float[:, :]
+    fixed : float[:, ::1]
         A pre-allocated memoryview to store the pre-computed values in, its faster
         to allocate outside this function.
-    t_bar : float[:]
+    t_bar : float[::1]
         The average time for each resultant
-    tau : float[:]
+    tau : float[::1]
         The time variance for each resultant
-    n_reads : int[:]
+    n_reads : int[::1]
         The number of reads for each resultant
     n_resultants : int
         The number of resultants for the read pattern
@@ -142,11 +142,11 @@ cpdef inline float[:, :] fill_fixed_values(float[:, :] fixed,
 @boundscheck(False)
 @wraparound(False)
 @cdivision(True)
-cpdef inline float[:, :] _fill_pixel_values(float[:, :] pixel,
-                                            float[:] resultants,
-                                            float[:, :] fixed,
-                                            float read_noise,
-                                            int n_resultants):
+cpdef inline float[:, ::1] _fill_pixel_values(float[:, ::1] pixel,
+                                              float[::1] resultants,
+                                              float[:, ::1] fixed,
+                                              float read_noise,
+                                              int n_resultants):
     """
     Pre-compute all the values needed for jump detection which only depend on
         the a specific pixel (independent of the given ramp for a pixel).
@@ -226,7 +226,7 @@ cdef inline float _threshold(Thresh thresh, float slope):
 @boundscheck(False)
 @wraparound(False)
 @cdivision(True)
-cdef inline float _correction(float[:] t_bar, RampIndex ramp, float slope):
+cdef inline float _correction(float[::1] t_bar, RampIndex ramp, float slope):
     """
     Compute the correction factor for the variance used by a statistic
 
@@ -297,9 +297,9 @@ cdef inline float _statstic(float local_slope,
 
 @boundscheck(False)
 @wraparound(False)
-cdef inline (int, float) _fit_statistic(float[:, :] pixel,
-                                        float[:, :] fixed,
-                                        float[:] t_bar,
+cdef inline (int, float) _fit_statistic(float[:, ::1] pixel,
+                                        float[:, ::1] fixed,
+                                        float[::1] t_bar,
                                         float slope,
                                         RampIndex ramp):
     """
@@ -390,15 +390,15 @@ cdef inline (int, float) _fit_statistic(float[:, :] pixel,
 @boundscheck(False)
 @wraparound(False)
 @cdivision(True)
-cdef inline JumpFits fit_jumps(float[:] resultants,
-                               int[:] dq,
+cdef inline JumpFits fit_jumps(float[::1] resultants,
+                               int[::1] dq,
                                float read_noise,
-                               float[:] t_bar,
-                               float[:] tau,
-                               int[:] n_reads,
+                               float[::1] t_bar,
+                               float[::1] tau,
+                               int[::1] n_reads,
                                int n_resultants,
-                               float[:, :] fixed,
-                               float[:, :] pixel,
+                               float[:, ::1] fixed,
+                               float[:, ::1] pixel,
                                Thresh thresh,
                                bool use_jump,
                                bool include_diagnostic):

diff --git a/src/stcal/ramp_fitting/ols_cas22/_ramp.pxd b/src/stcal/ramp_fitting/ols_cas22/_ramp.pxd
@@ -23,7 +23,7 @@ cdef class ReadPattern:
     cdef int[::1] n_reads
 
 
-cpdef RampQueue init_ramps(int[:] dq,
+cpdef RampQueue init_ramps(int[::1] dq,
                            int n_resultants)
 
 
@@ -32,9 +32,9 @@ cpdef ReadPattern from_read_pattern(list[list[int]] read_pattern,
                                     int n_resultants)
 
 
-cdef RampFit fit_ramp(float[:] resultants_,
-                      float[:] t_bar_,
-                      float[:] tau_,
-                      int[:] n_reads,
+cdef RampFit fit_ramp(float[::1] resultants_,
+                      float[::1] t_bar_,
+                      float[::1] tau_,
+                      int[::1] n_reads,
                       float read_noise,
                       RampIndex ramp)
diff --git a/src/stcal/ramp_fitting/ols_cas22/_ramp.pyx b/src/stcal/ramp_fitting/ols_cas22/_ramp.pyx
@@ -146,7 +146,7 @@ cpdef ReadPattern from_read_pattern(list[list[int]] read_pattern, float read_tim
 
 @boundscheck(False)
 @wraparound(False)
-cpdef inline RampQueue init_ramps(int[:] dq, int n_resultants):
+cpdef inline RampQueue init_ramps(int[::1] dq, int n_resultants):
     """
     Create the initial ramp "queue" for each pixel
         if dq[index_resultant, index_pixel] == 0, then the resultant is in a ramp
@@ -238,10 +238,10 @@ cdef inline float _get_power(float signal):
 @boundscheck(False)
 @wraparound(False)
 @cdivision(True)
-cdef inline RampFit fit_ramp(float[:] resultants_,
-                             float[:] t_bar_,
-                             float[:] tau_,
-                             int[:] n_reads_,
+cdef inline RampFit fit_ramp(float[::1] resultants_,
+                             float[::1] t_bar_,
+                             float[::1] tau_,
+                             int[::1] n_reads_,
                              float read_noise,
                              RampIndex ramp):
     """

diff --git a/src/stcal/ramp_fitting/ols_cas22_fit.py b/src/stcal/ramp_fitting/ols_cas22_fit.py
@@ -119,9 +119,13 @@ def fit_ramps_casertano(
         dq = dq.reshape(orig_shape + (1,))
         read_noise = read_noise.reshape(orig_shape[1:] + (1,))
 
+    # Force the input arrays to be contiguous in memory over the heaviest used
+    #   dimension (resultants), the ascontiguousarray can produce a copy
+    resultants_ = np.ascontiguousarray(resultants.reshape(resultants.shape[0], -1).T)
+    dq_ = np.ascontiguousarray(dq.reshape(dq.shape[0], -1).T)
     output = ols_cas22.fit_ramps(
-        resultants.reshape(resultants.shape[0], -1),
-        dq.reshape(resultants.shape[0], -1),
+        resultants_,
+        dq_,
         read_noise.reshape(-1),
         read_time,
         read_pattern,
@@ -130,7 +134,8 @@ def fit_ramps_casertano(
 
     parameters = output.parameters.reshape(orig_shape[1:] + (2,))
     variances = output.variances.reshape(orig_shape[1:] + (3,))
-    dq = output.dq.reshape(orig_shape)
+    # Undo the contiguous memory trickery for dq, resultants don't need to be touched
+    dq[:, :, :] = output.dq.T.reshape(orig_shape)
 
     if resultants.shape != orig_shape:
         parameters = parameters[0]