diff --git a/.gitignore b/.gitignore index d35d7af29..136e50d07 100644 --- a/.gitignore +++ b/.gitignore @@ -139,9 +139,9 @@ dmypy.json # Cython debug symbols cython_debug/ -src/stcal/ramp_fitting/ols_cas22/*.c -src/stcal/ramp_fitting/ols_cas22/*.cpp -src/stcal/ramp_fitting/ols_cas22/*.html +src/stcal/ramp_fitting/_ols_cas22/*.c +src/stcal/ramp_fitting/_ols_cas22/*.cpp +src/stcal/ramp_fitting/_ols_cas22/*.html # setuptools-scm generated module src/stcal/_version.py diff --git a/CHANGES.rst b/CHANGES.rst index 094c64048..7f85b560c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,10 @@ jump ---- - Fix the code to at least always flag the group with the shower and the requested groups after the primary shower. [#237] + + ramp + ---- +- Convert Cython code to using annotated .py files. [#232] 1.5.2 (2023-12-13) ================== diff --git a/pyproject.toml b/pyproject.toml index a945de6ea..84e2e30ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ test = [ requires = [ 'setuptools >=61', 'setuptools_scm[toml] >=3.4', - 'Cython >=0.29.21', + 'Cython >=3.0.0', 'numpy >=1.18', ] build-backend = 'setuptools.build_meta' @@ -81,6 +81,7 @@ testpaths = [ "docs", ] norecursedirs = [ + 'src/stcal/ramp_fitting/_ols_cas22', 'benchmarks', '.asv', '.eggs', @@ -146,7 +147,6 @@ extend-select = [ 'FLY', # flynt (f-string conversion where possible) 'NPY', # NumPy-specific checks (recommendations from NumPy) 'PERF', # Perflint (performance linting) - 'LOG', 'RUF', # ruff specific checks ] ignore = [ diff --git a/setup.py b/setup.py index e176149ef..923c87196 100644 --- a/setup.py +++ b/setup.py @@ -8,20 +8,8 @@ extensions = [ Extension( - "stcal.ramp_fitting.ols_cas22._ramp", - ["src/stcal/ramp_fitting/ols_cas22/_ramp.pyx"], - include_dirs=[np.get_include()], - language="c++", - ), - Extension( - "stcal.ramp_fitting.ols_cas22._jump", - ["src/stcal/ramp_fitting/ols_cas22/_jump.pyx"], - include_dirs=[np.get_include()], - language="c++", - ), - Extension( - "stcal.ramp_fitting.ols_cas22._fit", - ["src/stcal/ramp_fitting/ols_cas22/_fit.pyx"], + "stcal.ramp_fitting._ols_cas22._fit", + ["src/stcal/ramp_fitting/_ols_cas22/_fit.py"], include_dirs=[np.get_include()], language="c++", ), diff --git a/src/stcal/ramp_fitting/_ols_cas22/__init__.py b/src/stcal/ramp_fitting/_ols_cas22/__init__.py new file mode 100644 index 000000000..764b37837 --- /dev/null +++ b/src/stcal/ramp_fitting/_ols_cas22/__init__.py @@ -0,0 +1,14 @@ +""" +This subpackage exists to hold the Cython implementation of the OLS cas22 algorithm + This subpackage is private, and should not be imported directly by users. Instead, + import from stcal.ramp_fitting.ols_cas22. +""" +from ._fit import FixedOffsets, Parameter, PixelOffsets, Variance, fit_ramps + +__all__ = [ + "fit_ramps", + "Parameter", + "Variance", + "PixelOffsets", + "FixedOffsets", +] diff --git a/src/stcal/ramp_fitting/_ols_cas22/_fit.pxd b/src/stcal/ramp_fitting/_ols_cas22/_fit.pxd new file mode 100644 index 000000000..4df8af8c1 --- /dev/null +++ b/src/stcal/ramp_fitting/_ols_cas22/_fit.pxd @@ -0,0 +1,32 @@ +# cython: language_level=3str + + +cpdef enum Parameter: + intercept + slope + n_param + + +cpdef enum Variance: + read_var + poisson_var + total_var + n_var + + +cpdef enum: + JUMP_DET = 4 + + +cpdef enum FixedOffsets: + t_bar_diff + t_bar_diff_sqr + read_recip + var_slope_val + n_fixed_offsets + + +cpdef enum PixelOffsets: + local_slope + var_read_noise + n_pixel_offsets diff --git a/src/stcal/ramp_fitting/_ols_cas22/_fit.py b/src/stcal/ramp_fitting/_ols_cas22/_fit.py new file mode 100644 index 000000000..b8c7f5ac8 --- /dev/null +++ b/src/stcal/ramp_fitting/_ols_cas22/_fit.py @@ -0,0 +1,1077 @@ +# cython: language_level=3str + +""" +Cython implementation of the Casertano+22 algorithm for fitting ramps with jump detection. + + Note that this is written in annotated Python using Cython annotations, meaning + that Cython 3 can interpret this "python" file as if it were a Cython file. This + enables one to use Python tooling to write Cython code, and prevents the need to + context switch between the Python and Cython programming languages. + + Note that everything is crammed into a single file because it enables Cython to + directly optimize the C code it generates. This is because Cython can only optimize + across a single file (i.e. inlining functions only works if the function is in the + same file as the function it is being inlined into). This helps aid the C compiler + in optimizing C code when it compiles. + +Enums +----- +Parameter: + This enum is used to index into the parameter output array. + slope: 0 + intercept: 1 + n_param: 2 (number of parameters output) +Variance: + This enum is used to index into the variance output array. + read_var: 0 + poisson_var: 1 + total_var: 2 + n_var: 3 (number of variances output) + +Functions +--------- +fit_ramps: + This is the main driver program for the Casertano+22 algorithm. It fits ramps + with jump detection (if enabled) to a series of pixels, returning the Cas22 + fit parameters and variances for each pixel. This function is not intended to + be called outside of stcal itself as it requires a lot of pre-allocation of + memory views to be passed in. Use the `stcal.ramp_fitting.ols_cas22.fit_ramps` + function instead. +""" +import cython +from cython.cimports.libc.math import INFINITY, NAN, fabs, fmaxf, isnan, log10, sqrt +from cython.cimports.libcpp import bool as cpp_bool +from cython.cimports.libcpp.list import list as cpp_list +from cython.cimports.libcpp.vector import vector +from cython.cimports.stcal.ramp_fitting._ols_cas22._fit import ( + JUMP_DET, + FixedOffsets, + Parameter, + PixelOffsets, + Variance, +) + +RampIndex = cython.struct( + start=cython.int, + end=cython.int, +) +RampQueue = cython.typedef(vector[RampIndex]) +RampFit = cython.struct( + slope=cython.float, + read_var=cython.float, + poisson_var=cython.float, +) +JumpFits = cython.struct( + jumps=vector[cython.int], + fits=vector[RampFit], + index=RampQueue, +) +Thresh = cython.struct( + intercept=cython.float, + constant=cython.float, +) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.ccall +def fit_ramps( + resultants: cython.float[:, :], + dq: cython.int[:, :], + read_noise: cython.float[:], + parameters: cython.float[:, :], + variances: cython.float[:, :], + t_bar: cython.float[:], + tau: cython.float[:], + n_reads: cython.int[:], + single_pixel: cython.float[:, :], + double_pixel: cython.float[:, :], + single_fixed: cython.float[:, :], + double_fixed: cython.float[:, :], + use_jump: cpp_bool, + intercept: cython.float, + constant: cython.float, + include_diagnostic: cpp_bool, +) -> cpp_list[JumpFits]: + """Fit ramps using the Casertano+22 algorithm. + This implementation uses the Cas22 algorithm to fit ramps, where + ramps are fit between bad resultants marked by dq flags for each pixel + which are not equal to zero. If use_jump is True, it additionally uses + jump detection to mark additional resultants for each pixel as bad if + a jump is suspected in them. + + Parameters + ---------- + resultants : float[n_resultants, n_pixel] + the resultants in electrons (Note that this can be based as any sort of + array, such as a numpy array. The memory view is just for efficiency in + cython) + dq : np.ndarry[n_resultants, n_pixel] + the dq array. dq != 0 implies bad pixel / CR. (Kept as a numpy array + so that it can be passed out without copying into new numpy array, will + be working on memory views of this array) + read_noise : float[n_pixel] + the read noise in electrons for each pixel (same note as the resultants) + parameters : float[n_pixel, 2] + The output array for the fit parameters. The first dimension is the + intercept, the second dimension is the slope. + variances : float[n_pixel, 3] + The output array for the fit variances. The first dimension is the + The first dimension is the read noise variance, the second dimension + is the poissson variance, and the third dimension is the total variance. + t_bar : float[n_resultants] + The average times for each resultant computed from the read pattern + tau : float[n_resultants] + The variance in the time for each resultant computed from the read pattern + n_reads : int[n_resultants] + The number of reads for each resultant computed from the read pattern + single_pixel : float[2, n_resultants - 1] + Pre-allocated array for the jump detection fixed values for a given pixel. + These will hold single difference values. + double_pixel : float[2, n_resultants - 2] + Pre-allocated array for the jump detection fixed values for a given pixel. + These will hold double difference values. + single_fixed : float[4, n_resultants - 1] + Pre-allocated array for the jump detection fixed values for all pixels. + These will hold single difference values. + double_fixed : float[4, n_resultants - 2] + Pre-allocated array for the jump detection fixed values for all pixels. + These will hold double difference values. + use_jump : bool + If True, use the jump detection algorithm to identify CRs. + If False, use the DQ array to identify CRs. + intercept : float + The intercept value for the threshold function. Default=5.5 + constant : float + The constant value for the threshold function. Default=1/3.0 + include_diagnostic : bool + If True, include the raw ramp fits in the output. Default=False + + Notes + ----- + The single_pixel, double_pixel, single_fixed, and double_fixed arrays + are passed in so that python can use numpy to pre-allocate the arrays + in python code. Surprisingly this is more efficient than using numpy + to allocate these arrays in cython code. This is because numpy requires + a back and forth jump between python and cython calls which induces a + lot of overhead. + + Returns + ------- + list of JumpFits (if include_diagnostic is True) + """ + n_resultants: cython.int = resultants.shape[0] + n_pixels: cython.int = resultants.shape[1] + + if use_jump: + # Pre-compute the values from the read pattern + _fill_fixed_values(single_fixed, double_fixed, t_bar, tau, n_reads, n_resultants) + + # Create a threshold struct + thresh: Thresh = Thresh(intercept, constant) + + # Create variable to old the diagnostic data + # Use list because this might grow very large which would require constant + # reallocation. We don't need random access, and this gets cast to a python + # list in the end. + ramp_fits: cpp_list[JumpFits] = cpp_list[JumpFits]() + + # Run the jump fitting algorithm for each pixel + fit: JumpFits + index: cython.int + for index in range(n_pixels): + # Fit all the ramps for the given pixel + fit = _fit_pixel( + parameters[index, :], + variances[index, :], + resultants[:, index], + dq[:, index], + read_noise[index], + t_bar, + tau, + n_reads, + n_resultants, + single_pixel, + double_pixel, + single_fixed, + double_fixed, + thresh, + use_jump, + include_diagnostic, + ) + + # Store diagnostic data if requested + if include_diagnostic: + ramp_fits.push_back(fit) + + # Cast memory views into numpy arrays for ease of use in python. + return ramp_fits + + +_slope = cython.declare(cython.int, Parameter.slope) +_read_var = cython.declare(cython.int, Variance.read_var) +_poisson_var = cython.declare(cython.int, Variance.poisson_var) +_total_var = cython.declare(cython.int, Variance.total_var) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +@cython.inline +@cython.cfunc +def _fit_pixel( + parameters: cython.float[:], + variances: cython.float[:], + resultants: cython.float[:], + dq: cython.int[:], + read_noise: cython.float, + t_bar: cython.float[:], + tau: cython.float[:], + n_reads: cython.int[:], + n_resultants: cython.int, + single_pixel: cython.float[:, :], + double_pixel: cython.float[:, :], + single_fixed: cython.float[:, :], + double_fixed: cython.float[:, :], + thresh: Thresh, + use_jump: cpp_bool, + include_diagnostic: cpp_bool, +) -> JumpFits: + """ + Compute all the ramps for a single pixel using the Casertano+22 algorithm + with jump detection. + + Parameters + ---------- + parameters : float[:] + 2 element array for the output parameters (slice of the total parameters array). + This will be modified in place, so the array it is a slice of will be modified + as a side effect. + variance : float[:] + 3 element array for the output variances (slice of the total variances array) + This will be modified in place, so the array it is a slice of will be modified + as a side effect. + resultants : float[:] + The resultants for the pixel + dq : int[:] + The dq flags for the pixel. This is a slice of the dq array. This is modified + in place, so the external dq flag array will be modified as a side-effect. + read_noise : float + The read noise for the pixel. + t_bar : float[:] + The average time for each resultant + tau : float[:] + The time variance for each resultant + n_reads : int[:] + The number of reads for each resultant + n_resultants : int + The number of resultants for the pixel + single_pixel : float[:, :] + A pre-allocated array for the jump detection fixed values for the + given pixel. This will be modified in place, it is passed in to avoid + re-allocating it for each pixel. + These will hold single difference values. + double_pixel : float[:, :] + A pre-allocated array for the jump detection fixed values for the + given pixel. This will be modified in place, it is passed in to avoid + re-allocating it for each pixel. These will hold double difference values. + single-fixed : float[:, :] + The jump detection pre-computed values for a given read_pattern. + These will hold single difference values. + double-fixed : float[:, :] + The jump detection pre-computed values for a given read_pattern. + These will hold double difference values. + thresh : Thresh + The threshold parameter struct for jump detection + use_jump : bool + Turn on or off jump detection. + include_diagnostic : bool + Turn on or off recording all the diaganostic information on the fit + + Returns + ------- + RampFits struct of all the fits for a single pixel + """ + # Find initial set of ramps + ramps: RampQueue = _init_ramps(dq, n_resultants) + + # Initialize algorithm + parameters[:] = 0 + variances[:] = 0 + + jumps: vector[cython.int] = vector[cython.int]() + fits: vector[RampFit] = vector[RampFit]() + index: RampQueue = RampQueue() + + # Declare variables for the loop + jump: Jump + ramp: RampIndex + ramp_fit: RampFit + weight: cython.float + total_weight: cython.float = 0 + + # Fill in the jump detection pre-compute values for a single pixel + if use_jump: + _fill_pixel_values( + single_pixel, double_pixel, single_fixed, double_fixed, resultants, read_noise, n_resultants + ) + + # Run while the Queue is non-empty + while not ramps.empty(): + # Remove top ramp of the stack to use + ramp = ramps.back() + ramps.pop_back() + + # Compute fit using the Casertano+22 algorithm + ramp_fit = _fit_ramp(resultants, t_bar, tau, n_reads, read_noise, ramp) + + # Run jump detection + jump = _jump_detection( + single_pixel, + double_pixel, + single_fixed, + double_fixed, + t_bar, + ramp_fit.slope, + ramp, + thresh, + use_jump, + ) + + if jump.detected: + # A jump was detected! + # => Split the ramp and record the jumps + # Note that we have to do this splitting and recording here because + # vectors cannot be modified in place, so we have to copy the vectors + # if they were updated in a separate function, which is expensive. + + # Update the dq flags + dq[jump.jump0] = JUMP_DET + dq[jump.jump1] = JUMP_DET + + # Record jump diagnostics + if include_diagnostic: + jumps.push_back(jump.jump0) + jumps.push_back(jump.jump1) + + # The two resultant indices need to be skipped, therefore + # the two possible new ramps are: + # RampIndex(ramp.start, jump0 - 1) + # RampIndex(jump1 + 1, ramp.end) + # This is because the RampIndex contains the index of the + # first and last resultants in the sub-ramp it describes. + if jump.jump0 > ramp.start: + # Note that when jump0 == ramp.start, we have detected a + # jump in the first resultant of the ramp. + + # Add ramp from start to right before jump0 + ramps.push_back(RampIndex(ramp.start, jump.jump0 - 1)) + + if jump.jump1 < ramp.end: + # Note that if jump1 == ramp.end, we have detected a + # jump in the last resultant of the ramp. + + # Add ramp from right after jump1 to end + ramps.push_back(RampIndex(jump.jump1 + 1, ramp.end)) + else: + # No jump was detected! + # => Record the fit. + + # Record the diagnostics + if include_diagnostic: + fits.push_back(ramp_fit) + index.push_back(ramp) + + # Start computing the averages using a lazy process + # Note we do not do anything in the NaN case for degenerate ramps + if not isnan(ramp_fit.slope): + # protect weight against the extremely unlikely case of a zero + # variance + weight = 0 if ramp_fit.read_var == 0 else 1 / ramp_fit.read_var + total_weight += weight + + parameters[_slope] += weight * ramp_fit.slope + variances[_read_var] += weight**2 * ramp_fit.read_var + variances[_poisson_var] += weight**2 * ramp_fit.poisson_var + + # Finish computing averages using the lazy process + parameters[_slope] /= total_weight if total_weight != 0 else 1 + variances[_read_var] /= total_weight**2 if total_weight != 0 else 1 + variances[_poisson_var] /= total_weight**2 if total_weight != 0 else 1 + + # Multiply poisson term by flux, (no negative fluxes) + variances[_poisson_var] *= max(parameters[_slope], 0) + variances[_total_var] = variances[_read_var] + variances[_poisson_var] + + return JumpFits(jumps, fits, index) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +@cython.inline +@cython.cfunc +def _fit_ramp( + resultants_: cython.float[:], + t_bar_: cython.float[:], + tau_: cython.float[:], + n_reads_: cython.int[:], + read_noise: cython.float, + ramp: RampIndex, +) -> RampFit: + """ + Fit a single ramp using Casertano+22 algorithm. + + Parameters + ---------- + resultants_ : float[:] + All of the resultants for the pixel + t_bar_ : float[:] + All the t_bar values + tau_ : float[:] + All the tau values + n_reads_ : int[:] + All the n_reads values + read_noise : float + The read noise for the pixel + ramp : RampIndex + Struct for start and end of ramp to fit + + Returns + ------- + RampFit + struct containing + - slope + - read_var + - poisson_var + """ + n_resultants: cython.int = ramp.end - ramp.start + 1 + + # Special case where there is no or one resultant, there is no fit and + # we bail out before any computations. + # Note that in this case, we cannot compute the slope or the variances + # because these computations require at least two resultants. Therefore, + # this case is degernate and we return NaNs for the values. + if n_resultants <= 1: + return RampFit(NAN, NAN, NAN) + + # Compute the fit + i: cython.int = 0 + j: cython.int = 0 + + # Setup data for fitting (work over subset of data) to make things cleaner + # Recall that the RampIndex contains the index of the first and last + # index of the ramp. Therefore, the Python slice needed to get all the + # data within the ramp is: + # ramp.start:ramp.end + 1 + resultants: cython.float[:] = resultants_[ramp.start : ramp.end + 1] + t_bar: cython.float[:] = t_bar_[ramp.start : ramp.end + 1] + tau: cython.float[:] = tau_[ramp.start : ramp.end + 1] + n_reads: cython.int[:] = n_reads_[ramp.start : ramp.end + 1] + + # Compute mid point time + end: cython.int = n_resultants - 1 + t_bar_mid: cython.float = (t_bar[0] + t_bar[end]) / 2 + + # Casertano+2022 Eq. 44 + # Note we've departed from Casertano+22 slightly; + # there s is just resultants[ramp.end]. But that doesn't seem good if, e.g., + # a CR in the first resultant has boosted the whole ramp high but there + # is no actual signal. + power: cython.float = fmaxf(resultants[end] - resultants[0], 0) + power = power / sqrt(read_noise**2 + power) + power = _get_power(power) + + # It's easy to use up a lot of dynamic range on something like + # (tbar - tbarmid) ** 10. Rescale these. + t_scale: cython.float = (t_bar[end] - t_bar[0]) / 2 + t_scale = 1 if t_scale == 0 else t_scale + + # Initialize the fit loop + # it is faster to generate a c++ vector than a numpy array + weights: vector[cython.float] = vector[float](n_resultants) + coeffs: vector[cython.float] = vector[float](n_resultants) + ramp_fit: RampFit = RampFit(0, 0, 0) + f0: cython.float = 0 + f1: cython.float = 0 + f2: cython.float = 0 + coeff: cython.float + + # Issue when tbar[] == tbarmid causes exception otherwise + with cython.cpow(True): + for i in range(n_resultants): + # Casertano+22, Eq. 45 + weights[i] = (((1 + power) * n_reads[i]) / (1 + power * n_reads[i])) * fabs( + (t_bar[i] - t_bar_mid) / t_scale + ) ** power + + # Casertano+22 Eq. 35 + f0 += weights[i] + f1 += weights[i] * t_bar[i] + f2 += weights[i] * t_bar[i] ** 2 + + # Casertano+22 Eq. 36 + det: cython.float = f2 * f0 - f1**2 + if det == 0: + return ramp_fit + + for i in range(n_resultants): + # Casertano+22 Eq. 37 + coeff = (f0 * t_bar[i] - f1) * weights[i] / det + coeffs[i] = coeff + + # Casertano+22 Eq. 38 + ramp_fit.slope += coeff * resultants[i] + + # Casertano+22 Eq. 39 + ramp_fit.read_var += coeff**2 * read_noise**2 / n_reads[i] + + # Casertano+22 Eq 40 + # Note that this is an inversion of the indexing from the equation; + # however, commutivity of addition results in the same answer. This + # makes it so that we don't have to loop over all the resultants twice. + ramp_fit.poisson_var += coeff**2 * tau[i] + for j in range(i): + ramp_fit.poisson_var += 2 * coeff * coeffs[j] * t_bar[j] + + return ramp_fit + + +# Casertano+2022, Table 2 +_P_TABLE = cython.declare( + cython.float[6][2], + [ + [-INFINITY, 5, 10, 20, 50, 100], + [0, 0.4, 1, 3, 6, 10], + ], +) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.inline +@cython.cfunc +@cython.exceptval(check=False) +def _get_power(signal: cython.float) -> cython.float: + """ + Return the power from Casertano+22, Table 2. + + Parameters + ---------- + signal: float + signal from the resultants + + Returns + ------- + signal power from Table 2 + """ + i: cython.int + for i in range(6): + if signal < _P_TABLE[0][i]: + return _P_TABLE[1][i - 1] + + return _P_TABLE[1][i] + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.inline +@cython.ccall +def _init_ramps(dq: cython.int[:], n_resultants: cython.int) -> RampQueue: + """ + Create the initial ramp "queue" for each pixel + if dq[index_resultant, index_pixel] == 0, then the resultant is in a ramp + otherwise, the resultant is not in a ramp. + + Parameters + ---------- + dq : int[n_resultants] + DQ array + n_resultants : int + Number of resultants + + Returns + ------- + RampQueue + vector of RampIndex objects + - vector with entry for each ramp found (last entry is last ramp found) + - RampIndex with start and end indices of the ramp in the resultants + """ + ramps: RampQueue = RampQueue() + + # Note: if start/end are -1, then no value has been assigned + # ramp.start == -1 means we have not started a ramp + # dq[index_resultant, index_pixel] == 0 means resultant is in ramp + ramp: RampIndex = RampIndex(-1, -1) + index_resultant: cython.int + for index_resultant in range(n_resultants): + # Checking for start of ramp + if ramp.start == -1: + if dq[index_resultant] == 0: + # This resultant is in the ramp + # => We have found the start of a ramp! + ramp.start = index_resultant + + # This resultant cannot be the start of a ramp + # => Checking for end of ramp + elif dq[index_resultant] != 0: + # This pixel is not in the ramp + # => index_resultant - 1 is the end of the ramp + ramp.end = index_resultant - 1 + + # Add completed ramp to the queue and reset ramp + ramps.push_back(ramp) + ramp = RampIndex(-1, -1) + + # Handle case where last resultant is in ramp (so no end has been set) + if ramp.start != -1 and ramp.end == -1: + # Last resultant is end of the ramp => set then add to stack + ramp.end = n_resultants - 1 + ramps.push_back(ramp) + + return ramps + + +# Note everything below this comment is to support jump detection. + + +Jump = cython.struct( + detected=cpp_bool, + jump0=cython.int, + jump1=cython.int, +) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.inline +@cython.cfunc +def _jump_detection( + single_pixel: cython.float[:, :], + double_pixel: cython.float[:, :], + single_fixed: cython.float[:, :], + double_fixed: cython.float[:, :], + t_bar: cython.float[:], + slope: cython.float, + ramp: RampIndex, + thresh: Thresh, + use_jump: cpp_bool, +) -> Jump: + """ + Run jump detection on a single ramp fit. + + Parameters + ---------- + single_pixel : float[:, :] + The pre-computed fixed values for a given pixel + These will hold single difference values. + double_pixel : float[:, :] + The pre-computed fixed values for a given pixel + These will hold double difference values. + single_fixed : float[:, :] + The pre-computed fixed values for a given read_pattern + These will hold single difference values. + double_fixed : float[:, :] + The pre-computed fixed values for a given read_pattern + These will hold double difference values. + t_bar : float[:, :] + The average time for each resultant + slope : float + The computed slope for the ramp + ramp : RampIndex + Struct for start and end of ramp to fit + thresh : Thresh + Threshold parameters struct + use_jump : bool + Turn on or off jump detection. + + Returns + ------- + Jump: struct + - detected: bool + True if a jump was detected + - jump0: int + Index of the first resultant of the jump + - jump1: int + Index of the second resultant of the jump + """ + jump: cython.int + + # Run jump detection if enabled + if use_jump: + stat: Stat = _fit_statistic( + single_pixel, double_pixel, single_fixed, double_fixed, t_bar, slope, ramp + ) + # Note that when a "ramp" is a single point, _fit_statistic returns + # a NaN for max_stat. Note that NaN > anything is always false so the + # result drops through as desired. + if stat.max_stat > _threshold(thresh, slope): + # Compute jump point to create two new ramps + # This jump point corresponds to the index of the largest + # statistic: + # argmax = argmax(stats) + # These statistics are indexed relative to the ramp's range. + # Therefore, we need to add the start index of the ramp to the + # result. + jump = stat.arg_max + ramp.start + + # Note that because the resultants are averages of reads, but + # jumps occur in individual reads, it is possible that the + # jump is averaged down by the resultant with the actual jump + # causing the computed jump to be off by one index. + # In the idealized case this is when the jump occurs near + # the start of the resultant with the jump. In this case, + # the statistic for the resultant will be maximized at + # index - 1 rather than index. This means that we have to + # remove argmax(stats) + 1 as it is also a possible jump. + # This case is difficult to distinguish from the case where + # argmax(stats) does correspond to the jump resultant. + # Therefore, we just remove both possible resultants from + # consideration. + return Jump(True, jump, jump + 1) + + return Jump(False, -1, -1) + + +@cython.inline +@cython.cfunc +@cython.exceptval(check=False) +def _threshold(thresh: Thresh, slope: cython.float) -> cython.float: + """ + Compute jump threshold. + + Parameters + ---------- + thresh : Thresh + threshold parameters struct + slope : float + slope of the ramp in question + + Returns + ------- + intercept - constant * log10(slope) + """ + slope = slope if slope > 1 else 1 + slope = slope if slope < 1e4 else 1e4 + + return thresh.intercept - thresh.constant * log10(slope) + + +Stat = cython.struct(arg_max=cython.int, max_stat=cython.float) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.inline +@cython.cfunc +def _fit_statistic( + single_pixel: cython.float[:, :], + double_pixel: cython.float[:, :], + single_fixed: cython.float[:, :], + double_fixed: cython.float[:, :], + t_bar: cython.float[:], + slope: cython.float, + ramp: RampIndex, +) -> Stat: + """ + Compute the maximum index and its value over all fit statistics for a given + ramp. Each index's stat is the max of the single and double difference + statistics: + all_stats = . + + Parameters + ---------- + single_pixel : float[:, :] + The pre-computed fixed values for a given pixel + These will hold single difference values. + double_pixel : float[:, :] + The pre-computed fixed values for a given pixel + These will hold double difference values. + single_fixed : float[:, :] + The pre-computed fixed values for a given read_pattern + These will hold single difference values. + double_fixed : float[:, :] + The pre-computed fixed values for a given read_pattern + These will hold double difference values. + t_bar : float[:, :] + The average time for each resultant + slope : float + The computed slope for the ramp + ramp : RampIndex + Struct for start and end of ramp to fit + + Returns + ------- + argmax(all_stats), max(all_stats) + """ + # Note that a ramp consisting of a single point is degenerate and has no + # fit statistic so we bail out here + if ramp.start == ramp.end: + return Stat(0, NAN) + + # Start computing fit statistics + correct: cython.float = _correction(t_bar, ramp, slope) + + # We are computing single and double differences of using the ramp's resultants. + # Each of these computations requires two points meaning that there are + # start - end - 1 possible differences. However, we cannot compute a double + # difference for the last point as there is no point after it. Therefore, + # We use this point's single difference as our initial guess for the fit + # statistic. Note that the fit statistic can technically be negative so + # this makes it much easier to compute a "lazy" max. + index: cython.int = ramp.end - 1 + stat: Stat = Stat( + ramp.end - ramp.start - 1, + _statistic( + single_pixel[_local_slope, index], + single_pixel[_var_read_noise, index], + single_fixed[_t_bar_diff_sqr, index], + single_fixed[_var_slope_val, index], + slope, + correct, + ), + ) + + # Compute the rest of the fit statistics + max_stat: cython.float + single_stat: cython.float + double_stat: cython.float + arg_max: cython.int + for arg_max, index in enumerate(range(ramp.start, ramp.end - 1)): + # Compute max of single and double difference statistics + single_stat = _statistic( + single_pixel[_local_slope, index], + single_pixel[_var_read_noise, index], + single_fixed[_t_bar_diff_sqr, index], + single_fixed[_var_slope_val, index], + slope, + correct, + ) + double_stat = _statistic( + double_pixel[_local_slope, index], + double_pixel[_var_read_noise, index], + double_fixed[_t_bar_diff_sqr, index], + double_fixed[_var_slope_val, index], + slope, + correct, + ) + max_stat = fmaxf(single_stat, double_stat) + + # If this is larger than the current max, update the max + if max_stat > stat.max_stat: + stat = Stat(arg_max, max_stat) + + return stat + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +@cython.inline +@cython.cfunc +@cython.exceptval(check=False) +def _statistic( + local_slope: cython.float, + var_read_noise: cython.float, + t_bar_diff_sqr: cython.float, + var_slope_val: cython.float, + slope: cython.float, + correct: cython.float, +) -> cython.float: + """ + Compute a single fit statistic + delta / sqrt(var + correct). + + where: + delta = _local_slope - slope + var = (var_read_noise + slope * var_slope_val) / t_bar_diff_sqr + + pre-computed: + local_slope = (resultant[i + j] - resultant[i]) / (t_bar[i + j] - t_bar[i]) + var_read_noise = read_noise ** 2 * (1/n_reads[i + j] + 1/n_reads[i]) + var_slope_coeff = tau[i + j] + tau[i] - 2 * min(t_bar[i + j], t_bar[i]) + t_bar_diff_sqr = (t_bar[i + j] - t_bar[i]) ** 2 + + Parameters + ---------- + local_slope : float + The local slope the statistic is computed for + var_read_noise: float + The read noise variance for _local_slope + t_bar_diff_sqr : float + The square difference for the t_bar corresponding to _local_slope + var_slope_val : float + The slope variance coefficient for _local_slope + slope : float + The computed slope for the ramp + correct : float + The correction factor needed + + Returns + ------- + Create a single instance of the stastic for the given parameters + """ + delta: cython.float = local_slope - slope + var: cython.float = (var_read_noise + slope * var_slope_val) / t_bar_diff_sqr + + return delta / sqrt(var + correct) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +@cython.inline +@cython.cfunc +@cython.exceptval(check=False) +def _correction(t_bar: cython.float[:], ramp: RampIndex, slope: cython.float) -> cython.float: + """ + Compute the correction factor for the variance used by a statistic. + + - slope / (t_bar[end] - t_bar[start]) + + Parameters + ---------- + t_bar : float[:] + The computed t_bar values for the ramp + ramp : RampIndex + Struct for start and end indices resultants for the ramp + slope : float + The computed slope for the ramp + """ + diff: cython.float = t_bar[ramp.end] - t_bar[ramp.start] + + return -slope / diff + + +_t_bar_diff = cython.declare(cython.int, FixedOffsets.t_bar_diff) +_t_bar_diff_sqr = cython.declare(cython.int, FixedOffsets.t_bar_diff_sqr) +_read_recip = cython.declare(cython.int, FixedOffsets.read_recip) +_var_slope_val = cython.declare(cython.int, FixedOffsets.var_slope_val) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +@cython.inline +@cython.ccall +def _fill_fixed_values( + single_fixed: cython.float[:, :], + double_fixed: cython.float[:, :], + t_bar: cython.float[:], + tau: cython.float[:], + n_reads: cython.int[:], + n_resultants: cython.int, +) -> cython.void: + """ + Pre-compute all the values needed for jump detection which only depend on + the read pattern. + + Parameters + ---------- + single_fixed : float[:, :] + A pre-allocated memoryview to store the pre-computed values in, its faster + to allocate outside this function. These will hold single difference values. + double_fixed : float[:, :] + A pre-allocated memoryview to store the pre-computed values in, its faster + to allocate outside this function. These will hold double difference values. + t_bar : float[:] + The average time for each resultant + tau : float[:] + The time variance for each resultant + n_reads : int[:] + The number of reads for each resultant + n_resultants : int + The number of resultants for the read pattern + + Returns + ------- + [ + , + , + ** 2, + ** 2, + <(1/n_reads[i+1] + 1/n_reads[i])>, + <(1/n_reads[i+2] + 1/n_reads[i])>, + <(tau[i] + tau[i+1] - 2 * min(t_bar[i], t_bar[i+1]))>, + <(tau[i] + tau[i+2] - 2 * min(t_bar[i], t_bar[i+2]))>, + ] + """ + # Coerce division to be using floats + num: cython.float = 1 + + i: cython.int + for i in range(n_resultants - 1): + single_fixed[_t_bar_diff, i] = t_bar[i + 1] - t_bar[i] + single_fixed[_t_bar_diff_sqr, i] = single_fixed[_t_bar_diff, i] ** 2 + single_fixed[_read_recip, i] = (num / n_reads[i + 1]) + (num / n_reads[i]) + single_fixed[_var_slope_val, i] = tau[i + 1] + tau[i] - 2 * min(t_bar[i + 1], t_bar[i]) + + if i < n_resultants - 2: + double_fixed[_t_bar_diff, i] = t_bar[i + 2] - t_bar[i] + double_fixed[_t_bar_diff_sqr, i] = double_fixed[_t_bar_diff, i] ** 2 + double_fixed[_read_recip, i] = (num / n_reads[i + 2]) + (num / n_reads[i]) + double_fixed[_var_slope_val, i] = tau[i + 2] + tau[i] - 2 * min(t_bar[i + 2], t_bar[i]) + + +_local_slope = cython.declare(cython.int, PixelOffsets.local_slope) +_var_read_noise = cython.declare(cython.int, PixelOffsets.var_read_noise) + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +@cython.inline +@cython.ccall +def _fill_pixel_values( + single_pixel: cython.float[:, :], + double_pixel: cython.float[:, :], + single_fixed: cython.float[:, :], + double_fixed: cython.float[:, :], + resultants: cython.float[:], + read_noise: cython.float, + n_resultants: cython.int, +) -> cython.void: + """ + Pre-compute all the values needed for jump detection which only depend on + the a specific pixel (independent of the given ramp for a pixel). + + Parameters + ---------- + single_pixel : float[:, :] + A pre-allocated memoryview to store the pre-computed values in, its faster + to allocate outside this function. These will hold single difference values. + double_pixel : float[:, :] + A pre-allocated memoryview to store the pre-computed values in, its faster + to allocate outside this function. These will hold double difference values. + single_fixed : float[:, :] + The pre-computed fixed values for the read_pattern + These will hold single difference values. + double_fixed : float[:, :] + The pre-computed fixed values for the read_pattern + These will hold double difference values. + resultants : float[:] + The resultants for the pixel in question. + read_noise : float + The read noise for the pixel + n_resultants : int + The number of resultants for the read_pattern + + Returns + ------- + [ + <(resultants[i+1] - resultants[i])> / <(t_bar[i+1] - t_bar[i])>, + <(resultants[i+2] - resultants[i])> / <(t_bar[i+2] - t_bar[i])>, + read_noise**2 * <(1/n_reads[i+1] + 1/n_reads[i])>, + read_noise**2 * <(1/n_reads[i+2] + 1/n_reads[i])>, + ] + """ + read_noise_sqr: cython.float = read_noise**2 + + i: cython.int + for i in range(n_resultants - 1): + single_pixel[_local_slope, i] = (resultants[i + 1] - resultants[i]) / single_fixed[_t_bar_diff, i] + single_pixel[_var_read_noise, i] = read_noise_sqr * single_fixed[_read_recip, i] + + if i < n_resultants - 2: + double_pixel[_local_slope, i] = (resultants[i + 2] - resultants[i]) / double_fixed[_t_bar_diff, i] + double_pixel[_var_read_noise, i] = read_noise_sqr * double_fixed[_read_recip, i] diff --git a/src/stcal/ramp_fitting/ols_cas22.py b/src/stcal/ramp_fitting/ols_cas22.py new file mode 100644 index 000000000..6e652f938 --- /dev/null +++ b/src/stcal/ramp_fitting/ols_cas22.py @@ -0,0 +1,232 @@ +"""Ramp fitting routines. + +The simulator need not actually fit any ramps, but we would like to do a good +job simulating the noise induced by ramp fitting. That requires computing the +covariance matrix coming out of ramp fitting. But that's actually a big part +of the work of ramp fitting. + +There are a few different proposed ramp fitting algorithms, differing in their +weights. The final derived covariances are all somewhat similarly difficult +to compute, however, since we ultimately end up needing to compute + +.. math:: (A^T C^{-1} A)^{-1} + +for the "optimal" case, or + +.. math:: (A^T W^{-1} A)^{-1} A^T W^{-1} C W^{-1} A (A^T W^{-1} A)^{-1} + +for some alternative weighting. + +We start trying the "optimal" case below. + +For the "optimal" case, a challenge is that we don't want to compute +:math:`C^{-1}` for every pixel individually. Fortunately, we only +need :math:`(A^T C^{-1} A)^{-1}` (which is only a 2x2 matrix) for variances, +and only :math:`(A^T C^{-1} A)^{-1} A^T C^{-1}` for ramp fitting, which is 2xn. +Both of these matrices are effectively single parameter families, depending +after rescaling by the read noise only on the ratio of the read noise and flux. + +So the routines in these packages construct these different matrices, store +them, and interpolate between them for different different fluxes and ratios. +""" +from enum import Enum +from typing import NamedTuple + +import numpy as np +from astropy import units as u + +from ._ols_cas22 import FixedOffsets, Parameter, PixelOffsets, Variance +from ._ols_cas22 import fit_ramps as _fit_ramps + +__all__ = ["fit_ramps", "Parameter", "Variance", "DefaultThreshold", "RampFitOutputs"] + + +class DefaultThreshold(Enum): + INTERCEPT = np.float32(5.5) + CONSTANT = np.float32(1 / 3) + + +class RampFitOutputs(NamedTuple): + """ + Simple tuple wrapper for outputs from the ramp fitting algorithm + This clarifies the meaning of the outputs via naming them something + descriptive. + + Attributes + ---------- + parameters: np.ndarray[n_pixel, 2] + the slope and intercept for each pixel's ramp fit. see Parameter enum + for indexing indicating slope/intercept in the second dimension. + variances: np.ndarray[n_pixel, 3] + the read, poisson, and total variances for each pixel's ramp fit. + see Variance enum for indexing indicating read/poisson/total in the + second dimension. + dq: np.ndarray[n_resultants, n_pixel] + the dq array, with additional flags set for jumps detected by the + jump detection algorithm. + fits: list of RampFits + the raw ramp fit outputs, these are all structs which will get mapped to + python dictionaries. + """ + + parameters: np.ndarray + variances: np.ndarray + dq: np.ndarray + + +def fit_ramps( + resultants, + dq, + read_noise, + read_time, + read_pattern, + use_jump=False, + *, + threshold_intercept=DefaultThreshold.INTERCEPT.value, + threshold_constant=DefaultThreshold.CONSTANT.value, + include_diagnostic=False, +): + """Fit ramps following Casertano+2022, including averaging partial ramps. + + Ramps are broken where dq != 0, and fits are performed on each sub-ramp. + Resultants containing multiple ramps have their ramp fits averaged using + inverse variance weights based on the variance in the individual slope fits + due to read noise. + + Parameters + ---------- + resultants : np.ndarry[nresultants, ...] + the resultants in electrons + dq : np.ndarry[nresultants, ...] + the dq array. dq != 0 implies bad pixel / CR. + read_noise : float + the read noise in electrons + read_time : float + Read time. For Roman data this is the FRAME_TIME keyword. + read_pattern : list[list[int]] + The read pattern prescription. If None, use `ma_table`. + One of `ma_table` or `read_pattern` must be defined. + use_jump : bool + If True, use the jump detection algorithm to identify CRs. + If False, use the DQ array to identify CRs. + threshold_intercept : float (optional, keyword-only) + Override the intercept parameter for threshold for the jump detection + algorithm. + theshold_constant : float (optional, keyword-only) + Override the constant parameter for threshold for the jump detection + algorithm. + + Returns + ------- + RampFitOutputs + parameters: np.ndarray[n_pixel, 2] + the slope and intercept for each pixel's ramp fit. see Parameter enum + for indexing indicating slope/intercept in the second dimension. + variances: np.ndarray[n_pixel, 3] + the read, poisson, and total variances for each pixel's ramp fit. + see Variance enum for indexing indicating read/poisson/total in the + second dimension. + dq: np.ndarray[n_resultants, n_pixel] + the dq array, with additional flags set for jumps detected by the + jump detection algorithm. + fits: always None, this is a hold over which can contain the diagnostic + fit information from the jump detection algorithm. + """ + # Trickery to avoid having to specify the defaults for the threshold + # parameters outside the cython code. + resultants_unit = getattr(resultants, "unit", None) + if resultants_unit is not None: + resultants = resultants.to(u.electron).value + + resultants = np.array(resultants).astype(np.float32) + + dq = np.array(dq).astype(np.int32) + if np.ndim(read_noise) <= 1: + read_noise = read_noise * np.ones(resultants.shape[1:]) + read_noise = np.array(read_noise).astype(np.float32) + + # Raise error if input data is inconsistent + n_resultants = resultants.shape[0] + if n_resultants != len(read_pattern): + msg = ( + f"The read pattern length {len(read_pattern)} does " + f"not match number of resultants {n_resultants}" + ) + raise RuntimeError(msg) + + orig_shape = resultants.shape + if len(resultants.shape) == 1: + # single ramp. + resultants = resultants.reshape((*orig_shape, 1)) + dq = dq.reshape((*orig_shape, 1)) + read_noise = read_noise.reshape(orig_shape[1:] + (1,)) + + # Pre-allocate the output arrays + n_pixels = np.prod(resultants.shape[1:]) + parameters = np.empty((n_pixels, Parameter.n_param), dtype=np.float32) + variances = np.empty((n_pixels, Variance.n_var), dtype=np.float32) + + # Pre-allocate the working memory arrays + # This prevents bouncing to and from cython for this allocation, which + # is slower than just doing it all in python to start. + t_bar, tau, n_reads = _create_metadata(read_pattern, read_time) + if use_jump: + single_pixel = np.empty((PixelOffsets.n_pixel_offsets, n_resultants - 1), dtype=np.float32) + double_pixel = np.empty((PixelOffsets.n_pixel_offsets, n_resultants - 2), dtype=np.float32) + single_fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 1), dtype=np.float32) + double_fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 2), dtype=np.float32) + else: + single_pixel = np.empty((0, 0), dtype=np.float32) + double_pixel = np.empty((0, 0), dtype=np.float32) + single_fixed = np.empty((0, 0), dtype=np.float32) + double_fixed = np.empty((0, 0), dtype=np.float32) + + _fit_ramps( + resultants.reshape(resultants.shape[0], -1), + dq.reshape(resultants.shape[0], -1), + read_noise.reshape(-1), + parameters, + variances, + t_bar, + tau, + n_reads, + single_pixel, + double_pixel, + single_fixed, + double_fixed, + use_jump, + threshold_intercept, + threshold_constant, + include_diagnostic, + ) + + parameters = parameters.reshape(orig_shape[1:] + (2,)) + variances = variances.reshape(orig_shape[1:] + (3,)) + dq = dq.reshape(orig_shape) + + if resultants.shape != orig_shape: + parameters = parameters[0] + variances = variances[0] + + if resultants_unit is not None: + parameters = parameters * resultants_unit + + # return ols_cas22.RampFitOutputs(output.fits, parameters, variances, dq) + return RampFitOutputs(parameters, variances, dq) + + +def _create_metadata(read_pattern, read_time): + n_resultants = len(read_pattern) + + t_bar = np.empty(n_resultants, dtype=np.float32) + tau = np.empty(n_resultants, dtype=np.float32) + n_reads = np.empty(n_resultants, dtype=np.int32) + + for i, resultant in enumerate(read_pattern): + n_read = len(resultant) + + n_reads[i] = n_read + t_bar[i] = read_time * np.mean(resultant) + tau[i] = np.sum((2 * (n_read - np.arange(n_read)) - 1) * resultant) * read_time / n_read**2 + + return t_bar, tau, n_reads diff --git a/src/stcal/ramp_fitting/ols_cas22/__init__.py b/src/stcal/ramp_fitting/ols_cas22/__init__.py deleted file mode 100644 index 3d30b0adb..000000000 --- a/src/stcal/ramp_fitting/ols_cas22/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from ._fit import Parameter, RampFitOutputs, Variance, fit_ramps -from ._jump import JUMP_DET - -__all__ = ["fit_ramps", "RampFitOutputs", "Parameter", "Variance", "Diff", "JUMP_DET"] diff --git a/src/stcal/ramp_fitting/ols_cas22/_fit.pyx b/src/stcal/ramp_fitting/ols_cas22/_fit.pyx deleted file mode 100644 index dbe3c5366..000000000 --- a/src/stcal/ramp_fitting/ols_cas22/_fit.pyx +++ /dev/null @@ -1,236 +0,0 @@ -# cython: language_level=3str - -""" -External interface module for the Casertano+22 ramp fitting algorithm with jump detection. - This module is intended to contain everything needed by external code. - -Enums ------ -Parameter : - Enumerate the index for the output parameters array. - -Variance : - Enumerate the index for the output variances array. - -Classes -------- -RampFitOutputs : NamedTuple - Simple tuple wrapper for outputs from the ramp fitting algorithm - This clarifies the meaning of the outputs via naming them something - descriptive. - -(Public) Functions ------------------- -fit_ramps : function - Fit ramps using the Castenario+22 algorithm to a set of pixels accounting - for jumps (if use_jump is True) and bad pixels (via the dq array). This - is the primary externally callable function. -""" -from __future__ import annotations - -import numpy as np - -cimport numpy as cnp -from cython cimport boundscheck, wraparound -from libcpp cimport bool -from libcpp.list cimport list as cpp_list - -from stcal.ramp_fitting.ols_cas22._jump cimport ( - JumpFits, - Thresh, - fill_fixed_values, - fit_jumps, - n_fixed_offsets, - n_pixel_offsets, -) -from stcal.ramp_fitting.ols_cas22._ramp cimport ReadPattern, from_read_pattern - -from typing import NamedTuple - -# Initialize numpy for cython use in this module -cnp.import_array() - - -cpdef enum Parameter: - intercept - slope - n_param - - -cpdef enum Variance: - read_var - poisson_var - total_var - n_var - - -class RampFitOutputs(NamedTuple): - """ - Simple tuple wrapper for outputs from the ramp fitting algorithm - This clarifies the meaning of the outputs via naming them something - descriptive. - - Attributes - ---------- - parameters: np.ndarray[n_pixel, 2] - the slope and intercept for each pixel's ramp fit. see Parameter enum - for indexing indicating slope/intercept in the second dimension. - variances: np.ndarray[n_pixel, 3] - the read, poisson, and total variances for each pixel's ramp fit. - see Variance enum for indexing indicating read/poisson/total in the - second dimension. - dq: np.ndarray[n_resultants, n_pixel] - the dq array, with additional flags set for jumps detected by the - jump detection algorithm. - fits: list of RampFits - the raw ramp fit outputs, these are all structs which will get mapped to - python dictionaries. - """ - parameters: np.ndarray - variances: np.ndarray - dq: np.ndarray - fits: list | None = None - - -@boundscheck(False) -@wraparound(False) -def fit_ramps(float[:, :] resultants, - cnp.ndarray[int, ndim=2] dq, - float[:] read_noise, - float read_time, - list[list[int]] read_pattern, - bool use_jump=False, - float intercept=5.5, - float constant=1/3, - bool include_diagnostic=False): - """Fit ramps using the Casertano+22 algorithm. - This implementation uses the Cas22 algorithm to fit ramps, where - ramps are fit between bad resultants marked by dq flags for each pixel - which are not equal to zero. If use_jump is True, it additionally uses - jump detection to mark additional resultants for each pixel as bad if - a jump is suspected in them. - - Parameters - ---------- - resultants : float[n_resultants, n_pixel] - the resultants in electrons (Note that this can be based as any sort of - array, such as a numpy array. The memory view is just for efficiency in - cython) - dq : np.ndarry[n_resultants, n_pixel] - the dq array. dq != 0 implies bad pixel / CR. (Kept as a numpy array - so that it can be passed out without copying into new numpy array, will - be working on memory views of this array) - read_noise : float[n_pixel] - the read noise in electrons for each pixel (same note as the resultants) - read_time : float - Time to perform a readout. For Roman data, this is FRAME_TIME. - read_pattern : list[list[int]] - the read pattern for the image - use_jump : bool - If True, use the jump detection algorithm to identify CRs. - If False, use the DQ array to identify CRs. - intercept : float - The intercept value for the threshold function. Default=5.5 - constant : float - The constant value for the threshold function. Default=1/3.0 - include_diagnostic : bool - If True, include the raw ramp fits in the output. Default=False - - Returns - ------- - A RampFitOutputs tuple - """ - cdef int n_pixels, n_resultants - n_resultants = resultants.shape[0] - n_pixels = resultants.shape[1] - - # Raise error if input data is inconsistent - if n_resultants != len(read_pattern): - raise RuntimeError(f'The read pattern length {len(read_pattern)} does not ' - f'match number of resultants {n_resultants}') - - # Compute the main metadata from the read pattern and cast it to memory views - cdef ReadPattern metadata = from_read_pattern(read_pattern, read_time, n_resultants) - cdef float[:] t_bar = metadata.t_bar - cdef float[:] tau = metadata.tau - cdef int[:] n_reads = metadata.n_reads - - # Setup pre-compute arrays for jump detection - cdef float[:, :] fixed - cdef float[:, :] pixel - if use_jump: - # Initialize arrays for the jump detection pre-computed values - fixed = np.empty((n_fixed_offsets, n_resultants - 1), dtype=np.float32) - pixel = np.empty((n_pixel_offsets, n_resultants - 1), dtype=np.float32) - - # Pre-compute the values from the read pattern - fixed = fill_fixed_values(fixed, t_bar, tau, n_reads, n_resultants) - else: - # "Initialize" the arrays when not using jump detection, they need to be - # initialized because they do get passed around, but they don't need - # to actually have any entries - fixed = np.empty((0, 0), dtype=np.float32) - pixel = np.empty((0, 0), dtype=np.float32) - - # Create a threshold struct - cdef Thresh thresh = Thresh(intercept, constant) - - # Create variable to old the diagnostic data - # Use list because this might grow very large which would require constant - # reallocation. We don't need random access, and this gets cast to a python - # list in the end. - cdef cpp_list[JumpFits] ramp_fits - - # Initialize the output arrays. Note that the fit intercept is currently always - # zero, where as every variance is calculated and set. This means that the - # parameters need to be filled with zeros, where as the variances can just - # be allocated - cdef float[:, :] parameters = np.zeros((n_pixels, Parameter.n_param), dtype=np.float32) - cdef float[:, :] variances = np.empty((n_pixels, Variance.n_var), dtype=np.float32) - - # Cast the enum values into integers for indexing (otherwise compiler complains) - # These will be optimized out - cdef int slope = Parameter.slope - cdef int read_var = Variance.read_var - cdef int poisson_var = Variance.poisson_var - cdef int total_var = Variance.total_var - - # Pull memory view of dq for speed of access later - # changes to this array will backpropagate to the original numpy array - cdef int[:, :] dq_ = dq - - # Run the jump fitting algorithm for each pixel - cdef JumpFits fit - cdef int index - for index in range(n_pixels): - # Fit all the ramps for the given pixel - fit = fit_jumps(resultants[:, index], - dq_[:, index], - read_noise[index], - t_bar, - tau, - n_reads, - n_resultants, - fixed, - pixel, - thresh, - use_jump, - include_diagnostic) - - # Extract the output fit's parameters - parameters[index, slope] = fit.average.slope - - # Extract the output fit's variances - variances[index, read_var] = fit.average.read_var - variances[index, poisson_var] = fit.average.poisson_var - variances[index, total_var] = fit.average.read_var + fit.average.poisson_var - - # Store diagnostic data if requested - if include_diagnostic: - ramp_fits.push_back(fit) - - # Cast memory views into numpy arrays for ease of use in python. - return RampFitOutputs(np.array(parameters, dtype=np.float32), - np.array(variances, dtype=np.float32), - dq, - ramp_fits if include_diagnostic else None) diff --git a/src/stcal/ramp_fitting/ols_cas22/_jump.pxd b/src/stcal/ramp_fitting/ols_cas22/_jump.pxd deleted file mode 100644 index 8693e7919..000000000 --- a/src/stcal/ramp_fitting/ols_cas22/_jump.pxd +++ /dev/null @@ -1,62 +0,0 @@ -# cython: language_level=3str - -from libcpp cimport bool -from libcpp.vector cimport vector - -from stcal.ramp_fitting.ols_cas22._ramp cimport RampFit, RampQueue - - -cpdef enum FixedOffsets: - single_t_bar_diff - double_t_bar_diff - single_t_bar_diff_sqr - double_t_bar_diff_sqr - single_read_recip - double_read_recip - single_var_slope_val - double_var_slope_val - n_fixed_offsets - - -cpdef enum PixelOffsets: - single_local_slope - double_local_slope - single_var_read_noise - double_var_read_noise - n_pixel_offsets - - -cpdef enum: - JUMP_DET = 4 - -cdef struct Thresh: - float intercept - float constant - - -cdef struct JumpFits: - RampFit average - vector[int] jumps - vector[RampFit] fits - RampQueue index - - -cpdef float[:, :] fill_fixed_values(float[:, :] fixed, - float[:] t_bar, - float[:] tau, - int[:] n_reads, - int n_resultants) - - -cdef JumpFits fit_jumps(float[:] resultants, - int[:] dq, - float read_noise, - float[:] t_bar, - float[:] tau, - int[:] n_reads, - int n_resultants, - float[:, :] fixed, - float[:, :] pixel, - Thresh thresh, - bool use_jump, - bool include_diagnostic) diff --git a/src/stcal/ramp_fitting/ols_cas22/_jump.pyx b/src/stcal/ramp_fitting/ols_cas22/_jump.pyx deleted file mode 100644 index 808482f32..000000000 --- a/src/stcal/ramp_fitting/ols_cas22/_jump.pyx +++ /dev/null @@ -1,595 +0,0 @@ -# cython: language_level=3str - - -""" -This module contains all the functions needed to execute jump detection for the - Castentano+22 ramp fitting algorithm - - The _ramp module contains the actual ramp fitting algorithm, this module - contains a driver for the algorithm and detection of jumps/splitting ramps. - -Structs -------- -Thresh : struct - intercept - constant * log10(slope) - - intercept : float - The intercept of the jump threshold - - constant : float - The constant of the jump threshold - -JumpFits : struct - All the data on a given pixel's ramp fit with (or without) jump detection - - average : RampFit - The average of all the ramps fit for the pixel - - jumps : vector[int] - The indices of the resultants which were detected as jumps - - fits : vector[RampFit] - All of the fits for each ramp fit for the pixel - - index : RampQueue - The RampIndex representations corresponding to each fit in fits - -Enums ------ -FixedOffsets : enum - Enumerate the different pieces of information computed for jump detection - which only depend on the read pattern. - -PixelOffsets : enum - Enumerate the different pieces of information computed for jump detection - which only depend on the given pixel (independent of specific ramp). - -JUMP_DET : value - A the fixed value for the jump detection dq flag. - -(Public) Functions ------------------- -fill_fixed_values : function - Pre-compute all the values needed for jump detection for a given read_pattern, - this is independent of the pixel involved. - -fit_jumps : function - Compute all the ramps for a single pixel using the Casertano+22 algorithm - with jump detection. This is a driver for the ramp fit algorithm in general - meaning it automatically handles splitting ramps across dq flags in addition - to splitting across detected jumps (if jump detection is turned on). -""" - -from cython cimport boundscheck, cdivision, wraparound -from libc.math cimport NAN, fmaxf, isnan, log10, sqrt -from libcpp cimport bool - -from stcal.ramp_fitting.ols_cas22._jump cimport JUMP_DET, FixedOffsets, JumpFits, PixelOffsets, Thresh -from stcal.ramp_fitting.ols_cas22._ramp cimport RampFit, RampIndex, RampQueue, fit_ramp, init_ramps - - -@boundscheck(False) -@wraparound(False) -@cdivision(True) -cpdef inline float[:, :] fill_fixed_values(float[:, :] fixed, - float[:] t_bar, - float[:] tau, - int[:] n_reads, - int n_resultants): - """ - Pre-compute all the values needed for jump detection which only depend on - the read pattern. - - Parameters - ---------- - fixed : float[:, :] - A pre-allocated memoryview to store the pre-computed values in, its faster - to allocate outside this function. - t_bar : float[:] - The average time for each resultant - tau : float[:] - The time variance for each resultant - n_reads : int[:] - The number of reads for each resultant - n_resultants : int - The number of resultants for the read pattern - - Returns - ------- - [ - , - , - ** 2, - ** 2, - <(1/n_reads[i+1] + 1/n_reads[i])>, - <(1/n_reads[i+2] + 1/n_reads[i])>, - <(tau[i] + tau[i+1] - 2 * min(t_bar[i], t_bar[i+1]))>, - <(tau[i] + tau[i+2] - 2 * min(t_bar[i], t_bar[i+2]))>, - ] - """ - # Cast the enum values into integers for indexing (otherwise compiler complains) - # These will be optimized out - cdef int single_t_bar_diff = FixedOffsets.single_t_bar_diff - cdef int double_t_bar_diff = FixedOffsets.double_t_bar_diff - cdef int single_t_bar_diff_sqr = FixedOffsets.single_t_bar_diff_sqr - cdef int double_t_bar_diff_sqr = FixedOffsets.double_t_bar_diff_sqr - cdef int single_read_recip = FixedOffsets.single_read_recip - cdef int double_read_recip = FixedOffsets.double_read_recip - cdef int single_var_slope_val = FixedOffsets.single_var_slope_val - cdef int double_var_slope_val = FixedOffsets.double_var_slope_val - - # Coerce division to be using floats - cdef float num = 1 - - cdef int i - for i in range(n_resultants - 1): - fixed[single_t_bar_diff, i] = t_bar[i + 1] - t_bar[i] - fixed[single_t_bar_diff_sqr, i] = fixed[single_t_bar_diff, i] ** 2 - fixed[single_read_recip, i] = (num / n_reads[i + 1]) + (num / n_reads[i]) - fixed[single_var_slope_val, i] = tau[i + 1] + tau[i] - 2 * min(t_bar[i + 1], t_bar[i]) - - if i < n_resultants - 2: - fixed[double_t_bar_diff, i] = t_bar[i + 2] - t_bar[i] - fixed[double_t_bar_diff_sqr, i] = fixed[double_t_bar_diff, i] ** 2 - fixed[double_read_recip, i] = (num / n_reads[i + 2]) + (num / n_reads[i]) - fixed[double_var_slope_val, i] = tau[i + 2] + tau[i] - 2 * min(t_bar[i + 2], t_bar[i]) - else: - # Last double difference is undefined - fixed[double_t_bar_diff, i] = NAN - fixed[double_t_bar_diff_sqr, i] = NAN - fixed[double_read_recip, i] = NAN - fixed[double_var_slope_val, i] = NAN - - return fixed - - -@boundscheck(False) -@wraparound(False) -@cdivision(True) -cpdef inline float[:, :] _fill_pixel_values(float[:, :] pixel, - float[:] resultants, - float[:, :] fixed, - float read_noise, - int n_resultants): - """ - Pre-compute all the values needed for jump detection which only depend on - the a specific pixel (independent of the given ramp for a pixel). - - Parameters - ---------- - pixel : float[:, :] - A pre-allocated memoryview to store the pre-computed values in, its faster - to allocate outside this function. - resultants : float[:] - The resultants for the pixel in question. - fixed : float[:, :] - The pre-computed fixed values for the read_pattern - read_noise : float - The read noise for the pixel - n_resultants : int - The number of resultants for the read_pattern - - Returns - ------- - [ - <(resultants[i+1] - resultants[i])> / <(t_bar[i+1] - t_bar[i])>, - <(resultants[i+2] - resultants[i])> / <(t_bar[i+2] - t_bar[i])>, - read_noise**2 * <(1/n_reads[i+1] + 1/n_reads[i])>, - read_noise**2 * <(1/n_reads[i+2] + 1/n_reads[i])>, - ] - """ - cdef int single_t_bar_diff = FixedOffsets.single_t_bar_diff - cdef int double_t_bar_diff = FixedOffsets.double_t_bar_diff - cdef int single_read_recip = FixedOffsets.single_read_recip - cdef int double_read_recip = FixedOffsets.double_read_recip - - cdef int single_slope = PixelOffsets.single_local_slope - cdef int double_slope = PixelOffsets.double_local_slope - cdef int single_var = PixelOffsets.single_var_read_noise - cdef int double_var = PixelOffsets.double_var_read_noise - - cdef float read_noise_sqr = read_noise ** 2 - - cdef int i - for i in range(n_resultants - 1): - pixel[single_slope, i] = (resultants[i + 1] - resultants[i]) / fixed[single_t_bar_diff, i] - pixel[single_var, i] = read_noise_sqr * fixed[single_read_recip, i] - - if i < n_resultants - 2: - pixel[double_slope, i] = (resultants[i + 2] - resultants[i]) / fixed[double_t_bar_diff, i] - pixel[double_var, i] = read_noise_sqr * fixed[double_read_recip, i] - else: - # The last double difference is undefined - pixel[double_slope, i] = NAN - pixel[double_var, i] = NAN - - return pixel - - -cdef inline float _threshold(Thresh thresh, float slope): - """ - Compute jump threshold - - Parameters - ---------- - thresh : Thresh - threshold parameters struct - slope : float - slope of the ramp in question - - Returns - ------- - intercept - constant * log10(slope) - """ - slope = slope if slope > 1 else 1 - slope = slope if slope < 1e4 else 1e4 - - return thresh.intercept - thresh.constant * log10(slope) - - -@boundscheck(False) -@wraparound(False) -@cdivision(True) -cdef inline float _correction(float[:] t_bar, RampIndex ramp, float slope): - """ - Compute the correction factor for the variance used by a statistic - - - slope / (t_bar[end] - t_bar[start]) - - Parameters - ---------- - t_bar : float[:] - The computed t_bar values for the ramp - ramp : RampIndex - Struct for start and end indices resultants for the ramp - slope : float - The computed slope for the ramp - """ - - cdef float diff = t_bar[ramp.end] - t_bar[ramp.start] - - return - slope / diff - - -@boundscheck(False) -@wraparound(False) -@cdivision(True) -cdef inline float _statstic(float local_slope, - float var_read_noise, - float t_bar_diff_sqr, - float var_slope_coeff, - float slope, - float correct): - """ - Compute a single fit statistic - delta / sqrt(var + correct) - - where: - delta = local_slope - slope - var = (var_read_noise + slope * var_slope_coeff) / t_bar_diff_sqr - - pre-computed: - local_slope = (resultant[i + j] - resultant[i]) / (t_bar[i + j] - t_bar[i]) - var_read_noise = read_noise ** 2 * (1/n_reads[i + j] + 1/n_reads[i]) - var_slope_coeff = tau[i + j] + tau[i] - 2 * min(t_bar[i + j], t_bar[i]) - t_bar_diff_sqr = (t_bar[i + j] - t_bar[i]) ** 2 - - Parameters - ---------- - local_slope : float - The local slope the statistic is computed for - float : var_read_noise - The read noise variance for local_slope - t_bar_diff_sqr : float - The square difference for the t_bar corresponding to local_slope - var_slope_coeff : float - The slope variance coefficient for local_slope - slope : float - The computed slope for the ramp - correct : float - The correction factor needed - - Returns - ------- - Create a single instance of the stastic for the given parameters - """ - - cdef float delta = local_slope - slope - cdef float var = (var_read_noise + slope * var_slope_coeff) / t_bar_diff_sqr - - return delta / sqrt(var + correct) - - -@boundscheck(False) -@wraparound(False) -cdef inline (int, float) _fit_statistic(float[:, :] pixel, - float[:, :] fixed, - float[:] t_bar, - float slope, - RampIndex ramp): - """ - Compute the maximum index and its value over all fit statistics for a given - ramp. Each index's stat is the max of the single and double difference - statistics: - all_stats = - - Parameters - ---------- - pixel : float[:, :] - The pre-computed fixed values for a given pixel - fixed : float[:, :] - The pre-computed fixed values for a given read_pattern - t_bar : float[:, :] - The average time for each resultant - slope : float - The computed slope for the ramp - ramp : RampIndex - Struct for start and end of ramp to fit - - Returns - ------- - argmax(all_stats), max(all_stats) - """ - # Cast the enum values into integers for indexing (otherwise compiler complains) - # These will be optimized out - cdef int single_local_slope = PixelOffsets.single_local_slope - cdef int double_local_slope = PixelOffsets.double_local_slope - cdef int single_var_read_noise = PixelOffsets.single_var_read_noise - cdef int double_var_read_noise = PixelOffsets.double_var_read_noise - - cdef int single_t_bar_diff_sqr = FixedOffsets.single_t_bar_diff_sqr - cdef int double_t_bar_diff_sqr = FixedOffsets.double_t_bar_diff_sqr - cdef int single_var_slope_val = FixedOffsets.single_var_slope_val - cdef int double_var_slope_val = FixedOffsets.double_var_slope_val - - # Note that a ramp consisting of a single point is degenerate and has no - # fit statistic so we bail out here - if ramp.start == ramp.end: - return 0, NAN - - # Start computing fit statistics - cdef float correct = _correction(t_bar, ramp, slope) - - # We are computing single and double differences of using the ramp's resultants. - # Each of these computations requires two points meaning that there are - # start - end - 1 possible differences. However, we cannot compute a double - # difference for the last point as there is no point after it. Therefore, - # We use this point's single difference as our initial guess for the fit - # statistic. Note that the fit statistic can technically be negative so - # this makes it much easier to compute a "lazy" max. - cdef int index = ramp.end - 1 - cdef int argmax = ramp.end - ramp.start - 1 - cdef float max_stat = _statstic(pixel[single_local_slope, index], - pixel[single_var_read_noise, index], - fixed[single_t_bar_diff_sqr, index], - fixed[single_var_slope_val, index], - slope, - correct) - - # Compute the rest of the fit statistics - cdef float stat, stat1, stat2 - cdef int stat_index - for stat_index, index in enumerate(range(ramp.start, ramp.end - 1)): - # Compute max of single and double difference statistics - stat1 = _statstic(pixel[single_local_slope, index], - pixel[single_var_read_noise, index], - fixed[single_t_bar_diff_sqr, index], - fixed[single_var_slope_val, index], - slope, - correct) - stat2 = _statstic(pixel[double_local_slope, index], - pixel[double_var_read_noise, index], - fixed[double_t_bar_diff_sqr, index], - fixed[double_var_slope_val, index], - slope, - correct) - stat = fmaxf(stat1, stat2) - - # If this is larger than the current max, update the max - if stat > max_stat: - max_stat = stat - argmax = stat_index - - return argmax, max_stat - - -@boundscheck(False) -@wraparound(False) -@cdivision(True) -cdef inline JumpFits fit_jumps(float[:] resultants, - int[:] dq, - float read_noise, - float[:] t_bar, - float[:] tau, - int[:] n_reads, - int n_resultants, - float[:, :] fixed, - float[:, :] pixel, - Thresh thresh, - bool use_jump, - bool include_diagnostic): - """ - Compute all the ramps for a single pixel using the Casertano+22 algorithm - with jump detection. - - Parameters - ---------- - resultants : float[:] - The resultants for the pixel - dq : int[:] - The dq flags for the pixel. This is modified in place, so the external - dq flag array will be modified as a side-effect. - read_noise : float - The read noise for the pixel. - ramps : RampQueue - RampQueue for initial ramps to fit for the pixel - multiple ramps are possible due to dq flags - t_bar : float[:] - The average time for each resultant - tau : float[:] - The time variance for each resultant - n_reads : int[:] - The number of reads for each resultant - n_resultants : int - The number of resultants for the pixel - fixed : float[:, :] - The jump detection pre-computed values for a given read_pattern - pixel : float[:, :] - A pre-allocated array for the jump detection fixed values for the - given pixel. This will be modified in place, it is passed in to avoid - re-allocating it for each pixel. - thresh : Thresh - The threshold parameter struct for jump detection - use_jump : bool - Turn on or off jump detection. - include_diagnostic : bool - Turn on or off recording all the diaganostic information on the fit - - Returns - ------- - RampFits struct of all the fits for a single pixel - """ - # Find initial set of ramps - cdef RampQueue ramps = init_ramps(dq, n_resultants) - - # Initialize algorithm - cdef JumpFits ramp_fits - cdef RampIndex ramp - cdef RampFit ramp_fit - - ramp_fits.average.slope = 0 - ramp_fits.average.read_var = 0 - ramp_fits.average.poisson_var = 0 - - cdef int argmax, jump0, jump1 - cdef float max_stat - cdef float weight, total_weight = 0 - - # Fill in the jump detection pre-compute values for a single pixel - if use_jump: - pixel = _fill_pixel_values(pixel, resultants, fixed, read_noise, n_resultants) - - # Run while the Queue is non-empty - while not ramps.empty(): - # Remove top ramp of the stack to use - ramp = ramps.back() - ramps.pop_back() - - # Compute fit using the Casertano+22 algorithm - ramp_fit = fit_ramp(resultants, - t_bar, - tau, - n_reads, - read_noise, - ramp) - - # Run jump detection if enabled - if use_jump: - argmax, max_stat = _fit_statistic(pixel, - fixed, - t_bar, - ramp_fit.slope, - ramp) - - # Note that when a "ramp" is a single point, _fit_statistic returns - # a NaN for max_stat. Note that NaN > anything is always false so the - # result drops through as desired. - if max_stat > _threshold(thresh, ramp_fit.slope): - # Compute jump point to create two new ramps - # This jump point corresponds to the index of the largest - # statistic: - # argmax = argmax(stats) - # These statistics are indexed relative to the - # ramp's range. Therefore, we need to add the start index - # of the ramp to the result. - # - jump0 = argmax + ramp.start - - # Note that because the resultants are averages of reads, but - # jumps occur in individual reads, it is possible that the - # jump is averaged down by the resultant with the actual jump - # causing the computed jump to be off by one index. - # In the idealized case this is when the jump occurs near - # the start of the resultant with the jump. In this case, - # the statistic for the resultant will be maximized at - # index - 1 rather than index. This means that we have to - # remove argmax(stats) + 1 as it is also a possible jump. - # This case is difficult to distinguish from the case where - # argmax(stats) does correspond to the jump resultant. - # Therefore, we just remove both possible resultants from - # consideration. - jump1 = jump0 + 1 - - # Update the dq flags - dq[jump0] = JUMP_DET - dq[jump1] = JUMP_DET - - # Record jump diagnostics - if include_diagnostic: - ramp_fits.jumps.push_back(jump0) - ramp_fits.jumps.push_back(jump1) - - # The two resultant indices need to be skipped, therefore - # the two - # possible new ramps are: - # RampIndex(ramp.start, jump0 - 1) - # RampIndex(jump1 + 1, ramp.end) - # This is because the RampIndex contains the index of the - # first and last resulants in the sub-ramp it describes. - # Note: The algorithm works via working over the sub-ramps - # backward in time. Therefore, since we are using a stack, - # we need to add the ramps in the time order they were - # observed in. This results in the last observation ramp - # being the top of the stack; meaning that, - # it will be the next ramp handled. - - if jump0 > ramp.start: - # Note that when jump0 == ramp.start, we have detected a - # jump in the first resultant of the ramp. This means - # there is no sub-ramp before jump0. - # Also, note that this will produce bad results as - # the ramp indexing will go out of bounds. So it is - # important that we exclude it. - # Note that jump0 < ramp.start is not possible because - # the argmax is always >= 0 - ramps.push_back(RampIndex(ramp.start, jump0 - 1)) - - if jump1 < ramp.end: - # Note that if jump1 == ramp.end, we have detected a - # jump in the last resultant of the ramp. This means - # there is no sub-ramp after jump1. - # Also, note that this will produce bad results as - # the ramp indexing will go out of bounds. So it is - # important that we exclude it. - # Note that jump1 > ramp.end is technically possible - # however in those potential cases it will draw on - # resultants which are not considered part of the ramp - # under consideration. Therefore, we have to exclude all - # of those values. - ramps.push_back(RampIndex(jump1 + 1, ramp.end)) - - # Skip recording the ramp as it has a detected jump - continue - - # Start recording the the fit (no jump detected) - - # Record the diagnositcs - if include_diagnostic: - ramp_fits.fits.push_back(ramp_fit) - ramp_fits.index.push_back(ramp) - - # Start computing the averages using a lazy process - # Note we do not do anything in the NaN case for degenerate ramps - if not isnan(ramp_fit.slope): - # protect weight against the extremely unlikely case of a zero - # variance - weight = 0 if ramp_fit.read_var == 0 else 1 / ramp_fit.read_var - total_weight += weight - - ramp_fits.average.slope += weight * ramp_fit.slope - ramp_fits.average.read_var += weight**2 * ramp_fit.read_var - ramp_fits.average.poisson_var += weight**2 * ramp_fit.poisson_var - - # Finish computing averages using the lazy process - ramp_fits.average.slope /= total_weight if total_weight != 0 else 1 - ramp_fits.average.read_var /= total_weight**2 if total_weight != 0 else 1 - ramp_fits.average.poisson_var /= total_weight**2 if total_weight != 0 else 1 - - # Multiply poisson term by flux, (no negative fluxes) - ramp_fits.average.poisson_var *= max(ramp_fits.average.slope, 0) - - return ramp_fits diff --git a/src/stcal/ramp_fitting/ols_cas22/_ramp.pxd b/src/stcal/ramp_fitting/ols_cas22/_ramp.pxd deleted file mode 100644 index de31cd6ce..000000000 --- a/src/stcal/ramp_fitting/ols_cas22/_ramp.pxd +++ /dev/null @@ -1,40 +0,0 @@ -# cython: language_level=3str - -from libcpp.vector cimport vector - - -cdef struct RampIndex: - int start - int end - - -cdef struct RampFit: - float slope - float read_var - float poisson_var - - -ctypedef vector[RampIndex] RampQueue - - -cdef class ReadPattern: - cdef float[::1] t_bar - cdef float[::1] tau - cdef int[::1] n_reads - - -cpdef RampQueue init_ramps(int[:] dq, - int n_resultants) - - -cpdef ReadPattern from_read_pattern(list[list[int]] read_pattern, - float read_time, - int n_resultants) - - -cdef RampFit fit_ramp(float[:] resultants_, - float[:] t_bar_, - float[:] tau_, - int[:] n_reads, - float read_noise, - RampIndex ramp) diff --git a/src/stcal/ramp_fitting/ols_cas22/_ramp.pyx b/src/stcal/ramp_fitting/ols_cas22/_ramp.pyx deleted file mode 100644 index cf9b93364..000000000 --- a/src/stcal/ramp_fitting/ols_cas22/_ramp.pyx +++ /dev/null @@ -1,357 +0,0 @@ -# cython: language_level=3str - -""" -This module contains all the functions needed to execute the Casertano+22 ramp - fitting algorithm on its own without jump detection. - - The _jump module contains a driver function which calls the `fit_ramp` function - from this module iteratively. This evvetively handles dq flags and detected - jumps simultaneously. - -Structs -------- -RampIndex : struct - - start : int - Index of the first resultant in the ramp - - end : int - Index of the last resultant in the ramp (so indexing of ramp requires end + 1) - -RampFit : struct - - slope : float - The slope fit to the ramp - - read_var : float - The read noise variance for the fit - - poisson_var : float - The poisson variance for the fit - -RampQueue : vector[RampIndex] - Vector of RampIndex objects (convenience typedef) - -Classes -------- -ReadPattern : - Container class for all the metadata derived from the read pattern, this - is just a temporary object to allow us to return multiple memory views from - a single function. - -(Public) Functions ------------------- -init_ramps : function - Create the initial ramp "queue" for each pixel in order to handle any initial - "dq" flags passed in from outside. - -from_read_pattern : function - Derive the input data from the the read pattern - This is faster than using __init__ or __cinit__ to construct the object with - these calls. - -fit_ramps : function - Implementation of running the Casertano+22 algorithm on a (sub)set of resultants - listed for a single pixel -""" -import numpy as np - -cimport numpy as cnp -from cython cimport boundscheck, cdivision, cpow, wraparound -from libc.math cimport INFINITY, NAN, fabs, fmaxf, sqrt -from libcpp.vector cimport vector - -from stcal.ramp_fitting.ols_cas22._ramp cimport RampFit, RampIndex, RampQueue, ReadPattern - -# Initialize numpy for cython use in this module -cnp.import_array() - - -cdef class ReadPattern: - """ - Class to contain the read pattern derived metadata - This exists only to allow us to output multiple memory views at the same time - from the same cython function. This is needed because neither structs nor unions - can contain memory views. - - In the case of this code memory views are the fastest "safe" array data structure. - This class will immediately be unpacked into raw memory views, so that we avoid - any further overhead of switching between python and cython. - - Attributes: - ---------- - t_bar : np.ndarray[float_t, ndim=1] - The mean time of each resultant - tau : np.ndarray[float_t, ndim=1] - The variance in time of each resultant - n_reads : np.ndarray[cnp.int32_t, ndim=1] - The number of reads in each resultant - """ - - def _to_dict(ReadPattern self): - """ - This is a private method to convert the ReadPattern object to a dictionary, - so that attributes can be directly accessed in python. Note that this - is needed because class attributes cannot be accessed on cython classes - directly in python. Instead they need to be accessed or set using a - python compatible method. This method is a pure puthon method bound - to to the cython class and should not be used by any cython code, and - only exists for testing purposes. - """ - return dict(t_bar=np.array(self.t_bar, dtype=np.float32), - tau=np.array(self.tau, dtype=np.float32), - n_reads=np.array(self.n_reads, dtype=np.int32)) - - -@boundscheck(False) -@wraparound(False) -@cdivision(True) -cpdef ReadPattern from_read_pattern(list[list[int]] read_pattern, float read_time, int n_resultants): - """ - Derive the input data from the the read pattern - This is faster than using __init__ or __cinit__ to construct the object with - these calls. - - Parameters - ---------- - read pattern: list[list[int]] - read pattern for the image - read_time : float - Time to perform a readout. - n_resultants : int - Number of resultants in the image - - Returns - ------- - ReadPattern - Contains: - - t_bar - - tau - - n_reads - """ - - cdef ReadPattern data = ReadPattern() - data.t_bar = np.empty(n_resultants, dtype=np.float32) - data.tau = np.empty(n_resultants, dtype=np.float32) - data.n_reads = np.empty(n_resultants, dtype=np.int32) - - cdef int index, n_reads - cdef list[int] resultant - for index, resultant in enumerate(read_pattern): - n_reads = len(resultant) - - data.n_reads[index] = n_reads - data.t_bar[index] = read_time * np.mean(resultant) - data.tau[index] = (np.sum((2 * (n_reads - np.arange(n_reads)) - 1) * resultant) * - read_time / n_reads**2) - - return data - - -@boundscheck(False) -@wraparound(False) -cpdef inline RampQueue init_ramps(int[:] dq, int n_resultants): - """ - Create the initial ramp "queue" for each pixel - if dq[index_resultant, index_pixel] == 0, then the resultant is in a ramp - otherwise, the resultant is not in a ramp - - Parameters - ---------- - dq : int[n_resultants] - DQ array - n_resultants : int - Number of resultants - - Returns - ------- - RampQueue - vector of RampIndex objects - - vector with entry for each ramp found (last entry is last ramp found) - - RampIndex with start and end indices of the ramp in the resultants - """ - cdef RampQueue ramps = RampQueue() - - # Note: if start/end are -1, then no value has been assigned - # ramp.start == -1 means we have not started a ramp - # dq[index_resultant, index_pixel] == 0 means resultant is in ramp - cdef RampIndex ramp = RampIndex(-1, -1) - cdef int index_resultant - for index_resultant in range(n_resultants): - if ramp.start == -1: - # Looking for the start of a ramp - if dq[index_resultant] == 0: - # We have found the start of a ramp! - ramp.start = index_resultant - else: - # This is not the start of the ramp yet - continue - else: - # Looking for the end of a ramp - if dq[index_resultant] == 0: - # This pixel is in the ramp do nothing - continue - else: - # This pixel is not in the ramp - # => index_resultant - 1 is the end of the ramp - ramp.end = index_resultant - 1 - - # Add completed ramp to stack and reset ramp - ramps.push_back(ramp) - ramp = RampIndex(-1, -1) - - # Handle case where last resultant is in ramp (so no end has been set) - if ramp.start != -1 and ramp.end == -1: - # Last resultant is end of the ramp => set then add to stack - ramp.end = n_resultants - 1 - ramps.push_back(ramp) - - return ramps - -# Keeps the static type checker/highlighter happy this has no actual effect -ctypedef float[6] _row - -# Casertano+2022, Table 2 -cdef _row[2] _PTABLE = [[-INFINITY, 5, 10, 20, 50, 100], - [0, 0.4, 1, 3, 6, 10]] - - -@boundscheck(False) -@wraparound(False) -cdef inline float _get_power(float signal): - """ - Return the power from Casertano+22, Table 2 - - Parameters - ---------- - signal: float - signal from the resultants - - Returns - ------- - signal power from Table 2 - """ - cdef int i - for i in range(6): - if signal < _PTABLE[0][i]: - return _PTABLE[1][i - 1] - - return _PTABLE[1][i] - - -@boundscheck(False) -@wraparound(False) -@cdivision(True) -cdef inline RampFit fit_ramp(float[:] resultants_, - float[:] t_bar_, - float[:] tau_, - int[:] n_reads_, - float read_noise, - RampIndex ramp): - """ - Fit a single ramp using Casertano+22 algorithm. - - Parameters - ---------- - resultants_ : float[:] - All of the resultants for the pixel - t_bar_ : float[:] - All the t_bar values - tau_ : float[:] - All the tau values - n_reads_ : int[:] - All the n_reads values - read_noise : float - The read noise for the pixel - ramp : RampIndex - Struct for start and end of ramp to fit - - Returns - ------- - RampFit - struct containing - - slope - - read_var - - poisson_var - """ - cdef int n_resultants = ramp.end - ramp.start + 1 - - # Special case where there is no or one resultant, there is no fit and - # we bail out before any computations. - # Note that in this case, we cannot compute the slope or the variances - # because these computations require at least two resultants. Therefore, - # this case is degernate and we return NaNs for the values. - if n_resultants <= 1: - return RampFit(NAN, NAN, NAN) - - # Compute the fit - cdef int i = 0, j = 0 - - # Setup data for fitting (work over subset of data) to make things cleaner - # Recall that the RampIndex contains the index of the first and last - # index of the ramp. Therefore, the Python slice needed to get all the - # data within the ramp is: - # ramp.start:ramp.end + 1 - cdef float[:] resultants = resultants_[ramp.start:ramp.end + 1] - cdef float[:] t_bar = t_bar_[ramp.start:ramp.end + 1] - cdef float[:] tau = tau_[ramp.start:ramp.end + 1] - cdef int[:] n_reads = n_reads_[ramp.start:ramp.end + 1] - - # Compute mid point time - cdef int end = n_resultants - 1 - cdef float t_bar_mid = (t_bar[0] + t_bar[end]) / 2 - - # Casertano+2022 Eq. 44 - # Note we've departed from Casertano+22 slightly; - # there s is just resultants[ramp.end]. But that doesn't seem good if, e.g., - # a CR in the first resultant has boosted the whole ramp high but there - # is no actual signal. - cdef float power = fmaxf(resultants[end] - resultants[0], 0) - power = power / sqrt(read_noise**2 + power) - power = _get_power(power) - - # It's easy to use up a lot of dynamic range on something like - # (tbar - tbarmid) ** 10. Rescale these. - cdef float t_scale = (t_bar[end] - t_bar[0]) / 2 - t_scale = 1 if t_scale == 0 else t_scale - - # Initialize the fit loop - # it is faster to generate a c++ vector than a numpy array - cdef vector[float] weights = vector[float](n_resultants) - cdef vector[float] coeffs = vector[float](n_resultants) - cdef RampFit ramp_fit = RampFit(0, 0, 0) - cdef float f0 = 0, f1 = 0, f2 = 0 - cdef float coeff - - # Issue when tbar[] == tbarmid causes exception otherwise - with cpow(True): - for i in range(n_resultants): - # Casertano+22, Eq. 45 - weights[i] = ((((1 + power) * n_reads[i]) / (1 + power * n_reads[i])) * - fabs((t_bar[i] - t_bar_mid) / t_scale) ** power) - - # Casertano+22 Eq. 35 - f0 += weights[i] - f1 += weights[i] * t_bar[i] - f2 += weights[i] * t_bar[i]**2 - - # Casertano+22 Eq. 36 - cdef float det = f2 * f0 - f1 ** 2 - if det == 0: - return ramp_fit - - for i in range(n_resultants): - # Casertano+22 Eq. 37 - coeff = (f0 * t_bar[i] - f1) * weights[i] / det - coeffs[i] = coeff - - # Casertano+22 Eq. 38 - ramp_fit.slope += coeff * resultants[i] - - # Casertano+22 Eq. 39 - ramp_fit.read_var += (coeff ** 2 * read_noise ** 2 / n_reads[i]) - - # Casertano+22 Eq 40 - # Note that this is an inversion of the indexing from the equation; - # however, commutivity of addition results in the same answer. This - # makes it so that we don't have to loop over all the resultants twice. - ramp_fit.poisson_var += coeff ** 2 * tau[i] - for j in range(i): - ramp_fit.poisson_var += (2 * coeff * coeffs[j] * t_bar[j]) - - return ramp_fit diff --git a/src/stcal/ramp_fitting/ols_cas22_fit.py b/src/stcal/ramp_fitting/ols_cas22_fit.py index 9203686ea..05e03e8f5 100644 --- a/src/stcal/ramp_fitting/ols_cas22_fit.py +++ b/src/stcal/ramp_fitting/ols_cas22_fit.py @@ -1,143 +1,13 @@ -"""Ramp fitting routines. +"""This module exists to keep the interfaces the same as before a refactoring.""" +import warnings -The simulator need not actually fit any ramps, but we would like to do a good -job simulating the noise induced by ramp fitting. That requires computing the -covariance matrix coming out of ramp fitting. But that's actually a big part -of the work of ramp fitting. +from .ols_cas22 import fit_ramps as fit_ramps_casertano -There are a few different proposed ramp fitting algorithms, differing in their -weights. The final derived covariances are all somewhat similarly difficult -to compute, however, since we ultimately end up needing to compute +__all__ = ["fit_ramps_casertano"] -.. math:: (A^T C^{-1} A)^{-1} - -for the "optimal" case, or - -.. math:: (A^T W^{-1} A)^{-1} A^T W^{-1} C W^{-1} A (A^T W^{-1} A)^{-1} - -for some alternative weighting. - -We start trying the "optimal" case below. - -For the "optimal" case, a challenge is that we don't want to compute -:math:`C^{-1}` for every pixel individually. Fortunately, we only -need :math:`(A^T C^{-1} A)^{-1}` (which is only a 2x2 matrix) for variances, -and only :math:`(A^T C^{-1} A)^{-1} A^T C^{-1}` for ramp fitting, which is 2xn. -Both of these matrices are effectively single parameter families, depending -after rescaling by the read noise only on the ratio of the read noise and flux. - -So the routines in these packages construct these different matrices, store -them, and interpolate between them for different different fluxes and ratios. -""" -import numpy as np -from astropy import units as u - -from . import ols_cas22 - - -def fit_ramps_casertano( - resultants, - dq, - read_noise, - read_time, - read_pattern, - use_jump=False, - *, - threshold_intercept=None, - threshold_constant=None, -): - """Fit ramps following Casertano+2022, including averaging partial ramps. - - Ramps are broken where dq != 0, and fits are performed on each sub-ramp. - Resultants containing multiple ramps have their ramp fits averaged using - inverse variance weights based on the variance in the individual slope fits - due to read noise. - - Parameters - ---------- - resultants : np.ndarry[nresultants, ...] - the resultants in electrons - dq : np.ndarry[nresultants, ...] - the dq array. dq != 0 implies bad pixel / CR. - read_noise : float - the read noise in electrons - read_time : float - Read time. For Roman data this is the FRAME_TIME keyword. - read_pattern : list[list[int]] - The read pattern prescription. If None, use `ma_table`. - One of `ma_table` or `read_pattern` must be defined. - use_jump : bool - If True, use the jump detection algorithm to identify CRs. - If False, use the DQ array to identify CRs. - threshold_intercept : float (optional, keyword-only) - Override the intercept parameter for threshold for the jump detection - algorithm. - theshold_constant : float (optional, keyword-only) - Override the constant parameter for threshold for the jump detection - algorithm. - - Returns - ------- - RampFitOutputs - parameters: np.ndarray[n_pixel, 2] - the slope and intercept for each pixel's ramp fit. see Parameter enum - for indexing indicating slope/intercept in the second dimension. - variances: np.ndarray[n_pixel, 3] - the read, poisson, and total variances for each pixel's ramp fit. - see Variance enum for indexing indicating read/poisson/total in the - second dimension. - dq: np.ndarray[n_resultants, n_pixel] - the dq array, with additional flags set for jumps detected by the - jump detection algorithm. - fits: always None, this is a hold over which can contain the diagnostic - fit information from the jump detection algorithm. - """ - # Trickery to avoid having to specify the defaults for the threshold - # parameters outside the cython code. - kwargs = {} - if threshold_intercept is not None: - kwargs["intercept"] = threshold_intercept - if threshold_constant is not None: - kwargs["constant"] = threshold_constant - - resultants_unit = getattr(resultants, "unit", None) - if resultants_unit is not None: - resultants = resultants.to(u.electron).value - - resultants = np.array(resultants).astype(np.float32) - - dq = np.array(dq).astype(np.int32) - if np.ndim(read_noise) <= 1: - read_noise = read_noise * np.ones(resultants.shape[1:]) - read_noise = np.array(read_noise).astype(np.float32) - - orig_shape = resultants.shape - if len(resultants.shape) == 1: - # single ramp. - resultants = resultants.reshape((*orig_shape, 1)) - dq = dq.reshape((*orig_shape, 1)) - read_noise = read_noise.reshape(orig_shape[1:] + (1,)) - - output = ols_cas22.fit_ramps( - resultants.reshape(resultants.shape[0], -1), - dq.reshape(resultants.shape[0], -1), - read_noise.reshape(-1), - read_time, - read_pattern, - use_jump, - **kwargs, - ) - - parameters = output.parameters.reshape(orig_shape[1:] + (2,)) - variances = output.variances.reshape(orig_shape[1:] + (3,)) - dq = output.dq.reshape(orig_shape) - - if resultants.shape != orig_shape: - parameters = parameters[0] - variances = variances[0] - - if resultants_unit is not None: - parameters = parameters * resultants_unit - - # return ols_cas22.RampFitOutputs(output.fits, parameters, variances, dq) - return ols_cas22.RampFitOutputs(parameters, variances, dq) +warnings.warn( + "The module stcal.ramp_fitting.ols_cas22_fit is deprecated. " + "Please use stcal.ramp_fitting.ols_cas22 instead.", + DeprecationWarning, + stacklevel=2, +) diff --git a/tests/test_jump_cas22.py b/tests/test_jump_cas22.py index 18c19c965..b2e6e695c 100644 --- a/tests/test_jump_cas22.py +++ b/tests/test_jump_cas22.py @@ -1,15 +1,17 @@ +from typing import NamedTuple + import numpy as np import pytest from numpy.testing import assert_allclose -from stcal.ramp_fitting.ols_cas22 import JUMP_DET, Parameter, Variance, fit_ramps -from stcal.ramp_fitting.ols_cas22._jump import ( - FixedOffsets, - PixelOffsets, +from stcal.ramp_fitting._ols_cas22 import FixedOffsets, Parameter, PixelOffsets, Variance, fit_ramps +from stcal.ramp_fitting._ols_cas22._fit import ( + JUMP_DET, + _fill_fixed_values, _fill_pixel_values, - fill_fixed_values, + _init_ramps, ) -from stcal.ramp_fitting.ols_cas22._ramp import from_read_pattern, init_ramps +from stcal.ramp_fitting.ols_cas22 import DefaultThreshold, _create_metadata # Purposefully set a fixed seed so that the tests in this module are deterministic RNG = np.random.default_rng(619) @@ -36,7 +38,7 @@ GOOD_PROB = 0.7 -def test_init_ramps(): +def test__init_ramps(): """ Test turning dq flags into initial ramp splits Note that because `init_ramps` itself returns a stack, which does not have @@ -56,7 +58,7 @@ def test_init_ramps(): ) n_resultants, n_pixels = dq.shape - ramps = [init_ramps(dq[:, index], n_resultants) for index in range(n_pixels)] + ramps = [_init_ramps(dq[:, index], n_resultants) for index in range(n_pixels)] assert len(ramps) == dq.shape[1] == 16 @@ -111,13 +113,15 @@ def read_pattern(): ] -def test_from_read_pattern(read_pattern): +def test__create_metadata(read_pattern): """Test turning read_pattern into the time data""" - metadata = from_read_pattern(read_pattern, READ_TIME, len(read_pattern))._to_dict() # noqa: SLF001 - t_bar = metadata["t_bar"] - tau = metadata["tau"] - n_reads = metadata["n_reads"] + n_resultants = len(read_pattern) + t_bar, tau, n_reads = _create_metadata(read_pattern, READ_TIME) + + assert t_bar.shape == (n_resultants,) + assert tau.shape == (n_resultants,) + assert n_reads.shape == (n_resultants,) # Check that the data is correct assert_allclose(t_bar, [7.6, 15.2, 21.279999, 41.040001, 60.799999, 88.159996]) @@ -142,9 +146,8 @@ def ramp_data(read_pattern): metadata : dict The metadata computed from the read pattern """ - data = from_read_pattern(read_pattern, READ_TIME, len(read_pattern))._to_dict() # noqa: SLF001 - return data["t_bar"], data["tau"], data["n_reads"], read_pattern + return *_create_metadata(read_pattern, READ_TIME), read_pattern def test_fill_fixed_values(ramp_data): @@ -152,43 +155,23 @@ def test_fill_fixed_values(ramp_data): t_bar, tau, n_reads, _ = ramp_data n_resultants = len(t_bar) - fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 1), dtype=np.float32) - fixed = fill_fixed_values(fixed, t_bar, tau, n_reads, n_resultants) - - # Sanity check that the shape of fixed is correct - assert fixed.shape == (2 * 4, n_resultants - 1) - - # Split into the different types of data - t_bar_diffs = fixed[FixedOffsets.single_t_bar_diff : FixedOffsets.double_t_bar_diff + 1, :] - t_bar_diff_sqrs = fixed[FixedOffsets.single_t_bar_diff_sqr : FixedOffsets.double_t_bar_diff_sqr + 1, :] - read_recip = fixed[FixedOffsets.single_read_recip : FixedOffsets.double_read_recip + 1, :] - var_slope_vals = fixed[FixedOffsets.single_var_slope_val : FixedOffsets.double_var_slope_val + 1, :] - - # Sanity check that these are all the right shape - assert t_bar_diffs.shape == (2, n_resultants - 1) - assert t_bar_diff_sqrs.shape == (2, n_resultants - 1) - assert read_recip.shape == (2, n_resultants - 1) - assert var_slope_vals.shape == (2, n_resultants - 1) + single_fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 1), dtype=np.float32) + double_fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 2), dtype=np.float32) + _fill_fixed_values(single_fixed, double_fixed, t_bar, tau, n_reads, n_resultants) # Check the computed data # These are computed using loop in cython, here we check against numpy # Single diffs - assert np.all(t_bar_diffs[0] == t_bar[1:] - t_bar[:-1]) - assert np.all(t_bar_diff_sqrs[0] == (t_bar[1:] - t_bar[:-1]) ** 2) - assert np.all(read_recip[0] == np.float32(1 / n_reads[1:]) + np.float32(1 / n_reads[:-1])) - assert np.all(var_slope_vals[0] == (tau[1:] + tau[:-1] - 2 * np.minimum(t_bar[1:], t_bar[:-1]))) + assert np.all(single_fixed[0, :] == t_bar[1:] - t_bar[:-1]) + assert np.all(single_fixed[1, :] == (t_bar[1:] - t_bar[:-1]) ** 2) + assert np.all(single_fixed[2, :] == np.float32(1 / n_reads[1:]) + np.float32(1 / n_reads[:-1])) + assert np.all(single_fixed[3, :] == (tau[1:] + tau[:-1] - 2 * np.minimum(t_bar[1:], t_bar[:-1]))) # Double diffs - assert np.all(t_bar_diffs[1, :-1] == t_bar[2:] - t_bar[:-2]) - assert np.all(t_bar_diff_sqrs[1, :-1] == (t_bar[2:] - t_bar[:-2]) ** 2) - assert np.all(read_recip[1, :-1] == np.float32(1 / n_reads[2:]) + np.float32(1 / n_reads[:-2])) - assert np.all(var_slope_vals[1, :-1] == (tau[2:] + tau[:-2] - 2 * np.minimum(t_bar[2:], t_bar[:-2]))) - - # Last double diff should be NaN - assert np.isnan(t_bar_diffs[1, -1]) - assert np.isnan(t_bar_diff_sqrs[1, -1]) - assert np.isnan(read_recip[1, -1]) - assert np.isnan(var_slope_vals[1, -1]) + assert np.all(double_fixed[0, :] == t_bar[2:] - t_bar[:-2]) + assert np.all(double_fixed[1, :] == (t_bar[2:] - t_bar[:-2]) ** 2) + assert np.all(double_fixed[2, :] == np.float32(1 / n_reads[2:]) + np.float32(1 / n_reads[:-2])) + assert np.all(double_fixed[3, :] == (tau[2:] + tau[:-2] - 2 * np.minimum(t_bar[2:], t_bar[:-2]))) def _generate_resultants(read_pattern, n_pixels=1): @@ -251,53 +234,42 @@ def pixel_data(ramp_data): t_bar, tau, n_reads, read_pattern = ramp_data n_resultants = len(t_bar) - fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 1), dtype=np.float32) - fixed = fill_fixed_values(fixed, t_bar, tau, n_reads, n_resultants) + single_fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 1), dtype=np.float32) + double_fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 2), dtype=np.float32) + _fill_fixed_values(single_fixed, double_fixed, t_bar, tau, n_reads, n_resultants) resultants = _generate_resultants(read_pattern) - return resultants, t_bar, tau, n_reads, fixed + return resultants, t_bar, n_reads, single_fixed, double_fixed def test__fill_pixel_values(pixel_data): """Test computing the initial pixel data""" - resultants, t_bar, tau, n_reads, fixed = pixel_data + resultants, t_bar, n_reads, single_fixed, double_fixed = pixel_data n_resultants = len(t_bar) - pixel = np.empty((PixelOffsets.n_pixel_offsets, n_resultants - 1), dtype=np.float32) - pixel = _fill_pixel_values(pixel, resultants, fixed, READ_NOISE, n_resultants) - - # Sanity check that the shape of pixel is correct - assert pixel.shape == (2 * 2, n_resultants - 1) - - # Split into the different types of data - local_slopes = pixel[PixelOffsets.single_local_slope : PixelOffsets.double_local_slope + 1, :] - var_read_noise = pixel[PixelOffsets.single_var_read_noise : PixelOffsets.double_var_read_noise + 1, :] - - # Sanity check that these are all the right shape - assert local_slopes.shape == (2, n_resultants - 1) - assert var_read_noise.shape == (2, n_resultants - 1) + single_pixel = np.empty((PixelOffsets.n_pixel_offsets, n_resultants - 1), dtype=np.float32) + double_pixel = np.empty((PixelOffsets.n_pixel_offsets, n_resultants - 2), dtype=np.float32) + _fill_pixel_values( + single_pixel, double_pixel, single_fixed, double_fixed, resultants, READ_NOISE, n_resultants + ) # Check the computed data # These are computed using loop in cython, here we check against numpy # Single diffs - assert np.all(local_slopes[0] == (resultants[1:] - resultants[:-1]) / (t_bar[1:] - t_bar[:-1])) + assert np.all(single_pixel[0, :] == (resultants[1:] - resultants[:-1]) / (t_bar[1:] - t_bar[:-1])) assert np.all( - var_read_noise[0] + single_pixel[1, :] == np.float32(READ_NOISE**2) * (np.float32(1 / n_reads[1:]) + np.float32(1 / n_reads[:-1])) ) # Double diffs - assert np.all(local_slopes[1, :-1] == (resultants[2:] - resultants[:-2]) / (t_bar[2:] - t_bar[:-2])) + assert np.all(double_pixel[0, :] == (resultants[2:] - resultants[:-2]) / (t_bar[2:] - t_bar[:-2])) assert np.all( - var_read_noise[1, :-1] + double_pixel[1, :] == np.float32(READ_NOISE**2) * (np.float32(1 / n_reads[2:]) + np.float32(1 / n_reads[:-2])) ) - # Last double diff should be NaN - assert np.isnan(local_slopes[1, -1]) - assert np.isnan(var_read_noise[1, -1]) - @pytest.fixture(scope="module") def detector_data(read_pattern): @@ -320,9 +292,49 @@ def detector_data(read_pattern): return resultants, read_noise, read_pattern +class AllocatedData(NamedTuple): + parameters: np.ndarray + variances: np.ndarray + t_bar: np.ndarray + tau: np.ndarray + n_reads: np.ndarray + single_pixel: np.ndarray + double_pixel: np.ndarray + single_fixed: np.ndarray + double_fixed: np.ndarray + + +@pytest.fixture() +def allocated_data(read_pattern): + """ + Allocate the working data for the tests + """ + n_resultants = len(read_pattern) + + # Initialize the output arrays + parameters = np.empty((N_PIXELS, Parameter.n_param), dtype=np.float32) + variances = np.empty((N_PIXELS, Variance.n_var), dtype=np.float32) + + # Initialize scratch storage + single_pixel = np.empty((PixelOffsets.n_pixel_offsets, n_resultants - 1), dtype=np.float32) + double_pixel = np.empty((PixelOffsets.n_pixel_offsets, n_resultants - 2), dtype=np.float32) + single_fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 1), dtype=np.float32) + double_fixed = np.empty((FixedOffsets.n_fixed_offsets, n_resultants - 2), dtype=np.float32) + + return AllocatedData( + parameters, + variances, + *_create_metadata(read_pattern, READ_TIME), + single_pixel, + double_pixel, + single_fixed, + double_fixed, + ) + + @pytest.mark.parametrize("use_jump", [True, False]) @pytest.mark.parametrize("use_dq", [True, False]) -def test_fit_ramps(detector_data, use_jump, use_dq): +def test_fit_ramps(detector_data, allocated_data, use_jump, use_dq): """ Test fitting ramps Since no jumps are simulated in the data, jump detection shouldn't pick @@ -346,13 +358,43 @@ def test_fit_ramps(detector_data, use_jump, use_dq): if not use_dq: assert okay.all() + # Mirror what python supporting code does + if use_jump: + single_pixel = allocated_data.single_pixel + double_pixel = allocated_data.double_pixel + single_fixed = allocated_data.single_fixed + double_fixed = allocated_data.double_fixed + else: + single_pixel = np.empty((0, 0), dtype=np.float32) + double_pixel = np.empty((0, 0), dtype=np.float32) + single_fixed = np.empty((0, 0), dtype=np.float32) + double_fixed = np.empty((0, 0), dtype=np.float32) + output = fit_ramps( - resultants, dq, read_noise, READ_TIME, read_pattern, use_jump=use_jump, include_diagnostic=True + resultants, + dq, + read_noise, + *allocated_data[:-4], + single_pixel, + double_pixel, + single_fixed, + double_fixed, + use_jump, + DefaultThreshold.INTERCEPT.value, + DefaultThreshold.CONSTANT.value, + True, ) - assert len(output.fits) == N_PIXELS # sanity check that a fit is output for each pixel + assert len(output) == N_PIXELS # sanity check that a fit is output for each pixel + + # Check that the intercept is always zero + assert np.all(allocated_data.parameters[:, Parameter.intercept] == 0) + + slopes = allocated_data.parameters[:, Parameter.slope] + read_vars = allocated_data.variances[:, Variance.read_var] + poisson_vars = allocated_data.variances[:, Variance.poisson_var] chi2 = 0 - for fit, use in zip(output.fits, okay): + for fit, slope, read_var, poisson_var, use in zip(output, slopes, read_vars, poisson_vars, okay): if not use_dq and not use_jump: ##### The not use_jump makes this NOT test for false positives ##### # Check that the data generated does not generate any false positives @@ -365,14 +407,14 @@ def test_fit_ramps(detector_data, use_jump, use_dq): if use: # Add okay ramps to chi2 - total_var = fit["average"]["read_var"] + fit["average"]["poisson_var"] + total_var = read_var + poisson_var if total_var != 0: - chi2 += (fit["average"]["slope"] - FLUX) ** 2 / total_var + chi2 += (slope - FLUX) ** 2 / total_var else: # Check no slope fit for bad ramps - assert fit["average"]["slope"] == 0 - assert fit["average"]["read_var"] == 0 - assert fit["average"]["poisson_var"] == 0 + assert slope == 0 + assert read_var == 0 + assert poisson_var == 0 assert use_dq # sanity check that this branch is only encountered when use_dq = True @@ -380,29 +422,6 @@ def test_fit_ramps(detector_data, use_jump, use_dq): assert np.abs(chi2 - 1) < CHI2_TOL -@pytest.mark.parametrize("use_jump", [True, False]) -def test_fit_ramps_array_outputs(detector_data, use_jump): - """ - Test that the array outputs line up with the dictionary output - """ - resultants, read_noise, read_pattern = detector_data - dq = np.zeros(resultants.shape, dtype=np.int32) - - output = fit_ramps( - resultants, dq, read_noise, READ_TIME, read_pattern, use_jump=use_jump, include_diagnostic=True - ) - - for fit, par, var in zip(output.fits, output.parameters, output.variances): - assert par[Parameter.intercept] == 0 - assert par[Parameter.slope] == fit["average"]["slope"] - - assert var[Variance.read_var] == fit["average"]["read_var"] - assert var[Variance.poisson_var] == fit["average"]["poisson_var"] - assert var[Variance.total_var] == np.float32( - fit["average"]["read_var"] + fit["average"]["poisson_var"] - ) - - @pytest.fixture(scope="module") def jump_data(detector_data): """ @@ -453,7 +472,7 @@ def jump_data(detector_data): return resultants, read_noise, read_pattern, jump_reads, jump_resultants -def test_find_jumps(jump_data): +def test_find_jumps(jump_data, allocated_data): """ Full unit tests to demonstrate that we can detect jumps in any read (except the first one) and that we correctly remove these reads from the fit to recover @@ -463,16 +482,32 @@ def test_find_jumps(jump_data): dq = np.zeros(resultants.shape, dtype=np.int32) output = fit_ramps( - resultants, dq, read_noise, READ_TIME, read_pattern, use_jump=True, include_diagnostic=True + resultants, + dq, + read_noise, + *allocated_data, + True, + DefaultThreshold.INTERCEPT.value, + DefaultThreshold.CONSTANT.value, + True, ) - assert len(output.fits) == len(jump_reads) # sanity check that a fit/jump is set for every pixel + assert len(output) == len(jump_reads) # sanity check that a fit/jump is set for every pixel + + # Check that the intercept is always zero + assert np.all(allocated_data.parameters[:, Parameter.intercept] == 0) + + slopes = allocated_data.parameters[:, Parameter.slope] + read_vars = allocated_data.variances[:, Variance.read_var] + poisson_vars = allocated_data.variances[:, Variance.poisson_var] chi2 = 0 incorrect_too_few = 0 incorrect_too_many = 0 incorrect_does_not_capture = 0 incorrect_other = 0 - for fit, jump_index, resultant_index in zip(output.fits, jump_reads, jump_resultants): + for fit, slope, read_var, poisson_var, jump_index, resultant_index in zip( + output, slopes, read_vars, poisson_vars, jump_reads, jump_resultants + ): # Check that the jumps are detected correctly if jump_index == 0: # There is no way to detect a jump if it is in the very first read @@ -525,30 +560,57 @@ def test_find_jumps(jump_data): # assert set(ramp_indices).union(fit['jumps']) == set(range(len(read_pattern))) # Compute the chi2 for the fit and add it to a running "total chi2" - total_var = fit["average"]["read_var"] + fit["average"]["poisson_var"] - chi2 += (fit["average"]["slope"] - FLUX) ** 2 / total_var + total_var = read_var + poisson_var + chi2 += (slope - FLUX) ** 2 / total_var # Check that the average chi2 is ~1. chi2 /= N_PIXELS - incorrect_too_few - incorrect_too_many - incorrect_does_not_capture - incorrect_other assert np.abs(chi2 - 1) < CHI2_TOL -def test_override_default_threshold(jump_data): +def test_override_default_threshold(jump_data, allocated_data): """This tests that we can override the default jump detection threshold constants""" resultants, read_noise, read_pattern, jump_reads, jump_resultants = jump_data dq = np.zeros(resultants.shape, dtype=np.int32) - standard = fit_ramps(resultants, dq, read_noise, READ_TIME, read_pattern, use_jump=True) - override = fit_ramps( - resultants, dq, read_noise, READ_TIME, read_pattern, use_jump=True, intercept=0, constant=0 + # Initialize the output arrays + standard_parameters = np.empty((N_PIXELS, Parameter.n_param), dtype=np.float32) + standard_variances = np.empty((N_PIXELS, Variance.n_var), dtype=np.float32) + fit_ramps( + resultants, + dq, + read_noise, + standard_parameters, + standard_variances, + *allocated_data[2:], + True, + DefaultThreshold.INTERCEPT.value, + DefaultThreshold.CONSTANT.value, + False, + ) + + override_parameters = np.empty((N_PIXELS, Parameter.n_param), dtype=np.float32) + override_variances = np.empty((N_PIXELS, Variance.n_var), dtype=np.float32) + fit_ramps( + resultants, + dq, + read_noise, + override_parameters, + override_variances, + *allocated_data[2:], + True, + 0, + 0, + False, ) # All this is intended to do is show that with all other things being equal passing non-default # threshold parameters changes the results. - assert (standard.parameters != override.parameters).any() + assert (standard_parameters != override_parameters).any() + assert (standard_variances != override_variances).any() -def test_jump_dq_set(jump_data): +def test_jump_dq_set(jump_data, allocated_data): # Check the DQ flag value to start assert 2**2 == JUMP_DET @@ -556,10 +618,17 @@ def test_jump_dq_set(jump_data): dq = np.zeros(resultants.shape, dtype=np.int32) output = fit_ramps( - resultants, dq, read_noise, READ_TIME, read_pattern, use_jump=True, include_diagnostic=True + resultants, + dq, + read_noise, + *allocated_data, + True, + DefaultThreshold.INTERCEPT.value, + DefaultThreshold.CONSTANT.value, + True, ) - for fit, pixel_dq in zip(output.fits, output.dq.transpose()): + for fit, pixel_dq in zip(output, dq.transpose()): # Check that all jumps found get marked assert (pixel_dq[fit["jumps"]] == JUMP_DET).all() diff --git a/tests/test_ramp_fitting_cas22.py b/tests/test_ramp_fitting_cas22.py index e6266fb90..123a69c1d 100644 --- a/tests/test_ramp_fitting_cas22.py +++ b/tests/test_ramp_fitting_cas22.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from stcal.ramp_fitting import ols_cas22_fit as ramp +from stcal.ramp_fitting import ols_cas22 as ramp # Purposefully set a fixed seed so that the tests in this module are deterministic RNG = np.random.default_rng(42) @@ -40,7 +40,7 @@ def test_simulated_ramps(use_unit, use_dq): bad = RNG.uniform(size=resultants.shape) > 0.7 dq |= bad - output = ramp.fit_ramps_casertano( + output = ramp.fit_ramps( resultants, dq, read_noise, ROMAN_READ_TIME, read_pattern, threshold_constant=0, threshold_intercept=0 ) # set the threshold parameters # to demo the interface. This