diff --git a/python/lsst/obs/pfs/h4Linearity/__init__.py b/python/lsst/obs/pfs/h4Linearity/__init__.py
new file mode 100644
index 00000000..ee6d4ebf
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/__init__.py
@@ -0,0 +1,86 @@
+"""h4Linearity — per-pixel nonlinearity correction for H4 IR-detector ramps.
+
+This package fits and applies per-pixel polynomial nonlinearity
+corrections to up-the-ramp H4 reads. It is consumed by ``PfsIsrTask``
+on every NIR visit/detector.
+
+Pipeline path (the single API call the pipeline makes):
+
+    correction = loadFits(<calib-product>)          # or fit(...) offline
+    linearized = apply(correction, Ramp(reads=cube, validMask=...))
+
+``correction`` is a :class:`LinearityCorrection` (model coefficients,
+per-pixel valid range, fit-time bad-pixel mask, fit diagnostics).
+``linearized`` is a :class:`LinearizedRamp` (cumulative-linear cube
+plus a merged bad-pixel mask).
+
+For partial-ramp re-anchoring the pipeline also uses :func:`applyFrame`
+on a single cumulative read. CR / ASIC-glitch detection on the
+linearized cube lives in the sibling :mod:`cr` module.
+
+Public entry points:
+
+- :func:`apply`, :func:`applyFrame` — linearize a ramp / a frame.
+- :func:`fit` — fit a correction from training ramps.
+- :func:`loadFits`, :func:`saveFits`, :func:`isH4LinearityFile` —
+  read / write / sniff the on-disk FITS calibration product.
+- :class:`Ramp`, :class:`LinearizedRamp`, :class:`LinearityCorrection`,
+  :class:`Diagnostics` — I/O dataclasses.
+- :class:`Model`, :class:`PolynomialModel` — model protocol + the
+  built-in Chebyshev implementation.
+- Bad-pixel bit-flag constants (``MASKED_BY_INPUT``, ``BELOW_VALID_RANGE``
+  etc.) for decoding the masks returned by :func:`apply`.
+"""
+
+from __future__ import annotations
+
+from . import cr
+from .apply import apply, applyFrame
+from .fit import fit
+from .io import isH4LinearityFile, loadFits, saveFits
+from .models import Model, PolynomialModel
+from .types import (
+    ABOVE_VALID_RANGE,
+    ASIC_GLITCH,
+    BELOW_VALID_RANGE,
+    BORDER_PIX,
+    DEAD,
+    FIT_FAILED,
+    HIGH_FIT_RESIDUAL,
+    INSUFFICIENT_POINTS,
+    MASKED_BY_INPUT,
+    NON_MONOTONIC,
+    UNCLASSIFIED,
+    UNSTABLE,
+    Diagnostics,
+    LinearityCorrection,
+    LinearizedRamp,
+    Ramp,
+)
+
+__all__ = [
+    "Ramp",
+    "LinearizedRamp",
+    "Diagnostics",
+    "LinearityCorrection",
+    "Model",
+    "PolynomialModel",
+    "fit",
+    "apply",
+    "applyFrame",
+    "saveFits",
+    "loadFits",
+    "isH4LinearityFile",
+    "MASKED_BY_INPUT",
+    "INSUFFICIENT_POINTS",
+    "FIT_FAILED",
+    "NON_MONOTONIC",
+    "BORDER_PIX",
+    "ABOVE_VALID_RANGE",
+    "BELOW_VALID_RANGE",
+    "UNCLASSIFIED",
+    "UNSTABLE",
+    "ASIC_GLITCH",
+    "HIGH_FIT_RESIDUAL",
+    "DEAD",
+]
diff --git a/python/lsst/obs/pfs/h4Linearity/apply.py b/python/lsst/obs/pfs/h4Linearity/apply.py
new file mode 100644
index 00000000..7ce9b454
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/apply.py
@@ -0,0 +1,232 @@
+"""Apply a fitted LinearityCorrection to a new ramp or single cumulative frame.
+
+This module is the **production pipeline entry point** for nonlinearity
+correction on H4 ramps. ``apply(correction, ramp)`` is the single call
+PfsIsrTask makes per visit/detector after dark subtraction and before
+CR/glitch detection; ``applyFrame`` is the per-frame variant used when
+re-anchoring a partial range.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+
+from .types import (
+    ABOVE_VALID_RANGE,
+    BELOW_VALID_RANGE,
+    LinearityCorrection,
+    LinearizedRamp,
+    Ramp,
+)
+
+
+def apply(correction: LinearityCorrection, ramp: Ramp) -> LinearizedRamp:
+    """Linearize a full ramp using a fitted per-pixel correction.
+
+    Maps each cumulative-ADU value ``m`` through the per-pixel model
+    ``t = model.evaluate(coefficients, x)`` where ``x`` is ``m`` rescaled
+    to ``[-1, 1]`` over the per-pixel fit range ``[fitMin, fitMax]``
+    (Chebyshev domain). The cube is processed read-by-read with vectorized
+    numpy ops; no Python-level pixel loop.
+
+    Pixel-by-pixel handling:
+
+    - **Valid pixels** (``correction.badPixelMask == 0`` *and*
+      ``ramp.validMask == 0``): the linearized estimate is returned.
+    - **Bad pixels** (any bit set in ``correction.badPixelMask`` OR
+      ``ramp.validMask``): the input value ``m`` is passed through
+      unchanged. The pixel stays flagged in the returned
+      ``badPixelMask`` (which preserves whatever bits the caller's
+      ``validMask`` carried in plus the fit-time bits). The runtime
+      range check is *skipped* on these pixels — per the
+      "first-reason-wins" rule, a pixel that fit() already flagged
+      should not also pick up ``ABOVE_VALID_RANGE`` from a
+      stale-default ``fitMax`` value.
+    - **Out-of-range** (otherwise-good pixel with any read below
+      ``fitMin`` or above ``fitMax``): the linearization formula is
+      still evaluated (Chebyshev extrapolation), but the pixel's flag
+      in the returned ``badPixelMask`` is OR'd with
+      ``BELOW_VALID_RANGE`` / ``ABOVE_VALID_RANGE``.
+
+    The returned ramp is float32 cumulative-ADU values, same shape as
+    ``ramp.reads``. The returned ``badPixelMask`` is uint16: fit-time
+    bits (preserved from the calib) | caller's ``validMask`` bits |
+    freshly computed range bits.
+
+    Parameters
+    ----------
+    correction : LinearityCorrection
+        Fitted correction from :func:`fit` or :func:`loadFits`. Must
+        share ``(H, W)`` with ``ramp.reads``.
+    ramp : Ramp
+        Input ramp. ``reads`` must be 3-D ``(H, W, N)`` cumulative ADU
+        with the time axis last. ``validMask`` is optional ``(H, W)`` —
+        0 means valid; any nonzero bit marks a pixel that should be
+        passed through. **Mutated in place**: on return, ``ramp.reads``
+        holds the Chebyshev-domain x values, not the original cumulative
+        ADU. The production caller in ``PfsIsrTask.makeNirExposure``
+        immediately reassigns its ``flux`` reference to the returned
+        ``cumulativeLinear`` and drops the ``Ramp``, so the mutation is
+        not visible there. Tests/notebooks that hold the input ramp
+        across this call should copy ``ramp.reads`` first.
+
+    Returns
+    -------
+    LinearizedRamp
+        ``cumulativeLinear`` ``(H, W, N)`` float32 and ``badPixelMask``
+        ``(H, W)`` uint8 (the merged fit-time + input + range bits).
+
+    Raises
+    ------
+    ValueError
+        If ``ramp.reads`` is not 3-D, has zero reads, or its ``(H, W)``
+        does not match ``correction.coefficients[1:]``.
+    """
+    if ramp.reads.ndim != 3:
+        raise ValueError(
+            f"ramp.reads must be 3-D (H, W, N); got {ramp.reads.shape}"
+        )
+    if ramp.reads.shape[-1] == 0:
+        raise ValueError("ramp has zero reads")
+    if ramp.reads.shape[:-1] != correction.coefficients.shape[1:]:
+        raise ValueError(
+            f"ramp H,W = {ramp.reads.shape[:-1]} does not match "
+            f"correction H,W = {correction.coefficients.shape[1:]}"
+        )
+
+    # copy=False: when the input is already float32 (the production
+    # case after dark-subtraction), skip the wasted ~6.7 GB allocation
+    # of an identical-dtype copy.
+    m = ramp.reads.astype(np.float32, copy=False)  # (H, W, N)
+
+    # ---- Precompute everything we need from raw m, in order — then
+    # mutate m → x in place so the Chebyshev evaluation runs without an
+    # extra (H, W, N) buffer. The mutation is safe in production: the
+    # caller in ``PfsIsrTask.makeNirExposure`` reassigns its ``flux``
+    # name immediately after this call (``flux =
+    # linearizedRamp.cumulativeLinear``) so the mutated buffer is no
+    # longer needed.
+
+    # Merge fit-time bpm + caller-supplied internal mask. The
+    # in-memory internal mask is uint16; the persisted calib bpm is
+    # uint8 (fit-time bits only fit there). ``ramp.validMask`` is the
+    # in-bound internal mask — its bits ride through unchanged (the
+    # caller may have OR'd BORDER_PIX, MASKED_BY_INPUT, and/or any
+    # other internal bit before passing the ramp in).
+    bpm = correction.badPixelMask.astype(np.uint16)
+    if ramp.validMask is not None:
+        bpm |= ramp.validMask.astype(np.uint16, copy=False)
+    bad = bpm != 0
+    # Save raw m at bad-pixel positions sparsely so we can restore them
+    # AFTER the in-place mutation and Chebyshev evaluation. Typically
+    # ~1-2 % of pixels are bad → tens of MB, negligible vs the cube.
+    mAtBad = m[bad, :].copy() if bad.any() else None
+
+    # Range flags (computed from raw m before mutation). Per the
+    # "skip already-flagged" rule, only OR the runtime range bits on
+    # pixels with no pre-existing reason — otherwise a pixel that
+    # fit() couldn't model (and therefore has ``fitMax == 0``) would
+    # spuriously pick up ``ABOVE_VALID_RANGE`` from any positive ADU.
+    goodPixels = ~bad
+    bpm[(m > correction.fitMax[..., None]).any(axis=-1)
+        & goodPixels] |= ABOVE_VALID_RANGE
+    bpm[(m < correction.fitMin[..., None]).any(axis=-1)
+        & goodPixels] |= BELOW_VALID_RANGE
+
+    # Affine map + polynomial evaluate, CHUNKED along the time axis
+    # (axis=-1) and written back into ``m`` in place. The Chebyshev
+    # coefficients are converted to monomial form ONCE outside the
+    # loop; per-chunk evaluation then uses Horner with a single
+    # accumulator buffer. Direct in-place Clenshaw on the Chebyshev
+    # coefficients would need several extra cube-sized buffers — too
+    # memory-hungry for the production cube. Peak transient: the full
+    # ``m`` cube + monomial coefficients ``(order+1, H, W)`` (small) +
+    # one chunk-sized accumulator. For N=88, H=W=4096, chunkSize=8,
+    # order=4 that's 5.85 GB + 0.32 GB + 0.51 GB.
+    denom = correction.fitMax - correction.fitMin
+    denom = np.where(denom > 0, denom, 1.0)
+    monCoefs = correction.model.chebToMonomial(
+        correction.coefficients
+    ).astype(np.float32, copy=False)
+    N = m.shape[-1]
+    chunkSize = 8
+    for k0 in range(0, N, chunkSize):
+        k1 = min(k0 + chunkSize, N)
+        chunk = m[..., k0:k1]  # writable view into m
+        # m → x ∈ [-1, 1] for Chebyshev evaluation, in place.
+        chunk -= correction.fitMin[..., None]
+        chunk *= 2.0
+        chunk /= denom[..., None]
+        chunk -= 1.0
+        # Horner evaluate (allocates ONE chunk-sized buffer internally).
+        # Write the result back into the same chunk slice of m: from here,
+        # m[..., k0:k1] holds linearized values, not Chebyshev x.
+        m[..., k0:k1] = correction.model.evaluateMonomial(monCoefs, chunk)
+    del monCoefs
+
+    # Bad-pixel pass-through: restore the raw m values at bad positions
+    # (m now holds linearized values everywhere except where we restore).
+    if mAtBad is not None:
+        m[bad, :] = mAtBad
+        del mAtBad
+
+    return LinearizedRamp(
+        cumulativeLinear=m.astype(np.float32, copy=False),
+        badPixelMask=bpm,
+    )
+
+
+def applyFrame(
+    correction: LinearityCorrection, m: np.ndarray
+) -> tuple[np.ndarray, np.ndarray]:
+    """Linearize a single cumulative frame.
+
+    Single-read variant of :func:`apply`. Used by PfsIsrTask when a
+    partial ramp needs to be re-anchored at ``firstRead`` (the cumulative
+    ADU at the first kept read is linearized once and subtracted from
+    every read in the cube). Same model evaluation + bad-pixel
+    pass-through as :func:`apply`, but does **not** return a merged
+    bad-pixel mask — only the per-pixel out-of-range mask. Callers
+    typically OR this into a larger mask themselves.
+
+    Parameters
+    ----------
+    correction : LinearityCorrection
+    m : np.ndarray
+        Single cumulative-ADU frame, shape ``(H, W)``. Cast to float32
+        internally.
+
+    Returns
+    -------
+    t : np.ndarray
+        ``(H, W)`` float32 linearized frame. Bad pixels (those with any
+        bit in ``correction.badPixelMask``) are passed through unchanged.
+    oor : np.ndarray
+        ``(H, W)`` bool — True where ``m`` is below ``fitMin`` or above
+        ``fitMax`` and therefore extrapolated.
+
+    Raises
+    ------
+    ValueError
+        If ``m.shape != correction.coefficients.shape[1:]``.
+    """
+    m = np.asarray(m, dtype=np.float32)
+    if m.shape != correction.coefficients.shape[1:]:
+        raise ValueError(
+            f"m shape {m.shape} does not match correction "
+            f"H,W = {correction.coefficients.shape[1:]}"
+        )
+
+    # Map m → x ∈ [-1, 1] for Chebyshev evaluation
+    denom = correction.fitMax - correction.fitMin
+    denom = np.where(denom > 0, denom, 1.0)
+    x = 2.0 * (m - correction.fitMin) / denom - 1.0
+
+    t = correction.model.evaluate(correction.coefficients, x)
+    oor = (m < correction.fitMin) | (m > correction.fitMax)
+
+    bad = correction.badPixelMask != 0
+    if bad.any():
+        t[bad] = m[bad]
+
+    return t.astype(np.float32, copy=False), oor
diff --git a/python/lsst/obs/pfs/h4Linearity/cr.py b/python/lsst/obs/pfs/h4Linearity/cr.py
new file mode 100644
index 00000000..c0667909
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/cr.py
@@ -0,0 +1,731 @@
+"""Post-linearization CR + ASIC-glitch detection and repair on H4 ramps.
+
+Iteratively detect single-read positive rate outliers (cosmic rays) and
+matched up/down delta pairs (ASIC digital glitches) in a linearized
+cumulative ramp. Repair them by replacing the affected deltas with the
+per-pixel UTR rate and re-cumsumming (pair-aware so post-glitch reads
+are preserved), and report per-delta CR / glitch masks.
+
+The pure-numpy algorithm is here; the wrapper that integrates with
+``PfsIsrTask`` lives in ``lsst.obs.pfs.isrTask``.
+"""
+from __future__ import annotations
+
+import time
+from dataclasses import dataclass, field
+from typing import Optional
+
+import numpy as np
+
+
+DEFAULT_SIGMA_FLOOR_ADU = 8.0
+
+# Defaults for iterativeUtrDetectAndRepair.
+DEFAULT_ITER_N_SIGMA = 5.0
+DEFAULT_MAX_ITERATIONS = 5
+
+
+@dataclass
+class IterativeRepairResult:
+    """Outcome of `iterativeUtrDetectAndRepair`.
+
+    Attributes
+    ----------
+    nIterations : int
+        Number of iterations actually run.
+    crFlagMask : np.ndarray
+        Boolean ``(H, W, N-1)``; True at delta positions identified as
+        single-read CRs. The last axis is the delta axis: a flag at
+        ``[y, x, k]`` repaired the delta from read ``k`` to read
+        ``k+1`` at pixel ``(y, x)``.
+    glitchFlagMask : np.ndarray
+        Boolean ``(H, W, N-1)``; True at delta positions identified as
+        ASIC glitches. Glitches come in pairs (one positive, one negative)
+        so this mask is True at BOTH delta indices of each pair.
+    unclassifiedFlagMask : np.ndarray
+        Boolean ``(H, W, N-1)``; True at delta positions whose absolute
+        residual exceeded the detection threshold but did not fall into
+        the CR / glitch pair / boundary classes — negative-residual
+        outliers and CR candidates demoted by the cumulative-drop check.
+        Diagnostic only; not subtracted from ``rate``.
+    badPixelMask : np.ndarray
+        Boolean ``(H, W)``; True at pixels whose ramp is dominated by
+        RTS / telegraph-noise behaviour rather than a single CR event
+        (many 3σ delta excursions AND a significantly negative ``rate``).
+        Downstream consumers should OR this into the ``BAD`` mask plane
+        and ignore any CR/glitch flags at these pixels — their
+        classifications are unreliable.
+    rate : np.ndarray
+        ``(H, W)`` float32; the per-pixel UTR-weighted rate
+        (= optimal least-squares slope) computed on the
+        CR/glitch-corrected ramp. Equivalent to
+        :meth:`PfsIsrTask.calcUTRrates` applied to the reconstructed
+        cumulative flux ``read0 + cumsum(deltas)``.
+    sigma : np.ndarray
+        ``(H, W)`` float32; per-delta noise estimate used for thresholding.
+    nCRs : int
+        Count of delta positions flagged as CR.
+    nGlitchPairs : int
+        Count of distinct glitch pairs (= nGlitchFlags / 2).
+    nByIteration : list
+        Per-iteration ``(newCRs, newGlitchPairs)`` tuples for inspection.
+    iterationTimings : list
+        Wall-clock seconds per iteration. Useful for profiling on real
+        (4096^2) data.
+    """
+
+    nIterations: int
+    crFlagMask: np.ndarray
+    glitchFlagMask: np.ndarray
+    unclassifiedFlagMask: np.ndarray
+    badPixelMask: np.ndarray
+    rate: np.ndarray
+    sigma: np.ndarray
+    nCRs: int
+    nGlitchPairs: int
+    nByIteration: list
+    iterationTimings: list = field(default_factory=list)
+
+
+def _rampQR(deltas: np.ndarray) -> tuple:
+    """Per-pixel 25th/50th/75th percentile of ``deltas`` along the time axis.
+
+    Equivalent to ``np.percentile(deltas, [25, 50, 75], axis=-1)`` with
+    method='linear', but uses a single ``ndarray.partition`` pass to
+    extract the three percentiles together (~3x faster than three
+    separate percentile calls).
+
+    ``deltas`` is ``(H, W, N-1)`` — the time axis is last and
+    contiguous, so per-pixel partitioning is cache-friendly and works
+    directly on the input without copying or transposing. The partition
+    is performed on a one-time scratch buffer so the caller's ``deltas``
+    is not reordered.
+
+    The 50th percentile (median) is returned as float32 because the
+    rest of the CR detector consumes it as a per-pixel rate in float32.
+    ``p25``/``p75`` retain ``deltas``'s dtype.
+    """
+    N = deltas.shape[-1]
+
+    def _ranks(q):
+        r = (N - 1) * q
+        lo = int(np.floor(r))
+        hi = int(np.ceil(r))
+        return lo, hi, r - lo
+
+    lo25, hi25, f25 = _ranks(0.25)
+    lo50, hi50, f50 = _ranks(0.50)
+    lo75, hi75, f75 = _ranks(0.75)
+    kth = sorted({lo25, hi25, lo50, hi50, lo75, hi75})
+
+    def _interp(arr, lo, hi, frac):
+        """Linear interpolation between ranks ``lo`` and ``hi`` on axis=-1."""
+        if lo == hi:
+            return arr[..., lo]
+        return (1.0 - frac) * arr[..., lo] + frac * arr[..., hi]
+
+    # Partition IN PLACE on a scratch copy so the caller's ``deltas``
+    # array is not reordered. The transient copy is the same size as
+    # ``deltas`` (~5.85 GB on a 4096²×88 ramp); this is the tradeoff
+    # for keeping ``deltas`` immutable here.
+    scratch = deltas.copy()
+    scratch.partition(kth, axis=-1)
+
+    p25 = _interp(scratch, lo25, hi25, f25)
+    median = _interp(scratch, lo50, hi50, f50).astype(np.float32, copy=False)
+    p75 = _interp(scratch, lo75, hi75, f75)
+    return p25, median, p75
+
+
+def _utrRateSimple(cube: np.ndarray) -> np.ndarray:
+    """Robust per-pixel rate via median of cumulative deltas (ADU/read).
+
+    Median has a 50% breakdown point — a few CR/glitch hits in a ramp
+    don't bias the rate. LSQ slope (the obvious alternative) is non-
+    robust: a single outlier near the center of a short ramp can drag
+    the slope by ~5% of the outlier amplitude, which feeds back into
+    the iterative detection loop and causes the rate to diverge. For a
+    clean ramp the median of deltas matches the mean (= LSQ slope) up
+    to sampling noise.
+
+    ``cube`` is ``(H, W, N)`` cumulative ADU; the time axis is last.
+    """
+    deltas = np.diff(cube, axis=-1)
+    return np.median(deltas, axis=-1).astype(np.float32, copy=False)
+
+
+def _detectAndRepairOnce(deltas, goodPixelMask, glitchActive,
+                         crAccum, glitchAccum, boundaryAccum, unclassAccum,
+                         sigmaFloorADU, nSigma, repair, correctGlitches,
+                         glitchAmplitudeMinADU=0.0,
+                         maxDropFraction=0.5,
+                         nDropSigma=3.0):
+    """Run one IQR-sigma detection/repair iteration on a delta cube.
+
+    Modifies ``deltas``, ``crAccum``, ``glitchAccum``, ``boundaryAccum``
+    in place. ``crAccum`` collects CRs, ``glitchAccum`` interior glitch
+    pairs, ``boundaryAccum`` end glitches (lone flagged delta at index 0
+    or N-2). Accepts any ``(H', W', Nd)`` shape, so it works for both
+    the full delta cube (H', W' = H, W) and a column-vector subset
+    (H'=1, W'=nActive). The time axis is always the last axis.
+
+    The simple in-iter repair sets flagged deltas to the per-pixel rate.
+
+    ``glitchAmplitudeMinADU`` adds a minimum-amplitude floor for glitch pair
+    classification: at least one of the two deltas must have ``|residual|``
+    above this value. Use it to suppress faint-end deglitching where the
+    classifier is less reliable. 0 disables the extra floor.
+
+    Returns ``(rate, sigma, newCR, newGlitchPairs)`` — counts are *new this
+    call* against ``crAccum`` / ``glitchAccum`` at entry.
+    """
+    # IQR percentiles + median in a single partition pass (see _rampQR).
+    # Done BEFORE converting deltas to residual so we can still read the
+    # raw delta values.
+    p25, rate, p75 = _rampQR(deltas)
+
+    iqrSigma = 0.741 * (p75 - p25)
+    sigma = np.maximum(iqrSigma, sigmaFloorADU).astype(np.float32, copy=False)
+    threshold = (nSigma * sigma).astype(np.float32, copy=False)
+
+    # Convert deltas → residual IN PLACE — ``residual`` aliases the
+    # deltas buffer; the raw delta values can only be recovered by
+    # adding ``rate[..., None]`` back. Done in place to avoid a second
+    # (H, W, N-1) cube — ~5.85 GB at 4096²×88.
+    deltas -= rate[..., None]
+    residual = deltas
+
+    # Pre-filter to narrow detection work to "candidate" pixels — those
+    # with at least one delta exceeding threshold. The full pair-detect
+    # bool/float expressions are then evaluated on the gathered subset
+    # ((nCand, N-1) instead of (H, W, N-1)), avoiding ~10 GB of transient
+    # float buffers and ~30 s of CPU on a 4096²×88 ramp. ``rate``,
+    # ``sigma``, and ``threshold`` are still per-pixel from _rampQR, so
+    # the detection criterion is bit-identical to the full-cube version —
+    # only the *where* changes, not the *what*.
+
+    # Slice-wise candidate scan: build the (H, W) any-delta-exceeds-
+    # threshold mask without materializing the full (H, W, N-1) flagged
+    # cube (saves ~1.5 GB bool + ~5.85 GB float abs transient).
+    cand2D = np.zeros(residual.shape[:-1], dtype=bool)
+    absSlice = np.empty(residual.shape[:-1], dtype=residual.dtype)
+    for k in range(residual.shape[-1]):
+        np.abs(residual[..., k], out=absSlice)
+        cand2D |= absSlice > threshold
+    cand2D &= goodPixelMask
+    del absSlice
+
+    candYs, candXs = np.where(cand2D)
+    nCand = candYs.size
+
+    newCR = 0
+    newGlitch = 0
+
+    if nCand > 0:
+        # Gather candidate data into compact (nCand, N-1) arrays. These
+        # are O(nCand) in size — for the production dark, ~30 K candidates
+        # out of 16.7 M pixels. Gathered along axes 0/1 (spatial) so the
+        # last axis (time) stays contiguous per candidate.
+        residC = residual[candYs, candXs, :]                  # (nCand, N-1)
+        threshC = threshold[candYs, candXs]                   # (nCand,)
+        gActiveC = glitchActive[candYs, candXs]               # (nCand,)
+        flaggedC = np.abs(residC) > threshC[:, None]          # (nCand, N-1)
+
+        # Pair criterion: at least ONE adjacent delta crosses threshold,
+        # the two residuals have opposite signs, and they cancel within
+        # threshold. The cancellation test is what discriminates glitch
+        # from CR — a real CR leaves residual[k+1] ~ 0, so the sum
+        # |resid[k] + resid[k+1]| ~ resid[k] is too large to pass.
+
+        # Opposite-sign via bool XOR (avoids a float product temp).
+        negPrev = residC[:, :-1] < 0
+        negNext = residC[:, 1:] < 0
+        oppositeSign = negPrev != negNext
+        del negPrev, negNext
+
+        # Cancellation: |residual[:-1] + residual[1:]| < threshold.
+        # abs in place on the sum.
+        sumResid = residC[:, :-1] + residC[:, 1:]
+        np.abs(sumResid, out=sumResid)
+        cancels = sumResid < threshC[:, None]
+        del sumResid
+
+        flaggedEither = flaggedC[:, :-1] | flaggedC[:, 1:]
+
+        pairMatch = oppositeSign & cancels & flaggedEither & gActiveC[:, None]
+        del oppositeSign, cancels, flaggedEither
+
+        if glitchAmplitudeMinADU > 0.0:
+            # Extra floor: at least one delta in the pair clears the
+            # specified amplitude. Suppresses faint-end glitch
+            # classification.
+            absT = np.abs(residC[:, :-1])
+            bigEnough = absT > glitchAmplitudeMinADU
+            del absT
+            absT = np.abs(residC[:, 1:])
+            bigEnough |= absT > glitchAmplitudeMinADU
+            del absT
+            pairMatch &= bigEnough
+            del bigEnough
+
+        isPairC = np.zeros_like(flaggedC)
+        isPairC[:, :-1] |= pairMatch
+        isPairC[:, 1:] |= pairMatch
+        del pairMatch
+
+        # Boundary ("end") glitch heuristic: a flagged delta at index 0
+        # or N-2 can never pair because one of its neighbors lies off the
+        # ramp. It is classified as an ASIC glitch (so it is not
+        # misclassified as a CR) but tracked separately from interior
+        # pairs — a lone end glitch has no partner to cancel against, so
+        # it must always be corrected.
+        isBoundaryC = np.zeros_like(flaggedC)
+        isBoundaryC[:, 0] = flaggedC[:, 0] & ~isPairC[:, 0] & gActiveC
+        isBoundaryC[:, -1] = flaggedC[:, -1] & ~isPairC[:, -1] & gActiveC
+
+        isCRC = flaggedC & ~isPairC & ~isBoundaryC & (residC > 0)
+
+        # Cumulative-drop check: a real cosmic ray deposits charge that
+        # stays for the rest of the ramp, so the per-pixel running
+        # cumulative residual from the CR delta onward should hover near
+        # zero (modulo random-walk noise). A transient up-spike (RTS,
+        # decaying glitch, etc.) leaks back and drives the cumulative
+        # residual significantly negative. For each candidate CR delta k
+        # with amplitude ``A = residC[k] > 0``, compute
+        # ``minDropAfter[k] = min over m > k of sum_{j=k+1..m} residC[j]``.
+        # We REJECT only when BOTH:
+        #
+        #   (a) the drop exceeds a fraction of the CR amplitude
+        #       (``minDropAfter < -maxDropFraction * A``), and
+        #   (b) the drop exceeds the noise-driven cumulative
+        #       random-walk excursion
+        #       (``minDropAfter < -nDropSigma * sigma * sqrt(N-1-k)``).
+        #
+        # Both criteria together: real persistent CRs survive because
+        # their cumulative drift is noise-limited (small in sigma units);
+        # genuine transients fail both because their cumulative drops
+        # back ~A which is both >> 0.5*A and >> a few-sigma random walk.
+        # (Note: not to be confused with H4RG image persistence — the
+        # latent-charge phenomenon — which is a different effect.) CRs
+        # at the very last delta have no "after" to check and are
+        # accepted. Skip the check entirely when ``maxDropFraction`` is
+        # infinite.
+        if isCRC.any() and np.isfinite(maxDropFraction):
+            forwardCS = np.cumsum(residC, axis=-1)  # (nCand, Nd)
+            # runningMinFwd[i, m] = min over j >= m of forwardCS[i, j]
+            runningMinFwd = np.minimum.accumulate(
+                forwardCS[:, ::-1], axis=-1
+            )[:, ::-1]
+            minDropAfter = np.zeros_like(forwardCS)
+            minDropAfter[:, :-1] = runningMinFwd[:, 1:] - forwardCS[:, :-1]
+            del forwardCS, runningMinFwd
+            # Criterion (a): relative-to-amplitude.
+            relativeOk = minDropAfter > -maxDropFraction * residC
+            if np.isfinite(nDropSigma):
+                # Criterion (b): noise-aware. nAfter[k] = N-1-k is the
+                # number of deltas summed in the cumulative residual at
+                # the worst-drop position; its std grows as σ√nAfter.
+                nDeltasC = residC.shape[-1]
+                nAfter = np.arange(nDeltasC - 1, -1, -1, dtype=np.float32)
+                sigmaC = sigma[candYs, candXs].astype(np.float32, copy=False)
+                sigmaScale = sigmaC[:, None] * np.sqrt(nAfter)[None, :]
+                sigmaOk = minDropAfter > -nDropSigma * sigmaScale
+                dropOk = relativeOk | sigmaOk
+                del sigmaC, sigmaScale, sigmaOk
+            else:
+                dropOk = relativeOk
+            isCRC &= dropOk
+            del minDropAfter, relativeOk, dropOk
+
+        # Outliers above |residual| threshold that the classifier did not
+        # assign to any class (not CR, not glitch pair, not boundary).
+        # These include negative-residual flagged deltas (RTS bursts,
+        # persistence echoes, defects) and CR candidates demoted by the
+        # cumulative-drop check. They aren't physically interpretable as
+        # ramp signal, so the final rate excludes them.
+        isUnclassC = flaggedC & ~isPairC & ~isBoundaryC & ~isCRC
+        del flaggedC
+
+        # Read existing accumulator values at candidate positions; count
+        # new flags; scatter the OR'd values back. (Fancy indexing on
+        # the LHS produces a copy, so we can't use ``|=`` directly.)
+        crAccumC = crAccum[candYs, candXs, :]
+        glitchAccumC = glitchAccum[candYs, candXs, :]
+        boundaryAccumC = boundaryAccum[candYs, candXs, :]
+        unclassAccumC = unclassAccum[candYs, candXs, :]
+        newCR = int((isCRC & ~crAccumC).sum())
+        newGlitch = int((isPairC & ~glitchAccumC).sum()) // 2
+        crAccum[candYs, candXs, :] = crAccumC | isCRC
+        glitchAccum[candYs, candXs, :] = glitchAccumC | isPairC
+        boundaryAccum[candYs, candXs, :] = boundaryAccumC | isBoundaryC
+        unclassAccum[candYs, candXs, :] = unclassAccumC | isUnclassC
+        del isCRC, isPairC, isBoundaryC, isUnclassC
+        del crAccumC, glitchAccumC, boundaryAccumC, unclassAccumC
+
+    if repair:
+        # In-iter repair: flagged residuals → 0 (they become ``rate``
+        # after add-back) so the next iteration sees a cleaner sample.
+        # CRs and end glitches are always repaired — an end glitch has no
+        # pair partner, so it cannot cancel itself out. Interior glitch
+        # pairs are repaired only when ``correctGlitches`` is set;
+        # otherwise the symmetric +A/-A pair is left in place to cancel
+        # in the mean UTR rate on its own.
+        allFlag = crAccum | boundaryAccum
+        if correctGlitches:
+            allFlag = allFlag | glitchAccum
+        if allFlag.any():
+            residual[allFlag] = 0.0
+        del allFlag
+    # Always restore delta space (even when nothing was flagged this
+    # iter — keeps the in/out contract: input and output both live in
+    # delta space).
+    residual += rate[..., None]
+
+    return rate, sigma, newCR, newGlitch
+
+
+def iterativeUtrDetectAndRepair(
+    deltas: np.ndarray,
+    *,
+    goodPixelMask: np.ndarray,
+    glitchPixelMask: Optional[np.ndarray] = None,
+    sigmaFloorADU: float = DEFAULT_SIGMA_FLOOR_ADU,
+    nSigma: float = DEFAULT_ITER_N_SIGMA,
+    maxIterations: int = DEFAULT_MAX_ITERATIONS,
+    repair: bool = True,
+    correctGlitches: bool = False,
+    glitchAmplitudeMinADU: float = 0.0,
+    maxDropFraction: float = 0.5,
+    nDropSigma: float = 3.0,
+    badPixelMinOutliers: int = 4,
+    badPixelOutlierSigma: float = 4.0,
+) -> IterativeRepairResult:
+    """Iterative CR + ASIC-glitch detection on a linearized delta cube.
+
+    Operates in delta-space throughout — no ``np.diff`` or ``np.cumsum``
+    happens inside this function. The caller is responsible for:
+
+      1. ``deltas = np.diff(flux, axis=-1)`` ONCE before this call, on
+         a ``(H, W, N)`` linearized cumulative ramp.
+      2. Cumulative reconstruction (``np.cumsum(axis=-1)``) ONCE after,
+         if needed.
+
+    For linearized data, deltas have uncorrelated read noise and the
+    per-pixel mean is the optimal (BLUE) UTR rate estimator; ``result.rate``
+    is that mean taken over the un-flagged deltas (CR/glitch-flagged
+    deltas excluded). Detection still uses the robust median + IQR
+    (computed in a single ``np.partition`` pass via :func:`_rampQR`), so
+    iter-1's threshold doesn't get inflated by contamination.
+
+    Each iteration:
+
+      1. Robust median rate + IQR sigma from current deltas (via _rampQR).
+      2. Flag deltas with ``|delta - rate| > nSigma * max(sigma,
+         sigmaFloorADU)``.
+      3. Classify each flagged delta:
+
+         - **Glitch pair (ASIC)**: at least one of two adjacent deltas
+           is flagged, the two residuals have opposite signs, and
+           ``|resid[k]+resid[k+1]| < nSigma*sigma`` (the recovery
+           cancels). Both deltas are marked.
+         - **CR (single read)**: a positive flagged delta whose neighbor
+           is not flagged as a pair partner.
+
+      4. Simple in-iter repair (flagged → rate) so the next iteration
+         sees a cleaner sample.
+      5. Stop when no new flags or rate change is small.
+
+    The detection threshold is the noise floor: glitches/CRs much
+    larger than ``nSigma * sigma`` are reliably detected; those near or
+    below the noise are not (and shouldn't be — they're indistinguishable
+    from per-read noise).
+
+    Parameters
+    ----------
+    deltas : np.ndarray
+        ``(H, W, N-1)`` linearized delta cube with the time axis last;
+        ``deltas[y, x, k] = flux[y, x, k+1] - flux[y, x, k]`` for the
+        linearized cumulative cube. Modified in place if ``repair=True``:
+        on return, holds the FINAL repaired deltas.
+    goodPixelMask : np.ndarray
+        ``(H, W)`` bool. Pixels where this is False are skipped (no
+        flags raised).
+    glitchPixelMask : np.ndarray, optional
+        ``(H, W)`` bool. ASIC-glitch pair detection runs only where this
+        is True. Default ``None`` disables glitch detection entirely.
+        Production callers should build the mask from
+        ``PfsIsrTask.asicBadChannelMask(...)``.
+    sigmaFloorADU : float
+        Lower bound on per-pixel IQR sigma.
+    nSigma : float
+        Detection threshold in sigma units. Defaults to 5.
+    maxIterations : int
+        Hard cap on iteration count.
+    repair : bool
+        If False, only flag; deltas are not modified.
+    correctGlitches : bool
+        Controls correction of *interior* ASIC-glitch pairs. Default
+        False — the policy is to detect ASIC glitches but not repair
+        them: interior pairs are flagged (so a glitch up-spike is not
+        misclassified as a CR) and left in place, where the symmetric
+        +A/-A pair cancels on its own in the rate mean. Set True to
+        also repair interior pairs in the cube and exclude their deltas
+        from ``result.rate`` alongside CRs. *End glitches* (a lone
+        flagged delta at the first or last delta, with no pair partner)
+        are always repaired regardless of this flag: with no partner
+        they cannot self-cancel.
+    glitchAmplitudeMinADU : float
+        Minimum |residual| amplitude (ADU) for ASIC-glitch pair
+        classification. 0 (default) uses only the CR threshold.
+    maxDropFraction : float
+        Cumulative-drop check for the CR classifier — amplitude
+        criterion. A real CR deposits charge that stays for the rest
+        of the ramp, so the per-pixel running cumulative residual from
+        the CR delta onward should hover near zero. A candidate CR
+        with amplitude ``A`` fails this criterion when the cumulative
+        residual drops by more than ``maxDropFraction * A`` at any
+        later read. Default 0.5. Set to ``float('inf')`` to disable
+        the check (alone). (This is unrelated to H4RG image
+        persistence, which is a different detector-level phenomenon.)
+    nDropSigma : float
+        Cumulative-drop check for the CR classifier — noise criterion.
+        The cumulative residual after the CR delta is a random walk
+        whose std grows as ``σ × √(N-1-k)``; even a true CR can drift
+        a couple of sigma negative just from noise. A candidate fails
+        this criterion when the cumulative drop exceeds
+        ``nDropSigma × σ × √(N-1-k)``. The CR is **rejected** only
+        when BOTH criteria fail (drop exceeds the amplitude fraction
+        AND exceeds the noise floor), which keeps marginal CRs whose
+        noise-driven cumulative drift would otherwise be misread as
+        decay. Default 3.0. Set to ``float('inf')`` to disable the
+        noise criterion (then the amplitude criterion alone gates
+        rejection).
+    badPixelMinOutliers : int
+        BAD-pixel gate (count criterion). A pixel is marked BAD in
+        ``result.badPixelMask`` when it has at least this many delta
+        residuals exceeding ``badPixelOutlierSigma × σ_IQR`` from the
+        per-pixel median. Counts are taken on the pristine input
+        deltas (before in-place repair). Default 4. Set to ``0`` to
+        disable the BAD-pixel pass entirely.
+    badPixelOutlierSigma : float
+        BAD-pixel gate (sigma criterion). Per-delta outlier threshold,
+        in units of ``σ_IQR``. Default 4.0 — for clean Gaussian noise
+        the expected outlier count per N=80 ramp is ~0.005, so the
+        combined ``count ≥ 4 at ≥ 4σ`` gate is essentially
+        false-positive-free; real H4 deltas have mild non-Gaussian
+        tails (shot/read-noise mixture + linearity residuals) that
+        ``σ_IQR`` underestimates by ~30 %, and 3σ at the count=4
+        threshold pulls in many merely-noisy pixels.
+
+    Returns
+    -------
+    IterativeRepairResult
+        ``rate`` is the per-pixel UTR rate — the mean of the un-flagged
+        deltas (CR-flagged always excluded; glitch-flagged excluded only
+        when ``correctGlitches``), ready for use as the science rate.
+        ``sigma`` is the IQR-based per-pixel scatter from the last
+        iteration.
+    """
+    if deltas.ndim != 3:
+        raise ValueError(
+            f"deltas must be 3-D (H, W, N-1); got {deltas.shape}"
+        )
+    H, W, nDeltas = deltas.shape
+    if nDeltas < 2:
+        raise ValueError(f"deltas must have at least 2 entries; got {nDeltas}.")
+
+    if glitchPixelMask is None:
+        # Off by default: glitch pair detection finds nothing.
+        glitchActive2D = np.zeros((H, W), dtype=bool)
+    else:
+        if glitchPixelMask.shape != (H, W):
+            raise ValueError(
+                f"glitchPixelMask shape {glitchPixelMask.shape} != deltas H,W ({H}, {W})."
+            )
+        glitchActive2D = np.asarray(glitchPixelMask, dtype=bool)
+
+    crFlagAccum = np.zeros((H, W, nDeltas), dtype=bool)
+    glitchFlagAccum = np.zeros((H, W, nDeltas), dtype=bool)
+    boundaryFlagAccum = np.zeros((H, W, nDeltas), dtype=bool)
+    # Outlier deltas flagged by threshold but not assigned a class
+    # (CR / glitch pair / end glitch). Tracked so the final ``rate``
+    # excludes them — leaving them in biases the mean toward the
+    # outlier amplitude even though we can't physically classify them.
+    unclassFlagAccum = np.zeros((H, W, nDeltas), dtype=bool)
+
+    # BAD-pixel pass (count criterion): tally 3σ delta excursions on the
+    # PRISTINE input deltas. Must run before iter 1 because
+    # ``_detectAndRepairOnce`` mutates ``deltas`` in place when
+    # ``repair=True``. A single ``_rampQR`` pass gives the per-pixel
+    # median + IQR-σ used to define an outlier; the rate criterion is
+    # applied at the end against ``rateFinal``.
+    if badPixelMinOutliers > 0:
+        p25Init, p50Init, p75Init = _rampQR(deltas)
+        sigmaInit = np.maximum(
+            0.741 * (p75Init - p25Init).astype(np.float32, copy=False),
+            sigmaFloorADU,
+        )
+        threshInit = (badPixelOutlierSigma * sigmaInit).astype(
+            np.float32, copy=False
+        )
+        nLargeOutliers = np.zeros((H, W), dtype=np.int32)
+        absSlice = np.empty((H, W), dtype=deltas.dtype)
+        for k in range(nDeltas):
+            np.abs(deltas[..., k] - p50Init, out=absSlice)
+            nLargeOutliers += absSlice > threshInit
+        del absSlice, threshInit, sigmaInit, p25Init, p50Init, p75Init
+    else:
+        nLargeOutliers = None
+
+    nByIter = []
+    iterTimings = []
+    iteration = 0
+    rateTolerance = 0.05  # ADU/read; loop stops when rate change < this
+
+    # ---- Iter 1: process the full delta cube. ----
+    tIter0 = time.time()
+    rateFull, sigmaFull, newCR, newGlitch = _detectAndRepairOnce(
+        deltas, goodPixelMask, glitchActive2D,
+        crFlagAccum, glitchFlagAccum, boundaryFlagAccum, unclassFlagAccum,
+        sigmaFloorADU, nSigma, repair, correctGlitches,
+        glitchAmplitudeMinADU=glitchAmplitudeMinADU,
+        maxDropFraction=maxDropFraction,
+        nDropSigma=nDropSigma,
+    )
+    iterTimings.append(time.time() - tIter0)
+    nByIter.append((newCR, newGlitch))
+
+    # ---- Iter 2..N: restrict to the subset of pixels that got any flag in
+    #      iter 1. Pixels with no iter-1 flag never see new flags in later
+    #      iterations because their deltas are unchanged. ----
+    activeMask = (crFlagAccum.any(axis=-1) | glitchFlagAccum.any(axis=-1)
+                  | boundaryFlagAccum.any(axis=-1)
+                  | unclassFlagAccum.any(axis=-1))
+    ratePrev = rateFull
+
+    for iteration in range(1, maxIterations):
+        ys, xs = np.where(activeMask)
+        if ys.size == 0:
+            break
+
+        tIter0 = time.time()
+        # Subset views are copies (fancy indexing); modify, then write back.
+        # Subset shape is (1, nActive, N-1) — a single-row column-vector
+        # H'=1 keeps the (H', W', Nd) convention _detectAndRepairOnce
+        # expects.
+        deltasSub = deltas[ys, xs, :][np.newaxis, :, :].copy()
+        crAccumSub = crFlagAccum[ys, xs, :][np.newaxis, :, :].copy()
+        glitchAccumSub = glitchFlagAccum[ys, xs, :][np.newaxis, :, :].copy()
+        boundaryAccumSub = boundaryFlagAccum[ys, xs, :][np.newaxis, :, :].copy()
+        unclassAccumSub = unclassFlagAccum[ys, xs, :][np.newaxis, :, :].copy()
+        goodSub = goodPixelMask[ys, xs][np.newaxis, :]
+        glitchSub = glitchActive2D[ys, xs][np.newaxis, :]
+
+        rateSub, sigmaSub, newCR, newGlitch = _detectAndRepairOnce(
+            deltasSub, goodSub, glitchSub,
+            crAccumSub, glitchAccumSub, boundaryAccumSub, unclassAccumSub,
+            sigmaFloorADU, nSigma, repair, correctGlitches,
+            glitchAmplitudeMinADU=glitchAmplitudeMinADU,
+            maxDropFraction=maxDropFraction,
+            nDropSigma=nDropSigma,
+        )
+
+        deltas[ys, xs, :] = deltasSub[0, :, :]
+        crFlagAccum[ys, xs, :] = crAccumSub[0, :, :]
+        glitchFlagAccum[ys, xs, :] = glitchAccumSub[0, :, :]
+        boundaryFlagAccum[ys, xs, :] = boundaryAccumSub[0, :, :]
+        unclassFlagAccum[ys, xs, :] = unclassAccumSub[0, :, :]
+
+        rateFull = ratePrev.copy()
+        rateFull[ys, xs] = rateSub[0, :]
+        sigmaFull[ys, xs] = sigmaSub[0, :]
+
+        iterTimings.append(time.time() - tIter0)
+        nByIter.append((newCR, newGlitch))
+
+        # Converge when the per-pixel rate stops moving on the active set.
+        rateChange = float(np.max(np.abs(rateFull - ratePrev)))
+        if rateChange < rateTolerance:
+            break
+        ratePrev = rateFull
+
+    # result.rate is the per-pixel UTR-weighted rate — the optimal
+    # least-squares slope on the (CR-/glitch-corrected) ramp. Equivalent
+    # to ``PfsIsrTask.calcUTRrates`` applied to ``read0 + cumsum(deltas)``,
+    # derived directly in delta space via the closed-form delta weights
+    # ``u[j] = 6(j+1)(N-1-j) / (N(N-1)(N+1))``, j = 0..N-2 — these sum
+    # to 1 and reproduce the read-space UTR weights w[i] = (12i - 6(N-1))
+    # / (N(N²-1)) after the cumsum/diff change of variables.
+    #
+    # Flagged-delta handling: CR + interior glitch pairs + end glitches
+    # + unclassified outliers are all excluded from the rate.
+    # Unclassified deltas are above-threshold residuals the classifier
+    # could not assign (mostly negative outliers from RTS/persistence/
+    # defects + CR candidates demoted by the cumulative-drop check) —
+    # they are real ramp aberrations, not signal, and leaving them in
+    # pulls the slope toward the outlier amplitude. Glitch pairs are
+    # excluded regardless of ``correctGlitches`` because the UTR
+    # weights are asymmetric across the pair — the +A/-A cancellation
+    # that held for the unweighted mean does not extend here.
+    # ``correctGlitches`` now controls only the in-cube repair
+    # (whether the spikes survive in the returned ``deltas``), not
+    # the rate.
+    #
+    # Exclusion is implemented as weight re-normalization: rate = sum
+    # over unflagged k of u[k]·δ[k], divided by 1 − sum over flagged k
+    # of u[k]. Equivalently, fill flagged deltas with the rate itself
+    # and apply the closed-form UTR weights — i.e., the fixed point of
+    # ``calcUTRrates(read0 + cumsum(deltas_with_flagged_filled_by_rate))``.
+    # This makes ``result.rate`` agree exactly with calcUTRrates on the
+    # reconstructed-and-repaired ramp; the median ``rateFull`` is kept
+    # only as the all-flagged fallback. Accumulated slice-wise to avoid
+    # an ``(H, W, N-1)`` transient.
+    nReads = nDeltas + 1
+    ks = np.arange(nDeltas, dtype=np.float32)
+    utrW = np.float32(6.0) * (ks + np.float32(1.0)) * (
+        np.float32(nReads - 1) - ks
+    ) / np.float32(nReads * (nReads - 1) * (nReads + 1))
+    sumUnflaggedW = np.zeros(deltas.shape[:-1], dtype=np.float32)
+    sumFlaggedW = np.zeros(deltas.shape[:-1], dtype=np.float32)
+    for k in range(nDeltas):
+        flagK = (crFlagAccum[..., k] | boundaryFlagAccum[..., k]
+                 | glitchFlagAccum[..., k] | unclassFlagAccum[..., k])
+        unflagK = ~flagK
+        sumUnflaggedW += utrW[k] * deltas[..., k] * unflagK
+        sumFlaggedW += utrW[k] * flagK
+    denom = np.float32(1.0) - sumFlaggedW
+    with np.errstate(divide="ignore", invalid="ignore"):
+        rateFinal = (sumUnflaggedW / denom).astype(np.float32, copy=False)
+    allFlagged = denom < np.float32(1e-6)
+    if allFlagged.any():
+        rateFinal[allFlagged] = rateFull[allFlagged]
+    sigmaFinal = sigmaFull.astype(np.float32, copy=False)
+
+    # BAD-pixel pass: a pixel with ≥ badPixelMinOutliers delta
+    # excursions above ``badPixelOutlierSigma × σ_IQR`` (counted on the
+    # pristine input deltas before in-place repair) is RTS /
+    # telegraph-noise. Restricted to ``goodPixelMask`` so already-masked
+    # pixels don't appear in this output independently.
+    if nLargeOutliers is not None:
+        badPixelMask = (
+            (nLargeOutliers >= badPixelMinOutliers) & goodPixelMask
+        )
+    else:
+        badPixelMask = np.zeros((H, W), dtype=bool)
+
+    # glitchFlagMask reports every ASIC glitch — interior pairs and end
+    # glitches — for the ASIC_GLITCH mask plane. nGlitchPairs counts only
+    # true pairs (from glitchFlagAccum); end glitches are singletons.
+    return IterativeRepairResult(
+        nIterations=iteration + 1,
+        crFlagMask=crFlagAccum,
+        glitchFlagMask=glitchFlagAccum | boundaryFlagAccum,
+        unclassifiedFlagMask=unclassFlagAccum,
+        badPixelMask=badPixelMask,
+        rate=rateFinal,
+        sigma=sigmaFinal,
+        nCRs=int(crFlagAccum.sum()),
+        nGlitchPairs=int(glitchFlagAccum.sum()) // 2,
+        nByIteration=nByIter,
+        iterationTimings=iterTimings,
+    )
diff --git a/python/lsst/obs/pfs/h4Linearity/fit.py b/python/lsst/obs/pfs/h4Linearity/fit.py
new file mode 100644
index 00000000..f006d58e
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/fit.py
@@ -0,0 +1,522 @@
+"""Top-level fit(): tile-iterate over (H, W) and delegate to model.fitBlock."""
+
+from __future__ import annotations
+
+import os
+from collections.abc import Sequence
+from concurrent.futures import Future, ThreadPoolExecutor, as_completed
+
+import numpy as np
+
+from .models import Model, PolynomialModel
+from .types import (
+    BORDER_PIX,
+    FIT_FAILED,
+    HIGH_FIT_RESIDUAL,
+    INSUFFICIENT_POINTS,
+    MASKED_BY_INPUT,
+    NON_MONOTONIC,
+    Diagnostics,
+    LinearityCorrection,
+    Ramp,
+)
+
+# Worker-count resolution constants. Tunable at module level; the tests
+# monkeypatch `os.cpu_count` rather than these, so changing them does not
+# break tests but will change the default behavior for small/large frames.
+_SMALL_FRAME_PIXEL_LIMIT = 1_000_000   # H*W below this → sequential default
+_DEFAULT_WORKER_CAP = 8                # auto-detected cpu_count is capped here
+
+# Override point for tests. Default is the real ThreadPoolExecutor; a test
+# can `monkeypatch.setattr("h4Linearity.fit._executorFactory", ...)` to observe
+# construction or to inject a recording executor.
+_executorFactory = ThreadPoolExecutor
+
+
+def _resolveWorkerCount(workers: int | None, H: int, W: int) -> int:
+    """Resolve the effective worker count for a `fit()` call.
+
+    - If ``workers`` is an ``int``: returned as-is; must be >= 1.
+    - If ``workers`` is ``None``:
+        - H*W < ``_SMALL_FRAME_PIXEL_LIMIT`` → 1 (sequential default).
+        - Otherwise → ``min(os.cpu_count() or 1, _DEFAULT_WORKER_CAP)``.
+
+    Raises:
+        ValueError: if ``workers`` is an int less than 1.
+    """
+    if workers is None:
+        if H * W < _SMALL_FRAME_PIXEL_LIMIT:
+            return 1
+        return min(os.cpu_count() or 1, _DEFAULT_WORKER_CAP)
+    if workers < 1:
+        raise ValueError(f"workers must be >= 1, got {workers}")
+    return workers
+
+
+def fit(
+    ramps: Sequence[Ramp],
+    model: Model | None = None,
+    blockSize: tuple[int, int] = (128, 128),
+    workers: int | None = None,
+    conditionNumberLimit: float = 1e12,
+    deviationLimit: float | None = None,
+    deviationStart: float = 0.5,
+    nRefReads: int = 5,
+    saturationLevel: float | None = None,
+    lowFluxFraction: float = 0.5,
+    borderWidth: int = 4,
+    saturationKnee: float | None = 0.5,
+    badLinearityMedianMultiplier: float | None = 5.0,
+) -> LinearityCorrection:
+    """Fit a per-pixel nonlinearity correction from one or more ramps.
+
+    See ``docs/superpowers/specs/2026-04-16-relin-package-design.md`` for
+    the full algorithm description.
+
+    Parameters
+    ----------
+    ramps : sequence of Ramp
+        One or more ramps to fit jointly. All ramps must share the same
+        ``(H, W)`` frame shape.
+    model : Model, optional
+        Model to fit. Defaults to ``PolynomialModel(order=4)``.
+    blockSize : (int, int), optional
+        Tile size in pixels for the per-tile normal-equations fit.
+        Default is ``(128, 128)``, chosen by sweep on a 4096×4096×29
+        reference workload with the auto worker count. Smaller tiles
+        reduce peak memory; larger tiles reduce per-tile overhead.
+    workers : int or None, optional
+        Number of worker threads for the tile loop.
+
+        - ``1`` (explicit): sequential — no thread pool is constructed.
+        - ``N > 1`` (explicit): run the tile loop on a
+          ``ThreadPoolExecutor`` with ``max_workers=N``. No upper cap is
+          applied to explicit values.
+        - ``None`` (default): heuristic. If ``H * W < 1_000_000``, use
+          ``1`` worker (sequential). Otherwise use
+          ``min(os.cpu_count() or 1, 8)``. The cap at 8 applies only
+          to the auto-detected default.
+
+        Output is deterministic and worker-count-independent: every tile
+        writes to a disjoint slice of the preallocated output arrays
+        on the main thread, so the fit result is byte-identical
+        regardless of ``workers``.
+
+        Note on BLAS/LAPACK: numpy's linear-algebra routines may spawn
+        additional internal threads. On multi-core machines, combining
+        ``workers > 1`` with an uncontrolled BLAS thread count can lead
+        to oversubscription and diminishing returns. If threaded
+        speedup plateaus below expectations, try setting
+        ``OMP_NUM_THREADS=1`` and/or ``MKL_NUM_THREADS=1`` in the
+        process environment.
+    conditionNumberLimit : float, optional
+        Pixels whose normal-equations matrix has a condition number
+        above this threshold are flagged as ``FIT_FAILED`` and left
+        with zeroed coefficients. Default ``1e12``.
+    deviationLimit : float or None, optional
+        When set, reads where the per-read delta deviates from the
+        per-pixel reference rate by more than this fraction are excluded
+        from fitting. The reference rate is the median of the first
+        ``nRefReads`` deltas for each pixel. For each pixel, the first
+        read exceeding the threshold causes all subsequent reads to be
+        masked as well. This clips the fitting range to the linear
+        regime, excluding e.g. the saturated tail. Default ``None``
+        (disabled — all valid reads are used).
+    nRefReads : int, optional
+        Number of early reads to median when computing the per-pixel
+        reference rate for ``deviationLimit``. Default ``5``.
+    saturationLevel : float or None, optional
+        When set, reads where the cumulative signal ``m`` exceeds this
+        value are excluded from fitting. For each pixel, the first read
+        exceeding the threshold causes all subsequent reads to be masked.
+        Default ``None`` (disabled).
+    lowFluxFraction : float, optional
+        Pixels whose median delta over the first ``nRefReads`` reads is
+        below ``lowFluxFraction * globalMedianRate`` are masked entirely
+        (all reads invalidated). This rejects dead or very dim pixels
+        that would otherwise pass per-pixel deviation clipping.
+        Default ``0.5``.
+    borderWidth : int, optional
+        Number of border pixels to exclude on each edge of the frame.
+        These are hardware reference pixels and are flagged with
+        ``BORDER_PIX``. Default ``4``.
+    saturationKnee : float or None, optional
+        Per-pixel saturation knee. The first delta with
+        ``delta < saturationKnee * refDelta`` marks that read and all
+        later reads invalid. ``refDelta`` is the median of the first
+        ``nRefReads`` deltas; within that window a leave-one-out median
+        of the other deltas in the window is used so a saturating delta
+        cannot bias its own knee. Stacks with ``deviationLimit``. Default
+        ``0.5``; pass ``None`` to disable.
+    badLinearityMedianMultiplier : float or None, optional
+        Pixels whose ``residualRms`` exceeds
+        ``badLinearityMedianMultiplier × median(residualRms over
+        still-good pixels)`` are flagged ``HIGH_FIT_RESIDUAL`` and
+        passed through unchanged by :func:`apply`. Detector-relative —
+        no absolute DN cutoff. Default ``5.0``; pass ``None`` to disable.
+
+    Returns
+    -------
+    LinearityCorrection
+        Fitted coefficients, range bounds, bad-pixel mask, and
+        diagnostics.
+    """
+    if model is None:
+        model = PolynomialModel(order=4)
+
+    if len(ramps) == 0:
+        raise ValueError("fit() requires at least one ramp")
+
+    # Validate shapes.
+    H, W = ramps[0].reads.shape[1:]
+    for k, ramp in enumerate(ramps):
+        if ramp.reads.ndim != 3:
+            raise ValueError(
+                f"ramps[{k}].reads must be 3-D (N, H, W); got {ramp.reads.shape}"
+            )
+        if ramp.reads.shape[1:] != (H, W):
+            raise ValueError(
+                f"ramps[{k}].reads H,W = {ramp.reads.shape[1:]} "
+                f"does not match ramps[0] H,W = {(H, W)}"
+            )
+        if ramp.validMask is not None and ramp.validMask.shape != (H, W):
+            raise ValueError(
+                f"ramps[{k}].validMask shape {ramp.validMask.shape} != {(H, W)}"
+            )
+
+    effectiveWorkers = _resolveWorkerCount(workers, H, W)
+
+    # Per-ramp precomputation.
+    cumulatives: list[np.ndarray] = []
+    targets: list[np.ndarray] = []
+    rates: list[float] = []
+    for ramp in ramps:
+        m = ramp.reads.astype(np.float32)
+        cumulatives.append(m)
+        Nk = ramp.reads.shape[0]
+        if Nk < 3:
+            raise ValueError(
+                f"ramp must have at least 3 reads (incl. implicit read0); got {Nk}"
+            )
+        # Rate R: median of the second .npz delta (m[2] - m[1]) over allowed
+        # pixels. Skipping the first delta avoids any reset-frame transient.
+        refRead = m[2] - m[1]
+        if ramp.validMask is not None:
+            allowed = ramp.validMask == 0
+            if allowed.any():
+                rate = float(np.median(refRead[allowed]))
+            else:
+                rate = float(np.median(refRead))
+        else:
+            rate = float(np.median(refRead))
+        rates.append(rate)
+        # Target ramp matches the new reads convention: target[0] = 0.
+        targets.append(rate * np.arange(Nk, dtype=np.float32))
+
+    # Deviation-based clipping: for each ramp, mask reads where the measured
+    # cumulative deviates from the target by more than deviationLimit.
+    # Store per-ramp (Nk, H, W) validity arrays for use in tile assembly.
+    rampValidity: list[np.ndarray] = []
+    for k, ramp in enumerate(ramps):
+        Nk = ramp.reads.shape[0]
+        if ramp.validMask is not None:
+            v = np.broadcast_to(
+                (ramp.validMask == 0)[None], (Nk, H, W)
+            ).copy()
+        else:
+            v = np.ones((Nk, H, W), dtype=bool)
+
+        # Mask border pixels (skip if frame is too small).
+        if borderWidth > 0 and H > 2 * borderWidth and W > 2 * borderWidth:
+            v[:, :borderWidth, :] = False
+            v[:, -borderWidth:, :] = False
+            v[:, :, :borderWidth] = False
+            v[:, :, -borderWidth:] = False
+
+        # Per-read deltas (the original .npz deltas). Length Nk-1; deltas[i]
+        # corresponds to the transition from read i to read i+1.
+        m = cumulatives[k]
+        deltas = np.diff(m, axis=0)  # (Nk-1, H, W)
+        nRef = min(nRefReads, deltas.shape[0])
+        refDelta = np.median(deltas[:nRef], axis=0)  # (H, W)
+
+        # Reject low-flux pixels: mask all reads for pixels whose
+        # reference delta is below lowFluxFraction * global median rate.
+        lowFluxThreshold = lowFluxFraction * rates[k]
+        lowFlux = refDelta < lowFluxThreshold  # (H, W)
+        v[:, lowFlux] = False
+
+        # Per-pixel saturation knee. The first delta with
+        # ``delta < saturationKnee * refDelta`` marks that read and all
+        # later reads invalid. Acts on every delta from index 0, so it
+        # catches early-saturating pixels whose deltas collapse inside the
+        # always-valid prefix of ``deviationLimit``/``deviationStart``.
+        #
+        # Within the refDelta window (indices ``0..nRef-1``) each delta is
+        # judged against a leave-one-out median of the *other* deltas in
+        # the window, so a cluster of already-saturated early deltas
+        # cannot pull refDelta down to hide its own neighbours. Beyond
+        # the window the full-window refDelta applies.
+        if saturationKnee is not None:
+            nDelta = deltas.shape[0]
+            sat = np.empty((nDelta, H, W), dtype=bool)
+            with np.errstate(divide="ignore", invalid="ignore"):
+                if nRef >= 2:
+                    window = deltas[:nRef]
+                    for i in range(nRef):
+                        others = [j for j in range(nRef) if j != i]
+                        refI = np.median(window[others], axis=0)
+                        sat[i] = deltas[i] < (saturationKnee * refI)
+                else:
+                    # Degenerate case (nRef < 2): fall back to the inclusive
+                    # refDelta — LOO is undefined with fewer than 2 samples.
+                    sat[:nRef] = deltas[:nRef] < (saturationKnee * refDelta[None])
+                if nDelta > nRef:
+                    sat[nRef:] = deltas[nRef:] < (saturationKnee * refDelta[None])
+            sat = np.maximum.accumulate(sat, axis=0)
+            v[1:][sat] = False
+
+        if deviationLimit is not None:
+            nDelta = deltas.shape[0]  # = Nk - 1
+            startDelta = int(nDelta * deviationStart)
+            with np.errstate(divide="ignore", invalid="ignore"):
+                frac = np.abs(deltas - refDelta[None]) / np.abs(refDelta[None])
+            frac = np.where(np.isfinite(frac), frac, 0.0)
+            exceeds = frac > deviationLimit  # (Nk-1, H, W)
+            # Only apply from startDelta onward (early deltas are not
+            # near saturation and shouldn't trigger the limit).
+            exceeds[:startDelta] = False
+            exceeds = np.maximum.accumulate(exceeds, axis=0)
+            # delta i excessive → mask reads i+1 onward; the implicit
+            # zero read 0 is never excluded by this test.
+            v[1:][exceeds] = False
+
+        if saturationLevel is not None:
+            m = cumulatives[k]  # (Nk, H, W)
+            saturated = m > saturationLevel
+            saturated = np.maximum.accumulate(saturated, axis=0)
+            v[saturated] = False
+
+        rampValidity.append(v)
+
+    # Concatenated targets across ramps — used per tile.
+    tConcat = np.concatenate(targets)
+
+    # Preallocate full-frame outputs.
+    # The shape of coefficients is determined by the model:
+    # - PolynomialModel: (order+1, H, W)
+    # We discover the coefficient shape by running a 1x1 dummy block first.
+    coefShape = _peekCoefShape(model)
+    coefficients = np.zeros((coefShape, H, W), dtype=np.float32)
+    fitMin = np.zeros((H, W), dtype=np.float32)
+    fitMax = np.zeros((H, W), dtype=np.float32)
+    residualRms = np.zeros((H, W), dtype=np.float32)
+    maxAbsResidual = np.zeros((H, W), dtype=np.float32)
+    nPointsUsed = np.zeros((H, W), dtype=np.int32)
+    conditionNumber = np.zeros((H, W), dtype=np.float32)
+    monotonic = np.zeros((H, W), dtype=bool)
+    badPixelMask = np.zeros((H, W), dtype=np.uint16)
+
+    # Iterate over tiles. Tile-assembly (mTile, validTile) is identical
+    # for sequential and threaded paths; factor it into a closure so both
+    # paths call model.fitBlock with exactly the same inputs.
+    bH, bW = blockSize
+
+    def _assembleTile(
+        rowStart: int, rowEnd: int, colStart: int, colEnd: int
+    ) -> tuple[np.ndarray, np.ndarray]:
+        mSegments: list[np.ndarray] = []
+        validSegments: list[np.ndarray] = []
+        for k in range(len(ramps)):
+            mSegments.append(
+                cumulatives[k][:, rowStart:rowEnd, colStart:colEnd]
+            )
+            validSegments.append(
+                rampValidity[k][:, rowStart:rowEnd, colStart:colEnd]
+            )
+        mTile = np.concatenate(mSegments, axis=0)
+        validTile = np.concatenate(validSegments, axis=0)
+        return mTile, validTile
+
+    def _storeResult(
+        rowStart: int, rowEnd: int, colStart: int, colEnd: int, result
+    ) -> None:
+        coefficients[:, rowStart:rowEnd, colStart:colEnd] = result.coefficients
+        fitMin[rowStart:rowEnd, colStart:colEnd] = result.fitMin
+        fitMax[rowStart:rowEnd, colStart:colEnd] = result.fitMax
+        residualRms[rowStart:rowEnd, colStart:colEnd] = result.residualRms
+        maxAbsResidual[rowStart:rowEnd, colStart:colEnd] = result.maxAbsResidual
+        nPointsUsed[rowStart:rowEnd, colStart:colEnd] = result.nPointsUsed
+        conditionNumber[rowStart:rowEnd, colStart:colEnd] = result.conditionNumber
+        monotonic[rowStart:rowEnd, colStart:colEnd] = result.monotonic
+        badPixelMask[rowStart:rowEnd, colStart:colEnd] = result.badPixelMask
+
+    if effectiveWorkers == 1:
+        # Sequential fast path — no executor involvement.
+        for rowStart in range(0, H, bH):
+            rowEnd = min(rowStart + bH, H)
+            for colStart in range(0, W, bW):
+                colEnd = min(colStart + bW, W)
+                mTile, validTile = _assembleTile(
+                    rowStart, rowEnd, colStart, colEnd
+                )
+                result = model.fitBlock(
+                    m=mTile, t=tConcat, valid=validTile,
+                    conditionNumberLimit=conditionNumberLimit,
+                )
+                _storeResult(rowStart, rowEnd, colStart, colEnd, result)
+    else:
+        # Threaded path. Submit each tile as a future; consume completed
+        # futures on the main thread and stitch into disjoint output slices.
+        # Tile-assembly runs on the submitting thread so workers do pure
+        # compute on independent numpy arrays (no shared mutable state).
+        # Note: ThreadPoolExecutor.submit has no back-pressure, so all
+        # tiles' assembled (mTile, validTile) arrays coexist in memory
+        # until their futures complete. On 4096x4096 with blockSize=(128,
+        # 128), that is 1024 tiles of roughly tile-sized float32 plus a
+        # small bool array each — manageable on the reference workload
+        # but worth keeping in mind if tile size grows.
+        with _executorFactory(max_workers=effectiveWorkers) as executor:
+            futures: dict[Future, tuple[int, int, int, int]] = {}
+            for rowStart in range(0, H, bH):
+                rowEnd = min(rowStart + bH, H)
+                for colStart in range(0, W, bW):
+                    colEnd = min(colStart + bW, W)
+                    mTile, validTile = _assembleTile(
+                        rowStart, rowEnd, colStart, colEnd
+                    )
+                    fut = executor.submit(
+                        model.fitBlock,
+                        m=mTile, t=tConcat, valid=validTile,
+                        conditionNumberLimit=conditionNumberLimit,
+                    )
+                    futures[fut] = (rowStart, rowEnd, colStart, colEnd)
+
+            for fut in as_completed(futures):
+                rs, re, cs, ce = futures[fut]
+                try:
+                    result = fut.result()
+                except Exception as e:
+                    # Cancel any futures that haven't started; in-flight
+                    # tasks still run to completion but their results are
+                    # discarded when the `with` block shuts down.
+                    for other in futures:
+                        if other is not fut:
+                            other.cancel()
+                    raise RuntimeError(
+                        f"fitBlock failed at tile "
+                        f"[rows {rs}:{re}, cols {cs}:{ce}]"
+                    ) from e
+                _storeResult(rs, re, cs, ce, result)
+
+    # Border pixels are hardware reference pixels, not part of the
+    # linearity-fittable population. Assign ``BORDER_PIX`` directly (not
+    # OR) so the mask records only the reason for rejection — and not the
+    # ``INSUFFICIENT_POINTS`` bit ``fitBlock`` raises on the matching
+    # ``valid=False`` columns above. Skip if the frame is too small.
+    if borderWidth > 0 and H > 2 * borderWidth and W > 2 * borderWidth:
+        badPixelMask[:borderWidth, :] = BORDER_PIX
+        badPixelMask[-borderWidth:, :] = BORDER_PIX
+        badPixelMask[:, :borderWidth] = BORDER_PIX
+        badPixelMask[:, -borderWidth:] = BORDER_PIX
+
+    # Propagate caller-supplied input masks.
+    for ramp in ramps:
+        if ramp.validMask is not None:
+            inputBad = (ramp.validMask != 0)
+            badPixelMask[inputBad] |= MASKED_BY_INPUT
+
+    # Post-fit residual quality. Pixels whose fit completed but whose
+    # residualRms exceeds ``badLinearityMedianMultiplier × median(residualRms
+    # over still-good pixels)`` are flagged ``HIGH_FIT_RESIDUAL``. The
+    # reference median is detector-relative, so no absolute DN cutoff is
+    # required.
+    highResidualThreshold: float | None = None
+    if badLinearityMedianMultiplier is not None:
+        notYetBad = (badPixelMask == 0)
+        if notYetBad.any():
+            ref = float(np.median(residualRms[notYetBad]))
+            highResidualThreshold = badLinearityMedianMultiplier * ref
+            highRes = notYetBad & (residualRms > highResidualThreshold)
+            badPixelMask[highRes] |= HIGH_FIT_RESIDUAL
+
+    # Dataset-wide summary.
+    goodPixels = (badPixelMask == 0)
+    totalPixels = int(H * W)
+    summary: dict = {
+        "totalPixels": totalPixels,
+        "goodPixelFraction": float(goodPixels.sum()) / totalPixels,
+        "badPixelFraction_borderPix": float((badPixelMask & BORDER_PIX > 0).sum()) / totalPixels,
+        "badPixelFraction_maskedByInput": float((badPixelMask & MASKED_BY_INPUT > 0).sum()) / totalPixels,
+        "badPixelFraction_insufficientPoints":
+            float((badPixelMask & INSUFFICIENT_POINTS > 0).sum()) / totalPixels,
+        "badPixelFraction_fitFailed": float((badPixelMask & FIT_FAILED > 0).sum()) / totalPixels,
+        "badPixelFraction_nonMonotonic": float((badPixelMask & NON_MONOTONIC > 0).sum()) / totalPixels,
+        "badPixelFraction_highFitResidual":
+            float((badPixelMask & HIGH_FIT_RESIDUAL > 0).sum()) / totalPixels,
+        "modelName": model.modelName,
+        "nRamps": len(ramps),
+        "order": model.order,
+        "blockSize": f"{blockSize[0]}x{blockSize[1]}",
+        "borderWidth": borderWidth,
+        "conditionNumberLimit": conditionNumberLimit,
+        "nRefReads": nRefReads,
+        "lowFluxFraction": lowFluxFraction,
+    }
+    if deviationLimit is not None:
+        summary["deviationLimit"] = deviationLimit
+        summary["deviationStart"] = deviationStart
+    if saturationLevel is not None:
+        summary["saturationLevel"] = saturationLevel
+    if saturationKnee is not None:
+        summary["saturationKnee"] = saturationKnee
+    if badLinearityMedianMultiplier is not None:
+        summary["badLinearityMedianMultiplier"] = badLinearityMedianMultiplier
+        if highResidualThreshold is not None:
+            summary["highFitResidualThresholdDN"] = highResidualThreshold
+    if goodPixels.any():
+        goodRms = residualRms[goodPixels]
+        summary["residualRmsP50"] = float(np.percentile(goodRms, 50))
+        summary["residualRmsP95"] = float(np.percentile(goodRms, 95))
+        summary["residualRmsP99"] = float(np.percentile(goodRms, 99))
+    else:
+        summary["residualRmsP50"] = float("nan")
+        summary["residualRmsP95"] = float("nan")
+        summary["residualRmsP99"] = float("nan")
+
+    diagnostics = Diagnostics(
+        residualRms=residualRms,
+        maxAbsResidual=maxAbsResidual,
+        nPointsUsed=nPointsUsed,
+        monotonic=monotonic,
+        conditionNumber=conditionNumber,
+        summary=summary,
+    )
+
+    return LinearityCorrection(
+        model=model,
+        coefficients=coefficients,
+        fitMin=fitMin,
+        fitMax=fitMax,
+        badPixelMask=badPixelMask,
+        diagnostics=diagnostics,
+    )
+
+
+def _peekCoefShape(model: Model) -> int:
+    """Return the first-axis size of coefficients the model will produce.
+
+    For ``PolynomialModel``, this is ``order + 1``.
+    Models that don't expose ``order`` must run a throwaway 1x1 fit.
+    """
+    if isinstance(model, PolynomialModel):
+        return model.order + 1
+    # Fallback: run a minimal 1x1 block fit with 2*(order+1) dummy points.
+    nPoints = 8
+    m = np.linspace(0.0, 1.0, nPoints, dtype=np.float32)[:, None, None]
+    t = m[:, 0, 0].copy()
+    valid = np.ones((nPoints, 1, 1), dtype=bool)
+    result = model.fitBlock(
+        m=m, t=t, valid=valid, conditionNumberLimit=1e12
+    )
+    return int(result.coefficients.shape[0])
diff --git a/python/lsst/obs/pfs/h4Linearity/io.py b/python/lsst/obs/pfs/h4Linearity/io.py
new file mode 100644
index 00000000..898c39b7
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/io.py
@@ -0,0 +1,230 @@
+"""FITS persistence for LinearityCorrection objects."""
+
+from __future__ import annotations
+
+import datetime as _dt
+from importlib.metadata import PackageNotFoundError, version
+from pathlib import Path
+
+import numpy as np
+from astropy.io import fits
+
+from .models import MODEL_REGISTRY
+from .types import Diagnostics, LinearityCorrection
+
+
+def _packageVersion() -> str:
+    try:
+        return version("h4Linearity")
+    except PackageNotFoundError:
+        return "unknown"
+
+
+def isH4LinearityFile(path: str | Path) -> bool:
+    """Predicate: does the FITS file at ``path`` look like an h4Linearity file?
+
+    Cheap header-only check: the primary HDU must carry a ``MODEL``
+    keyword whose value names a model that's registered in
+    ``MODEL_REGISTRY``. Files written by :func:`saveFits` always pass;
+    legacy ``nirLinearity.NirLinearity`` files (and anything else)
+    fail.
+
+    Returns False on any failure (missing path, not FITS, missing
+    ``MODEL`` key, unknown model name) — predicates should be quiet.
+    """
+    path = Path(path)
+    if not path.is_file():
+        return False
+    try:
+        with fits.open(path) as hdul:
+            model = hdul[0].header.get("MODEL")
+    except (OSError, fits.verify.VerifyError):
+        return False
+    return isinstance(model, str) and model in MODEL_REGISTRY
+
+
+def saveFits(path: str | Path, correction: LinearityCorrection) -> None:
+    """Write a ``LinearityCorrection`` to a FITS file.
+
+    Layout:
+
+    - **PRIMARY HDU** (header-only). Contains ``MODEL`` (model name —
+      used by :func:`isH4LinearityFile` to sniff the file format and by
+      :func:`loadFits` to look up the model class), ``FITDATE``
+      (ISO-8601 UTC), ``RELINVER`` (package version), and scalar
+      ``correction.diagnostics.summary`` entries. Long Python keys
+      become HIERARCH cards (case-preserved); short keys are uppercased
+      by FITS and the original Python key is stored in the card comment
+      so the dict round-trips through :func:`loadFits` unchanged.
+    - **Model-specific HDUs**, contributed by ``model.toFitsHdus()``
+      (e.g. polynomial coefficients).
+    - **Standard image HDUs** (one per :class:`Diagnostics` array):
+      ``FITMIN``, ``FITMAX``, ``BPMASK``, ``RESRMS``, ``RESMAX``,
+      ``NPOINTS``, ``MONOTON``, ``CONDNUM``.
+
+    CHECKSUM/DATASUM are added to every image HDU. The file is
+    overwritten unconditionally.
+
+    Parameters
+    ----------
+    path : str or Path
+        Destination FITS file.
+    correction : LinearityCorrection
+        Object to persist.
+    """
+    path = Path(path)
+
+    # Build PRIMARY header.
+    primaryHeader = fits.Header()
+    primaryHeader["MODEL"] = (
+        correction.model.modelName, "model form identifier"
+    )
+    primaryHeader["FITDATE"] = (
+        _dt.datetime.now(_dt.timezone.utc).isoformat(timespec="seconds"),
+        "ISO-8601 fit timestamp",
+    )
+    primaryHeader["RELINVER"] = (_packageVersion(), "h4Linearity package version")
+    # Scalar summary fields. Long keys become HIERARCH cards (case-preserved);
+    # short keys are uppercased by FITS and the original Python key is stored in
+    # the comment so ``loadFits`` can reconstruct the dict without collisions.
+    for key, value in correction.diagnostics.summary.items():
+        if isinstance(value, (int, float, bool, str)):
+            if len(key) > 8:
+                # HIERARCH card: keyword IS the Python key; no comment needed.
+                primaryHeader["HIERARCH " + key] = (value, "")
+            else:
+                # Short key: FITS uppercases it; store original in comment.
+                primaryHeader[key] = (value, key)
+    primary = fits.PrimaryHDU(header=primaryHeader)
+
+    # Model-specific HDUs.
+    modelHdus = list(correction.model.toFitsHdus(correction))
+
+    # Standard HDUs for the non-model-specific arrays.
+    fitMinHdu = fits.ImageHDU(data=correction.fitMin, name="FITMIN")
+    fitMaxHdu = fits.ImageHDU(data=correction.fitMax, name="FITMAX")
+    bpHdu = fits.ImageHDU(data=correction.badPixelMask, name="BPMASK")
+    bpHdu.header["COMMENT"] = "Bit flags: MASKED_BY_INPUT=0x01 INSUFFICIENT_POINTS=0x02"
+    bpHdu.header["COMMENT"] = "          FIT_FAILED=0x04 NON_MONOTONIC=0x08 BORDER_PIX=0x10"
+    bpHdu.header["COMMENT"] = "          BELOW_VALID_RANGE=0x20 ABOVE_VALID_RANGE=0x40"
+    resRmsHdu = fits.ImageHDU(
+        data=correction.diagnostics.residualRms, name="RESRMS"
+    )
+    resMaxHdu = fits.ImageHDU(
+        data=correction.diagnostics.maxAbsResidual, name="RESMAX"
+    )
+    nPtsHdu = fits.ImageHDU(
+        data=correction.diagnostics.nPointsUsed, name="NPOINTS"
+    )
+    monoHdu = fits.ImageHDU(
+        data=correction.diagnostics.monotonic.astype(np.uint8), name="MONOTON"
+    )
+    condHdu = fits.ImageHDU(
+        data=correction.diagnostics.conditionNumber, name="CONDNUM"
+    )
+
+    hdul = fits.HDUList(
+        [primary, *modelHdus, fitMinHdu, fitMaxHdu, bpHdu,
+         resRmsHdu, resMaxHdu, nPtsHdu, monoHdu, condHdu]
+    )
+
+    # Add CHECKSUM/DATASUM to every image HDU.
+    for hdu in hdul[1:]:
+        hdu.add_checksum()
+
+    hdul.writeto(path, overwrite=True)
+
+
+def loadFits(path: str | Path) -> LinearityCorrection:
+    """Read a FITS file written by :func:`saveFits`.
+
+    Reverses :func:`saveFits`. Looks up the model class via the primary
+    ``MODEL`` keyword in :data:`MODEL_REGISTRY`, then asks the model to
+    rebuild itself + coefficients from the model-specific HDUs.
+    Recovers the standard arrays (``FITMIN`` / ``FITMAX`` / ``BPMASK`` /
+    ``RESRMS`` / ``RESMAX`` / ``NPOINTS`` / ``MONOTON`` / ``CONDNUM``)
+    and the diagnostics-summary dict from primary-header cards.
+
+    Parameters
+    ----------
+    path : str or Path
+        FITS file produced by :func:`saveFits`.
+
+    Returns
+    -------
+    LinearityCorrection
+
+    Raises
+    ------
+    ValueError
+        If the file's ``MODEL`` value isn't in :data:`MODEL_REGISTRY`.
+        (For a quiet predicate over an arbitrary path, use
+        :func:`isH4LinearityFile` first.)
+    """
+    path = Path(path)
+    with fits.open(path) as hdul:
+        primary = hdul[0]
+        modelName = primary.header["MODEL"]
+        if modelName not in MODEL_REGISTRY:
+            raise ValueError(
+                f"Unknown model {modelName!r}; known: {sorted(MODEL_REGISTRY)}"
+            )
+        modelClass = MODEL_REGISTRY[modelName]
+
+        # Collect model HDUs (anything the model classmethod consumes) and the
+        # fixed non-model HDUs by name.
+        allHdus = [hdu for hdu in hdul]
+        model, coefficients = modelClass.fromFitsHdus(allHdus)
+
+        fitMin = _arrayByName(hdul, "FITMIN")
+        fitMax = _arrayByName(hdul, "FITMAX")
+        badPixelMask = _arrayByName(hdul, "BPMASK").astype(np.uint16)
+        residualRms = _arrayByName(hdul, "RESRMS")
+        maxAbsResidual = _arrayByName(hdul, "RESMAX")
+        nPointsUsed = _arrayByName(hdul, "NPOINTS").astype(np.int32)
+        monotonic = _arrayByName(hdul, "MONOTON").astype(bool)
+        conditionNumber = _arrayByName(hdul, "CONDNUM")
+
+        # Rebuild summary from primary header (best-effort; drops non-scalar keys).
+        # HIERARCH cards preserve the Python key in card.keyword (mixed case /
+        # underscores); for short keys that FITS uppercased, the original Python
+        # key is stored in the comment.
+        summary: dict = {}
+        _skipKeys = {"SIMPLE", "BITPIX", "NAXIS", "EXTEND", "MODEL",
+                     "FITDATE", "RELINVER"}
+        for card in primary.header.cards:
+            key = card.keyword
+            if key in _skipKeys or key.startswith("NAXIS"):
+                continue
+            if key == key.upper():
+                # Standard FITS key (uppercased); recover original via comment.
+                if card.comment:
+                    originalKey = card.comment
+                    summary[originalKey] = card.value
+            else:
+                # HIERARCH card — keyword IS the original Python key.
+                summary[key] = card.value
+
+    diagnostics = Diagnostics(
+        residualRms=residualRms,
+        maxAbsResidual=maxAbsResidual,
+        nPointsUsed=nPointsUsed,
+        monotonic=monotonic,
+        conditionNumber=conditionNumber,
+        summary=summary,
+    )
+    return LinearityCorrection(
+        model=model,
+        coefficients=coefficients,
+        fitMin=fitMin,
+        fitMax=fitMax,
+        badPixelMask=badPixelMask,
+        diagnostics=diagnostics,
+    )
+
+
+def _arrayByName(hdul: fits.HDUList, name: str) -> np.ndarray:
+    for hdu in hdul:
+        if getattr(hdu, "name", "") == name:
+            return np.asarray(hdu.data)
+    raise ValueError(f"HDU {name!r} not found in FITS file")
diff --git a/python/lsst/obs/pfs/h4Linearity/isrPlots.py b/python/lsst/obs/pfs/h4Linearity/isrPlots.py
new file mode 100644
index 00000000..0b9cc8f9
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/isrPlots.py
@@ -0,0 +1,1835 @@
+"""Plotting helpers for H4 ISR validation over arbitrary read spans.
+
+Currently covers:
+
+- Half-vs-half ISR validation (data shape produced by ``/tmp/run_halves_isr.py``).
+  Functions: `load`, `printSummary`, `plotAdditivityMap`, `plotRateComparisonMap`,
+  `plotAmpBias`, `plotResidualHist`, `randomPixels`.
+- Per-pixel ramp inspection across one or more read spans (single full ramp, both
+  halves overlaid, or any other split). Functions: `plotPixelRamp`, with data
+  collected by `validate.collectPixelRampData`.
+
+Pure numpy + matplotlib — no LSST stack imports, so this module can be loaded
+in any Python environment that has access to the saved arrays.
+
+Typical use from JupyterLab::
+
+    from lsst.obs.pfs.h4Linearity import isrPlots
+
+    data = isrPlots.load('/work/cloomis/outputs/halves_isr_v142109_n3.npz')
+
+    isrPlots.printSummary(data)
+    isrPlots.plotAdditivityMap(data, corner='top-left')
+    isrPlots.plotRateComparisonMap(data, rateMin=5.0)
+    isrPlots.plotAmpBias(data)
+
+Each plot function returns the ``Figure`` for further customization.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional, Union
+
+import numpy as np
+
+
+# H4 readout has 32 horizontal channels (slow direction), each 128 pixels wide.
+H4_AMP_WIDTH = 128
+H4_N_AMPS = 32
+
+
+@dataclass
+class PixelRampData:
+    """Per-read cubes aligned for a per-pixel ramp inspection.
+
+    All cubes share shape ``(H, W, N)`` and are indexed by the same
+    ``readIndices`` along axis -1 (the time axis is last to match the
+    H4 ISR cube convention). ``readIndices[i]`` is the absolute read
+    index in the original ramp; ``cube*[y, x, i]`` is the cumulative
+    value at that read for pixel (y, x).
+
+    cubeRaw : pre-linearization, post-dark-subtraction cumulative (the
+        input to ``h4Linearity.apply``).
+    cubeLin : post-linearization cumulative (the output of
+        ``h4Linearity.apply``, after re-anchoring at firstRead).
+    cubeDark : dark-cube cumulative aligned with the same reads.
+    cubePreDark : pre-dark-subtraction cumulative = cubeRaw + cubeDark.
+        Useful for seeing the raw detector reads before any subtraction.
+
+    For ``firstRead=0`` (full ramp) and the default ``applyUTRWeights=True``,
+    ``cubeRaw[..., i]`` corresponds to absolute read ``i + 1`` of the
+    original ramp (the first delta is read 0 -> read 1). For
+    ``firstRead > 0`` the absolute read at index ``i`` is
+    ``firstRead + i + 1``.
+    """
+
+    cubeRaw: np.ndarray
+    cubeLin: np.ndarray
+    cubeDark: np.ndarray
+    cubePreDark: np.ndarray
+    readIndices: np.ndarray
+    firstRead: int
+    lastRead: int
+    visit: int = -1
+    cam: str = "?"
+    fitMin: Optional[np.ndarray] = None
+    fitMax: Optional[np.ndarray] = None
+    # Post-CR-repair linearized cumulative. None when CR detection didn't run.
+    # Differs from cubeLin at pixels repaired by cr.iterativeUtrDetectAndRepair.
+    cubeCR: Optional[np.ndarray] = None
+    # Full ``exposure.mask.array`` after ISR (incl. CR + ASIC_GLITCH stamps).
+    mask: Optional[np.ndarray] = None
+    # Mask plane dictionary {name: bit_index} captured at construction time.
+    # Used by `randomPixels(..., plane="CR")` etc. to look up plane bits
+    # without importing afw at plot time.
+    maskPlaneDict: Optional[dict] = None
+    # Per-pixel UTR rate (ADU/read). From the iterative CR detector
+    # (post-repair) when doCR=True, else computed from cubeLin.
+    avgRate: Optional[np.ndarray] = None
+    # Per-delta flag arrays from the iterative CR detector, shape
+    # ``(H, W, N-1)`` bool. ``crFlagMask[y, x, k]`` True means the delta
+    # from read k to read k+1 was flagged as a CR at pixel (y, x). Used
+    # by `plotPixelRamp` to mark flagged reads along the top of each row.
+    crFlagMask: Optional[np.ndarray] = None
+    glitchFlagMask: Optional[np.ndarray] = None
+
+
+@dataclass
+class HalvesIsrData:
+    """Per-pixel arrays from a half-vs-half ISR run.
+
+    All arrays are ``(H, W)`` float32 unless noted. ``mask*`` are integer
+    bitfields from `lsst.afw.image.Mask`.
+    """
+
+    img1: np.ndarray
+    img2: np.ndarray
+    imgF: np.ndarray
+    rate1: np.ndarray
+    rate2: np.ndarray
+    rateFull: np.ndarray
+    addResid: np.ndarray
+    addResidRel: np.ndarray
+    relDiff: np.ndarray
+    avgRate: np.ndarray
+    mask_first: np.ndarray
+    mask_second: np.ndarray
+    mask_full: np.ndarray
+    maskUnion: np.ndarray
+    visit: int
+    cam: str
+    midRead: int
+    nReads: int
+
+
+def load(path: str) -> HalvesIsrData:
+    """Load an .npz produced by the halves-ISR validation script."""
+    with np.load(path, allow_pickle=False) as f:
+        # Scalars come back as 0-d arrays; unwrap to Python ints/strings.
+        return HalvesIsrData(
+            img1=f['img1'], img2=f['img2'], imgF=f['imgF'],
+            rate1=f['rate1'], rate2=f['rate2'], rateFull=f['rateFull'],
+            addResid=f['addResid'], addResidRel=f['addResidRel'],
+            relDiff=f['relDiff'], avgRate=f['avgRate'],
+            mask_first=f['mask_first'], mask_second=f['mask_second'],
+            mask_full=f['mask_full'], maskUnion=f['maskUnion'],
+            visit=int(f['visit']), cam=str(f['cam']),
+            midRead=int(f['midRead']), nReads=int(f['nReads']),
+        )
+
+
+_RATE_BINS = (
+    (-1e9, 0.0), (0.0, 1.0), (1.0, 5.0), (5.0, 20.0),
+    (20.0, 100.0), (100.0, 1000.0), (1000.0, 5000.0), (5000.0, 50000.0),
+)
+
+
+def _asData(data: Union[str, HalvesIsrData]) -> HalvesIsrData:
+    return load(data) if isinstance(data, str) else data
+
+
+def printSummary(data: Union[str, HalvesIsrData]) -> None:
+    """Print the same kind of breakdown the validation script prints."""
+    d = _asData(data)
+    unmasked = (d.maskUnion == 0) & np.isfinite(d.relDiff)
+    nU = int(unmasked.sum())
+    total = d.maskUnion.size
+    print(f"visit={d.visit} cam={d.cam}  nReads={d.nReads}  midRead={d.midRead}")
+    print(f"unmasked: {nU:,}/{total:,} ({100 * nU / total:.2f}%)")
+
+    print("\n=== |addResidRel| over unmasked, by avgRate ===")
+    print(f"{'rate range':>14s}  {'nPix':>10s}  {'pct50':>9s}  {'pct95':>9s}  "
+          f"{'pct99.9':>9s}  {'max':>9s}")
+    for lo, hi in _RATE_BINS:
+        sel = unmasked & (d.avgRate > lo) & (d.avgRate <= hi)
+        n = int(sel.sum())
+        if n == 0:
+            continue
+        rr = np.abs(d.addResidRel[sel])
+        p50, p95, p999 = np.percentile(rr, [50, 95, 99.9])
+        print(f"({lo:>5.0f},{hi:>6.0f}]  {n:>10,d}  {p50:>9.4f}  {p95:>9.4f}  "
+              f"{p999:>9.4f}  {float(rr.max()):>9.4f}")
+
+    print("\n=== |relDiff| over unmasked, by avgRate (rate-space test) ===")
+    print(f"{'rate range':>14s}  {'nPix':>10s}  {'median':>9s}  {'MAD':>9s}  "
+          f"{'|<1%':>7s}  {'|<5%':>7s}  {'|<20%':>7s}")
+    for lo, hi in _RATE_BINS:
+        sel = unmasked & (d.avgRate > lo) & (d.avgRate <= hi)
+        n = int(sel.sum())
+        if n == 0:
+            continue
+        rd = d.relDiff[sel]
+        med = float(np.median(rd))
+        mad = float(1.4826 * np.median(np.abs(rd - med)))
+        f1 = float(np.mean(np.abs(rd) < 0.01))
+        f5 = float(np.mean(np.abs(rd) < 0.05))
+        f20 = float(np.mean(np.abs(rd) < 0.20))
+        print(f"({lo:>5.0f},{hi:>6.0f}]  {n:>10,d}  {med:+8.4f}  {mad:8.4f}  "
+              f"{f1:7.4f}  {f5:7.4f}  {f20:7.4f}")
+
+
+def _cornerSlice(corner: str, height: int, width: int, size: int):
+    """Return (yslice, xslice) for one of: top-left, top-right, bottom-left, bottom-right.
+
+    Corner names follow the y-up display convention: "top" means high y,
+    "bottom" means low y. Plots use ``origin='lower'``, so this matches the
+    visual orientation in matplotlib.
+    """
+    if corner == 'top-left':
+        return slice(height - size, height), slice(0, size)
+    if corner == 'top-right':
+        return slice(height - size, height), slice(width - size, width)
+    if corner == 'bottom-left':
+        return slice(0, size), slice(0, size)
+    if corner == 'bottom-right':
+        return slice(0, size), slice(width - size, width)
+    raise ValueError(f"unknown corner {corner!r}; expected top-left / top-right / "
+                     "bottom-left / bottom-right.")
+
+
+def plotAdditivityMap(
+    data: Union[str, HalvesIsrData],
+    *,
+    corner: Optional[str] = None,
+    cornerSize: int = 600,
+    vlim: float = 0.5,
+    rateMin: float = 0.0,
+    fig=None,
+):
+    """imshow of the additivity residual map.
+
+    Parameters
+    ----------
+    data
+        Loaded `HalvesIsrData` or path to the .npz.
+    corner : {None, 'top-left', 'top-right', 'bottom-left', 'bottom-right'}
+        If given, zoom to a ``cornerSize x cornerSize`` corner of the detector.
+    cornerSize : int
+        Side length of the corner zoom in pixels.
+    vlim : float
+        Color-scale limit; map is clipped to ``[-vlim, +vlim]``.
+    rateMin : float
+        Set pixels with ``avgRate <= rateMin`` to NaN so they show as gray
+        (their relative residual is dominated by noise).
+    fig : matplotlib.figure.Figure, optional
+        Reuse an existing figure.
+    """
+    import matplotlib.pyplot as plt
+
+    d = _asData(data)
+    H, W = d.addResidRel.shape
+    arr = d.addResidRel.copy()
+    if rateMin > 0:
+        arr[d.avgRate <= rateMin] = np.nan
+    arr[d.maskUnion != 0] = np.nan
+
+    if corner is not None:
+        ys, xs = _cornerSlice(corner, H, W, cornerSize)
+        arr = arr[ys, xs]
+        extent = (xs.start, xs.stop, ys.start, ys.stop)
+        title = (f"addResidRel — {corner}  (cornerSize={cornerSize}, "
+                 f"rateMin={rateMin})")
+    else:
+        extent = None
+        title = f"addResidRel — full detector  (rateMin={rateMin})"
+
+    if fig is None:
+        fig, ax = plt.subplots(figsize=(8, 8))
+    else:
+        ax = fig.add_subplot(111)
+    im = ax.imshow(arr, origin='lower', cmap='RdBu_r',
+                   vmin=-vlim, vmax=vlim,
+                   extent=extent, interpolation='nearest')
+    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02, label='resid / |full|')
+    ax.set_title(f"visit={d.visit} {d.cam}  {title}")
+    ax.set_xlabel("x")
+    ax.set_ylabel("y")
+    fig.tight_layout()
+    return fig
+
+
+def plotRateComparisonMap(
+    data: Union[str, HalvesIsrData],
+    *,
+    rateMin: float = 5.0,
+    vlim: float = 0.2,
+    corner: Optional[str] = None,
+    cornerSize: int = 600,
+    fig=None,
+):
+    """imshow of ``relDiff = 2(rate1-rate2)/(rate1+rate2)``, masked to bright pixels.
+
+    Parameters
+    ----------
+    data
+        Loaded `HalvesIsrData` or path to the .npz.
+    rateMin : float
+        Pixels with ``avgRate <= rateMin`` are masked (set to NaN). Default
+        5 ADU/read selects pixels with reasonable per-half S/N.
+    vlim : float
+        Color-scale clip ``[-vlim, +vlim]``.
+    corner, cornerSize
+        Same semantics as `plotAdditivityMap`.
+    """
+    import matplotlib.pyplot as plt
+
+    d = _asData(data)
+    H, W = d.relDiff.shape
+    arr = d.relDiff.copy()
+    bad = (d.maskUnion != 0) | (d.avgRate <= rateMin) | ~np.isfinite(arr)
+    arr[bad] = np.nan
+
+    if corner is not None:
+        ys, xs = _cornerSlice(corner, H, W, cornerSize)
+        arr = arr[ys, xs]
+        extent = (xs.start, xs.stop, ys.start, ys.stop)
+        zoom = f"  ({corner}, cornerSize={cornerSize})"
+    else:
+        extent = None
+        zoom = ""
+
+    if fig is None:
+        fig, ax = plt.subplots(figsize=(8, 8))
+    else:
+        ax = fig.add_subplot(111)
+    im = ax.imshow(arr, origin='lower', cmap='RdBu_r',
+                   vmin=-vlim, vmax=vlim,
+                   extent=extent, interpolation='nearest')
+    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02,
+                 label='2*(r1-r2)/(r1+r2)')
+    ax.set_title(f"visit={d.visit} {d.cam}  relDiff  rateMin={rateMin}{zoom}")
+    ax.set_xlabel("x")
+    ax.set_ylabel("y")
+    fig.tight_layout()
+    return fig
+
+
+def plotAmpBias(
+    data: Union[str, HalvesIsrData],
+    *,
+    rateMin: float = 5.0,
+    ampWidth: int = H4_AMP_WIDTH,
+    fig=None,
+):
+    """Per-amplifier breakdown of the rate-comparison bias.
+
+    Splits the detector into vertical stripes of width ``ampWidth`` (default
+    128 for H4, giving 32 amps along x) and reports per-amp:
+
+    - median(relDiff) over unmasked bright pixels
+    - MAD(relDiff)
+    - npix in the comparison
+
+    Useful for spotting whether a global +N% bias is detector-wide or
+    concentrated in particular amps.
+    """
+    import matplotlib.pyplot as plt
+
+    d = _asData(data)
+    H, W = d.relDiff.shape
+    nAmps = W // ampWidth
+    if nAmps * ampWidth != W:
+        raise ValueError(
+            f"detector width {W} not divisible by ampWidth {ampWidth}"
+        )
+
+    unmasked = (d.maskUnion == 0) & np.isfinite(d.relDiff) & (d.avgRate > rateMin)
+
+    ampMedians = np.zeros(nAmps, dtype=np.float64)
+    ampMads = np.zeros(nAmps, dtype=np.float64)
+    ampN = np.zeros(nAmps, dtype=np.int64)
+    for k in range(nAmps):
+        xs = slice(k * ampWidth, (k + 1) * ampWidth)
+        sel = unmasked[:, xs]
+        rd = d.relDiff[:, xs][sel]
+        ampN[k] = rd.size
+        if rd.size == 0:
+            ampMedians[k] = np.nan
+            ampMads[k] = np.nan
+            continue
+        med = float(np.median(rd))
+        ampMedians[k] = med
+        ampMads[k] = float(1.4826 * np.median(np.abs(rd - med)))
+
+    if fig is None:
+        fig, axes = plt.subplots(2, 1, figsize=(11, 6), sharex=True)
+    else:
+        axes = fig.subplots(2, 1, sharex=True)
+
+    ampIdx = np.arange(nAmps)
+    axes[0].errorbar(ampIdx, ampMedians, yerr=ampMads, fmt='o', capsize=3,
+                     color='C0', label='median ± MAD')
+    axes[0].axhline(0.0, color='k', lw=0.5, ls=':')
+    overallMed = float(np.median(d.relDiff[unmasked]))
+    axes[0].axhline(overallMed, color='C3', lw=0.8, ls='--',
+                    label=f'global median = {overallMed:+.4f}')
+    axes[0].set_ylabel('relDiff (rate1 vs rate2)')
+    axes[0].set_title(f"visit={d.visit} {d.cam}  per-amp rate bias  "
+                      f"(rateMin={rateMin}, ampWidth={ampWidth})")
+    axes[0].legend(loc='best', fontsize=9)
+    axes[0].grid(True, alpha=0.3)
+
+    axes[1].bar(ampIdx, ampN, color='0.5')
+    axes[1].set_xlabel(f'amplifier index (x = [k*{ampWidth}, (k+1)*{ampWidth}))')
+    axes[1].set_ylabel('nPix in comparison')
+    axes[1].grid(True, alpha=0.3)
+
+    fig.tight_layout()
+    return fig
+
+
+_PIXEL_RAMP_LINESTYLES = ('-', '--', ':', '-.')
+
+
+# Mask planes to highlight with colored badges on per-pixel ramp plots.
+# Order is priority left-to-right; any plane set on the pixel but not in
+# this dict is rendered with the fallback grey color and trailing.
+_MASK_PLANE_BADGES = (
+    ('CR', '#d62728'),  # red
+    ('ASIC_GLITCH', '#ff7f0e'),  # orange
+    ('SAT', '#bcbd22'),  # ochre
+    ('BAD', '#444444'),
+    ('INTRP', '#888888'),
+    ('NO_DATA', '#bbbbbb'),
+    ('SUSPECT', '#9467bd'),  # purple
+    ('EDGE', '#17becf'),  # cyan
+)
+
+
+def _activeMaskPlanes(maskPixel: int, planeDict: dict) -> list:
+    """Return the names of mask planes set in ``maskPixel``, in badge order.
+
+    Known planes appear in ``_MASK_PLANE_BADGES`` priority order;
+    unrecognized set bits are appended in alphabetical order.
+    """
+    if maskPixel == 0:
+        return []
+    knownNames = {name for name, _ in _MASK_PLANE_BADGES}
+    active = [
+        name for name, _ in _MASK_PLANE_BADGES
+        if name in planeDict and (maskPixel >> planeDict[name]) & 1
+    ]
+    for name in sorted(planeDict):
+        if name in knownNames:
+            continue
+        if (maskPixel >> planeDict[name]) & 1:
+            active.append(name)
+    return active
+
+
+def _drawMaskBadges(ax, planeNames: list, yPos: float = 0.97) -> None:
+    """Render a row of colored mask-plane badges along the top of ``ax``."""
+    if not planeNames:
+        return
+    colorMap = dict(_MASK_PLANE_BADGES)
+    # Place left-to-right just inside the top edge of the axes.
+    xPos = 0.01
+    for name in planeNames:
+        color = colorMap.get(name, '#666666')
+        ax.text(
+            xPos, yPos, name,
+            transform=ax.transAxes, ha='left', va='top',
+            fontsize=7, color='white', fontweight='bold',
+            bbox=dict(boxstyle='round,pad=0.18',
+                      facecolor=color, edgecolor='none', alpha=0.92),
+        )
+        # Advance xPos by the rendered width of the badge (in axes coords).
+        # matplotlib doesn't give us the width until draw time; approximate
+        # by character count + a small pad. Each char ≈ 0.012 axes units at
+        # fontsize=7 on a typical figure width.
+        xPos += 0.014 * (len(name) + 2)
+        if xPos > 0.85:   # wrap is unlikely; just stop drawing instead
+            break
+
+
+def topDiscrepantPixels(
+    data: "HalvesIsrData",
+    n: int = 20,
+    *,
+    metric: str = "relDiffWeighted",
+    rateMin: float = 5.0,
+    rateMax: Optional[float] = None,
+    unmasked: bool = True,
+):
+    """Return the top-N most discrepant pixels by the chosen metric.
+
+    Avoids the noise-amplification problem of sorting on
+    ``|addResid|/|imgF|``: that ratio blows up when ``|imgF|`` is small,
+    so the top pixels tend to be low-S/N rather than truly anomalous.
+
+    Parameters
+    ----------
+    data : HalvesIsrData
+    n : int
+        Number of pixels to return.
+    metric : {'relDiff', 'relDiffWeighted', 'addResid'}
+        Sort key:
+
+        - ``relDiff`` — pure rate-comparison: ``|2(r1-r2)/(r1+r2)|``.
+          Sensitive to any half-vs-half mismatch but still noisy for
+          low-rate pixels (Poisson + read noise dominate).
+        - ``relDiffWeighted`` (default) — ``|relDiff| * sqrt(avgRate)``.
+          Approximate S/N weighting that prefers high-rate pixels with
+          large rate disagreement.
+        - ``addResid`` — raw additivity residual in ADU:
+          ``|imgF - (img1 + img2)|``. Picks the largest absolute
+          discrepancies. Best when you want bright outliers regardless
+          of relative scale.
+    rateMin : float
+        Drop pixels with ``avgRate <= rateMin`` (default 5 ADU/read).
+        Faint pixels are excluded from the ranking because their metric
+        values are dominated by statistical noise.
+    rateMax : float, optional
+        Drop pixels with ``avgRate > rateMax``. Use this to focus on a
+        specific flux regime.
+    unmasked : bool
+        Restrict to pixels with no mask bits set on any of the three
+        ISR passes (default True).
+
+    Returns
+    -------
+    coords : list[tuple[int, int]]
+        Pixel coordinates ordered by *descending* metric value.
+    """
+    sel = np.ones(data.relDiff.shape, dtype=bool)
+    if unmasked:
+        sel &= (data.maskUnion == 0)
+    sel &= np.isfinite(data.relDiff) & np.isfinite(data.avgRate)
+    sel &= data.avgRate > rateMin
+    if rateMax is not None:
+        sel &= data.avgRate <= rateMax
+
+    if metric == "relDiff":
+        score = np.abs(data.relDiff)
+    elif metric == "relDiffWeighted":
+        score = np.abs(data.relDiff) * np.sqrt(np.maximum(data.avgRate, 0.0))
+    elif metric == "addResid":
+        score = np.abs(data.addResid)
+    else:
+        raise ValueError(
+            f"unknown metric {metric!r}; expected "
+            "'relDiff', 'relDiffWeighted', or 'addResid'."
+        )
+    score = np.where(sel, score, -np.inf)
+
+    flat = score.ravel()
+    if not np.isfinite(flat).any():
+        raise RuntimeError(
+            f"No pixels match the selection (rateMin={rateMin}, "
+            f"rateMax={rateMax}, unmasked={unmasked})."
+        )
+    nReturn = min(int(n), int(np.isfinite(flat).sum()))
+    # argpartition gets the top-nReturn without sorting the rest; then
+    # sort that small slice in descending order for a clean ranking.
+    top_idx = np.argpartition(flat, -nReturn)[-nReturn:]
+    top_idx = top_idx[np.argsort(flat[top_idx])[::-1]]
+    ys, xs = np.unravel_index(top_idx, score.shape)
+    return list(zip(xs.tolist(), ys.tolist()))
+
+
+_DEFAULT_EXCLUDE_PLANES = ('BAD', 'SAT', 'BORDER')
+
+
+def randomPixels(
+    data,
+    n: int = 20,
+    *,
+    plane: Optional[str] = None,
+    excludePlanes=_DEFAULT_EXCLUDE_PLANES,
+    rateMin: Optional[float] = None,
+    rateMax: Optional[float] = None,
+    rng=None,
+):
+    """Return up to ``n`` random ``(x, y)`` pixel coordinates passing the selection.
+
+    Accepts either a `HalvesIsrData` (uses ``avgRate`` + ``maskUnion``) or
+    a `PixelRampData` (uses ``mask`` + ``maskPlaneDict``).
+
+    Parameters
+    ----------
+    data : HalvesIsrData or PixelRampData
+    n : int
+        Maximum number of pixels to return. Fewer are returned if the
+        selection has fewer than ``n`` candidates.
+    plane : str, optional
+        Restrict to pixels where this mask plane bit is set in the mask.
+        Useful for sampling e.g. ``"CR"`` or ``"ASIC_GLITCH"`` pixels.
+        Requires ``maskPlaneDict`` on the data (carried by
+        ``PixelRampData`` from ``validate.collectPixelRampData``).
+        Composes with ``excludePlanes``, ``rateMin``, ``rateMax``.
+    excludePlanes : sequence of str
+        Drop pixels with any of these mask planes set. Default
+        ``('BAD', 'SAT', 'BORDER')`` skips obviously unusable pixels but
+        keeps CR / ASIC_GLITCH ones. Pass ``()`` to disable. Names that
+        aren't in ``data.maskPlaneDict`` are silently ignored, so this
+        default works across detectors. If the data carries a mask but
+        no plane dict (e.g. ``HalvesIsrData``), falls back to requiring
+        all mask bits to be zero.
+    rateMin, rateMax : float, optional
+        Restrict to ``rateMin <= avgRate <= rateMax``. Requires the data
+        to carry ``avgRate``.
+
+        Common shortcuts:
+          - faint pixels: ``rateMin=1.0, rateMax=10.0``
+          - bright pixels: ``rateMin=50.0`` (no upper bound)
+          - dark pixels: ``rateMax=1.0``
+    rng : np.random.Generator or int, optional
+        Reproducible random state. ``int`` → seeded generator; ``None`` →
+        fresh generator each call.
+
+    Returns
+    -------
+    coords : list[tuple[int, int]]
+    """
+    if isinstance(rng, (int, np.integer)):
+        rng = np.random.default_rng(int(rng))
+    elif rng is None:
+        rng = np.random.default_rng()
+
+    # Mask source: PixelRampData.mask (preferred) or HalvesIsrData.maskUnion.
+    mask = getattr(data, 'mask', None)
+    if mask is None:
+        mask = getattr(data, 'maskUnion', None)
+    planeDict = getattr(data, 'maskPlaneDict', None)
+    avgRate = getattr(data, 'avgRate', None)
+
+    if mask is not None:
+        shape = mask.shape
+    elif avgRate is not None:
+        shape = avgRate.shape
+    else:
+        raise ValueError(
+            "data has neither a `mask`/`maskUnion` nor `avgRate`; nothing "
+            "to filter on."
+        )
+    sel = np.ones(shape, dtype=bool)
+
+    if plane is not None:
+        if mask is None or planeDict is None:
+            raise ValueError(
+                "plane= filtering requires `mask` + `maskPlaneDict` on the data "
+                "(use a PixelRampData from validate.collectPixelRampData)."
+            )
+        if plane not in planeDict:
+            raise ValueError(
+                f"plane {plane!r} not in maskPlaneDict; available planes: "
+                f"{sorted(planeDict.keys())}"
+            )
+        bit = mask.dtype.type(1) << mask.dtype.type(int(planeDict[plane]))
+        sel &= (mask & bit) != 0
+
+    # Drop "really bad" pixels by default (BAD / SAT / BORDER). When the
+    # data has a plane dict, only the named bits are checked, so CR /
+    # ASIC_GLITCH pixels still pass through.
+    if excludePlanes:
+        if mask is not None and planeDict is not None:
+            excludeBit = mask.dtype.type(0)
+            for name in excludePlanes:
+                if name in planeDict:
+                    excludeBit = excludeBit | (
+                        mask.dtype.type(1) << mask.dtype.type(int(planeDict[name]))
+                    )
+            if excludeBit != 0:
+                sel &= (mask & excludeBit) == 0
+        elif mask is not None:
+            # No plane dict (e.g. HalvesIsrData) — fall back to "no mask
+            # bits at all".
+            sel &= (mask == 0)
+    if avgRate is not None:
+        sel &= np.isfinite(avgRate)
+
+    if rateMin is not None or rateMax is not None:
+        if avgRate is None:
+            raise ValueError(
+                "rateMin/rateMax require `avgRate` on the data."
+            )
+        if rateMin is not None:
+            sel &= avgRate >= rateMin
+        if rateMax is not None:
+            sel &= avgRate <= rateMax
+
+    ys, xs = np.where(sel)
+    if ys.size == 0:
+        raise RuntimeError(
+            f"No pixels match the selection "
+            f"(plane={plane!r}, excludePlanes={tuple(excludePlanes)}, "
+            f"rateMin={rateMin}, rateMax={rateMax})."
+        )
+    n = min(int(n), ys.size)
+    pick = rng.choice(ys.size, size=n, replace=False)
+    return list(zip(xs[pick].tolist(), ys[pick].tolist()))
+
+
+def plotPixelRamp(
+    data,
+    coords,
+    *,
+    includePreDark: bool = True,
+    includeDark: bool = True,
+    includeCR: bool = False,
+    showFitRange: bool = False,
+    addResidualColumn: bool = False,
+    rowHeight: float = 2.0,
+    width: float = 10.0,
+    fig=None,
+):
+    """Plot per-read cumulative values for one or more pixels, one per row.
+
+    Up to four curves per pixel per data source (read-by-read cumulative ADU):
+
+    - ``cubePreDark`` — raw cumulative before dark subtraction (blue).
+      Skipped if ``includePreDark=False``.
+    - ``cubeDark`` — dark-cube cumulative (green). Skipped if ``includeDark=False``.
+    - ``cubeRaw`` — post-dark, pre-linearization cumulative (orange). The input
+      to ``h4Linearity.apply``.
+    - ``cubeLin`` — linearized cumulative (red).
+
+    Multi-source view: pass a list of `PixelRampData` (e.g. one for the first
+    half of the ramp and one for the second) to see how the pixels evolve
+    across both halves on the same axes. Each data source gets its own line
+    style (solid, dashed, dotted, dash-dot); colors stay tied to curve type.
+    ``cubePreDark`` / ``cubeDark`` / ``cubeRaw`` are continuous across the
+    half boundary because they are not re-anchored; ``cubeLin`` IS re-anchored
+    per source, so each source's linearized curve starts near zero at its own
+    ``firstRead``.
+
+    Parameters
+    ----------
+    data : `PixelRampData` or sequence of `PixelRampData`
+        Single ramp (one set of curves) or multi-source list (one set of
+        curves per source, different line styles).
+    coords : (x, y) tuple, or sequence of (x, y) tuples
+    includePreDark, includeDark : bool
+        Toggle the pre-dark and dark curves.
+    includeCR : bool
+        If True and ``data.cubeCR`` is populated, overlay the
+        CR-repaired linearized cumulative as a fifth curve (purple).
+        Lets you see the difference CR repair makes per pixel.
+    showFitRange : bool
+        If True (and ``data.fitMin`` / ``data.fitMax`` are populated),
+        draw horizontal dotted lines at the linearity fit limits and
+        annotate the values. Default False because fitMax is usually
+        far above the data range, which forces the y-axis to expand
+        and shrinks the actual ramp curves.
+    addResidualColumn : bool
+        If True, add a second column showing each source's ``cubeLin``
+        minus its own linear fit, plotted against the within-half read
+        offset (``reads - reads[0]``). With multi-source data
+        (first/second halves), the two residual curves overlay and
+        should be ~flat and overlapping where linearization is working;
+        systematic curvature signals a linearization failure in the
+        relevant flux regime.
+    rowHeight, width : float
+        Figure sizing. ``rowHeight=2.0`` keeps a 20-pixel stack to 40 in tall.
+    fig : matplotlib.figure.Figure, optional
+
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+    """
+    import matplotlib.pyplot as plt
+    from matplotlib.lines import Line2D
+
+    # Normalize `data` to a list of sources.
+    if isinstance(data, PixelRampData):
+        dataList = [data]
+    else:
+        dataList = list(data)
+    if not dataList:
+        raise ValueError("data is empty.")
+    H, W = dataList[0].cubeLin.shape[:-1]
+    for d in dataList[1:]:
+        if d.cubeLin.shape[:-1] != (H, W):
+            raise ValueError("all PixelRampData must share the same (H, W).")
+
+    # Normalize `coords` to a list of (x, y) tuples.
+    if (
+        isinstance(coords, tuple)
+        and len(coords) == 2
+        and not isinstance(coords[0], (tuple, list))
+    ):
+        coords = [coords]
+    coords = list(coords)
+    nPx = len(coords)
+    if nPx == 0:
+        raise ValueError("coords is empty.")
+
+    ncols = 2 if addResidualColumn else 1
+    # Add a fixed header band on top of the row stack for the suptitle
+    # plus a 2-row fontsize=8 legend (measured ~0.34 in tall with default
+    # padding), plus gaps for the per-read markers that sit at the very
+    # top edge of the first axes.
+    headerHeightIn = 0.75
+    figHeight = rowHeight * nPx + headerHeightIn
+    # Figure width is fixed at ``width`` regardless of the column count;
+    # an enabled residual column splits that width equally with the main
+    # column. Tight inter-column spacing keeps each plot as wide as
+    # possible at the cost of letting the columns share a y-axis edge.
+    if fig is None:
+        fig, axesGrid = plt.subplots(
+            nPx, ncols, figsize=(width, figHeight),
+            squeeze=False, sharex='col',
+            gridspec_kw={'wspace': 0.06} if ncols > 1 else None,
+        )
+    else:
+        axesGrid = fig.subplots(
+            nPx, ncols, squeeze=False, sharex='col',
+            gridspec_kw={'wspace': 0.06} if ncols > 1 else None,
+        )
+    axes = axesGrid[:, 0]
+    residAxes = axesGrid[:, 1] if addResidualColumn else None
+
+    # Use the first source for fitMin/fitMax (the linearity calibration is
+    # per-pixel, not per-read, so it's the same regardless of source).
+    fitMin_arr = dataList[0].fitMin
+    fitMax_arr = dataList[0].fitMax
+    showFitRange = (
+        showFitRange and fitMin_arr is not None and fitMax_arr is not None
+    )
+
+    for i, (x, y) in enumerate(coords):
+        ax = axes[i]
+        for s, d in enumerate(dataList):
+            ls = _PIXEL_RAMP_LINESTYLES[s % len(_PIXEL_RAMP_LINESTYLES)]
+            reads = d.readIndices
+            if includePreDark:
+                ax.plot(reads, d.cubePreDark[y, x, :], 'o', linestyle=ls,
+                        color='C0', lw=1.0, markersize=3)
+            if includeDark:
+                ax.plot(reads, d.cubeDark[y, x, :], 'd', linestyle=ls,
+                        color='C2', lw=1.0, markersize=3)
+            ax.plot(reads, d.cubeRaw[y, x, :], 's', linestyle=ls,
+                    color='C1', lw=1.0, markersize=3)
+            ax.plot(reads, d.cubeLin[y, x, :], '^', linestyle=ls,
+                    color='C3', lw=1.0, markersize=3)
+            if includeCR and d.cubeCR is not None:
+                ax.plot(reads, d.cubeCR[y, x, :], 'v', linestyle=ls,
+                        color='C4', lw=1.0, markersize=3)
+            # Faint UTR-rate reference line: cubeLin[..., 0] + rate * (k - k0).
+            # Lets the eye spot deviations from the per-pixel linear
+            # extrapolation that the detector uses internally.
+            if d.avgRate is not None:
+                rate = float(d.avgRate[y, x])
+                anchor = float(d.cubeLin[y, x, 0])
+                utrLine = anchor + rate * (reads - reads[0]).astype(np.float64)
+                ax.plot(reads, utrLine, linestyle=ls, color='0.4',
+                        lw=0.8, alpha=0.45)
+        if showFitRange:
+            fmax = float(fitMax_arr[y, x])
+            fmin = float(fitMin_arr[y, x])
+            ax.axhline(fmax, color='0.4', lw=0.8, ls=':', alpha=0.8)
+            ax.axhline(fmin, color='0.4', lw=0.8, ls=':', alpha=0.4)
+            # Compact annotation in the upper-right corner.
+            ax.text(0.995, 0.95,
+                    f'fitMax={fmax:.0f}  fitMin={fmin:.0f}',
+                    transform=ax.transAxes, ha='right', va='top',
+                    fontsize=7, color='0.3',
+                    bbox=dict(boxstyle='round,pad=0.2',
+                              facecolor='white', edgecolor='none',
+                              alpha=0.7))
+        ax.set_ylabel('ADU', fontsize=9)
+        # Pixel coordinate in the top-left of the plot (paste-ready Python
+        # tuple-unpack form).
+        ax.text(
+            0.01, 0.97, f'(x, y) = ({x}, {y})',
+            transform=ax.transAxes, ha='left', va='top',
+            fontsize=8, fontweight='bold', color='black',
+            bbox=dict(boxstyle='round,pad=0.2',
+                      facecolor='white', edgecolor='none', alpha=0.85),
+        )
+        ax.grid(True, alpha=0.3)
+
+        # Per-read CR / ASIC_GLITCH markers along the top of the axes.
+        # A flag at delta index k → marker at read k+1 (the cumulative
+        # sample where the jump first appears). Drawn per source so
+        # multi-half data shows each half's flags at the matching reads.
+        for d in dataList:
+            reads = d.readIndices
+            if d.crFlagMask is not None:
+                flags = np.asarray(d.crFlagMask[y, x, :], dtype=bool)
+                if flags.any():
+                    ax.plot(
+                        reads[1:][flags], np.full(int(flags.sum()), 1.02),
+                        marker='v', color='#d62728', linestyle='none',
+                        markersize=7, mew=0, clip_on=False,
+                        transform=ax.get_xaxis_transform(),
+                    )
+            if d.glitchFlagMask is not None:
+                flags = np.asarray(d.glitchFlagMask[y, x, :], dtype=bool)
+                # Glitches come in delta-pairs (the up + down). The
+                # physically corrupted read is the cumulative sample
+                # between them — for a pair at delta indices (k, k+1)
+                # that's read k+1. Mark each pair-start once.
+                pairStart = flags[:-1] & flags[1:]
+                if pairStart.any():
+                    pairReads = reads[1:-1][pairStart]
+                    ax.plot(
+                        pairReads, np.full(len(pairReads), 1.02),
+                        marker='v', mfc='none', color='#ff7f0e',
+                        linestyle='none', markersize=8, mew=1.4,
+                        clip_on=False,
+                        transform=ax.get_xaxis_transform(),
+                    )
+
+        # Per-pixel badges for the non-temporal planes (BAD, SAT, INTRP,
+        # NO_DATA, SUSPECT, EDGE, ...) — CR and ASIC_GLITCH are already
+        # surfaced by the per-read markers above.
+        unionMask = 0
+        planeDict = None
+        for d in dataList:
+            if d.mask is not None and d.maskPlaneDict:
+                unionMask |= int(d.mask[y, x])
+                planeDict = d.maskPlaneDict
+        if planeDict is not None and unionMask != 0:
+            planes = [
+                p for p in _activeMaskPlanes(unionMask, planeDict)
+                if p not in ('CR', 'ASIC_GLITCH')
+            ]
+            # Place badges below the coord text, not on top of it.
+            _drawMaskBadges(ax, planes, yPos=0.86)
+
+        if residAxes is not None:
+            axR = residAxes[i]
+            # Joint linear fit across all sources: a single common slope
+            # is subtracted from each source's cubeLin (no intercept). That
+            # way endpoint *offsets* between halves remain visible -- they
+            # would be exactly zero if linearization were perfectly
+            # consistent across the ramp.
+            xAll, yAll = [], []
+            for d in dataList:
+                xAll.append(
+                    (d.readIndices - d.readIndices[0]).astype(np.float64)
+                )
+                yAll.append(d.cubeLin[y, x, :].astype(np.float64, copy=False))
+            xAll = np.concatenate(xAll)
+            yAll = np.concatenate(yAll)
+            slope = float(np.polyfit(xAll, yAll, 1)[0]) if xAll.size >= 2 else 0.0
+            for s, d in enumerate(dataList):
+                ls = _PIXEL_RAMP_LINESTYLES[s % len(_PIXEL_RAMP_LINESTYLES)]
+                xRel = (d.readIndices - d.readIndices[0]).astype(np.float64)
+                lin = d.cubeLin[y, x, :].astype(np.float64, copy=False)
+                resid = (lin - slope * xRel).astype(np.float32, copy=False)
+                axR.plot(xRel.astype(np.int32), resid, '^', linestyle=ls,
+                         color='C3', lw=1.0, markersize=3)
+            axR.axhline(0.0, color='k', lw=0.5, ls=':', alpha=0.7)
+            axR.grid(True, alpha=0.3)
+            # Slope annotation inside the axes rather than as an ylabel,
+            # so the residual plot's data area gets all available width.
+            axR.text(
+                0.01, 0.97, f'residual:  cubeLin − {slope:.1f}·x',
+                transform=axR.transAxes, ha='left', va='top',
+                fontsize=8, color='0.3',
+                bbox=dict(boxstyle='round,pad=0.2',
+                          facecolor='white', edgecolor='none', alpha=0.85),
+            )
+            # Residual y-ticks on the right so the inter-column gap
+            # doesn't have to reserve space for tick labels.
+            axR.yaxis.tick_right()
+            axR.yaxis.set_label_position('right')
+
+    # Combined legend on the top subplot: curve types (by color) and (only
+    # when there are >1 sources) line styles per source.
+    legendItems = []
+    if includePreDark:
+        legendItems.append(Line2D([0], [0], marker='o', color='C0',
+                                  lw=1.0, label='pre-dark (raw)'))
+    if includeDark:
+        legendItems.append(Line2D([0], [0], marker='d', color='C2',
+                                  lw=1.0, label='dark'))
+    legendItems.append(Line2D([0], [0], marker='s', color='C1',
+                              lw=1.0, label='post-dark (pre-lin input)'))
+    legendItems.append(Line2D([0], [0], marker='^', color='C3',
+                              lw=1.0, label='linearized'))
+    if includeCR and any(d.cubeCR is not None for d in dataList):
+        legendItems.append(Line2D([0], [0], marker='v', color='C4',
+                                  lw=1.0, label='linearized + CR repaired'))
+    if any(d.avgRate is not None for d in dataList):
+        legendItems.append(Line2D([0], [0], color='0.4', lw=0.8, alpha=0.6,
+                                  label='UTR rate × k'))
+    if showFitRange:
+        legendItems.append(Line2D([0], [0], color='0.4', lw=0.8, ls=':',
+                                  label='fitMax / fitMin'))
+    if any(d.crFlagMask is not None for d in dataList):
+        legendItems.append(Line2D([0], [0], marker='v', color='#d62728',
+                                  lw=0, markersize=7, mew=0,
+                                  label='CR flag (read)'))
+    if any(d.glitchFlagMask is not None for d in dataList):
+        legendItems.append(Line2D([0], [0], marker='v', color='#ff7f0e',
+                                  mfc='none', lw=0, markersize=8, mew=1.4,
+                                  label='ASIC_GLITCH flag (read)'))
+    if len(dataList) > 1:
+        for s, d in enumerate(dataList):
+            ls = _PIXEL_RAMP_LINESTYLES[s % len(_PIXEL_RAMP_LINESTYLES)]
+            legendItems.append(Line2D(
+                [0], [0], color='k', linestyle=ls, lw=1.2,
+                label=f'src {s}: reads {d.firstRead + 1}..{d.lastRead}',
+            ))
+    # Figure-level legend in the header band — placed in figure coords so
+    # it sits below the suptitle at a fixed offset regardless of row count.
+    # Top edge ~0.26 in below the figure top leaves ~0.04 in gap below the
+    # ~0.18 in tall suptitle text.
+    fig.legend(
+        handles=legendItems,
+        loc='upper left',
+        bbox_to_anchor=(0.07, 1.0 - 0.26 / figHeight),
+        ncol=max(2, (len(legendItems) + 1) // 2),
+        fontsize=8, framealpha=0.85, borderaxespad=0.0,
+    )
+
+    if len(dataList) == 1:
+        d = dataList[0]
+        xlabel = (
+            f'absolute read index  '
+            f'(cube spans firstRead={d.firstRead}..lastRead={d.lastRead})'
+        )
+    else:
+        lo = min(d.firstRead for d in dataList)
+        hi = max(d.lastRead for d in dataList)
+        xlabel = (
+            f'absolute read index  (sources span reads {lo + 1}..{hi})'
+        )
+    axes[-1].set_xlabel(xlabel)
+    if residAxes is not None:
+        residAxes[-1].set_xlabel('read offset from start of half')
+        residAxes[0].set_title(
+            'cubeLin − common slope × offset  (per-pixel joint fit)',
+            fontsize=10,
+        )
+        axes[0].set_title('ramp curves', fontsize=10)
+
+    visit = dataList[0].visit
+    cam = dataList[0].cam
+    suffix = '' if len(dataList) == 1 else f'  ({len(dataList)} sources)'
+    # Suptitle anchored near the top edge of the figure; the legend below
+    # is offset further to leave room for the ~0.18 in tall title text.
+    fig.suptitle(f'visit={visit} {cam}  per-pixel ramps{suffix}',
+                 fontsize=11, y=1.0 - 0.04 / figHeight)
+    # Manual layout — tight_layout can't model the bbox-anchored external
+    # legend (warns and overshoots the top reserve). subplots_adjust uses
+    # the header band absolutely and keeps the per-row data area as wide
+    # and as tall as possible.
+    topMargin = 1.0 - headerHeightIn / figHeight
+    fig.subplots_adjust(
+        top=topMargin,
+        bottom=0.05 if nPx == 1 else 0.04 + 0.04 / nPx,
+        left=0.07,
+        right=0.97,
+        wspace=0.06,
+        hspace=0.15,
+    )
+    return fig
+
+
+def plotResidualHist(
+    data: Union[str, HalvesIsrData],
+    *,
+    rateMin: float = 5.0,
+    fig=None,
+):
+    """Two-panel histogram: ``relDiff`` and ``addResidRel`` over unmasked bright pixels.
+
+    Useful for at-a-glance shape — symmetric? heavy-tailed? bias?
+    """
+    import matplotlib.pyplot as plt
+
+    d = _asData(data)
+    unmasked = (d.maskUnion == 0) & np.isfinite(d.relDiff) & (d.avgRate > rateMin)
+    if not unmasked.any():
+        raise RuntimeError(f"no unmasked pixels with avgRate > {rateMin}")
+
+    rd = d.relDiff[unmasked]
+    rr = d.addResidRel[unmasked]
+
+    if fig is None:
+        fig, axes = plt.subplots(1, 2, figsize=(12, 4))
+    else:
+        axes = fig.subplots(1, 2)
+
+    for ax, vals, lbl in [(axes[0], rd, 'relDiff'),
+                          (axes[1], rr, 'addResid / |imgF|')]:
+        clipped = vals[np.abs(vals) <= 0.5]
+        med = float(np.median(vals))
+        mad = float(1.4826 * np.median(np.abs(vals - med)))
+        ax.hist(clipped, bins=120, color='0.3')
+        ax.axvline(med, color='C3', lw=1.5,
+                   label=f'median={med:+.4f}  MAD={mad:.4f}  N={vals.size:,}')
+        ax.axvline(0.0, color='k', lw=0.5, ls=':')
+        ax.set_xlabel(lbl)
+        ax.set_ylabel(f'pixels with avgRate > {rateMin}')
+        ax.legend(loc='upper right', fontsize=9)
+
+    fig.suptitle(f"visit={d.visit} {d.cam}  half-vs-half residual histograms",
+                 fontsize=11)
+    fig.tight_layout()
+    return fig
+
+
+def randomCRPixels(result, n: int = 10, *, rng=None):
+    """Return up to ``n`` random ``(x, y)`` pixels flagged as CR by ``result``.
+
+    Parameters
+    ----------
+    result : `cr.IterativeRepairResult`
+    n : int
+    rng : np.random.Generator or int, optional
+    """
+    if isinstance(rng, (int, np.integer)):
+        rng = np.random.default_rng(int(rng))
+    elif rng is None:
+        rng = np.random.default_rng()
+    pix2D = result.crFlagMask.any(axis=-1)
+    ys, xs = np.where(pix2D)
+    if ys.size == 0:
+        raise RuntimeError("no CR-flagged pixels in result.")
+    n = min(int(n), ys.size)
+    pick = rng.choice(ys.size, size=n, replace=False)
+    return list(zip(xs[pick].tolist(), ys[pick].tolist()))
+
+
+def randomGlitchPixels(result, n: int = 10, *, rng=None):
+    """Return up to ``n`` random ``(x, y)`` pixels flagged as ASIC glitch.
+
+    Parameters
+    ----------
+    result : `cr.IterativeRepairResult`
+    n : int
+    rng : np.random.Generator or int, optional
+    """
+    if isinstance(rng, (int, np.integer)):
+        rng = np.random.default_rng(int(rng))
+    elif rng is None:
+        rng = np.random.default_rng()
+    pix2D = result.glitchFlagMask.any(axis=-1)
+    ys, xs = np.where(pix2D)
+    if ys.size == 0:
+        raise RuntimeError("no ASIC-glitch-flagged pixels in result.")
+    n = min(int(n), ys.size)
+    pick = rng.choice(ys.size, size=n, replace=False)
+    return list(zip(xs[pick].tolist(), ys[pick].tolist()))
+
+
+def randomBadPixels(result, n: int = 10, *, rng=None):
+    """Return up to ``n`` random ``(x, y)`` pixels flagged BAD (RTS) by the
+    CR detector's outlier-count gate. These are pixels with ≥
+    ``badPixelMinOutliers`` 3σ-IQR delta excursions on the pristine ramp —
+    telegraph-noise / persistent-defect pixels, not single-CR pixels.
+
+    Parameters
+    ----------
+    result : `cr.IterativeRepairResult`
+    n : int
+    rng : np.random.Generator or int, optional
+    """
+    if isinstance(rng, (int, np.integer)):
+        rng = np.random.default_rng(int(rng))
+    elif rng is None:
+        rng = np.random.default_rng()
+    pix2D = np.asarray(result.badPixelMask, dtype=bool)
+    ys, xs = np.where(pix2D)
+    if ys.size == 0:
+        raise RuntimeError("no BAD-flagged pixels in result.")
+    n = min(int(n), ys.size)
+    pick = rng.choice(ys.size, size=n, replace=False)
+    return list(zip(xs[pick].tolist(), ys[pick].tolist()))
+
+
+def plotCRRamps(
+    cubeOriginal: np.ndarray,
+    cubeRepaired: np.ndarray,
+    result,
+    coords,
+    *,
+    space: str = 'flux',
+    rowHeight: float = 2.0,
+    width: float = 6.0,
+    fig=None,
+):
+    """Plot N pixels comparing pre- and post-repair ramps.
+
+    One row per pixel, two columns:
+
+    - Left: ``cubeOriginal`` (pre-repair linearized) with the per-pixel UTR
+      rate from ``result.rate``. Flux space overlays the line
+      ``cubeRepaired[0] + rate * k``; delta space overlays a horizontal
+      line at ``rate``. The line is anchored on the repaired cube's first
+      sample so a CR/glitch in an early delta does not offset it.
+    - Right: ``cubeRepaired`` (post-CR/glitch repair) drawn the same way.
+
+    Flagged samples are highlighted in both columns: CR as red ``x``,
+    ASIC glitch as open orange ``o``. In flux space the marker sits on
+    the cumulative *after* the flagged delta (index ``k+1``); in delta
+    space it sits on the flagged delta itself.
+
+    Parameters
+    ----------
+    cubeOriginal, cubeRepaired : np.ndarray
+        ``(H, W, N)`` cumulative cubes (pre- and post-repair) with the
+        time axis last.
+    result : `cr.IterativeRepairResult`
+        Source of ``rate``, ``crFlagMask``, ``glitchFlagMask`` — the
+        flag masks are ``(H, W, N-1)``.
+    coords : (x, y) tuple, or sequence of tuples.
+    space : {'flux', 'delta'}
+    rowHeight, width : float
+    fig : matplotlib.figure.Figure, optional
+    """
+    import matplotlib.pyplot as plt
+    from matplotlib.lines import Line2D
+
+    if space not in ('flux', 'delta'):
+        raise ValueError(f"space must be 'flux' or 'delta'; got {space!r}.")
+    if cubeOriginal.shape != cubeRepaired.shape:
+        raise ValueError(
+            f"cubeOriginal {cubeOriginal.shape} and cubeRepaired "
+            f"{cubeRepaired.shape} must share shape."
+        )
+    if (
+        isinstance(coords, tuple)
+        and len(coords) == 2
+        and not isinstance(coords[0], (tuple, list))
+    ):
+        coords = [coords]
+    coords = list(coords)
+    nPx = len(coords)
+    if nPx == 0:
+        raise ValueError("coords is empty.")
+
+    if fig is None:
+        fig, axesGrid = plt.subplots(
+            nPx, 2, figsize=(width * 2, rowHeight * nPx),
+            squeeze=False, sharex='col',
+        )
+    else:
+        axesGrid = fig.subplots(nPx, 2, squeeze=False, sharex='col')
+
+    nReads = cubeOriginal.shape[-1]
+    reads = np.arange(nReads)
+    deltaIdx = np.arange(nReads - 1)
+
+    for i, (x, y) in enumerate(coords):
+        rate = float(result.rate[y, x])
+        crFlag = np.asarray(result.crFlagMask[y, x, :], dtype=bool)
+        glFlag = np.asarray(result.glitchFlagMask[y, x, :], dtype=bool)
+
+        if space == 'flux':
+            seriesOrig = cubeOriginal[y, x, :]
+            seriesRep = cubeRepaired[y, x, :]
+            # A flag at delta index k -> highlight cumulative sample k+1.
+            crSamp = np.concatenate(([False], crFlag))
+            glSamp = np.concatenate(([False], glFlag))
+            xVals = reads
+            xLabel = 'read index'
+            yLabel = 'ADU'
+        else:
+            seriesOrig = np.diff(cubeOriginal[y, x, :])
+            seriesRep = np.diff(cubeRepaired[y, x, :])
+            crSamp = crFlag
+            glSamp = glFlag
+            xVals = deltaIdx
+            xLabel = 'delta index'
+            yLabel = 'ΔADU'
+
+        for col, vals, title in ((0, seriesOrig, 'linearized'),
+                                 (1, seriesRep, 'repaired')):
+            ax = axesGrid[i, col]
+            ax.plot(xVals, vals, '.-', color='C3', lw=1.0, markersize=4)
+            if space == 'flux':
+                # Anchor on the repaired cube's first sample: cubeOriginal[..., 0]
+                # is contaminated when an early delta is the flagged CR/glitch.
+                ax.plot(reads, float(seriesRep[0]) + rate * reads,
+                        color='k', lw=0.8, ls='--', alpha=0.7)
+            else:
+                ax.axhline(rate, color='k', lw=0.8, ls='--', alpha=0.7)
+            if crSamp.any():
+                ax.plot(xVals[crSamp], vals[crSamp], 'x',
+                        color='red', markersize=8, mew=1.5, linestyle='none')
+            if glSamp.any():
+                ax.plot(xVals[glSamp], vals[glSamp], 'o',
+                        mfc='none', color='orange', markersize=8, mew=1.5,
+                        linestyle='none')
+            if i == 0:
+                ax.set_title(title)
+            ax.grid(True, alpha=0.3)
+        axesGrid[i, 0].set_ylabel(f'(x, y) = ({x}, {y})\n{yLabel}', fontsize=9)
+
+    axesGrid[-1, 0].set_xlabel(xLabel)
+    axesGrid[-1, 1].set_xlabel(xLabel)
+
+    legendItems = [
+        Line2D([0], [0], marker='.', color='C3', lw=1.0, label='cube'),
+        Line2D([0], [0], color='k', lw=0.8, ls='--', label='UTR rate'),
+        Line2D([0], [0], marker='x', color='red', lw=0, markersize=8,
+               mew=1.5, label='CR'),
+        Line2D([0], [0], marker='o', color='orange', mfc='none', lw=0,
+               markersize=8, mew=1.5, label='ASIC glitch'),
+    ]
+    axesGrid[0, 0].legend(handles=legendItems, loc='upper left', fontsize=8,
+                          framealpha=0.85)
+    fig.suptitle(f'CR / ASIC-glitch correction  ({space} space)',
+                 fontsize=11, y=0.995)
+    fig.tight_layout(rect=(0, 0, 1, 0.99))
+    return fig
+
+
+_DEFAULT_DS9_PLANE_COLORS = {
+    'BAD': 'red',
+    'SAT': 'yellow',
+    'CR': 'magenta',
+    'ASIC_GLITCH': 'orange',
+    'INTRP': 'green',
+    'NO_DATA': 'cyan',
+    'EDGE': 'cyan',
+    'BORDER': 'cyan',
+    'SUSPECT': 'purple',
+}
+
+
+def displayDS9(
+    image,
+    mask=None,
+    *,
+    maskPlaneDict=None,
+    planes=('BAD', 'SAT', 'CR', 'ASIC_GLITCH'),
+    planeColors=None,
+    ds9=None,
+    transparency: int = 70,
+    scale: str = 'zscale',
+):
+    """Display a 2D image plus colored mask planes on a pyds9 DS9 frame.
+
+    Opens a new frame on the given (or newly created) DS9 connection,
+    sends the image, then overlays each requested mask plane as its own
+    colored mask layer.
+
+    Parameters
+    ----------
+    image : np.ndarray
+        2D image to display (e.g. UTR rate, CDS, single read).
+    mask : np.ndarray, optional
+        2D integer mask with the per-plane bits set. If None, only the
+        image is displayed.
+    maskPlaneDict : dict, optional
+        ``{plane_name: bit_index}`` mapping. Required when ``mask`` is
+        given. PixelRampData carries this as ``data.maskPlaneDict``.
+    planes : sequence of str
+        Mask plane names to overlay. Default ``('BAD', 'SAT', 'CR',
+        'ASIC_GLITCH')``. Names missing from ``maskPlaneDict`` are
+        silently skipped.
+    planeColors : dict, optional
+        Override the default color map. Built-in defaults: BAD=red,
+        SAT=yellow, CR=magenta, ASIC_GLITCH=orange, INTRP=green,
+        NO_DATA/EDGE/BORDER=cyan, SUSPECT=purple.
+    ds9 : pyds9.DS9, optional
+        Existing DS9 connection. If None, a new one is created (which
+        launches DS9 if it isn't running).
+    transparency : int
+        Mask transparency, 0 (opaque) to 100 (invisible). DS9 default 70.
+    scale : str
+        DS9 scale spec, e.g. ``'zscale'``, ``'linear'``, ``'log'``.
+
+    Returns
+    -------
+    ds9 : pyds9.DS9
+        The (possibly newly created) DS9 connection. Pass back in to
+        layer more frames on the same DS9 window.
+    """
+    try:
+        import pyds9
+    except ImportError as e:
+        raise ImportError(
+            "pyds9 is required for displayDS9; install it (pip install pyds9) "
+            "or run via the LSST stack environment that provides it."
+        ) from e
+
+    if ds9 is None:
+        ds9 = pyds9.DS9()
+
+    ds9.set('frame new')
+
+    img = np.asarray(image, dtype=np.float32)
+    ds9.set_np2arr(img)
+    ds9.set(f'scale {scale}')
+    ds9.set('zoom to fit')
+
+    if mask is None or maskPlaneDict is None or not planes:
+        return ds9
+
+    colors = {**_DEFAULT_DS9_PLANE_COLORS, **(planeColors or {})}
+    ds9.set(f'mask transparency {int(transparency)}')
+
+    maskArr = np.asarray(mask)
+    H, W = maskArr.shape
+    for plane in planes:
+        if plane not in maskPlaneDict:
+            continue
+        bit = int(maskPlaneDict[plane])
+        # Each plane is sent as its own DS9 mask layer; the most recent
+        # `mask color` applies to the next array-mask push.
+        planeArr = (maskArr & maskArr.dtype.type(1 << bit)).astype('u2')
+        if planeArr.sum() == 0:
+            continue
+        color = colors.get(plane, 'gray')
+        ds9.set(f'mask color {color}')
+        ds9.set(
+            f'array mask [xdim={W},ydim={H},bitpix=16]', planeArr,
+        )
+
+    return ds9
+
+
+def _selMask(data, excludePlanes):
+    """``(H, W)`` bool mask of pixels with NONE of ``excludePlanes`` set.
+
+    Falls back to all-True when the data has no mask. Used by the
+    halves-comparison helpers below.
+    """
+    rateShape = None
+    if getattr(data, 'avgRate', None) is not None:
+        rateShape = data.avgRate.shape
+    if getattr(data, 'mask', None) is None or getattr(data, 'maskPlaneDict', None) is None:
+        return np.ones(rateShape, dtype=bool) if rateShape is not None else None
+    excludeBit = data.mask.dtype.type(0)
+    for name in excludePlanes:
+        if name in data.maskPlaneDict:
+            excludeBit = excludeBit | (
+                data.mask.dtype.type(1) << data.mask.dtype.type(int(data.maskPlaneDict[name]))
+            )
+    if excludeBit == 0:
+        return np.ones(data.mask.shape, dtype=bool)
+    return (data.mask & excludeBit) == 0
+
+
+def _halvesRateMetric(first, second, metric: str):
+    """Return ``(metricArr, xlabel)`` for the requested comparison metric."""
+    r1 = np.asarray(first.avgRate, dtype=np.float64)
+    r2 = np.asarray(second.avgRate, dtype=np.float64)
+    if metric == 'diff':
+        return r1 - r2, 'rate diff  (first − second)  [ADU/read]'
+    if metric == 'reldiff':
+        with np.errstate(invalid='ignore', divide='ignore'):
+            m = 2.0 * (r1 - r2) / (r1 + r2)
+        return m, 'rel rate diff  2(first − second)/(first + second)'
+    if metric == 'ratio':
+        with np.errstate(invalid='ignore', divide='ignore'):
+            m = r1 / r2
+        return m, 'rate ratio  first / second'
+    raise ValueError(
+        f"unknown metric {metric!r}; expected 'diff', 'reldiff', or 'ratio'."
+    )
+
+
+def plotHalvesRateHistogram(
+    first,
+    second,
+    *,
+    metric: str = 'diff',
+    bins: int = 100,
+    excludePlanes=_DEFAULT_EXCLUDE_PLANES,
+    logy: bool = True,
+    range_=None,
+    ax=None,
+):
+    """Histogram of per-pixel UTR-rate comparison between two ramp halves.
+
+    Parameters
+    ----------
+    first, second : PixelRampData
+        Half-ramp results from ``validate.collectPixelRampData``.
+    metric : {'diff', 'reldiff', 'ratio'}
+        - ``'diff'``: ``first.avgRate − second.avgRate`` (ADU/read).
+        - ``'reldiff'``: ``2*(first - second)/(first + second)``.
+        - ``'ratio'``: ``first / second``.
+    bins : int
+    excludePlanes : sequence of str
+        Drop pixels with any of these planes set in EITHER half.
+        Defaults to ``('BAD', 'SAT', 'BORDER')``.
+    logy : bool
+    range_ : tuple, optional
+        ``(lo, hi)`` x-axis range for the histogram. Default is set
+        from the 1st / 99th percentile span of the metric.
+    ax : matplotlib axis, optional
+
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+    """
+    metricArr, xlabel = _halvesRateMetric(first, second, metric)
+    sel1 = _selMask(first, excludePlanes)
+    sel2 = _selMask(second, excludePlanes)
+    valid = np.isfinite(metricArr) & sel1 & sel2
+    vals = metricArr[valid]
+
+    if ax is None:
+        import matplotlib.pyplot as plt
+        fig, ax = plt.subplots(figsize=(10, 4))
+    else:
+        fig = ax.figure
+
+    if range_ is None:
+        if vals.size:
+            p1, p99 = np.percentile(vals, [1, 99])
+            if metric == 'ratio':
+                pad = max(p99 - p1, 0.1)
+                lo, hi = max(p1 - 0.5 * pad, 0.0), p99 + 0.5 * pad
+            else:
+                span = max(abs(p1), abs(p99))
+                lo, hi = -2.5 * span, 2.5 * span
+        else:
+            lo, hi = -1.0, 1.0
+        range_ = (float(lo), float(hi))
+
+    ax.hist(
+        vals, bins=bins, range=range_,
+        color='steelblue', edgecolor='black', lw=0.4,
+    )
+    if logy:
+        ax.set_yscale('log')
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel('pixels')
+    ax.grid(True, alpha=0.3, axis='y')
+
+    # Annotate percentiles of |deviation from center|.
+    center = 1.0 if metric == 'ratio' else 0.0
+    dev = np.abs(vals - center)
+    if dev.size:
+        pcts = (50, 84, 95, 99, 99.9)
+        pVals = np.percentile(dev, pcts)
+        pStr = '   '.join(f'{p}%={v:.3f}' for p, v in zip(pcts, pVals))
+    else:
+        pStr = 'no valid pixels'
+
+    visit = getattr(first, 'visit', '?')
+    cam = getattr(first, 'cam', '?')
+    nValid = int(valid.sum())
+    nExcl = int(valid.size - nValid)
+    ax.set_title(
+        f'v{visit} {cam}  half-vs-half UTR rate ({metric})  '
+        f'[N={nValid:,}, excluded={nExcl:,}]\n'
+        f'|Δ| pct  {pStr}',
+        fontsize=10,
+    )
+    fig.tight_layout()
+    return fig
+
+
+def summarizeHalves(
+    first,
+    second,
+    *,
+    metric: str = 'diff',
+    threshold: Optional[float] = None,
+    excludePlanes=_DEFAULT_EXCLUDE_PLANES,
+    maxList: int = 20,
+    printOutput: bool = True,
+):
+    """Mask-plane counts + outlier coords from a halves comparison.
+
+    Parameters
+    ----------
+    first, second : PixelRampData
+    metric : {'diff', 'reldiff', 'ratio'}
+    threshold : float, optional
+        Outlier threshold for the chosen metric. Defaults: diff=5
+        ADU/read, reldiff=0.2, ratio=2 (an outlier is also <1/threshold
+        for ratio).
+    excludePlanes : sequence of str
+        Pixels with any of these mask planes set in EITHER half are
+        excluded from outlier counting.
+    maxList : int
+        Cap on the number of outliers listed in the printed summary.
+        The returned ``outliers`` list is full-length.
+    printOutput : bool
+        Print a text summary; defaults True.
+
+    Returns
+    -------
+    dict with keys:
+      - ``outliers``: list of ``(x, y)`` in descending |deviation| order
+      - ``metricValues``: np.ndarray of the metric at each outlier,
+        same order as ``outliers``
+      - ``maskCounts``: ``{plane: (first, second, union)}`` pixel counts
+      - ``percentiles``: ``{pct: |Δ|}`` over valid pixels
+      - ``nValid``: count of pixels that passed the excludePlanes filter
+    """
+    metricArr, label = _halvesRateMetric(first, second, metric)
+    sel1 = _selMask(first, excludePlanes)
+    sel2 = _selMask(second, excludePlanes)
+    valid = np.isfinite(metricArr) & sel1 & sel2
+
+    if threshold is None:
+        threshold = {'diff': 5.0, 'reldiff': 0.2, 'ratio': 2.0}.get(metric, 0.0)
+
+    center = 1.0 if metric == 'ratio' else 0.0
+    if metric == 'ratio':
+        outlierMask = valid & (
+            (metricArr > threshold) | (metricArr < 1.0 / max(threshold, 1e-9))
+        )
+    else:
+        outlierMask = valid & (np.abs(metricArr - center) > threshold)
+
+    ys, xs = np.where(outlierMask)
+    diffs = metricArr[outlierMask]
+    order = np.argsort(np.abs(diffs - center))[::-1]
+    outliers = [(int(xs[i]), int(ys[i])) for i in order]
+    metricValues = diffs[order]
+
+    # Mask-plane counts per half + union.
+    planeNames = set()
+    for d in (first, second):
+        if getattr(d, 'maskPlaneDict', None):
+            planeNames |= set(d.maskPlaneDict)
+    maskCounts = {}
+    for name in sorted(planeNames):
+        cf = cs = cu = 0
+        bf = bs = 0
+        if first.mask is not None and name in (first.maskPlaneDict or {}):
+            bf = 1 << first.maskPlaneDict[name]
+            cf = int(((first.mask & bf) != 0).sum())
+        if second.mask is not None and name in (second.maskPlaneDict or {}):
+            bs = 1 << second.maskPlaneDict[name]
+            cs = int(((second.mask & bs) != 0).sum())
+        if first.mask is not None and second.mask is not None:
+            cu = int((((first.mask & bf) != 0) | ((second.mask & bs) != 0)).sum())
+        maskCounts[name] = (cf, cs, cu)
+
+    vals = metricArr[valid]
+    dev = np.abs(vals - center)
+    pcts = (50, 84, 95, 99, 99.9)
+    percentiles = (
+        {p: float(np.percentile(dev, p)) for p in pcts} if dev.size
+        else {p: float('nan') for p in pcts}
+    )
+
+    if printOutput:
+        nValid = int(valid.sum())
+        nExcl = int(valid.size - nValid)
+        print(f'half-vs-half {label}')
+        print(f'  valid pixels (after excludePlanes={tuple(excludePlanes)}): '
+              f'{nValid:,}   excluded: {nExcl:,}')
+        print(f'  outlier threshold: {threshold}')
+        print()
+        print('mask-plane counts  (first / second / union):')
+        for name, (cf, cs, cu) in maskCounts.items():
+            print(f'  {name:14s}  {cf:>10,} / {cs:>10,} / {cu:>10,}')
+        print()
+        print('|Δ| percentiles over valid pixels:')
+        for p in pcts:
+            print(f'  {p:5}%: {percentiles[p]:.4f}')
+        print()
+        print(f'outliers (|metric − {center}| > {threshold}): {len(outliers):,}')
+        if outliers:
+            shown = min(maxList, len(outliers))
+            print(f'top {shown} by |Δ|:')
+            print(f'  {"(x, y)":>16}     metric')
+            for (x, y), v in zip(outliers[:shown], metricValues[:shown]):
+                print(f'  {f"({x}, {y})":>16}   {v:>8.3f}')
+
+    return {
+        'outliers': outliers,
+        'metricValues': metricValues,
+        'maskCounts': maskCounts,
+        'percentiles': percentiles,
+        'nValid': int(valid.sum()),
+    }
+
+
+def displayPixelRampDS9(data, image=None, *, ds9=None, **kwargs):
+    """Send a `PixelRampData`'s mask + rate to DS9 in one call.
+
+    Convenience wrapper around `displayDS9` that pulls ``mask`` and
+    ``maskPlaneDict`` straight off ``data``.
+
+    Parameters
+    ----------
+    data : PixelRampData
+    image : np.ndarray or str, optional
+        2D image to display. If None (default), uses ``data.avgRate``.
+        If a string, looks up that attribute on ``data`` (handy for
+        ``'avgRate'``; cube fields are 3D so pass a slice like
+        ``data.cubeLin[10]`` directly instead).
+    ds9 : pyds9.DS9, optional
+        Existing DS9 connection; passes through to `displayDS9`.
+    **kwargs : forwarded to `displayDS9` (planes, planeColors,
+        transparency, scale, ...).
+
+    Returns
+    -------
+    ds9 : pyds9.DS9
+    """
+    if image is None:
+        image = data.avgRate
+    elif isinstance(image, str):
+        image = getattr(data, image)
+    return displayDS9(
+        image, data.mask,
+        maskPlaneDict=data.maskPlaneDict,
+        ds9=ds9,
+        **kwargs,
+    )
+
+
+def filterCoordsByChannel(
+    coords,
+    channels,
+    *,
+    mode: str = 'select',
+    channelHeight: int = H4_AMP_WIDTH,
+):
+    """Filter ``(x, y)`` pixel coordinates by ASIC channel.
+
+    H4 detectors have 32 ASIC channels stacked along y, each
+    ``channelHeight`` rows tall. The channel of pixel ``(x, y)`` is
+    ``y // channelHeight``.
+
+    Parameters
+    ----------
+    coords : sequence of ``(x, y)`` tuples
+        Pixel coordinates in the project's user-facing convention.
+    channels : int or sequence of ints
+        Channel indices (0..31 for H4) to act on.
+    mode : {'select', 'cull'}
+        ``'select'`` (default) keeps coords whose channel is in
+        ``channels``; ``'cull'`` drops them.
+    channelHeight : int
+        Rows per ASIC channel; 128 for H4.
+
+    Returns
+    -------
+    list of ``(x, y)`` tuples, same order as the input.
+    """
+    if mode not in ('select', 'cull'):
+        raise ValueError(f"mode must be 'select' or 'cull'; got {mode!r}.")
+    if isinstance(channels, (int, np.integer)):
+        channels = (int(channels),)
+    channelSet = {int(c) for c in channels}
+    keepInside = mode == 'select'
+    out = []
+    for x, y in coords:
+        inChannels = (int(y) // channelHeight) in channelSet
+        if inChannels == keepInside:
+            out.append((int(x), int(y)))
+    return out
+
+
+def plotGlitchHistogramPerChannel(
+    source,
+    *,
+    channelHeight: int = H4_AMP_WIDTH,
+    countPixels: bool = False,
+    ax=None,
+    color: str = '#ff7f0e',
+):
+    """Bar histogram of ASIC glitches per H4 ASIC channel.
+
+    H4 detectors have 32 ASIC channels along the y-axis, each
+    ``channelHeight`` rows tall. This plot shows how many glitches the
+    iterative detector flagged in each channel.
+
+    Parameters
+    ----------
+    source : PixelRampData, cr.IterativeRepairResult, or np.ndarray
+        Anything carrying a ``glitchFlagMask`` attribute of shape
+        ``(H, W, N-1)``, or the array itself.
+    channelHeight : int
+        Rows per ASIC channel. 128 for H4.
+    countPixels : bool
+        Default (False) counts glitch PAIRS — distinct ASIC-glitch
+        events. True counts unique flagged pixels per channel; useful
+        for spatial-density inspection but obscures pixels that took
+        multiple hits.
+    ax : matplotlib axis, optional
+    color : str
+
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+    """
+    if isinstance(source, np.ndarray):
+        flags = source
+    else:
+        flags = getattr(source, 'glitchFlagMask', None)
+    if flags is None:
+        raise ValueError(
+            "source has no `glitchFlagMask` (pass an IterativeRepairResult, "
+            "a PixelRampData populated by validate.collectPixelRampData, or "
+            "the (H, W, N-1) bool array directly)."
+        )
+    flags = np.asarray(flags, dtype=bool)
+    if flags.ndim != 3:
+        raise ValueError(
+            f"glitchFlagMask must be (H, W, N-1); got shape {flags.shape}."
+        )
+
+    if countPixels:
+        # Per-pixel "had at least one glitch hit" mask, summed per channel.
+        perPixel = flags.any(axis=-1)
+        ylabel = 'unique flagged pixels'
+    else:
+        # Glitch pairs come as two adjacent True deltas; count each pair
+        # once at its starting delta index.
+        pairStart = flags[..., :-1] & flags[..., 1:]
+        perPixel = pairStart.sum(axis=-1).astype(np.int64)
+        ylabel = 'ASIC-glitch pair count'
+
+    H, W = perPixel.shape
+    nChannels = H // channelHeight
+    if H % channelHeight != 0:
+        raise ValueError(
+            f"H={H} not divisible by channelHeight={channelHeight}; "
+            f"can't bin by channel cleanly."
+        )
+    perChannel = perPixel.reshape(nChannels, channelHeight, W).sum(axis=(1, 2))
+
+    if ax is None:
+        import matplotlib.pyplot as plt
+        fig, ax = plt.subplots(figsize=(10, 4))
+    else:
+        fig = ax.figure
+
+    ax.bar(np.arange(nChannels), perChannel,
+           color=color, edgecolor='black', lw=0.4)
+    ax.set_xlabel(f'ASIC channel  (y // {channelHeight})')
+    ax.set_ylabel(ylabel)
+    titleSuffix = ''
+    cam = getattr(source, 'cam', None)
+    visit = getattr(source, 'visit', None)
+    if cam and visit:
+        titleSuffix = f'  (visit={visit} {cam})'
+    ax.set_title(f'ASIC glitches per channel{titleSuffix}')
+    ax.set_xticks(np.arange(nChannels))
+    ax.tick_params(axis='x', labelsize=8)
+    ax.grid(True, axis='y', alpha=0.3)
+    ax.set_xlim(-0.5, nChannels - 0.5)
+
+    # Annotate total in the top-right.
+    total = int(perChannel.sum())
+    ax.text(
+        0.995, 0.95, f'total: {total:,}',
+        transform=ax.transAxes, ha='right', va='top',
+        fontsize=9, color='0.3',
+        bbox=dict(boxstyle='round,pad=0.2',
+                  facecolor='white', edgecolor='none', alpha=0.85),
+    )
+
+    fig.tight_layout()
+    return fig
diff --git a/python/lsst/obs/pfs/h4Linearity/loaders.py b/python/lsst/obs/pfs/h4Linearity/loaders.py
new file mode 100644
index 00000000..d52dd4a1
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/loaders.py
@@ -0,0 +1,29 @@
+"""Development convenience loaders. Production callers supply their own loaders."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+
+from .types import Ramp
+
+
+def loadNpz(path: str | Path) -> tuple[Ramp, np.ndarray]:
+    """Load a ``.npz`` with ``deltas`` and ``photodiode`` arrays.
+
+    The on-disk format stores per-read deltas with an implicit read0 = 0.
+    This loader prepends the zero read and accumulates, yielding ``N+1``
+    cumulative reads from ``N`` deltas. The caller is expected to apply
+    the photodiode correction before passing the ramp into
+    :func:`h4Linearity.fit.fit`.
+    """
+    path = Path(path)
+    with np.load(path) as data:
+        deltas = np.asarray(data["deltas"], dtype=np.float32)
+        photodiode = np.asarray(data["photodiode"])
+    nDeltas, h, w = deltas.shape
+    reads = np.empty((nDeltas + 1, h, w), dtype=np.float32)
+    reads[0] = 0.0
+    np.cumsum(deltas, axis=0, out=reads[1:])
+    return Ramp(reads=reads), photodiode
diff --git a/python/lsst/obs/pfs/h4Linearity/models/__init__.py b/python/lsst/obs/pfs/h4Linearity/models/__init__.py
new file mode 100644
index 00000000..d815c463
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/models/__init__.py
@@ -0,0 +1,51 @@
+"""Model registry — lookup table from MODEL string (FITS header) to model class."""
+
+from __future__ import annotations
+
+from .base import BlockFitResult, Model
+from .polynomial import PolynomialModel
+
+MODEL_REGISTRY: dict[str, type[Model]] = {}
+
+
+def registerModel(modelClass: type[Model], *, overwrite: bool = False) -> None:
+    """Register a model class under its ``modelName`` attribute.
+
+    Once registered, ``saveFits`` / ``loadFits`` can round-trip a
+    correction that uses the model — :func:`loadFits` looks up the
+    class by the ``MODEL`` keyword in the PRIMARY FITS header.
+
+    Parameters
+    ----------
+    modelClass : type[Model]
+        Class implementing the :class:`Model` protocol. Its
+        ``modelName`` class attribute is used as the registry key.
+    overwrite : bool, optional
+        Allow replacing an existing registration under the same name.
+        Default ``False`` (raises ``ValueError`` on collision).
+
+    Raises
+    ------
+    ValueError
+        If ``modelClass.modelName`` is already registered and
+        ``overwrite=False``.
+    """
+    name = modelClass.modelName  # type: ignore[attr-defined]
+    if name in MODEL_REGISTRY and not overwrite:
+        raise ValueError(
+            f"model name {name!r} already registered; "
+            f"pass overwrite=True to replace"
+        )
+    MODEL_REGISTRY[name] = modelClass
+
+
+# Register the built-in model.
+registerModel(PolynomialModel)
+
+__all__ = [
+    "MODEL_REGISTRY",
+    "Model",
+    "BlockFitResult",
+    "PolynomialModel",
+    "registerModel",
+]
diff --git a/python/lsst/obs/pfs/h4Linearity/models/base.py b/python/lsst/obs/pfs/h4Linearity/models/base.py
new file mode 100644
index 00000000..61c73010
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/models/base.py
@@ -0,0 +1,101 @@
+"""The Model protocol and BlockFitResult — shared across all concrete model forms."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Protocol, runtime_checkable
+
+import numpy as np
+
+
+@dataclass(frozen=True)
+class BlockFitResult:
+    """Everything a model's ``fitBlock`` must return for a single tile."""
+
+    coefficients: np.ndarray       # model-specific shape; polynomial: (order+1, hTile, wTile) float32
+    fitMin: np.ndarray             # (hTile, wTile) float32 — min m used per pixel
+    fitMax: np.ndarray             # (hTile, wTile) float32 — max m used per pixel
+    residualRms: np.ndarray        # (hTile, wTile) float32
+    maxAbsResidual: np.ndarray     # (hTile, wTile) float32
+    nPointsUsed: np.ndarray        # (hTile, wTile) int32
+    conditionNumber: np.ndarray    # (hTile, wTile) float32
+    monotonic: np.ndarray          # (hTile, wTile) bool
+    # (hTile, wTile) uint8 — fit-time flags only
+    # (INSUFFICIENT_POINTS, FIT_FAILED, NON_MONOTONIC)
+    badPixelMask: np.ndarray
+
+
+@runtime_checkable
+class Model(Protocol):
+    """Protocol implemented by every concrete fit form (polynomial, spline, ...).
+
+    Concrete models live in this subpackage (currently
+    :class:`PolynomialModel`) and register themselves via
+    :func:`registerModel`. The protocol decouples :func:`fit` and
+    :func:`apply` from any particular functional form, and lets the
+    FITS layer round-trip arbitrary model variants by name (via the
+    ``MODEL`` PRIMARY-header card).
+    """
+
+    modelName: str  #: e.g. "CHEBYSHEV"; written into the FITS PRIMARY header.
+
+    def fitBlock(
+        self,
+        m: np.ndarray,                  # (nPoints, hTile, wTile) float32
+        t: np.ndarray,                  # (nPoints,) float32
+        valid: np.ndarray,              # (nPoints, hTile, wTile) bool
+        conditionNumberLimit: float,
+    ) -> BlockFitResult:
+        """Fit the model independently on every pixel of a tile.
+
+        Parameters
+        ----------
+        m : np.ndarray
+            ``(nPoints, hTile, wTile)`` float32. Per-pixel cumulative-ADU
+            values to use as the model's *measured* coordinate.
+        t : np.ndarray
+            ``(nPoints,)`` float32. Reference (true) signal corresponding
+            to each input point — shared across all pixels of the tile.
+        valid : np.ndarray
+            ``(nPoints, hTile, wTile)`` bool. Per-pixel-per-read mask;
+            False entries are excluded from the fit.
+        conditionNumberLimit : float
+            Pixels whose normal-equations matrix exceeds this condition
+            number are marked ``FIT_FAILED`` and left with zero
+            coefficients.
+        """
+
+    def evaluate(
+        self, coefficients: np.ndarray, m: np.ndarray
+    ) -> np.ndarray:
+        """Map measured cumulative ADU ``m`` to linearized signal ``t``.
+
+        Per-pixel vectorized; ``coefficients`` and ``m`` broadcast over
+        the same trailing ``(H, W)`` axes.
+        """
+
+    def isMonotonic(
+        self, coefficients: np.ndarray, mMin: np.ndarray, mMax: np.ndarray
+    ) -> np.ndarray:
+        """Return a per-pixel bool mask: True where the model is monotonic on ``[mMin, mMax]``.
+
+        Used during :func:`fit` to flag ``NON_MONOTONIC`` pixels.
+        """
+
+    def toFitsHdus(self, correction: Any) -> list:  # list[astropy.io.fits.HDU]
+        """Serialize the model's coefficients + form metadata to FITS HDUs.
+
+        The list returned here is inserted after the PRIMARY HDU and
+        before the standard image HDUs by :func:`saveFits`. ``correction``
+        is the surrounding :class:`LinearityCorrection`, so the model
+        can pull whatever per-pixel arrays it owns.
+        """
+
+    @classmethod
+    def fromFitsHdus(cls, hdus: list) -> tuple["Model", np.ndarray]:
+        """Rebuild ``(model, coefficients)`` from a list of HDUs.
+
+        Inverse of :meth:`toFitsHdus`. The full HDU list (including the
+        PRIMARY and the standard non-model HDUs) is passed in; the
+        model picks out the entries it owns.
+        """
diff --git a/python/lsst/obs/pfs/h4Linearity/models/polynomial.py b/python/lsst/obs/pfs/h4Linearity/models/polynomial.py
new file mode 100644
index 00000000..cd598d55
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/models/polynomial.py
@@ -0,0 +1,461 @@
+"""Chebyshev polynomial nonlinearity model: t = Σ c_k T_k(x) (per pixel)."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+from astropy.io import fits
+
+from .base import BlockFitResult
+from ..types import FIT_FAILED, INSUFFICIENT_POINTS, NON_MONOTONIC
+
+
+@dataclass(frozen=True)
+class PolynomialModel:
+    """Pluggable Chebyshev polynomial-fit model. Default 4th order.
+
+    ``fitMinMargin`` (default 100 DN) lowers the per-pixel ``fitMin`` by
+    an absolute DN amount so that apply() does not flag BELOW_VALID_RANGE
+    for read noise, kTC, or modest bias-level offsets that push slightly
+    below the measured range. Sized by detector physics (noise & bias),
+    not signal level. The polynomial near read 0 is locally linear
+    (anchored by the implicit zero read), so extrapolating through this
+    margin remains accurate.
+    """
+
+    order: int = 4
+    modelName: str = "CHEBYSHEV"
+    # Preliminary guess; needs a better-justified value derived from
+    # detector noise/bias measurements.
+    fitMinMargin: float = 100.0
+
+    def __post_init__(self) -> None:
+        if not isinstance(self.order, int) or isinstance(self.order, bool):
+            raise ValueError(f"order must be an int, got {type(self.order).__name__}")
+        if self.order < 1:
+            raise ValueError(f"order must be >= 1, got {self.order}")
+        if self.fitMinMargin < 0:
+            raise ValueError(
+                f"fitMinMargin must be >= 0, got {self.fitMinMargin}"
+            )
+
+    @staticmethod
+    def _chebToMonomialMatrix(order: int) -> np.ndarray:
+        """Return the (order+1, order+1) matrix that converts Chebyshev
+        coefficients to monomial coefficients: ``mon = M @ cheb``.
+
+        For order <= 5 the conversion is exact in float64 and numerically
+        benign (cheb-to-poly amplification factors are O(2^order)).
+        """
+        M = np.zeros((order + 1, order + 1), dtype=np.float64)
+        for k in range(order + 1):
+            ek = np.zeros(order + 1)
+            ek[k] = 1.0
+            # cheb2poly returns a TRIMMED 1-D array (length 1 for T_0,
+            # length 2 for T_1, ...), so slice the destination to match.
+            polyCoefs = np.polynomial.chebyshev.cheb2poly(ek)
+            M[:len(polyCoefs), k] = polyCoefs
+        return M
+
+    def chebToMonomial(self, chebCoefs: np.ndarray) -> np.ndarray:
+        """Convert Chebyshev-basis coefficients to monomial (standard
+        polynomial) coefficients on axis 0.
+
+        Input shape ``(order+1, ...)``; output same shape. Caller does
+        this once at apply time so the per-chunk Horner evaluation can
+        skip the conversion — see :func:`evaluateMonomial`.
+        """
+        chebCoefs = np.asarray(chebCoefs)
+        order = chebCoefs.shape[0] - 1
+        if order == 0:
+            return chebCoefs.copy()
+        M = self._chebToMonomialMatrix(order).astype(chebCoefs.dtype)
+        flat = chebCoefs.reshape(order + 1, -1)
+        return (M @ flat).reshape(chebCoefs.shape)
+
+    def evaluateMonomial(
+        self, monCoefs: np.ndarray, x: np.ndarray
+    ) -> np.ndarray:
+        """Evaluate a monomial polynomial via Horner's method.
+
+        ``t = mon[0] + mon[1]*x + mon[2]*x^2 + ...``
+
+        Uses one ``(H, W, ...)`` accumulator buffer (plus the input
+        ``x``) — two cubes total, vs three for Clenshaw on the
+        Chebyshev basis. For polynomial order <= 5 there's no numerical
+        downside to switching to the monomial form, and we save one
+        full-cube-sized buffer per chunk.
+
+        Parameters
+        ----------
+        monCoefs
+            Shape ``(order+1, H, W)``. ``monCoefs[k]`` is the coefficient
+            of ``x^k``. Usually produced by :func:`chebToMonomial`.
+        x
+            Shape ``(H, W)`` or ``(H, W, ...)``. Spatial axes are
+            leading and match ``monCoefs[k]``; the trailing axes (the
+            time axis in production) are broadcast over per-pixel.
+
+        Returns
+        -------
+        t
+            Same shape as ``x``.
+        """
+        monCoefs = np.asarray(monCoefs)
+        x = np.asarray(x)
+        order = monCoefs.shape[0] - 1
+        # Per-pixel ``(H, W)`` coefficients broadcast against any
+        # trailing axes of ``x`` (e.g. a contiguous-time chunk of shape
+        # ``(H, W, chunkSize)``) by appending length-1 axes.
+        coefShape = monCoefs.shape[1:] + (1,) * (x.ndim - monCoefs.ndim + 1)
+
+        if order == 0:
+            return (
+                np.broadcast_to(monCoefs[0].reshape(coefShape), x.shape)
+                .astype(x.dtype)
+                .copy()
+            )
+
+        # Horner: acc = mon[order]; for k = order-1..0: acc = acc*x + mon[k]
+        acc = (
+            np.broadcast_to(monCoefs[order].reshape(coefShape), x.shape)
+            .astype(x.dtype)
+            .copy()
+        )
+        for k in range(order - 1, -1, -1):
+            acc *= x
+            acc += monCoefs[k].reshape(coefShape)
+        return acc
+
+    def evaluate(self, coefficients: np.ndarray, x: np.ndarray) -> np.ndarray:
+        """Evaluate the per-pixel Chebyshev series.
+
+        Internally converts Chebyshev coefficients to monomial form once
+        (cheap: an ``(order+1, order+1)`` matrix multiply on the
+        ``(order+1, H, W)`` coefficient cube) and evaluates with Horner.
+        Saves one full-cube buffer vs. Clenshaw at the cost of a small
+        upfront conversion. For tight loops over chunks of ``x`` with
+        the same coefficients, call :func:`chebToMonomial` once and
+        :func:`evaluateMonomial` per chunk to skip the conversion each
+        time.
+
+        Parameters
+        ----------
+        coefficients
+            Shape ``(order+1, H, W)``, float32. ``coefficients[k]`` is the
+            coefficient of T_k(x).
+        x
+            Shape ``(H, W)`` or ``(H, W, ...)``, float32. Mapped input
+            in [-1, 1] with the spatial axes leading.
+
+        Returns
+        -------
+        t
+            Same shape as ``x``.
+        """
+        monCoefs = self.chebToMonomial(coefficients).astype(x.dtype, copy=False)
+        return self.evaluateMonomial(monCoefs, x)
+
+    def isMonotonic(
+        self,
+        coefficients: np.ndarray,
+        mMin: np.ndarray,
+        mMax: np.ndarray,
+        nSamples: int = 32,
+    ) -> np.ndarray:
+        """Return an ``(H, W)`` boolean map: ``True`` if the fit is monotonically
+        increasing on ``[mMin, mMax]`` per pixel.
+
+        Computes the derivative of the Chebyshev series, evaluates it at
+        ``nSamples`` evenly-spaced points on the mapped interval ``[-1, 1]``,
+        and checks that all sampled derivatives (in original m-space) are
+        non-negative.
+        """
+        coefficients = np.asarray(coefficients, dtype=np.float64)
+        order = coefficients.shape[0] - 1
+        if order < 1:
+            return np.ones(coefficients.shape[1:], dtype=bool)
+
+        # Derivative of Chebyshev series: d/dx [Σ c_k T_k(x)] = Σ d_k T_k(x)
+        # where the derivative coefficients satisfy the recurrence:
+        #   d_{p-1} = 2 * p * c_p
+        #   d_k     = d_{k+2} + 2 * (k+1) * c_{k+1}   for k = p-2, ..., 1
+        #   d_0     = d_2 / 2 + c_1
+        derivCoefs = np.zeros((order, *coefficients.shape[1:]), dtype=np.float64)
+        derivCoefs[order - 1] = 2.0 * order * coefficients[order]
+        for k in range(order - 2, 0, -1):
+            dKplus2 = derivCoefs[k + 2] if k + 2 < order else np.zeros_like(derivCoefs[0])
+            derivCoefs[k] = dKplus2 + 2.0 * (k + 1) * coefficients[k + 1]
+        # d_0: handle the k+2 index carefully (it's 0 if order < 3)
+        d2 = derivCoefs[2] if order >= 3 else np.zeros_like(derivCoefs[0])
+        derivCoefs[0] = d2 / 2.0 + coefficients[1]
+
+        H, W = mMin.shape
+        # Sample x in [-1, 1]
+        xSamples = np.linspace(-1.0, 1.0, nSamples, dtype=np.float64)[:, None, None]
+        xSamples = np.broadcast_to(xSamples, (nSamples, H, W)).copy()
+
+        # Evaluate derivative Chebyshev series at sample points via Clenshaw.
+        derivOrder = order - 1
+        if derivOrder == 0:
+            d = np.broadcast_to(derivCoefs[0], (nSamples, H, W)).copy()
+        else:
+            bNext = np.zeros((nSamples, H, W), dtype=np.float64)
+            bCurr = np.broadcast_to(
+                derivCoefs[derivOrder], (nSamples, H, W)
+            ).copy().astype(np.float64)
+            for k in range(derivOrder - 1, 0, -1):
+                bPrev = 2.0 * xSamples * bCurr - bNext + derivCoefs[k]
+                bNext = bCurr
+                bCurr = bPrev
+            d = xSamples * bCurr - bNext + derivCoefs[0]
+
+        # Chain rule: dt/dm = (dt/dx) * (dx/dm) = (dt/dx) * 2/(fitMax - fitMin)
+        # For monotonicity we only care about sign, and 2/(fitMax - fitMin) > 0,
+        # so we can just check dt/dx >= 0.
+        allNonNegative = (d >= 0).all(axis=0)
+        degenerate = mMax <= mMin
+        return allNonNegative | degenerate
+
+    def fitBlock(
+        self,
+        m: np.ndarray,
+        t: np.ndarray,
+        valid: np.ndarray,
+        conditionNumberLimit: float,
+    ) -> BlockFitResult:
+        """Fit a Chebyshev polynomial at every pixel in the block.
+
+        Maps ``m`` to ``x ∈ [-1, 1]`` via ``x = 2*(m - fitMin)/(fitMax - fitMin) - 1``
+        before forming normal equations in the Chebyshev basis ``T_k(x)``.
+        """
+        nPoints, H, W = m.shape
+        p = self.order
+        nCoefs = p + 1
+
+        mD = m.astype(np.float64)
+        v64 = valid.astype(np.float64)
+        t64 = t.astype(np.float64)
+
+        # Count valid points per pixel
+        nPointsUsed = valid.sum(axis=0).astype(np.int32)  # (H, W)
+        badMask = np.zeros((H, W), dtype=np.uint16)
+
+        # fitMin / fitMax: min/max of m over valid reads per pixel
+        mMasked = np.where(valid, mD, np.nan)
+        with np.errstate(invalid="ignore"):
+            fitMin = np.nanmin(mMasked, axis=0)
+            fitMax = np.nanmax(mMasked, axis=0)
+        fitMin = np.where(np.isnan(fitMin), 0.0, fitMin)
+        fitMax = np.where(np.isnan(fitMax), 0.0, fitMax)
+        # Extend fitMin downward by an absolute DN margin so apply()
+        # does not flag noise / bad bias-level offsets below the
+        # measured range. The size of the margin is driven by detector
+        # physics (read noise, kTC, bias variation), not signal level.
+        # The polynomial near read 0 (anchored by the implicit zero
+        # read) is locally linear, so extrapolating through this margin
+        # stays accurate.
+        fitMin = fitMin - self.fitMinMargin
+
+        # Affine map m → x ∈ [-1, 1]: x = 2*(m - fitMin)/(fitMax - fitMin) - 1
+        denom = fitMax - fitMin
+        denom = np.where(denom > 0, denom, 1.0)  # avoid /0 for degenerate pixels
+        x = 2.0 * (mD - fitMin[None]) / denom[None] - 1.0  # (N, H, W)
+
+        # Flag insufficient-points pixels now.
+        insufficientPixels = nPointsUsed < (nCoefs + 1)
+        badMask[insufficientPixels] |= INSUFFICIENT_POINTS
+
+        # Compute Chebyshev basis values T_k(x) via three-term recurrence.
+        # nCoefs is small (typically 5), so storing all of them is fine.
+        tCheb = []
+        for k in range(nCoefs):
+            if k == 0:
+                tk = np.ones_like(x)
+            elif k == 1:
+                tk = x.copy()
+            else:
+                tk = 2.0 * x * tCheb[k - 1] - tCheb[k - 2]
+            tCheb.append(tk)
+
+        # Accumulate normal equations AtA and Atb
+        AtA = np.zeros((H, W, nCoefs, nCoefs), dtype=np.float64)
+        Atb = np.zeros((H, W, nCoefs), dtype=np.float64)
+
+        for i in range(nCoefs):
+            vTi = v64 * tCheb[i]  # (N, H, W)
+            Atb[..., i] = (vTi * t64[:, None, None]).sum(axis=0)
+            for j in range(i, nCoefs):
+                val = (vTi * tCheb[j]).sum(axis=0)  # (H, W)
+                AtA[..., i, j] = val
+                if i != j:
+                    AtA[..., j, i] = val
+
+        # Condition number check
+        with np.errstate(divide="ignore", invalid="ignore"):
+            conditionNumber = np.linalg.cond(AtA)
+        conditionNumber = np.nan_to_num(conditionNumber, nan=np.inf, posinf=np.inf)
+
+        fitFailed = (~insufficientPixels) & (conditionNumber > conditionNumberLimit)
+        badMask[fitFailed] |= FIT_FAILED
+
+        skip = insufficientPixels | fitFailed
+        identityBlock = np.eye(nCoefs, dtype=np.float64)
+        AtA[skip] = identityBlock
+        Atb[skip] = 0.0
+
+        # Batched solve
+        try:
+            sol = np.linalg.solve(AtA, Atb[..., None])[..., 0]  # (H, W, nCoefs)
+        except np.linalg.LinAlgError:
+            sol = np.zeros((H, W, nCoefs), dtype=np.float64)
+            for hi in range(H):
+                for wi in range(W):
+                    if skip[hi, wi]:
+                        continue
+                    try:
+                        sol[hi, wi] = np.linalg.solve(AtA[hi, wi], Atb[hi, wi])
+                    except np.linalg.LinAlgError:
+                        badMask[hi, wi] |= FIT_FAILED
+                        sol[hi, wi] = 0.0
+            skip = skip | (badMask & FIT_FAILED != 0)
+
+        # No unscaling needed — coefficients are in Chebyshev basis directly.
+        coefficients = np.zeros((p + 1, H, W), dtype=np.float32)
+        for k in range(nCoefs):
+            coefficients[k] = sol[..., k].astype(np.float32)
+        coefficients[:, skip] = 0.0
+
+        # Residuals: evaluate fit at each read and compare to t.
+        # TEMPORARY BRIDGE (PIPE2D-1843 transpose is half-done): fitBlock uses
+        # (N, H, W) but evaluate/evaluateMonomial still expect (H, W, ...).
+        # Transpose around the call so layouts line up; remove once evaluate is
+        # converted to (N, H, W) too.
+        xHW = np.moveaxis(x.astype(np.float32), 0, -1)  # (H, W, N)
+        tPred = np.moveaxis(self.evaluate(coefficients, xHW), -1, 0)  # (N, H, W)
+        residuals = (t[:, None, None].astype(np.float32) - tPred) * valid
+        nForDiv = np.where(nPointsUsed > 0, nPointsUsed, 1).astype(np.float32)
+        residualRms = np.sqrt((residuals ** 2).sum(axis=0) / nForDiv).astype(np.float32)
+        maxAbsResidual = np.abs(residuals).max(axis=0).astype(np.float32)
+
+        # Monotonicity check — retry non-monotonic pixels at lower orders.
+        monotonic = self.isMonotonic(
+            coefficients, fitMin.astype(np.float32), fitMax.astype(np.float32)
+        )
+        monotonic[skip] = False
+        nonMono = (~skip) & (~monotonic)
+
+        for retryOrder in range(p - 1, 0, -1):
+            retryIdx = np.flatnonzero(nonMono.ravel())
+            if len(retryIdx) == 0:
+                break
+            retryNCoefs = retryOrder + 1
+            # Need at least retryNCoefs + 1 valid points.
+            canRetry = nonMono & (nPointsUsed >= retryNCoefs + 1)
+            retryIdx = np.flatnonzero(canRetry.ravel())
+            if len(retryIdx) == 0:
+                continue
+            rr = retryIdx // W
+            rc = retryIdx % W
+
+            # Build normal equations for these pixels at the reduced order.
+            rAtA = np.zeros((len(retryIdx), retryNCoefs, retryNCoefs), dtype=np.float64)
+            rAtb = np.zeros((len(retryIdx), retryNCoefs), dtype=np.float64)
+            for i in range(retryNCoefs):
+                vTi = v64[:, rr, rc] * tCheb[i][:, rr, rc]  # (N, nRetry)
+                rAtb[:, i] = (vTi * t64[:, None]).sum(axis=0)
+                for j in range(i, retryNCoefs):
+                    val = (vTi * tCheb[j][:, rr, rc]).sum(axis=0)
+                    rAtA[:, i, j] = val
+                    if i != j:
+                        rAtA[:, j, i] = val
+
+            with np.errstate(divide="ignore", invalid="ignore"):
+                rCond = np.linalg.cond(rAtA)
+            goodCond = np.isfinite(rCond) & (rCond <= conditionNumberLimit)
+
+            # Solve good-condition pixels.
+            solveIdx = np.flatnonzero(goodCond)
+            if len(solveIdx) == 0:
+                continue
+            try:
+                rSol = np.linalg.solve(rAtA[solveIdx], rAtb[solveIdx, :, None])[..., 0]
+            except np.linalg.LinAlgError:
+                continue
+
+            # Vectorized monotonicity check across all good-conditioned
+            # retry pixels. Layout the candidate batch as (nCoefs, 1, nGood)
+            # so isMonotonic / evaluate can run once instead of per-pixel.
+            nGood = len(solveIdx)
+            goodPxR = rr[solveIdx]
+            goodPxC = rc[solveIdx]
+            trialCoefsBatch = np.zeros((nCoefs, 1, nGood), dtype=np.float32)
+            trialCoefsBatch[:retryNCoefs, 0, :] = rSol.T.astype(np.float32, copy=False)
+
+            fmBatch = fitMin[goodPxR, goodPxC].astype(np.float32, copy=False).reshape(1, nGood)
+            fMBatch = fitMax[goodPxR, goodPxC].astype(np.float32, copy=False).reshape(1, nGood)
+            goodMono = self.isMonotonic(trialCoefsBatch, fmBatch, fMBatch)[0]
+
+            if not goodMono.any():
+                continue
+
+            accPxR = goodPxR[goodMono]
+            accPxC = goodPxC[goodMono]
+            acceptedCoefs = trialCoefsBatch[:, 0, goodMono]  # (nCoefs, nAcc)
+
+            coefficients[:, accPxR, accPxC] = acceptedCoefs
+            monotonic[accPxR, accPxC] = True
+            nonMono[accPxR, accPxC] = False
+            conditionNumber[accPxR, accPxC] = rCond[solveIdx[goodMono]]
+
+            # Recompute residuals for accepted pixels in one batched evaluate.
+            xAcc = x[:, accPxR, accPxC].astype(np.float32, copy=False)[:, None, :]  # (N, 1, nAcc)
+            coefForEval = acceptedCoefs[:, None, :]                     # (nCoefs, 1, nAcc)
+            # PIPE2D-1843 transpose bridge: evaluate expects (*spatial, ...trailing).
+            xAccHW = np.moveaxis(xAcc, 0, -1)                           # (1, nAcc, N)
+            tPredAcc = np.moveaxis(
+                self.evaluate(coefForEval, xAccHW), -1, 0
+            )[:, 0, :]                                                  # (N, nAcc)
+            vAcc = valid[:, accPxR, accPxC]
+            resAcc = (t.astype(np.float32, copy=False)[:, None] - tPredAcc) * vAcc
+            nAccPts = np.maximum(nPointsUsed[accPxR, accPxC], 1).astype(np.float32, copy=False)
+            residualRms[accPxR, accPxC] = np.sqrt(
+                (resAcc ** 2).sum(axis=0) / nAccPts
+            )
+            maxAbsResidual[accPxR, accPxC] = np.abs(resAcc).max(axis=0)
+
+        badMask[nonMono] |= NON_MONOTONIC
+
+        return BlockFitResult(
+            coefficients=coefficients,
+            fitMin=fitMin.astype(np.float32),
+            fitMax=fitMax.astype(np.float32),
+            residualRms=residualRms,
+            maxAbsResidual=maxAbsResidual,
+            nPointsUsed=nPointsUsed,
+            conditionNumber=conditionNumber.astype(np.float32),
+            monotonic=monotonic,
+            badPixelMask=badMask,
+        )
+
+    def toFitsHdus(self, correction) -> list[fits.ImageHDU]:
+        """Serialize model coefficients to a single ImageHDU named COEFFS."""
+        hdu = fits.ImageHDU(data=correction.coefficients, name="COEFFS")
+        hdu.header["ORDER"] = (self.order, "polynomial order")
+        hdu.header["COMMENT"] = "COEFFS axis 0 is the Chebyshev coefficient index; C0 (T_0) first."
+        return [hdu]
+
+    @classmethod
+    def fromFitsHdus(cls, hdus) -> tuple["PolynomialModel", np.ndarray]:
+        """Reconstruct a PolynomialModel + coefficients from HDUs written by toFitsHdus."""
+        coeffsHdu = None
+        for hdu in hdus:
+            if getattr(hdu, "name", "") == "COEFFS":
+                coeffsHdu = hdu
+                break
+        if coeffsHdu is None:
+            raise ValueError("No COEFFS HDU found in provided hdus")
+        order = int(coeffsHdu.header["ORDER"])
+        coefficients = np.asarray(coeffsHdu.data, dtype=np.float32)
+        return cls(order=order), coefficients
diff --git a/python/lsst/obs/pfs/h4Linearity/saturation.py b/python/lsst/obs/pfs/h4Linearity/saturation.py
new file mode 100644
index 00000000..f1c202e9
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/saturation.py
@@ -0,0 +1,14 @@
+"""Saturation-detection utility — planned post-MVP.
+
+The core ``fit`` path accepts a pixel-level ``validMask`` on each ``Ramp``.
+This module will eventually provide a default saturation detector that
+produces such a mask from a ramp's raw deltas / cumulative signal. For the
+MVP, it intentionally contains no implementation.
+
+See ``docs/superpowers/specs/2026-04-16-relin-package-design.md``
+section 1 ("Planned (post-MVP) extensions") and section 11.
+"""
+
+from __future__ import annotations
+
+# Intentionally empty.
diff --git a/python/lsst/obs/pfs/h4Linearity/sim.py b/python/lsst/obs/pfs/h4Linearity/sim.py
new file mode 100644
index 00000000..e000e915
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/sim.py
@@ -0,0 +1,470 @@
+"""Simulation framework for H4 ramp defects (CRs, ASIC glitches).
+
+Generates synthetic cumulative-ADU ramps with controllable noise and
+injects known defects so detection algorithms can be exercised against
+ground truth, in either the linearized or the raw-domain view.
+
+Two ramp builders are provided:
+
+- `makeRamp` — linear response, no nonlinearity. Both CRs and glitches
+  inject directly into the cumulative cube. Use this when you only need
+  to exercise post-linearization detection (e.g.
+  `cr.iterativeUtrDetectAndRepair`).
+- `makeRawRamp` — applies a forward `Nonlinearity` to the true-linear
+  signal so the result is a "raw" (detector-domain) cube. CRs are
+  injected in true space (before the ADC nonlinearity); ASIC glitches
+  are injected in measured space (after, as digital electronics
+  artifacts).
+
+`makeRawAndLinearRamps` is a convenience that returns both a raw cube
+and its linearized counterpart (the analytic inverse of the same
+nonlinearity), so detection algorithms can be run on either side and
+compared against the same injected truth.
+
+Pixel response (constant rate, simple model)::
+
+    trueLin[k] = bias + rate * k    (+ Poisson noise if enabled)
+    measured   = nl.forward(trueLin) + N(0, readNoise)    (raw path)
+
+Defect injection
+----------------
+
+CR (cosmic-ray, step up in cumulative)::
+
+    trueLin[k:, y, x] += amount      for k >= cr.read
+
+Persists through the end of the ramp. Multiple CRs per pixel allowed.
+
+ASIC glitch (single-read offset, digital)::
+
+    measured[k, y, x] += amount      at exactly one read
+
+Returns to the unglitched cumulative at the next read. In delta space
+(``np.diff(cum, axis=0)``) it shows as a matched +A / −A pair at
+adjacent read indices. Amplitudes are conventionally digital — ±2**N
+for some N (commonly N ∈ {10..13} → 1024..8192 ADU), reflecting bit-
+flips in the ASIC.
+
+Pure numpy + dataclasses; no LSST stack imports.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional, Sequence, Union
+
+import numpy as np
+
+
+@dataclass
+class RampParams:
+    """Synthetic ramp generator parameters.
+
+    Attributes
+    ----------
+    nReads : int
+        Number of reads in the cube.
+    H, W : int
+        Spatial dimensions.
+    rate : float
+        Per-read flux (ADU/read), constant across the ramp.
+    readNoise : float
+        Per-read Gaussian noise (ADU).
+    bias : float
+        Constant pedestal added to every read (ADU).
+    poisson : bool
+        Include shot noise (Gaussian approximation with variance =
+        cumulative signal).
+    """
+
+    nReads: int = 45
+    H: int = 16
+    W: int = 16
+    rate: float = 50.0
+    readNoise: float = 5.0
+    bias: float = 0.0
+    poisson: bool = True
+
+
+@dataclass(frozen=True)
+class CR:
+    """Cosmic-ray hit (step up in cumulative starting at read ``read``)."""
+
+    y: int
+    x: int
+    read: int
+    amount: float
+
+
+@dataclass(frozen=True)
+class AsicGlitch:
+    """Digital ASIC glitch (single-read offset at one read).
+
+    Amplitude is signed: positive for up-glitches, negative for down-glitches.
+    Typically a power of 2 or sum of powers of 2.
+    """
+
+    y: int
+    x: int
+    read: int
+    amount: float
+
+
+@dataclass(frozen=True)
+class Nonlinearity:
+    """Quadratic-compressive detector nonlinearity for sim.
+
+    Models the H4 detector's response as a quadratic deviation from
+    linear:
+
+        forward (true_linear → measured):
+            measured = t − α * t² / qMax
+
+        inverse (measured → true_linear, analytic root):
+            t = (1 − sqrt(max(0, 1 − 4 * α * m / qMax))) / (2 * α / qMax)
+
+    The inverse is exact in float64 arithmetic for t < qMax / (2α) (the
+    monotonic regime). With ``alpha=0.05`` and ``qMax=60000`` the
+    response is 5% compressed at full well, the monotonic limit is
+    600,000 ADU (well above any real cube value), and a complete
+    round-trip ``inverse(forward(t))`` returns ``t`` to ~1 ADU
+    precision in float32.
+
+    ``alpha=0`` is the linear case (forward and inverse both identity);
+    use this to disable nonlinearity without removing the parameter.
+    """
+
+    alpha: float = 0.05
+    qMax: float = 60000.0
+
+    def forward(self, t: np.ndarray) -> np.ndarray:
+        """True linear cumulative → measured (raw-domain) cumulative."""
+        if self.alpha == 0.0:
+            return np.asarray(t, dtype=np.float32).copy()
+        t64 = np.asarray(t, dtype=np.float64)
+        m = t64 - self.alpha * t64 * t64 / self.qMax
+        return m.astype(np.float32, copy=False)
+
+    def inverse(self, m: np.ndarray) -> np.ndarray:
+        """Measured (raw-domain) cumulative → true linear cumulative."""
+        if self.alpha == 0.0:
+            return np.asarray(m, dtype=np.float32).copy()
+        m64 = np.asarray(m, dtype=np.float64)
+        a = self.alpha / self.qMax
+        disc = np.maximum(0.0, 1.0 - 4.0 * a * m64)
+        t = (1.0 - np.sqrt(disc)) / (2.0 * a)
+        return t.astype(np.float32, copy=False)
+
+
+def _asGenerator(rng):
+    if isinstance(rng, np.random.Generator):
+        return rng
+    return np.random.default_rng(rng)
+
+
+def makeRamp(
+    params: RampParams,
+    crs: Optional[Sequence[CR]] = None,
+    glitches: Optional[Sequence[AsicGlitch]] = None,
+    rng: Optional[Union[int, np.random.Generator]] = None,
+) -> np.ndarray:
+    """Build a synthetic cumulative-ADU cube with optional injected defects.
+
+    Parameters
+    ----------
+    params : RampParams
+    crs : sequence of CR, optional
+        Cosmic-ray injections.
+    glitches : sequence of AsicGlitch, optional
+        ASIC-glitch injections.
+    rng : np.random.Generator | int | None
+        Reproducible random state. ``int`` seeds a fresh generator,
+        ``None`` uses an unseeded one.
+
+    Returns
+    -------
+    cube : np.ndarray
+        Shape ``(nReads, H, W)``, float32. Cumulative ADU per read.
+    """
+    rng = _asGenerator(rng)
+
+    nReads, H, W = params.nReads, params.H, params.W
+    # Per-read flux increments. Each read accumulates 'rate' ADU of new
+    # signal (plus Poisson sqrt(rate) noise if enabled). CRs are charge
+    # deposits at a specific read; they inject into the per-read flux.
+    if params.poisson and params.rate > 0:
+        perRead = rng.normal(params.rate,
+                             np.sqrt(params.rate),
+                             (nReads, H, W))
+    else:
+        perRead = np.full((nReads, H, W), params.rate, dtype=np.float64)
+
+    if crs:
+        for c in crs:
+            perRead[c.read, c.y, c.x] += c.amount
+
+    # Cumulate to get the true-charge cumulative, then add per-read read
+    # noise (independent at each readout) and ASIC glitches (digital
+    # post-ADC offsets).
+    cum = np.cumsum(perRead, axis=0) + params.bias
+    cum = cum + rng.normal(0.0, params.readNoise, (nReads, H, W))
+    cum = cum.astype(np.float32)
+
+    if glitches:
+        for g in glitches:
+            cum[g.read, g.y, g.x] += np.float32(g.amount)
+
+    return cum
+
+
+def utrRate(
+    cube: np.ndarray,
+    readMask: Optional[np.ndarray] = None,
+) -> np.ndarray:
+    """Per-pixel UTR (Up The Ramp) rate from a cumulative cube.
+
+    Computes the least-squares slope of cumulative-vs-read for each
+    pixel. With ``readMask`` you can exclude specific reads from the fit
+    (e.g. CR-affected reads during an iterative detector); without it
+    the fit uses all reads with equal weight.
+
+    Parameters
+    ----------
+    cube : np.ndarray
+        Shape ``(N, H, W)``. Cumulative ADU per read.
+    readMask : np.ndarray, optional
+        Boolean shape ``(N, H, W)`` (per-pixel per-read), or ``(N,)``
+        (whole-read mask), or ``(H, W)`` (whole-pixel mask). True means
+        the read is used in the fit. If omitted, all reads are used.
+
+    Returns
+    -------
+    rate : np.ndarray
+        Shape ``(H, W)``, float32. ADU per read.
+
+    Notes
+    -----
+    For PFS H4 the production rate estimator (``PfsIsrTask.calcUTRrates``)
+    applies optimal weights derived from a noise model. The simple
+    unweighted LSQ slope used here is adequate for sim closure tests
+    and for the iterative-detection rate reference.
+    """
+    nReads, H, W = cube.shape
+    k = np.arange(nReads, dtype=np.float64)
+    cube64 = cube.astype(np.float64, copy=False)
+
+    if readMask is None:
+        # Unweighted closed form: slope = sum((k-mean)*y) / sum((k-mean)**2)
+        kMean = k.mean()
+        kCentered = k - kMean
+        kVar = float((kCentered * kCentered).sum())
+        slope = np.einsum('k,khw->hw', kCentered, cube64) / kVar
+        return slope.astype(np.float32, copy=False)
+
+    # With a mask, broadcast it to (N, H, W) bool and do a per-pixel
+    # weighted fit (weights are 0/1). Same closed form, just with weights.
+    mask = np.asarray(readMask, dtype=bool)
+    if mask.shape == (nReads,):
+        mask = np.broadcast_to(mask[:, None, None], (nReads, H, W))
+    elif mask.shape == (H, W):
+        mask = np.broadcast_to(mask[None], (nReads, H, W))
+    elif mask.shape != (nReads, H, W):
+        raise ValueError(f"readMask shape {readMask.shape} not compatible with "
+                         f"cube {cube.shape}.")
+    w = mask.astype(np.float64)
+    kCube = np.broadcast_to(k[:, None, None], (nReads, H, W))
+    sumW = w.sum(axis=0)
+    sumK = (w * kCube).sum(axis=0)
+    sumY = (w * cube64).sum(axis=0)
+    sumKK = (w * kCube * kCube).sum(axis=0)
+    sumKY = (w * kCube * cube64).sum(axis=0)
+    denom = sumW * sumKK - sumK * sumK
+    with np.errstate(divide='ignore', invalid='ignore'):
+        slope = np.where(denom > 0, (sumW * sumKY - sumK * sumY) / denom, 0.0)
+    return slope.astype(np.float32, copy=False)
+
+
+def makeRawRamp(
+    params: RampParams,
+    nonlinearity: Optional[Nonlinearity] = None,
+    crs: Optional[Sequence[CR]] = None,
+    asicGlitches: Optional[Sequence[AsicGlitch]] = None,
+    rng: Optional[Union[int, np.random.Generator]] = None,
+) -> np.ndarray:
+    """Build a synthetic *raw-domain* (post-ADC) cumulative-ADU cube.
+
+    Physically motivated injection order:
+
+    1. ``trueLin = rate * k + bias`` (noiseless linear ramp).
+    2. (optional) add Poisson shot noise to ``trueLin`` (in photon-charge space).
+    3. Inject ``crs`` into ``trueLin`` (charge deposits, before the ADC).
+    4. Apply ``nonlinearity.forward`` (true → measured).
+    5. Add per-read Gaussian read noise (in measured space).
+    6. Inject ``asicGlitches`` (digital, single-read; post-ADC artifacts).
+
+    Setting ``nonlinearity=None`` is equivalent to ``alpha=0``: the cube
+    is linear (same as `makeRamp`, but glitches still inject post-noise).
+
+    Parameters
+    ----------
+    params : RampParams
+    nonlinearity : Nonlinearity, optional
+    crs : sequence of CR, optional
+        Injected before the nonlinearity, i.e. on the true-linear cube.
+    asicGlitches : sequence of AsicGlitch, optional
+        Injected after the nonlinearity, i.e. on the measured cube.
+    rng : int | np.random.Generator | None
+
+    Returns
+    -------
+    measured : np.ndarray
+        ``(nReads, H, W)`` float32 cube in raw (detector-domain) ADU.
+    """
+    rng = _asGenerator(rng)
+
+    nReads, H, W = params.nReads, params.H, params.W
+    # Per-read flux increments with Poisson noise (if enabled); CRs add
+    # charge to a specific read.
+    if params.poisson and params.rate > 0:
+        perRead = rng.normal(params.rate,
+                             np.sqrt(params.rate),
+                             (nReads, H, W))
+    else:
+        perRead = np.full((nReads, H, W), params.rate, dtype=np.float64)
+
+    if crs:
+        for c in crs:
+            perRead[c.read, c.y, c.x] += c.amount
+
+    trueLin = np.cumsum(perRead, axis=0) + params.bias
+
+    if nonlinearity is None or nonlinearity.alpha == 0.0:
+        measured = trueLin.astype(np.float32, copy=False)
+    else:
+        measured = nonlinearity.forward(trueLin)
+
+    measured = measured + rng.normal(
+        0.0, params.readNoise, (nReads, H, W)
+    ).astype(np.float32)
+
+    if asicGlitches:
+        for g in asicGlitches:
+            measured[g.read, g.y, g.x] += np.float32(g.amount)
+
+    return measured.astype(np.float32, copy=False)
+
+
+def makeRawAndLinearRamps(
+    params: RampParams,
+    nonlinearity: Nonlinearity,
+    crs: Optional[Sequence[CR]] = None,
+    asicGlitches: Optional[Sequence[AsicGlitch]] = None,
+    rng: Optional[Union[int, np.random.Generator]] = None,
+) -> tuple:
+    """Return ``(raw, linearized)`` cubes for one consistent ground truth.
+
+    The raw cube is the output of `makeRawRamp` with the supplied
+    nonlinearity; the linearized cube is ``nonlinearity.inverse(raw)``.
+    Both share the same injected CRs (in true space) and ASIC glitches
+    (in measured space), so detection on either side can be checked
+    against the same truth catalog. In the linearized cube:
+
+    - CRs are clean step-ups (the round-trip nonlinearity recovers them).
+    - ASIC glitches are *slightly deformed* — their amplitude after the
+      inverse depends on the local slope of the inverse curve at the
+      glitched read. The user can compare this deformation to glitch
+      detection performance.
+
+    Returns
+    -------
+    raw, linearized : np.ndarray
+        Both ``(nReads, H, W)`` float32.
+    """
+    raw = makeRawRamp(params, nonlinearity=nonlinearity, crs=crs,
+                      asicGlitches=asicGlitches, rng=rng)
+    linearized = nonlinearity.inverse(raw)
+    return raw, linearized
+
+
+# ----- Convenience generators -----
+
+def digitalGlitchAmounts(
+    n: int,
+    bits: Sequence[int] = (10, 11, 12, 13),
+    signed: bool = True,
+    rng: Optional[Union[int, np.random.Generator]] = None,
+) -> np.ndarray:
+    """Generate ``n`` digital glitch amplitudes drawn from ``±2**bit``.
+
+    Default bits 10..13 give magnitudes 1024, 2048, 4096, 8192 ADU,
+    which roughly matches the observed range on PFS H4 channel-24
+    glitches.
+
+    Returns
+    -------
+    amounts : np.ndarray
+        Shape ``(n,)``, float32. Signed if ``signed=True``.
+    """
+    rng = _asGenerator(rng)
+    bits_arr = np.asarray(bits)
+    chosen = rng.choice(bits_arr, size=n)
+    amounts = (2.0 ** chosen).astype(np.float32)
+    if signed:
+        signs = rng.choice([-1.0, 1.0], size=n).astype(np.float32)
+        amounts = amounts * signs
+    return amounts
+
+
+def randomCRs(
+    n: int,
+    H: int,
+    W: int,
+    nReads: int,
+    amountRange: tuple = (50.0, 5000.0),
+    readRange: Optional[tuple] = None,
+    rng: Optional[Union[int, np.random.Generator]] = None,
+) -> list:
+    """Generate ``n`` random CR hits with log-uniform amplitudes.
+
+    ``readRange=None`` means ``(1, nReads - 1)`` (skip first and last
+    reads so the step is fully visible in cumulative space).
+    """
+    rng = _asGenerator(rng)
+    if readRange is None:
+        readRange = (1, nReads - 1)
+    ys = rng.integers(0, H, size=n)
+    xs = rng.integers(0, W, size=n)
+    reads = rng.integers(readRange[0], readRange[1], size=n)
+    logLo, logHi = np.log(amountRange[0]), np.log(amountRange[1])
+    amounts = np.exp(rng.uniform(logLo, logHi, size=n))
+    return [
+        CR(int(y), int(x), int(r), float(a))
+        for y, x, r, a in zip(ys, xs, reads, amounts)
+    ]
+
+
+def randomAsicGlitches(
+    n: int,
+    H: int,
+    W: int,
+    nReads: int,
+    bits: Sequence[int] = (10, 11, 12, 13),
+    readRange: Optional[tuple] = None,
+    rng: Optional[Union[int, np.random.Generator]] = None,
+) -> list:
+    """Generate ``n`` random ASIC glitches with digital amplitudes.
+
+    ``readRange=None`` means ``(1, nReads - 2)`` — skip both endpoints so
+    the glitch and its return-to-baseline are both inside the ramp.
+    """
+    rng = _asGenerator(rng)
+    if readRange is None:
+        readRange = (1, nReads - 2)
+    ys = rng.integers(0, H, size=n)
+    xs = rng.integers(0, W, size=n)
+    reads = rng.integers(readRange[0], readRange[1], size=n)
+    amounts = digitalGlitchAmounts(n, bits=bits, signed=True, rng=rng)
+    return [
+        AsicGlitch(int(y), int(x), int(r), float(a))
+        for y, x, r, a in zip(ys, xs, reads, amounts)
+    ]
diff --git a/python/lsst/obs/pfs/h4Linearity/types.py b/python/lsst/obs/pfs/h4Linearity/types.py
new file mode 100644
index 00000000..f69fad46
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/types.py
@@ -0,0 +1,202 @@
+"""Data types and bad-pixel flag constants for h4Linearity.
+
+The four public dataclasses below form the I/O surface of the package:
+
+- :class:`Ramp` — the input to :func:`apply`. A cumulative cube plus
+  an optional per-pixel mask. The caller (typically PfsIsrTask) builds
+  one of these from raw H4 reads.
+- :class:`LinearizedRamp` — the output of :func:`apply`. Linearized
+  cube + merged bad-pixel mask. The pipeline consumes the cumulative
+  values and uses the mask to drive ``exposure.mask`` plane stamping.
+- :class:`Diagnostics` — per-pixel fit-quality arrays plus a
+  global summary dict. Round-trips through FITS via :func:`saveFits` /
+  :func:`loadFits` and surfaces in the persisted calibration product.
+- :class:`LinearityCorrection` — the fitted calibration object. The
+  product butler stores it on disk; :func:`apply` consumes it.
+
+The bad-pixel bit-flag constants are stored in the uint8
+``badPixelMask`` field of both :class:`Ramp.validMask` (input) and
+:class:`LinearizedRamp.badPixelMask` / :class:`LinearityCorrection.badPixelMask`
+(output). Bits at or below ``0x10`` are set at fit time; bits above are
+populated when :func:`apply` discovers per-read excursions outside the
+fitted range.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+import numpy as np
+
+# Bad-pixel bit-flag constants. These form the alphabet of an
+# internal ``(H, W) uint16`` mask threaded through H4 ISR — see the
+# project_h4_mask_refactor_plan memory for the staging design.
+#
+# Fit-time flags (set by :func:`fit` and persisted via :func:`saveFits`
+# in the linearity calib's uint8 ``badPixelMask``):
+MASKED_BY_INPUT: int = 0x01      #: input data masked by caller (defects, etc.)
+INSUFFICIENT_POINTS: int = 0x02  #: too few usable reads to fit (in practice: dead pixels)
+FIT_FAILED: int = 0x04           #: normal-equations condition number > limit
+NON_MONOTONIC: int = 0x08        #: fitted polynomial is non-monotonic on its range
+BORDER_PIX: int = 0x10           #: detector-edge reference pixel (skipped by fit)
+
+# Apply-time flags (set by :func:`apply` per-ramp from out-of-range reads):
+BELOW_VALID_RANGE: int = 0x20    #: at least one read was below the per-pixel fitMin
+ABOVE_VALID_RANGE: int = 0x40    #: at least one read was above the per-pixel fitMax
+
+# CR-stage flags (set by ``cr.iterativeUtrDetectAndRepair`` and OR'd
+# into the internal mask after CR detection). Not persisted with the
+# linearity calib — they're per-exposure and live only in the in-memory
+# uint16 internal mask:
+UNCLASSIFIED: int = 0x0080  #: ≥1 above-threshold delta the classifier could not assign
+UNSTABLE: int = 0x0100  #: ≥ ``badPixelMinOutliers`` 4σ delta excursions (RTS / telegraph)
+ASIC_GLITCH: int = 0x0200  #: ≥1 ASIC-glitch pair above ``asicGlitchHeightMaskADU``
+HIGH_FIT_RESIDUAL: int = 0x0400  #: residualRms > badLinearityMedianMultiplier × median(good residualRms)
+
+# Convenience composite: pixels the linearity fit failed to model in
+# any way. The ISR projection layer maps this group to the published
+# ``LINEARITY_DEFECT`` plane. HIGH_FIT_RESIDUAL is intentionally NOT in
+# DEAD: the fit completed there, just badly — semantically distinct from
+# the hard fit-failure modes.
+DEAD: int = INSUFFICIENT_POINTS | FIT_FAILED | NON_MONOTONIC
+
+
+@dataclass(frozen=True)
+class Ramp:
+    """A single ramp's cumulative flux at each non-destructive read.
+
+    The input form to :func:`apply`.
+
+    Attributes
+    ----------
+    reads : np.ndarray
+        Shape ``(H, W, N)``, float32. Cumulative signal, already
+        photodiode-corrected and (typically) dark-subtracted.
+        ``reads[y, x, n]`` is the total accumulated ADU at pixel
+        ``(y, x)`` through read *n*; the input to :func:`apply` is
+        the raw cumulative cube, not per-read deltas. The time axis is
+        last so the per-pixel ramp is contiguous in memory.
+    validMask : np.ndarray or None
+        Shape ``(H, W)``, integer-valued. 0 means the pixel is valid;
+        any nonzero value flags it as bad input (e.g. caller-supplied
+        defects). :func:`apply` merges this into the output's
+        ``badPixelMask`` by OR'ing ``MASKED_BY_INPUT`` for any nonzero
+        entry. ``None`` (default) means "all pixels valid".
+    """
+
+    reads: np.ndarray
+    validMask: np.ndarray | None = None
+
+
+@dataclass(frozen=True)
+class LinearizedRamp:
+    """Output of :func:`apply` on a :class:`Ramp`.
+
+    Attributes
+    ----------
+    cumulativeLinear : np.ndarray
+        Shape ``(H, W, N)``, float32. The linearized cumulative cube —
+        same shape and read ordering as the input ``Ramp.reads``, but
+        each valid pixel has been mapped through its per-pixel model.
+        Bad pixels (any bit set in the merged mask) are passed through
+        unchanged from input ``reads``.
+    badPixelMask : np.ndarray
+        Shape ``(H, W)``, uint16. The augmented internal mask returned
+        by :func:`apply`: the fit-time mask from
+        :class:`LinearityCorrection.badPixelMask` (promoted to uint16) |
+        the caller-supplied ``Ramp.validMask`` (passed through with
+        whatever bits it carried — typically BORDER_PIX +
+        MASKED_BY_INPUT from the upstream defect calib) | the
+        per-pixel out-of-range bits ``BELOW_VALID_RANGE`` /
+        ``ABOVE_VALID_RANGE``. Per the "first-reason-wins" rule, the
+        range bits are added *only* on pixels that were not already
+        flagged.
+    """
+
+    cumulativeLinear: np.ndarray   # (H, W, N) float32
+    badPixelMask: np.ndarray       # (H, W) uint16 — see docstring above.
+
+
+@dataclass(frozen=True)
+class Diagnostics:
+    """Per-pixel fit-quality diagnostics plus a dataset-wide summary.
+
+    Carried inside :class:`LinearityCorrection` and round-tripped
+    through FITS by :func:`saveFits` / :func:`loadFits`.
+
+    Attributes
+    ----------
+    residualRms : np.ndarray
+        ``(H, W)`` float32. Per-pixel RMS of the fit residual ``t - model(m)``
+        over the reads used in the fit, in true-signal units.
+    maxAbsResidual : np.ndarray
+        ``(H, W)`` float32. Per-pixel maximum absolute residual.
+    nPointsUsed : np.ndarray
+        ``(H, W)`` int32. Number of reads actually used in the per-pixel
+        fit (after exclusion of masked/saturated/below-threshold reads).
+    monotonic : np.ndarray
+        ``(H, W)`` bool. True when the fitted polynomial is monotonic on
+        the per-pixel ``[fitMin, fitMax]`` interval. Non-monotonic pixels
+        also get the ``NON_MONOTONIC`` bit in ``badPixelMask``.
+    conditionNumber : np.ndarray
+        ``(H, W)`` float32. Per-pixel condition number of the
+        normal-equations matrix. Pixels above ``conditionNumberLimit``
+        in :func:`fit` get ``FIT_FAILED``.
+    summary : dict
+        Dataset-wide scalar summary (counts of flagged pixels, fit
+        parameters used, etc.). Persisted as FITS PRIMARY-header cards
+        and recovered on :func:`loadFits`. Long Python keys round-trip
+        via HIERARCH cards (case-preserved); short keys via the card
+        comment.
+    """
+
+    residualRms: np.ndarray        # (H, W) float32
+    maxAbsResidual: np.ndarray     # (H, W) float32
+    nPointsUsed: np.ndarray        # (H, W) int32
+    monotonic: np.ndarray          # (H, W) bool
+    conditionNumber: np.ndarray    # (H, W) float32
+    summary: dict[str, Any]
+
+
+@dataclass(frozen=True)
+class LinearityCorrection:
+    """A fitted per-pixel nonlinearity correction.
+
+    Produced by :func:`fit`, persisted by :func:`saveFits` / :func:`loadFits`,
+    and consumed by :func:`apply`.
+
+    Attributes
+    ----------
+    model : Model
+        The model object (an instance of a class registered in
+        ``MODEL_REGISTRY`` — currently :class:`PolynomialModel`). Carries
+        the form-specific evaluation code; this dataclass just holds the
+        fitted coefficients alongside it. Typed as ``Any`` here to avoid
+        an import cycle with ``models`` — duck-typed against the
+        :class:`Model` protocol at runtime.
+    coefficients : np.ndarray
+        Model-specific shape. For :class:`PolynomialModel(order=N)` this
+        is ``(N+1, H, W)`` float32 — one Chebyshev coefficient per pixel
+        per order.
+    fitMin, fitMax : np.ndarray
+        ``(H, W)`` float32. The per-pixel data range used in the fit (in
+        cumulative-ADU units of the input ramp). :func:`apply` rescales
+        ``m`` to Chebyshev's ``[-1, 1]`` domain over this interval and
+        flags out-of-range reads via ``BELOW/ABOVE_VALID_RANGE``.
+    badPixelMask : np.ndarray
+        ``(H, W)`` uint8. Fit-time bad-pixel flags
+        (``MASKED_BY_INPUT`` / ``INSUFFICIENT_POINTS`` / ``FIT_FAILED`` /
+        ``NON_MONOTONIC`` / ``BORDER_PIX``). :func:`apply` ORs this with
+        its own merged mask to produce the result's mask.
+    diagnostics : Diagnostics
+        Per-pixel fit-quality arrays + the global summary dict; see
+        :class:`Diagnostics`.
+    """
+
+    model: Any                     # Model protocol; runtime-checked to avoid a types <-> models cycle
+    coefficients: np.ndarray       # shape depends on model; polynomial: (order+1, H, W) float32
+    fitMin: np.ndarray             # (H, W) float32
+    fitMax: np.ndarray             # (H, W) float32
+    badPixelMask: np.ndarray       # (H, W) uint8
+    diagnostics: Diagnostics
diff --git a/python/lsst/obs/pfs/h4Linearity/validate.py b/python/lsst/obs/pfs/h4Linearity/validate.py
new file mode 100644
index 00000000..925474be
--- /dev/null
+++ b/python/lsst/obs/pfs/h4Linearity/validate.py
@@ -0,0 +1,1481 @@
+"""Validate H4 linearity corrections by comparing first/second-half rates.
+
+Run a NIR ramp through the full ISR (dark + linearization + defects), split
+the resulting cumulative-flux cube into halves, fit a per-pixel rate to each
+half, and compare. A correctly-tuned linearity model flattens the half-vs-
+half rate; residual structure points at a poor or wrong correction.
+
+Run twice (linearize on / off) so the *improvement* is visible, not just the
+absolute residual.
+
+Usage from JupyterLab::
+
+    from lsst.obs.pfs.h4Linearity import validate
+    cmp = validate.runComparison(butler, dataId)
+    validate.printComparison(cmp)
+    validate.plotComparison(cmp)
+
+Notes
+-----
+- ``PfsIsrTask.makeNirExposure`` mutates the underlying ramp arrays in
+  place (in-place ``cumsum`` and dark subtraction). Each call therefore
+  needs a fresh ``raw``; ``runComparison`` re-fetches between passes.
+- The returned cube is *pre-gain* ADU. ``minRate`` and ``fluxBins``
+  defaults are in pre-gain ADU.
+- Linearity coefficients exist only for the ``n3`` detector today. Calling
+  with ``doLinearize=True`` against any other detector raises before any
+  ISR work is done.
+"""
+
+from dataclasses import dataclass, field
+from typing import Optional
+
+import numpy as np
+
+from lsst.obs.pfs import isrTask as pfsIsrTask
+
+from . import cr
+from . import isrPlots
+
+
+SUPPORTED_LINEARITY_CAMS = ("n1", "n2", "n3", "n4")
+
+DEFAULT_MIN_RATE = 3.0
+# "auto" → bins are derived per-call from minRate and the ramp midpoint:
+#   [(0, minRate*midRead), (minRate*midRead, 100), (100, 1000),
+#    (1000, 5000), (5000, 10000), (10000, 50000)]
+# Pass an explicit sequence of (lo, hi) pairs to override.
+DEFAULT_FLUX_BINS = "auto"
+DEFAULT_AGREE_PCT = (0.5, 1.0, 2.0)
+
+
+def _resolveFluxBins(fluxBins, minRate, midRead):
+    if fluxBins == "auto":
+        thr = float(minRate) * float(midRead)
+        return (
+            (0.0, thr),
+            (thr, 100.0),
+            (100.0, 1_000.0),
+            (1_000.0, 5_000.0),
+            (5_000.0, 10_000.0),
+            (10_000.0, 50_000.0),
+        )
+    return tuple((float(lo), float(hi)) for lo, hi in fluxBins)
+
+
+@dataclass
+class HalvesResult:
+    slope1: np.ndarray
+    slope2: np.ndarray
+    meanFlux: np.ndarray
+    relDiff: np.ndarray
+    goodMask: np.ndarray         # unmasked pixels with finite slopes
+    statsMask: np.ndarray        # goodMask AND avgRate > minRate
+    exposureMaskBits: dict
+    nReads: int
+    midRead: int
+    cam: str
+    doLinearize: bool
+    minRate: float
+    summary: dict
+    cube: Optional[np.ndarray] = None
+
+
+@dataclass
+class ComparisonResult:
+    on: HalvesResult
+    off: HalvesResult
+    delta: dict = field(default_factory=dict)
+
+
+def _makeDefaultIsrTask(doLinearize: bool):
+    """Build a `PfsIsrTask` matching the notebook recipe for this test."""
+    config = pfsIsrTask.PfsIsrTask.ConfigClass()
+    config.doFlat = False
+    config.doDark = True
+    config.doDefect = True
+    config.doSaturationInterpolation = False
+
+    config.h4.quickCDS = False
+    config.h4.doIPC = False
+    config.h4.doWriteRawCube = True
+    config.h4.doLinearize = doLinearize
+    # Rate-based CR rejection requires linearization to have run.
+    config.h4.doCR = doLinearize
+
+    config.validate()
+    return pfsIsrTask.PfsIsrTask(config=config)
+
+
+def _ensureIsrTask(isrTask, doLinearize: bool):
+    """Return an isrTask whose configuration matches the requested mode."""
+    if isrTask is None:
+        return _makeDefaultIsrTask(doLinearize)
+
+    needsRebuild = (
+        bool(isrTask.config.h4.doLinearize) != bool(doLinearize)
+        or bool(isrTask.config.h4.doCR) != bool(doLinearize)
+        or not bool(isrTask.config.h4.doWriteRawCube)
+        or bool(isrTask.config.h4.quickCDS)
+    )
+    if needsRebuild:
+        return _makeDefaultIsrTask(doLinearize)
+    return isrTask
+
+
+def _resolveCam(cam, raw, butler, dataId):
+    if cam is not None:
+        return cam, raw
+    if raw is None:
+        raw = butler.get("raw", dataId)
+    return raw.detector.getName(), raw
+
+
+def _uniformSlope(cube: np.ndarray) -> np.ndarray:
+    """Per-pixel least-squares slope for uniform x = arange(k).
+
+    Parameters
+    ----------
+    cube : np.ndarray
+        Shape ``(k, H, W)``. Treated as ``y[k]`` per pixel.
+
+    Returns
+    -------
+    slope : np.ndarray
+        Shape ``(H, W)``, dtype float32. Slope of ``cube`` against
+        ``arange(k)``. Constant offsets in ``cube`` cancel.
+    """
+    k = cube.shape[0]
+    if k < 2:
+        raise ValueError(f"Need at least 2 reads for a slope; got {k}.")
+    x = np.arange(k, dtype=np.float64)
+    Sx = float(x.sum())
+    Sxx = float((x * x).sum())
+    denom = k * Sxx - Sx * Sx
+
+    Sc = cube.sum(axis=0, dtype=np.float64)
+    Sxc = np.einsum("k,khw->hw", x, cube, dtype=np.float64)
+
+    slope = (k * Sxc - Sx * Sc) / denom
+    return slope.astype(np.float32)
+
+
+def _mad(values: np.ndarray) -> float:
+    if values.size == 0:
+        return float("nan")
+    med = float(np.median(values))
+    return float(1.4826 * np.median(np.abs(values - med)))
+
+
+def _binStats(values: np.ndarray, agreePct):
+    if values.size == 0:
+        return {
+            "median": float("nan"),
+            "mad": float("nan"),
+            "p1": float("nan"),
+            "p16": float("nan"),
+            "p84": float("nan"),
+            "p99": float("nan"),
+            "fracWithin": {pct: float("nan") for pct in agreePct},
+            "nPix": 0,
+        }
+    p1, p16, med, p84, p99 = np.percentile(values, [1, 16, 50, 84, 99])
+    return {
+        "median": float(med),
+        "mad": _mad(values),
+        "p1": float(p1),
+        "p16": float(p16),
+        "p84": float(p84),
+        "p99": float(p99),
+        "fracWithin": {
+            pct: float(np.mean(np.abs(values) < pct / 100.0)) for pct in agreePct
+        },
+        "nPix": int(values.size),
+    }
+
+
+def _summarize(relDiff, meanFlux, goodMask, statsMask, fluxBins, agreePct):
+    nTotal = int(goodMask.size)
+    nGood = int(goodMask.sum())
+    nStats = int(statsMask.sum())
+
+    if nStats == 0:
+        empty = _binStats(np.array([], dtype=np.float32), agreePct)
+        return {
+            "nGood": nGood,
+            "nStats": 0,
+            "nTotal": nTotal,
+            "overall": empty,
+            "byBin": {(lo, hi): empty for (lo, hi) in fluxBins},
+        }
+
+    flat = relDiff[statsMask]
+    flatMean = meanFlux[statsMask]
+
+    byBin = {}
+    for lo, hi in fluxBins:
+        sel = (flatMean >= lo) & (flatMean < hi)
+        byBin[(lo, hi)] = _binStats(flat[sel], agreePct)
+
+    overall = _binStats(flat, agreePct)
+    overall.pop("nPix", None)
+
+    return {
+        "nGood": nGood,
+        "nStats": nStats,
+        "nTotal": nTotal,
+        "overall": overall,
+        "byBin": byBin,
+    }
+
+
+def _exposureMaskBitCounts(exp):
+    bits = {}
+    arr = exp.mask.array
+    mask = exp.mask
+    for name in ("BAD", "SAT", "INTRP", "CR", "EDGE", "DETECTED", "NO_DATA"):
+        try:
+            bit = mask.getPlaneBitMask(name)
+        except Exception:
+            continue
+        bits[name] = int(((arr & bit) != 0).sum())
+    return bits
+
+
+def runHalvesTest(
+    butler,
+    dataId,
+    *,
+    doLinearize: bool = True,
+    cam: Optional[str] = None,
+    isrTask=None,
+    raw=None,
+    nirDark=None,
+    defects=None,
+    minRate: float = DEFAULT_MIN_RATE,
+    fluxBins=DEFAULT_FLUX_BINS,
+    agreePct=DEFAULT_AGREE_PCT,
+    returnCube: bool = False,
+    log=None,
+) -> HalvesResult:
+    """Run NIR ISR once and report half-vs-half rate statistics.
+
+    Parameters
+    ----------
+    butler : `lsst.daf.butler.Butler`
+    dataId : mapping
+    doLinearize : bool
+        Whether to apply the new H4 per-read linearity correction.
+    cam : str, optional
+        Detector name (e.g. ``"n3"``). If ``None``, inferred from the raw.
+        Required to be ``"n3"`` when ``doLinearize=True``.
+    isrTask : `PfsIsrTask`, optional
+        Reuse a prebuilt task. If its config disagrees with the requested
+        mode, a fresh task is built (the supplied task is not mutated).
+    raw, nirDark, defects : optional
+        Pre-fetched butler objects. Fetched on demand if not given.
+    minRate : float
+        Pixels with mean rate < ``minRate`` ADU/read are excluded from
+        statistics (avoids dividing by ~zero in relDiff).
+    fluxBins : sequence of (lo, hi)
+        Cumulative-flux bins (in pre-gain ADU at the ramp midpoint) used
+        for per-bin statistics.
+    agreePct : sequence of float
+        Percent thresholds for the ``fracWithin`` summary.
+    returnCube : bool
+        If True, the returned ``HalvesResult`` carries the full flux cube.
+        Off by default to keep peak memory at one cube.
+    log : logger, optional
+
+    Returns
+    -------
+    HalvesResult
+    """
+    cam, raw = _resolveCam(cam, raw, butler, dataId)
+
+    if doLinearize and cam not in SUPPORTED_LINEARITY_CAMS:
+        raise RuntimeError(
+            f"Linearity corrections are only available for {SUPPORTED_LINEARITY_CAMS}; "
+            f"got cam={cam!r}. Re-run with doLinearize=False to test this detector."
+        )
+
+    isrTask = _ensureIsrTask(isrTask, doLinearize)
+    if log is None:
+        log = isrTask.log
+
+    if raw is None:
+        raw = butler.get("raw", dataId)
+    if nirDark is None:
+        nirDark = butler.get("nirDark", dataId)
+    if defects is None:
+        defects = butler.get("defects", dataId)
+
+    linearity = isrTask.resolveNirLinearity(cam) if doLinearize else None
+    if doLinearize and linearity is None:
+        raise RuntimeError(
+            f"resolveNirLinearity({cam!r}) returned None; cannot linearize."
+        )
+
+    log.info(
+        f"validate.runHalvesTest: cam={cam} doLinearize={doLinearize} "
+        f"visit={dataId.get('visit', '?')}"
+    )
+
+    exp, cube = isrTask.makeNirExposure(
+        raw,
+        nirDark=nirDark,
+        defects=defects,
+        linearity=linearity,
+        doReturnRawCube=True,
+    )
+    if cube is None:
+        raise RuntimeError(
+            "makeNirExposure returned no flux cube; check h4.doWriteRawCube and h4.quickCDS."
+        )
+
+    # makeNirExposure() only flags defects in the mask plane when
+    # linearization is enabled (via the MASKED_BY_INPUT path). PfsIsrTask.run()
+    # normally calls maskDefect() afterwards; we are bypassing that, so apply
+    # defects ourselves. Idempotent when defects are already flagged.
+    if defects is not None:
+        defects.maskPixels(exp.mask, "BAD")
+
+    n = cube.shape[0]
+    mid = n // 2
+    if mid < 2 or n - mid < 2:
+        raise RuntimeError(
+            f"Ramp too short to split into halves with >= 2 reads each (n={n})."
+        )
+
+    slope1 = _uniformSlope(cube[:mid])
+    slope2 = _uniformSlope(cube[mid:])
+    meanFlux = cube.mean(axis=0, dtype=np.float64).astype(np.float32)
+
+    unmasked = exp.mask.array == 0
+    goodMask = unmasked & np.isfinite(slope1) & np.isfinite(slope2)
+
+    with np.errstate(divide="ignore", invalid="ignore"):
+        denom = slope1 + slope2
+        relDiff = np.where(denom != 0, 2.0 * (slope1 - slope2) / denom, np.nan).astype(
+            np.float32
+        )
+
+    avgSlope = 0.5 * (slope1 + slope2)
+    statsMask = goodMask & (avgSlope > minRate)
+
+    fluxBins = _resolveFluxBins(fluxBins, minRate, mid)
+    summary = _summarize(relDiff, meanFlux, goodMask, statsMask, fluxBins, agreePct)
+    exposureMaskBits = _exposureMaskBitCounts(exp)
+
+    result = HalvesResult(
+        slope1=slope1,
+        slope2=slope2,
+        meanFlux=meanFlux,
+        relDiff=relDiff,
+        goodMask=goodMask,
+        statsMask=statsMask,
+        exposureMaskBits=exposureMaskBits,
+        nReads=n,
+        midRead=mid,
+        cam=cam,
+        doLinearize=doLinearize,
+        minRate=float(minRate),
+        summary=summary,
+        cube=cube if returnCube else None,
+    )
+
+    if not returnCube:
+        del cube
+    return result
+
+
+def runComparison(
+    butler,
+    dataId,
+    *,
+    cam: Optional[str] = None,
+    isrTask=None,
+    nirDark=None,
+    defects=None,
+    minRate: float = DEFAULT_MIN_RATE,
+    fluxBins=DEFAULT_FLUX_BINS,
+    agreePct=DEFAULT_AGREE_PCT,
+    log=None,
+) -> ComparisonResult:
+    """Run the halves test twice (linearize on, then off) and compare.
+
+    Re-fetches ``raw`` between the two passes since ``makeNirExposure``
+    mutates the ramp data in place. ``defects`` and ``nirDark`` are
+    read-only consumers and are reused.
+
+    Returns
+    -------
+    ComparisonResult
+    """
+    cam, _raw = _resolveCam(cam, None, butler, dataId)
+
+    if nirDark is None:
+        nirDark = butler.get("nirDark", dataId)
+    if defects is None:
+        defects = butler.get("defects", dataId)
+
+    common = dict(
+        cam=cam,
+        isrTask=isrTask,
+        nirDark=nirDark,
+        defects=defects,
+        minRate=minRate,
+        fluxBins=fluxBins,
+        agreePct=agreePct,
+        returnCube=False,
+        log=log,
+    )
+
+    onResult = runHalvesTest(
+        butler,
+        dataId,
+        doLinearize=True,
+        raw=butler.get("raw", dataId),
+        **common,
+    )
+    offResult = runHalvesTest(
+        butler,
+        dataId,
+        doLinearize=False,
+        raw=butler.get("raw", dataId),
+        **common,
+    )
+
+    medOn = onResult.summary["overall"]["median"]
+    medOff = offResult.summary["overall"]["median"]
+    madOn = onResult.summary["overall"]["mad"]
+    madOff = offResult.summary["overall"]["mad"]
+    delta = {
+        "median_on": medOn,
+        "median_off": medOff,
+        "mad_on": madOn,
+        "mad_off": madOff,
+        "madImprovementFactor": (madOff / madOn) if madOn and np.isfinite(madOn) else float("nan"),
+    }
+    return ComparisonResult(on=onResult, off=offResult, delta=delta)
+
+
+def _formatBinTable(byBin, agreePct):
+    lines = []
+    headers = ["fluxBin", "nPix", "median", "MAD"]
+    headers += [f"|<{p}%" for p in agreePct]
+    lines.append("  ".join(f"{h:>12s}" for h in headers))
+    for (lo, hi), s in byBin.items():
+        row = [f"[{lo},{hi})", f"{s['nPix']}", f"{s['median']:+.4f}", f"{s['mad']:.4f}"]
+        row += [f"{s['fracWithin'][p]:.3f}" for p in agreePct]
+        lines.append("  ".join(f"{c:>12s}" for c in row))
+    return "\n".join(lines)
+
+
+def summarize(result: HalvesResult) -> str:
+    """Return a multi-line text summary of a single halves-test result."""
+    s = result.summary
+    o = s["overall"]
+    agreePct = sorted(o["fracWithin"].keys())
+    nTotal = max(s["nTotal"], 1)
+    lines = [
+        f"cam={result.cam}  doLinearize={result.doLinearize}  "
+        f"nReads={result.nReads}  midRead={result.midRead}",
+        f"good (unmasked) pixels: {s['nGood']}/{s['nTotal']} "
+        f"({100.0 * s['nGood'] / nTotal:.2f}%)",
+        f"in stats (avgRate > minRate): {s['nStats']}/{s['nTotal']} "
+        f"({100.0 * s['nStats'] / nTotal:.2f}%)",
+        "mask bits: " + ", ".join(f"{k}={v}" for k, v in result.exposureMaskBits.items()),
+        f"overall relDiff: median={o['median']:+.4f}  MAD={o['mad']:.4f}  "
+        f"p16/p84=[{o['p16']:+.4f},{o['p84']:+.4f}]  "
+        f"p1/p99=[{o['p1']:+.4f},{o['p99']:+.4f}]",
+        "fraction within: "
+        + "  ".join(f"|relDiff|<{p}%: {o['fracWithin'][p]:.3f}" for p in agreePct),
+        "by flux bin:",
+        _formatBinTable(s["byBin"], agreePct),
+    ]
+    return "\n".join(lines)
+
+
+def printComparison(cmp: ComparisonResult) -> None:
+    """Print both summaries plus an improvement-factor line."""
+    print("=== linearize=OFF ===")
+    print(summarize(cmp.off))
+    print()
+    print("=== linearize=ON ===")
+    print(summarize(cmp.on))
+    print()
+    d = cmp.delta
+    print(
+        f"Δ median: off={d['median_off']:+.4f}  on={d['median_on']:+.4f}\n"
+        f"Δ MAD:    off={d['mad_off']:.4f}   on={d['mad_on']:.4f}   "
+        f"improvement (off/on) = {d['madImprovementFactor']:.2f}x"
+    )
+
+
+def plotHalves(
+    result: HalvesResult,
+    *,
+    fig=None,
+    sample: int = 200_000,
+    vmin: float = -0.05,
+    vmax: float = 0.05,
+    title: Optional[str] = None,
+):
+    """Three-panel diagnostic figure for a single halves-test result."""
+    import matplotlib.pyplot as plt
+
+    ownFigure = fig is None
+    if ownFigure:
+        fig, axes = plt.subplots(1, 3, figsize=(16, 5))
+    else:
+        axes = fig.subplots(1, 3)
+
+    relDiff = result.relDiff
+    good = result.goodMask
+    stats = result.statsMask
+    flat = relDiff[stats]
+    fluxFlat = result.meanFlux[stats]
+
+    ax = axes[0]
+    if flat.size:
+        clipped = flat[np.abs(flat) <= max(abs(vmin), abs(vmax))]
+        ax.hist(clipped, bins=120, color="0.3")
+        med = float(np.median(flat))
+        mad = _mad(flat)
+        ax.axvline(med, color="C3", lw=1.5, label=f"median={med:+.4f}")
+        ax.axvline(med - mad, color="C3", lw=1.0, ls="--", label=f"±MAD={mad:.4f}")
+        ax.axvline(med + mad, color="C3", lw=1.0, ls="--")
+        ax.legend(loc="upper right", fontsize=9)
+    ax.set_xlabel("relDiff = 2(s1-s2)/(s1+s2)")
+    ax.set_ylabel(f"pixels with avgRate > {result.minRate:g}")
+    ax.set_title("histogram (clipped)")
+
+    ax = axes[1]
+    img = np.where(good, relDiff, np.nan)
+    im = ax.imshow(img, origin="lower", cmap="RdBu_r", vmin=vmin, vmax=vmax)
+    fig.colorbar(im, ax=ax, fraction=0.046, pad=0.02)
+    ax.set_title("relDiff map (unmasked pixels)")
+    ax.set_xlabel("x")
+    ax.set_ylabel("y")
+
+    ax = axes[2]
+    if flat.size:
+        if flat.size > sample:
+            idx = np.random.default_rng(0).choice(flat.size, size=sample, replace=False)
+            xs = fluxFlat[idx]
+            ys = flat[idx]
+        else:
+            xs = fluxFlat
+            ys = flat
+        ax.hexbin(xs, ys, gridsize=80, cmap="viridis", mincnt=1, bins="log")
+        ax.set_ylim(vmin, vmax)
+        for (lo, hi), s in result.summary["byBin"].items():
+            xc = 0.5 * (lo + hi)
+            ax.errorbar(xc, s["median"], yerr=s["mad"], fmt="o", color="C3", capsize=4)
+            ax.axvline(lo, color="0.7", lw=0.5)
+            ax.axvline(hi, color="0.7", lw=0.5)
+    ax.set_xlabel("mean cumulative flux (ADU)")
+    ax.set_ylabel("relDiff")
+    ax.set_title("relDiff vs mean flux")
+
+    suptitle = title or (
+        f"{result.cam}  doLinearize={result.doLinearize}  "
+        f"nReads={result.nReads}  midRead={result.midRead}"
+    )
+    fig.suptitle(suptitle)
+    if ownFigure:
+        fig.tight_layout()
+    return fig
+
+
+def plotStatsMask(result: HalvesResult, *, ax=None):
+    """Show which pixels are in statsMask (used for relDiff statistics).
+
+    Three-class map:
+      0 = masked (goodMask=False)
+      1 = unmasked but below minRate (goodMask=True, statsMask=False)
+      2 = in stats (statsMask=True)
+    """
+    import matplotlib.pyplot as plt
+    from matplotlib.colors import ListedColormap, BoundaryNorm
+
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(8, 7))
+    else:
+        fig = ax.figure
+
+    classes = np.zeros(result.goodMask.shape, dtype=np.uint8)
+    classes[result.goodMask & ~result.statsMask] = 1
+    classes[result.statsMask] = 2
+
+    cmap = ListedColormap(["#222222", "#e8a838", "#3a8fdc"])
+    norm = BoundaryNorm([-0.5, 0.5, 1.5, 2.5], cmap.N)
+
+    im = ax.imshow(classes, origin="lower", cmap=cmap, norm=norm,
+                   interpolation="nearest")
+    cbar = fig.colorbar(im, ax=ax, ticks=[0, 1, 2], fraction=0.046, pad=0.02)
+    cbar.ax.set_yticklabels([
+        f"masked ({(~result.goodMask).sum()})",
+        f"unmasked, rate≤{result.minRate:g}"
+        f" ({int((result.goodMask & ~result.statsMask).sum())})",
+        f"in stats ({int(result.statsMask.sum())})",
+    ])
+    ax.set_title(
+        f"statsMask  cam={result.cam}  doLinearize={result.doLinearize}  "
+        f"minRate={result.minRate:g}"
+    )
+    ax.set_xlabel("x")
+    ax.set_ylabel("y")
+    return fig
+
+
+def outlierMask(result: HalvesResult, *, thresh: float = 0.5) -> np.ndarray:
+    """Return the boolean 2-D mask of statsMask pixels with |relDiff| > thresh."""
+    return result.statsMask & (np.abs(result.relDiff) > thresh)
+
+
+def plotOutliers(
+    result: HalvesResult,
+    *,
+    thresh: float = 0.5,
+    ax=None,
+):
+    """2-D map of statsMask pixels with |relDiff| > thresh.
+
+    Background: gray = masked, light = in stats and within threshold.
+    Foreground: red = positive outlier (s1 > s2), blue = negative outlier.
+    """
+    import matplotlib.pyplot as plt
+
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(8, 7))
+    else:
+        fig = ax.figure
+
+    bad = outlierMask(result, thresh=thresh)
+    posOut = bad & (result.relDiff > 0)
+    negOut = bad & (result.relDiff < 0)
+
+    # 0 = masked, 1 = in-stats good, 2 = neg outlier, 3 = pos outlier
+    classes = np.zeros(result.goodMask.shape, dtype=np.uint8)
+    classes[result.goodMask & ~result.statsMask] = 1
+    classes[result.statsMask] = 1
+    classes[negOut] = 2
+    classes[posOut] = 3
+
+    from matplotlib.colors import ListedColormap, BoundaryNorm
+    cmap = ListedColormap(["#222222", "#dddddd", "#3a6fff", "#dc3a3a"])
+    norm = BoundaryNorm([-0.5, 0.5, 1.5, 2.5, 3.5], cmap.N)
+
+    im = ax.imshow(classes, origin="lower", cmap=cmap, norm=norm,
+                   interpolation="nearest")
+    cbar = fig.colorbar(im, ax=ax, ticks=[0, 1, 2, 3], fraction=0.046, pad=0.02)
+    cbar.ax.set_yticklabels([
+        "masked",
+        "in-bounds",
+        f"relDiff < -{thresh:g} ({int(negOut.sum())})",
+        f"relDiff > +{thresh:g} ({int(posOut.sum())})",
+    ])
+    ax.set_title(
+        f"|relDiff| > {thresh:g} outliers  cam={result.cam}  "
+        f"doLinearize={result.doLinearize}  total={int(bad.sum())}"
+    )
+    ax.set_xlabel("x")
+    ax.set_ylabel("y")
+    return fig
+
+
+def plotOutlierTraces(
+    result: HalvesResult,
+    cube: np.ndarray,
+    *,
+    thresh: float = 0.5,
+    n: int = 20,
+    normalize: str = "mean",
+    coords=None,
+    ax=None,
+    rng=None,
+    alpha: float = 0.6,
+):
+    """Plot per-read flux traces for outlier pixels.
+
+    Parameters
+    ----------
+    result : HalvesResult
+    cube : np.ndarray
+        The (nreads, H, W) flux cube. Pass the cube returned from
+        ``runHalvesTest(..., returnCube=True)``.
+    thresh : float
+        |relDiff| threshold defining outliers.
+    n : int
+        Number of outlier pixels to draw (random sample). Ignored if
+        ``coords`` is given.
+    normalize : {"mean", "max", "none"}
+        Per-pixel normalization of the trace.
+    coords : sequence of (x, y), optional
+        Specific pixel coordinates to plot. Overrides ``thresh``/``n``.
+    ax : matplotlib axis, optional
+    rng : np.random.Generator, optional
+        For reproducible random subsets. Defaults to seed=0.
+    alpha : float
+        Line alpha.
+
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+    coords : np.ndarray
+        Shape ``(npicked, 2)`` of (x, y) pairs that were plotted.
+    """
+    import matplotlib.pyplot as plt
+
+    if cube is None:
+        raise ValueError(
+            "cube is None. Re-run runHalvesTest(..., returnCube=True) and pass "
+            "the resulting result.cube here."
+        )
+    if cube.shape[1:] != result.relDiff.shape:
+        raise ValueError(
+            f"cube spatial shape {cube.shape[1:]} != relDiff shape {result.relDiff.shape}"
+        )
+
+    if coords is None:
+        bad = outlierMask(result, thresh=thresh)
+        ys, xs = np.where(bad)
+        if ys.size == 0:
+            raise RuntimeError(
+                f"No statsMask pixels with |relDiff|>{thresh}. Lower the threshold "
+                "or check that statsMask has the pixels you expect."
+            )
+        if rng is None:
+            rng = np.random.default_rng(0)
+        if ys.size > n:
+            pick = rng.choice(ys.size, size=n, replace=False)
+            ys, xs = ys[pick], xs[pick]
+        coords = np.column_stack([xs, ys])
+    else:
+        coords = np.asarray(coords)
+        if coords.ndim != 2 or coords.shape[1] != 2:
+            raise ValueError("coords must be (N, 2) of (x, y) pairs.")
+        xs = coords[:, 0]
+        ys = coords[:, 1]
+
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(10, 6))
+    else:
+        fig = ax.figure
+
+    nReads = cube.shape[0]
+    t = np.arange(nReads)
+    traces = cube[:, ys, xs].astype(np.float64)  # shape (nReads, npicked)
+
+    if normalize == "mean":
+        denom = traces.mean(axis=0, keepdims=True)
+        ylabel = "flux / mean(flux)"
+    elif normalize == "max":
+        denom = np.abs(traces).max(axis=0, keepdims=True)
+        ylabel = "flux / max(|flux|)"
+    elif normalize == "none":
+        denom = 1.0
+        ylabel = "cumulative flux (ADU)"
+    else:
+        raise ValueError(f"normalize must be 'mean', 'max', or 'none'; got {normalize!r}")
+    denom = np.where(denom == 0, 1.0, denom)
+    norm_traces = traces / denom
+
+    relDiffVals = result.relDiff[ys, xs]
+    order = np.argsort(relDiffVals)
+    cmap = plt.get_cmap("RdBu_r")
+    vmax = max(1e-6, float(np.abs(relDiffVals).max()))
+
+    for k in order:
+        c = cmap(0.5 + 0.5 * relDiffVals[k] / vmax)
+        ax.plot(t, norm_traces[:, k], color=c, alpha=alpha, lw=1.0)
+
+    ax.axvline(result.midRead - 0.5, color="k", lw=0.8, ls="--",
+               label=f"split at read {result.midRead}")
+    ax.set_xlabel("read index")
+    ax.set_ylabel(ylabel)
+    ax.set_title(
+        f"outlier traces  cam={result.cam}  doLinearize={result.doLinearize}  "
+        f"n={len(ys)}  |relDiff|>{thresh:g}"
+    )
+    ax.legend(loc="best", fontsize=9)
+
+    sm = plt.cm.ScalarMappable(cmap=cmap,
+                               norm=plt.Normalize(vmin=-vmax, vmax=vmax))
+    sm.set_array([])
+    cbar = fig.colorbar(sm, ax=ax, fraction=0.04, pad=0.02)
+    cbar.set_label("relDiff")
+
+    return fig, coords
+
+
+def processRamp(
+    butler,
+    dataId,
+    *,
+    cam: Optional[str] = None,
+    doDark: bool = True,
+    doLinearize: bool = True,
+    doCR: bool = True,
+    repairCR: bool = True,
+    firstRead: Optional[int] = None,
+    lastRead: Optional[int] = None,
+    nSigma: float = cr.DEFAULT_ITER_N_SIGMA,
+    sigmaFloorADU: float = cr.DEFAULT_SIGMA_FLOOR_ADU,
+    maxIterations: int = cr.DEFAULT_MAX_ITERATIONS,
+    doDeglitch: bool = True,
+    correctGlitches: bool = False,
+    glitchAmplitudeMinADU: float = 0.0,
+    maxDropFraction: float = 0.5,
+    nDropSigma: float = 3.0,
+    badPixelMinOutliers: int = 4,
+    badPixelOutlierSigma: float = 4.0,
+    raw=None,
+    nirDark=None,
+    defects=None,
+    linearity=None,
+    exposure=None,
+    cube=None,
+    intermediates=None,
+    log=None,
+):
+    """Run linearization + (optionally) iterative CR/ASIC-glitch detection.
+
+    The in-ISR CR step is forced off inside this helper so the CR knobs
+    here (``doCR`` / ``repairCR`` / thresholds) are the sole source
+    of truth.
+
+    Iteration shortcuts:
+
+    - Pass a pre-loaded ``linearity`` to iterate over linearity calibrations
+      without re-resolving from EUPS each call.
+    - Pass ``exposure`` + ``cube`` (both, or neither) to skip the
+      linearization phase entirely and re-run only the CR step. The CR
+      plane on ``exposure.mask`` is reset on each call before fresh flags
+      are stamped, so iterating with different thresholds gives a clean
+      mask each time. The caller is responsible for ensuring ``cube`` is
+      post-linearization (or accepting unusual threshold behavior).
+
+    Parameters
+    ----------
+    butler : `lsst.daf.butler.Butler`
+    dataId : mapping
+    cam : str, optional
+        Detector name (e.g. ``"n3"``); inferred from raw if omitted.
+    doLinearize : bool
+        Apply the H4 per-read linearity correction. Ignored when
+        ``exposure`` + ``cube`` are supplied.
+    doCR : bool
+        Run rate-based CR detection on the linearized cube. Requires a
+        supported camera.
+    repairCR : bool
+        Subtract the CR contribution in place. With ``False``, only the
+        CR mask plane is stamped — useful for visual inspection.
+    firstRead, lastRead : int, optional
+        Process only reads ``[firstRead, lastRead]`` of the ramp (0-indexed,
+        inclusive; ``lastRead=-1`` = last read). ``None`` (default) lets
+        ``makeNirExposure`` dispatch the range by observation type; an
+        explicit value is used as-is. See ``H4Config.firstRead`` /
+        ``H4Config.lastRead`` and ``PfsIsrTask.rampParams``. Ignored when
+        ``exposure`` + ``cube`` are supplied (the supplied cube already
+        encodes the range).
+    nSigma, sigmaFloorADU, maxIterations
+        CR-detection thresholds; see ``cr.iterativeUtrDetectAndRepair``.
+    raw, nirDark, defects : optional
+        Pre-fetched butler objects; fetched on demand if not given.
+        Ignored when ``exposure`` + ``cube`` are supplied.
+    linearity : `LinearityCorrection`, optional
+        Pre-loaded linearity. Falls back to ``isrTask.resolveNirLinearity(cam)``.
+        Ignored when ``exposure`` + ``cube`` are supplied.
+    exposure : `lsst.afw.image.Exposure`, optional
+        Pre-built post-linearization exposure (used as both the mask
+        source for ``goodPixelMask`` and the destination for CR-bit
+        stamping). Must be supplied together with ``cube``.
+    cube : np.ndarray, optional
+        Pre-built post-linearization cumulative ramp, shape ``(H, W, N)``
+        — the time axis is last to match the H4 ISR cube convention.
+        Must be supplied together with ``exposure``.
+    log : logger, optional
+
+    Returns
+    -------
+    exposure : `lsst.afw.image.Exposure`
+        Post-linearization exposure with the CR mask bit set at every
+        flagged pixel (when ``doCR=True``).
+    cube : np.ndarray
+        Linearized cumulative ramp, shape ``(H, W, N)``, float32. With
+        ``repairCR=True``, the CR contribution has been subtracted; with
+        ``repairCR=False``, the CR signature remains for inspection.
+    crResult : cr.IterativeRepairResult or None
+        ``None`` when ``doCR=False``.
+    """
+    if (exposure is None) != (cube is None):
+        raise ValueError("exposure and cube must be provided together (or both omitted).")
+    reuseExposure = exposure is not None
+
+    cam, raw = _resolveCam(cam, raw, butler, dataId)
+
+    if doCR and cam not in SUPPORTED_LINEARITY_CAMS:
+        raise RuntimeError(
+            f"CR detection requires linearity, only available for "
+            f"{SUPPORTED_LINEARITY_CAMS}; got cam={cam!r}."
+        )
+    if doCR and not doLinearize and not reuseExposure:
+        raise RuntimeError(
+            "doCR requires doLinearize=True (or a pre-linearized exposure+cube)."
+        )
+
+    if reuseExposure:
+        # Skip the linearization pipeline entirely; trust the caller.
+        if log is None:
+            import lsst.log as _lsst_log
+            log = _lsst_log.getLogger("lsst.obs.pfs.h4Linearity.validate.processRamp")
+        log.info(
+            f"processRamp: cam={cam} visit={dataId.get('visit', '?')} "
+            f"reusing supplied exposure+cube; doCR={doCR} repairCR={repairCR}"
+        )
+        exp = exposure
+    else:
+        isrTask = _makeDefaultIsrTask(doLinearize=doLinearize)
+        # Push processRamp's stage-control + CR-knob args onto the
+        # isrTask config so ``makeNirExposure`` runs the same way as
+        # the production pipetask. The CR call lives in one place
+        # (``makeNirExposure`` → ``cr.iterativeUtrDetectAndRepair``);
+        # processRamp is just another caller that adjusts the dials.
+        isrTask.config.doDark = doDark
+        isrTask.config.h4.doCR = doCR
+        isrTask.config.h4.repairCR = repairCR
+        isrTask.config.h4.firstRead = firstRead
+        isrTask.config.h4.lastRead = lastRead
+        isrTask.config.h4.doDeglitch = doDeglitch
+        isrTask.config.h4.correctGlitches = correctGlitches
+        isrTask.config.h4.deglitchAmplitudeMinADU = glitchAmplitudeMinADU
+        isrTask.config.h4.rateCRsigmaFloorADU = sigmaFloorADU
+        isrTask.config.h4.rateCRnSigma = nSigma
+        isrTask.config.h4.rateCRiterMax = maxIterations
+        isrTask.config.h4.rateCRmaxDropFraction = maxDropFraction
+        isrTask.config.h4.rateCRnDropSigma = nDropSigma
+        isrTask.config.h4.badPixelMinOutliers = badPixelMinOutliers
+        isrTask.config.h4.badPixelOutlierSigma = badPixelOutlierSigma
+
+        if log is None:
+            log = isrTask.log
+        log.info(
+            f"processRamp: cam={cam} visit={dataId.get('visit', '?')} "
+            f"doLinearize={doLinearize} doCR={doCR} repairCR={repairCR}"
+        )
+
+        if raw is None:
+            raw = butler.get("raw", dataId)
+        if doDark and nirDark is None:
+            nirDark = butler.get("nirDark", dataId)
+        elif not doDark:
+            nirDark = None
+        if defects is None:
+            defects = butler.get("defects", dataId)
+
+        if linearity is None and doLinearize:
+            linearity = isrTask.resolveNirLinearity(cam)
+        if doLinearize and linearity is None:
+            raise RuntimeError(f"resolveNirLinearity({cam!r}) returned None.")
+
+        # Pre-seed the intermediates makeNirExposure stashes for us:
+        # ``crCorrected`` for the post-repair cube and ``crResult`` for
+        # the IterativeRepairResult (the internal CR-stage product).
+        if intermediates is None:
+            ownIntermediates = {'crCorrected': None, 'crResult': None}
+            callerIntermediates = None
+        else:
+            ownIntermediates = intermediates
+            callerIntermediates = intermediates
+            if 'crCorrected' not in ownIntermediates:
+                ownIntermediates['crCorrected'] = None
+            if doCR and 'crResult' not in ownIntermediates:
+                ownIntermediates['crResult'] = None
+        exp, _ = isrTask.makeNirExposure(
+            raw, nirDark=nirDark, defects=defects, linearity=linearity,
+            doReturnRawCube=False,
+            intermediates=ownIntermediates,
+        )
+        cube = ownIntermediates['crCorrected']
+        if cube is None:
+            raise RuntimeError(
+                "makeNirExposure did not populate intermediates['crCorrected']; "
+                "check h4.doLinearize and h4.quickCDS."
+            )
+        crResult = ownIntermediates.get('crResult') if doCR else None
+        if callerIntermediates is None:
+            del ownIntermediates
+
+    if reuseExposure and doCR:
+        # The supplied exposure was processed externally; run CR on
+        # the supplied cube and OR the results onto its mask using the
+        # same projection rule as ``makeNirExposure``. Single-source
+        # implementation lives in ``cr.iterativeUtrDetectAndRepair`` +
+        # ``isrTask._projectInternalMask``; we just feed them.
+        from lsst.obs.pfs.isrTask import _projectInternalMask
+        from types import SimpleNamespace
+        crBit = exp.mask.array.dtype.type(exp.mask.getPlaneBitMask("CR"))
+        goodPixelMask = (exp.mask.array & ~crBit) == 0
+        glitchPixelMask = np.ones(cube.shape[:-1], dtype=bool) if doDeglitch else None
+        deltas = np.diff(cube, axis=-1)
+        read0 = cube[..., 0:1].copy() if repairCR else None
+        crResult = cr.iterativeUtrDetectAndRepair(
+            deltas,
+            goodPixelMask=goodPixelMask,
+            glitchPixelMask=glitchPixelMask,
+            nSigma=nSigma,
+            sigmaFloorADU=sigmaFloorADU,
+            maxIterations=maxIterations,
+            repair=repairCR,
+            correctGlitches=correctGlitches,
+            glitchAmplitudeMinADU=glitchAmplitudeMinADU,
+            maxDropFraction=maxDropFraction,
+            nDropSigma=nDropSigma,
+            badPixelMinOutliers=badPixelMinOutliers,
+            badPixelOutlierSigma=badPixelOutlierSigma,
+        )
+        if repairCR:
+            cube[..., 0:1] = read0
+            np.cumsum(deltas, axis=-1, out=cube[..., 1:])
+            cube[..., 1:] += read0
+        del deltas, read0
+        # Project CR/UNSTABLE bits onto exp.mask via the same helper
+        # makeNirExposure uses. Construct an empty internal mask
+        # (the supplied exposure already has its DARK_DEFECT/SAT/... bits
+        # set externally) carrying just the new CR-stage findings.
+        internal = np.zeros(cube.shape[:-1], dtype=np.uint16)
+        from lsst.obs.pfs import h4Linearity as _h4
+        if crResult.badPixelMask.any():
+            internal[crResult.badPixelMask] |= _h4.UNSTABLE
+        if crResult.unclassifiedFlagMask.any():
+            internal[crResult.unclassifiedFlagMask.any(axis=-1)] |= _h4.UNCLASSIFIED
+        flagShim = SimpleNamespace(
+            flagMask=crResult.crFlagMask.any(axis=-1)
+        )
+        # Wipe any previous CR bits before re-projecting.
+        exp.mask.array &= ~crBit
+        _projectInternalMask(exp, internal, crResult=flagShim)
+        if intermediates is not None and 'crCorrected' in intermediates:
+            intermediates['crCorrected'] = cube.copy()
+
+    return exp, cube, crResult
+
+
+def runHalvesIsrData(
+    butler,
+    dataId,
+    *,
+    outputPath: Optional[str] = None,
+    cam: Optional[str] = None,
+    doLinearize: bool = True,
+    doCR: bool = True,
+    repairCR: bool = True,
+    firstRead: int = 0,
+    lastRead: int = -1,
+    nirDark=None,
+    defects=None,
+    linearity=None,
+    log=None,
+) -> "isrPlots.HalvesIsrData":
+    """Build (and optionally save) a ``HalvesIsrData`` for the half-vs-half
+    UTR-rate test.
+
+    This is the in-repo producer for the ``.npz`` that
+    ``isrPlots.load()`` reads — the rate-comparison / amp-bias plots in
+    ``isrPlots`` consume a ``HalvesIsrData``.
+
+    Runs the full ISR (dark + linearity + CR/glitch) three times over
+    the read range ``[firstRead, lastRead]``:
+
+      - first half  ``[r0, mid]``
+      - second half ``[mid, r1]``
+      - full        ``[r0, r1]``
+
+    where ``mid = (r0 + r1) // 2``. **Linearization always operates on
+    the actual cumulative flux** — the second half's reads are
+    linearized at their true level (with the first half's charge
+    underneath them), via ``makeNirExposure``'s standard absolute-
+    baseline anchoring.
+
+    A correct per-read linearity model gives a flat rate across the
+    ramp, so the sub-ramp rates (``rate1``, ``rate2``) and the
+    full-ramp rate (``rateFull``) all agree. The **primary** diagnostic
+    is the sub-ramp-vs-sub-ramp comparison — a discrepant pixel is
+    easiest to diagnose from the two halves directly:
+
+    - ``relDiff = 2(rate1 - rate2)/(rate1 + rate2)`` — first sub-ramp
+      rate vs second sub-ramp rate, in per-read UTR-rate units.
+
+    ``rateFull`` is stored so it can be plotted against each half.
+    ``addResid = rateFull - avgRate`` (``avgRate = (rate1+rate2)/2``,
+    ``addResidRel = addResid / rateFull``) is the secondary
+    full-vs-sub-ramp check.
+
+    Residual structure flags a poor correction (or a genuinely
+    variable source). Acting on significant discrepancies — dynamic
+    masking of the offending pixels — is PIPE2D-1844; this producer
+    only exposes the per-pixel data.
+
+    Parameters
+    ----------
+    butler : `lsst.daf.butler.Butler`
+    dataId : mapping
+    outputPath : str, optional
+        If given, ``np.savez`` the result here in the exact schema
+        ``isrPlots.load()`` expects.
+    cam : str, optional
+        Detector name; inferred from the raw if omitted.
+    doLinearize, doCR, repairCR : bool
+        Forwarded to ``processRamp`` for all three runs.
+    firstRead, lastRead : int
+        0-indexed inclusive read range to split; ``lastRead=-1`` = last.
+    nirDark, defects, linearity : optional
+        Pre-fetched read-only inputs, reused across the three runs.
+    log : logger, optional
+
+    Returns
+    -------
+    `isrPlots.HalvesIsrData`
+    """
+    cam, _ = _resolveCam(cam, None, butler, dataId)
+    if nirDark is None:
+        nirDark = butler.get("nirDark", dataId)
+    if defects is None:
+        defects = butler.get("defects", dataId)
+
+    # Resolve the absolute read range and the split point.
+    raw0 = butler.get("raw", dataId)
+    r0 = raw0.positiveIndex(firstRead)
+    r1 = raw0.positiveIndex(lastRead)
+    nReads = int(raw0.getNumReads())
+    del raw0
+    mid = (r0 + r1) // 2
+    if mid - r0 < 2 or r1 - mid < 2:
+        raise RuntimeError(
+            f"Read range [{r0}, {r1}] is too short to split into halves "
+            f"with >= 2 intervals each (mid={mid})."
+        )
+
+    def _runImage(rLo, rHi):
+        # Standard ISR over [rLo, rHi]: linearization is always on the
+        # actual cumulative flux (makeNirExposure adds the absolute
+        # baseline when rLo > 0).
+        exp, cube, _crResult = processRamp(
+            butler, dataId, cam=cam,
+            doLinearize=doLinearize, doCR=doCR, repairCR=repairCR,
+            firstRead=rLo, lastRead=rHi,
+            nirDark=nirDark, defects=defects, linearity=linearity, log=log,
+        )
+        img = np.asarray(exp.image.array, dtype=np.float32)
+        maskArr = np.asarray(exp.mask.array).copy()
+        # makeNirExposure forms nirImage = rate * (#deltas); #deltas is
+        # cube.shape[-1] - 1 (cube holds the cumulative reads with the
+        # time axis last).
+        nDeltas = max(int(cube.shape[-1]) - 1, 1)
+        return img, maskArr, nDeltas
+
+    img1, mask_first, nDeltas1 = _runImage(r0, mid)
+    img2, mask_second, nDeltas2 = _runImage(mid, r1)
+    imgF, mask_full, nDeltasF = _runImage(r0, r1)
+
+    with np.errstate(divide="ignore", invalid="ignore"):
+        # Per-read UTR rate of each sub-ramp and of the full ramp.
+        rate1 = (img1 / nDeltas1).astype(np.float32)
+        rate2 = (img2 / nDeltas2).astype(np.float32)
+        rateF = (imgF / nDeltasF).astype(np.float32)
+        avgRate = (0.5 * (rate1 + rate2)).astype(np.float32)
+        # Half-vs-half rate consistency.
+        denom12 = rate1 + rate2
+        relDiff = np.where(
+            denom12 != 0, 2.0 * (rate1 - rate2) / denom12, np.nan
+        ).astype(np.float32)
+        # Sub-ramp-vs-full-ramp rate consistency (PIPE2D-1844 acts on this).
+        addResid = (rateF - avgRate).astype(np.float32)
+        addResidRel = np.where(
+            rateF != 0, addResid / rateF, np.nan
+        ).astype(np.float32)
+
+    maskUnion = mask_first | mask_second | mask_full
+
+    fields = dict(
+        img1=img1, img2=img2, imgF=imgF,
+        rate1=rate1, rate2=rate2, rateFull=rateF,
+        addResid=addResid, addResidRel=addResidRel,
+        relDiff=relDiff, avgRate=avgRate,
+        mask_first=mask_first, mask_second=mask_second,
+        mask_full=mask_full, maskUnion=maskUnion,
+        visit=int(dataId.get("visit", -1)), cam=str(cam),
+        midRead=int(mid), nReads=nReads,
+    )
+    data = isrPlots.HalvesIsrData(**fields)
+
+    if outputPath is not None:
+        np.savez(outputPath, **fields)
+        if log is not None:
+            log.info(f"runHalvesIsrData: wrote {outputPath}")
+
+    return data
+
+
+def runCRDiagnostics(
+    butler,
+    dataId,
+    *,
+    cam: Optional[str] = None,
+    firstRead: int = 0,
+    lastRead: int = -1,
+    nSigma: float = cr.DEFAULT_ITER_N_SIGMA,
+    sigmaFloorADU: float = cr.DEFAULT_SIGMA_FLOOR_ADU,
+    maxIterations: int = cr.DEFAULT_MAX_ITERATIONS,
+    repair: bool = False,
+    linearity=None,
+    exposure=None,
+    cube=None,
+    log=None,
+):
+    """Convenience wrapper: ``processRamp`` with diagnostics-friendly defaults.
+
+    Always sets ``doLinearize=True``, ``doCR=True``. Defaults to
+    ``repair=False`` so the returned cube preserves the CR/glitch
+    signature for inspection.
+
+    ``linearity`` / ``exposure`` / ``cube`` are forwarded to ``processRamp``
+    for iteration without re-running linearization each call.
+
+    Returns
+    -------
+    crResult, cube, exposure
+        Order preserved for backward compatibility with existing callers
+        of this function. ``processRamp`` itself returns
+        ``(exposure, cube, crResult)``.
+    """
+    exp, cube, crResult = processRamp(
+        butler, dataId,
+        cam=cam,
+        doLinearize=True,
+        doCR=True,
+        repairCR=repair,
+        firstRead=firstRead,
+        lastRead=lastRead,
+        nSigma=nSigma,
+        sigmaFloorADU=sigmaFloorADU,
+        maxIterations=maxIterations,
+        linearity=linearity,
+        exposure=exposure,
+        cube=cube,
+        log=log,
+    )
+    return crResult, cube, exp
+
+
+def collectPixelRampData(
+    butler,
+    dataId,
+    *,
+    cam: Optional[str] = None,
+    firstRead: int = 0,
+    lastRead: int = -1,
+    doCR: bool = True,
+    repairCR: bool = True,
+    raw=None,
+    nirDark=None,
+    defects=None,
+    linearity=None,
+    log=None,
+) -> "isrPlots.PixelRampData":
+    """Collect aligned ISR-stage cubes for per-pixel inspection in one pass.
+
+    Runs ``processRamp`` once with ``doLinearize=True`` and captures copies
+    of the flux cube at each major stage via the ``intermediates`` dict:
+
+    - ``cubePreDark``   — absolute cumulative, pre-dark-subtraction.
+    - ``cubeRaw``       — dark-subtracted absolute cumulative (input to linearity).
+    - ``cubeLin``       — linearized, re-anchored cumulative (pre-CR-repair).
+    - ``cubeCR``        — linearized + CR-repaired cumulative. ``None`` if
+                          ``doCR=False``; equal to ``cubeLin`` if
+                          ``doCR=True, repairCR=False`` (CR pixels flagged
+                          but not subtracted).
+
+    Memory cost: ~4 ramp cubes simultaneously held (≈ 4 × N × H × W ×
+    float32). For a 45-read 4096² ramp that is ~22 GB.
+
+    Parameters
+    ----------
+    doCR : bool
+        Run rate-based CR detection. Default True (so ``cubeCR`` is
+        populated). Pass ``False`` to skip CR work entirely.
+    repairCR : bool
+        When ``doCR=True``, subtract the CR contribution from
+        ``cubeCR``. With ``False``, the CR plane is stamped on
+        ``exp.mask`` but ``cubeCR`` keeps the spikes (useful for
+        inspecting how big they are).
+
+    Returns
+    -------
+    `isrPlots.PixelRampData`
+    """
+    cam, raw = _resolveCam(cam, raw, butler, dataId)
+
+    if nirDark is None:
+        nirDark = butler.get("nirDark", dataId)
+    if defects is None:
+        defects = butler.get("defects", dataId)
+
+    isrTask = _makeDefaultIsrTask(doLinearize=True)
+    if linearity is None:
+        linearity = isrTask.resolveNirLinearity(cam)
+        if linearity is None:
+            raise RuntimeError(f"resolveNirLinearity({cam!r}) returned None.")
+
+    intermediates: dict = dict.fromkeys(
+        ('raw', 'darkSubbed', 'linearized', 'crCorrected')
+    )
+    exp, _, crResult = processRamp(
+        butler, dataId,
+        cam=cam, firstRead=firstRead, lastRead=lastRead,
+        doLinearize=True, doCR=doCR, repairCR=repairCR,
+        raw=raw, nirDark=nirDark, defects=defects, linearity=linearity,
+        intermediates=intermediates,
+        log=log,
+    )
+
+    r0 = raw.positiveIndex(firstRead)
+    r1 = raw.positiveIndex(lastRead)
+    # All captured cubes are ``(H, W, N)`` — the time axis is last.
+    nIntervals = intermediates['linearized'].shape[-1]
+    cubeDark = isrTask.getDarkCube(nirDark, r0=r0, nreads=nIntervals).astype(
+        np.float32, copy=False
+    )
+    cubePreDark = intermediates['raw']    # absolute cumulative, pre-dark
+    cubeRaw = intermediates['darkSubbed']  # post-dark, pre-lin
+    cubeLin = intermediates['linearized']  # post-lin, pre-CR
+    cubeCR = intermediates.get('crCorrected')  # None if doCR was False
+
+    readIndices = np.arange(r0 + 1, r0 + 1 + nIntervals, dtype=np.int32)
+
+    # Per-pixel rate: use the proper UTR-weighted estimator
+    # (`isrTask.calcUTRrates`) on the post-CR-repair cube — same one
+    # `makeNirExposure` uses to land the final image when
+    # `applyUTRWeights=True`. The CR detector's own median-of-deltas rate
+    # (`crResult.rate`) is robust for thresholding but isn't the right
+    # number for downstream science / comparison work.
+    rateCube = cubeCR if cubeCR is not None else cubeLin
+    avgRate = np.asarray(isrTask.calcUTRrates(rateCube), dtype=np.float32)
+
+    crFlagMask = (
+        np.asarray(crResult.crFlagMask) if crResult is not None else None
+    )
+    glitchFlagMask = (
+        np.asarray(crResult.glitchFlagMask) if crResult is not None else None
+    )
+
+    return isrPlots.PixelRampData(
+        cubeRaw=cubeRaw, cubeLin=cubeLin, cubeDark=cubeDark,
+        cubePreDark=cubePreDark, readIndices=readIndices,
+        firstRead=r0, lastRead=r1,
+        visit=int(dataId.get("visit", -1)), cam=cam,
+        fitMin=np.asarray(linearity.fitMin, dtype=np.float32),
+        fitMax=np.asarray(linearity.fitMax, dtype=np.float32),
+        cubeCR=cubeCR,
+        mask=exp.mask.array.copy(),
+        maskPlaneDict=dict(exp.mask.getMaskPlaneDict()),
+        avgRate=avgRate,
+        crFlagMask=crFlagMask,
+        glitchFlagMask=glitchFlagMask,
+    )
+
+
+def summarizeCRDiagnostics(
+    crResult,
+    *,
+    sigmaFloorADU: float = cr.DEFAULT_SIGMA_FLOOR_ADU,
+    pcts=(50, 84, 95, 99, 99.9),
+):
+    """Print a concise text summary of an iterative CR/glitch result.
+
+    Works on a `cr.IterativeRepairResult` as returned by
+    ``cr.iterativeUtrDetectAndRepair`` (or ``processRamp``).
+    """
+    # crFlagMask / glitchFlagMask are (H, W, N-1); collapse the time
+    # axis to get the (H, W) "this pixel was ever flagged" maps.
+    crFlag = np.asarray(crResult.crFlagMask, dtype=bool)
+    glFlag = np.asarray(crResult.glitchFlagMask, dtype=bool)
+    crPix = crFlag.any(axis=-1)
+    glPix = glFlag.any(axis=-1)
+    nCRpix = int(crPix.sum())
+    nGLpix = int(glPix.sum())
+
+    print(f"iterations: {crResult.nIterations}")
+    print(
+        f"  CR delta entries:   {crResult.nCRs:,}  "
+        f"({nCRpix:,} unique pixels)"
+    )
+    print(
+        f"  ASIC glitch pairs:  {crResult.nGlitchPairs:,}  "
+        f"({nGLpix:,} unique pixels)"
+    )
+
+    if crResult.nByIteration:
+        print("per-iteration (new CR entries, new glitch pairs):")
+        for i, (nc, ng) in enumerate(crResult.nByIteration, start=1):
+            print(f"  iter {i}: CR={nc:,}  glitch pairs={ng:,}")
+
+    if crResult.iterationTimings:
+        ts = [round(float(t), 2) for t in crResult.iterationTimings]
+        print(f"per-iter timings (s): {ts}   total={sum(ts):.2f}s")
+
+    pcts = list(pcts)
+    sig = crResult.sigma
+    rate = crResult.rate
+    print(f"\nper-pixel sigma over all pixels (pct {pcts}): "
+          f"{np.percentile(sig, pcts).round(2)}")
+    nAtFloor = int((sig <= float(sigmaFloorADU) + 1e-3).sum())
+    nTot = sig.size
+    print(f"  sigma at floor ({sigmaFloorADU}): {nAtFloor:,} "
+          f"({100 * nAtFloor / max(nTot, 1):.1f}%)")
+
+    if nCRpix:
+        print(f"\nflagged-CR pixel stats over {nCRpix:,} pixels:")
+        print(f"  sigma          pct {pcts}: {np.percentile(sig[crPix], pcts).round(2)}")
+        print(f"  rate (ADU/rd)  pct {pcts}: {np.percentile(rate[crPix], pcts).round(2)}")
+
+    if nGLpix:
+        print(f"\nflagged-glitch pixel stats over {nGLpix:,} pixels:")
+        print(f"  sigma          pct {pcts}: {np.percentile(sig[glPix], pcts).round(2)}")
+        print(f"  rate (ADU/rd)  pct {pcts}: {np.percentile(rate[glPix], pcts).round(2)}")
+
+
+def plotComparison(
+    cmp: ComparisonResult,
+    *,
+    fig=None,
+    sample: int = 200_000,
+    vmin: float = -0.05,
+    vmax: float = 0.05,
+):
+    """2x3 grid: linearize-off on top, linearize-on on bottom."""
+    import matplotlib.pyplot as plt
+
+    if fig is None:
+        fig = plt.figure(figsize=(16, 10))
+    subfigs = fig.subfigures(2, 1)
+
+    plotHalves(
+        cmp.off, fig=subfigs[0], sample=sample, vmin=vmin, vmax=vmax,
+        title=f"linearize=OFF  cam={cmp.off.cam}  nReads={cmp.off.nReads}",
+    )
+    plotHalves(
+        cmp.on, fig=subfigs[1], sample=sample, vmin=vmin, vmax=vmax,
+        title=f"linearize=ON   cam={cmp.on.cam}  nReads={cmp.on.nReads}",
+    )
+
+    d = cmp.delta
+    fig.suptitle(
+        f"H4 linearity halves test — MAD off/on = {d['madImprovementFactor']:.2f}×  "
+        f"(off={d['mad_off']:.4f}, on={d['mad_on']:.4f})",
+        fontsize=12,
+    )
+    return fig
diff --git a/python/lsst/obs/pfs/isrTask.py b/python/lsst/obs/pfs/isrTask.py
index dfd12633..40d63d40 100644
--- a/python/lsst/obs/pfs/isrTask.py
+++ b/python/lsst/obs/pfs/isrTask.py
@@ -18,13 +18,14 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
-from typing import TYPE_CHECKING, Optional
+from typing import Optional
 
 import os
 import time
 import warnings
 
 from functools import partial
+from types import SimpleNamespace
 
 import numpy as np
 import scipy
@@ -51,15 +52,10 @@
 from lsst.daf.butler import DimensionGroup
 
 from . import imageCube
-from . import nirLinearity
+from . import h4Linearity
 from .overscan import PfsOverscanCorrectionTask
 from pfs.drp.stella.crosstalk import PfsCrosstalkTask
 
-if TYPE_CHECKING:
-    from lsst.afw.image import ExposureF
-    from .imageCube import ImageCube
-    from .raw import PfsRaw
-
 ___all__ = ["IsrTask", "IsrTaskConfig"]
 
 
@@ -105,18 +101,13 @@ def validate(self):
 class H4Config(pexConfig.Config):
     """Configuration parameters for H4 reductions"""
 
-    quickCDS = pexConfig.Field(dtype=bool, default=False,
-                               doc="Only consider last and first reads instead of the full ramp cube")
-    doCR = pexConfig.Field(dtype=bool, default=False,
-                           doc="Run ramp-based CR rejection. Requires quickCDS=False")
-    crMinReads = pexConfig.Field(dtype=int, default=4,
-                                 doc="Minimum number of dark or shutter-open reads for CR rejection")
+    quickCDS = pexConfig.Field(
+        dtype=bool, default=None, optional=True,
+        doc="Only consider last and first reads instead of the full ramp cube. "
+            "None (default) = dispatch by observation type "
+            "(CDS for arcs and flats; UTR for darks and science).")
     useIRP = pexConfig.Field(dtype=bool, default=True,
                              doc="Use Interleaved Reference Pixel planes if available")
-    repairAsicSpikes = pexConfig.Field(dtype=bool, default=True,
-                                       doc="Detect and mask single-pixel, single-read ASIC glitches")
-    repairAsicSpikesSigma = pexConfig.Field(dtype=float, default=3.0,
-                                            doc="Sigma threshold from reads for detecting ASIC glitches")
     IRPfilter = pexConfig.Field(dtype=int, default=15,  # 15..31, probably.
                                 doc="width of smoothing window for IRP corrections. 0=no smoothing. Odd")
     doIRPbadPixels = pexConfig.Field(dtype=bool, default=True,
@@ -137,11 +128,6 @@ class H4Config(pexConfig.Config):
         doc="Mapping of detector name to IPC kernel filename",
     )
 
-    useDarkCube = pexConfig.Field(
-        dtype=bool,
-        default=True,
-        doc="Use dark cube for dark subtraction? Disable this to use traditional darks.",
-    )
     applyUTRWeights = pexConfig.Field(
         dtype=bool,
         default=True,
@@ -153,8 +139,131 @@ class H4Config(pexConfig.Config):
         doc="write out raw-ish ISRCube (UTR, e-, with IRP and CR corrections)",
     )
     doLinearize = pexConfig.Field(dtype=bool, default=True, doc="Apply linearity correction?")
-    linearizeBeforeUTR = pexConfig.Field(dtype=bool, default=False,
-                                         doc="Apply linearity correction before UTR")
+
+    doCR = pexConfig.Field(
+        dtype=bool, default=True,
+        doc="Run the iterative UTR-rate-based CR + ASIC-glitch detector on "
+            "the linearized cube after dark subtraction.",
+    )
+    repairCR = pexConfig.Field(
+        dtype=bool, default=True,
+        doc="When ``doCR`` is True, replace flagged deltas in the "
+            "cube with the per-pixel rate so the integrated image "
+            "reflects the CR-corrected ramp. Set False to leave the "
+            "raw flagged values in place (diagnostic use; the rate "
+            "image is still computed from the detector's UTR-weighted "
+            "estimator, but the cube exposed via ``intermediates`` / "
+            "``doReturnRawCube`` shows uncorrected reads).",
+    )
+    rateCRnSigma = pexConfig.Field(
+        dtype=float, default=5.0,
+        doc="Per-pixel sigma threshold for the iterative CR/glitch detector.",
+    )
+    rateCRsigmaFloorADU = pexConfig.Field(
+        dtype=float, default=8.0,
+        doc="Minimum sigma in ADU/read; protects faint pixels from collapsing the threshold "
+            "to MAD-noise.",
+    )
+    doDeglitch = pexConfig.Field(
+        dtype=bool, default=True,
+        doc="Detect ASIC glitches alongside CRs in the iterative detector. "
+            "Default True: run glitch detection on all pixels, all channels. "
+            "Detection is needed even when glitches are not corrected — a "
+            "glitch up-spike must be recognized as part of a pair so it is "
+            "not misclassified as a CR. Set False to disable glitch detection "
+            "entirely; CRs are still detected. Whether detected glitches are "
+            "*corrected* is controlled separately by ``correctGlitches``. "
+            "Only meaningful when ``doCR`` is True.",
+    )
+    deglitchAmplitudeMinADU = pexConfig.Field(
+        dtype=float, default=0.0,
+        doc="Minimum residual amplitude (in ADU) required for an ASIC-glitch "
+            "pair to be classified. 0 (default) means the only floor is the "
+            "CR threshold (``nSigma * sigma``). Set higher (e.g. 100) to "
+            "suppress faint-end deglitching where the classifier is less "
+            "reliable; bright glitches above this floor are still picked up. "
+            "Implemented as: at least one of the two deltas in the pair must "
+            "have |residual| above this value.",
+    )
+    correctGlitches = pexConfig.Field(
+        dtype=bool, default=False,
+        doc="Correct interior ASIC-glitch pairs: subtract them from the "
+            "linearized cube and exclude their deltas from the UTR rate. "
+            "Default False — interior pairs are still detected (``ASIC_GLITCH`` "
+            "stamped, up-spike not misclassified as a CR) but left in place; "
+            "a real symmetric +A/-A pair cancels on its own in the mean rate, "
+            "so not correcting avoids acting on an unreliable glitch "
+            "classification. End glitches (a lone glitch at the first or last "
+            "delta, with no pair partner) are always corrected regardless of "
+            "this flag. Only meaningful when ``doDeglitch`` is True.",
+    )
+    rateCRiterMax = pexConfig.Field(
+        dtype=int, default=5,
+        doc="Maximum iteration count for the iterative UTR CR detector.",
+    )
+    rateCRmaxDropFraction = pexConfig.Field(
+        dtype=float, default=0.5,
+        doc="Cumulative-drop check for CR classification — amplitude "
+            "criterion. A candidate CR fails this criterion when the "
+            "per-pixel running cumulative residual from the CR delta "
+            "onward drops by more than this fraction of the CR "
+            "amplitude. Default 0.5; set very high to disable. The "
+            "candidate is REJECTED only when BOTH this and "
+            "``rateCRnDropSigma`` (the noise criterion) fail.",
+    )
+    rateCRnDropSigma = pexConfig.Field(
+        dtype=float, default=3.0,
+        doc="Cumulative-drop check for CR classification — noise "
+            "criterion. A candidate CR fails this criterion when the "
+            "cumulative residual drop exceeds ``nDropSigma * σ * "
+            "sqrt(N-1-k)`` ADU. Real CRs leave only random-walk noise "
+            "behind, with an expected cumulative drift ~σ√(N-1-k) — "
+            "that drift is not a transient signature and shouldn't "
+            "demote the candidate. Transients leak the deposited "
+            "charge back, producing a drop that vastly exceeds the "
+            "noise floor. The candidate is REJECTED only when both "
+            "this AND ``rateCRmaxDropFraction`` fail. Default 3.0; "
+            "set very high to fall back to the amplitude criterion "
+            "alone.",
+    )
+    badPixelMinOutliers = pexConfig.Field(
+        dtype=int, default=4,
+        doc="BAD-pixel gate inside the CR detector (count criterion). "
+            "A pixel is OR'd into the UNSTABLE + BAD mask planes when "
+            "its ramp has at least this many delta residuals exceeding "
+            "``badPixelOutlierSigma × σ_IQR`` from the per-pixel "
+            "median. Set 0 to disable.",
+    )
+    badPixelOutlierSigma = pexConfig.Field(
+        dtype=float, default=4.0,
+        doc="BAD-pixel gate inside the CR detector (sigma criterion). "
+            "Per-delta outlier threshold in units of ``σ_IQR``. Default "
+            "4.0 — 3.0 catches many merely-noisy pixels because the "
+            "IQR-σ underestimates the true scatter by ~30 % on real "
+            "H4 deltas (mild non-Gaussian tails from shot/read-noise "
+            "mixture + linearity residuals).",
+    )
+    asicGlitchHeightMaskADU = pexConfig.Field(
+        dtype=float, default=0.0,
+        doc="ASIC-glitch repair/mask threshold. Pixels with at least "
+            "one detected glitch pair whose height ``|A| = (δ[k] − "
+            "δ[k+1]) / 2`` exceeds this value (ADU) are OR'd into BAD "
+            "on the published mask. Default 0 = mask every detected "
+            "glitch pixel. Set to a few hundred ADU to leave the "
+            "noise-pair detection tail un-masked while still catching "
+            "genuine bit-flip events.",
+    )
+
+    firstRead = pexConfig.Field(
+        dtype=int, default=None, optional=True,
+        doc="0-indexed first read of the ramp to include (inclusive). "
+            "None (default) = dispatch by observation type; an explicit value is used as-is.",
+    )
+    lastRead = pexConfig.Field(
+        dtype=int, default=None, optional=True,
+        doc="0-indexed last read of the ramp to include (inclusive; -1 = last read). "
+            "None (default) = dispatch by observation type; an explicit value is used as-is.",
+    )
 
 
 class PfsAssembleCcdTask(AssembleCcdTask):
@@ -203,6 +312,125 @@ def assembleCcd(self, exposure):
         return exposure
 
 
+def _makeInternalMask(
+    shape, *, linearity=None, defects=None, borderWidth: int = 4,
+):
+    """Build the H4 ISR internal mask (uint16, ``(H, W)``).
+
+    The internal-mask alphabet is the union of:
+
+      - ``h4Linearity.BORDER_PIX`` on the outer ``borderWidth`` ring,
+        seeded here so even doLinearize=False paths get it.
+      - ``h4Linearity.MASKED_BY_INPUT`` on pixels carried by the
+        ``defects`` calib.
+      - Any bits already set in ``linearity.badPixelMask``
+        (typically also BORDER_PIX + MASKED_BY_INPUT + DEAD-group bits
+        the linearity fit() emitted).
+
+    Downstream stages (``h4Linearity.apply``, the CR detector,
+    ``cr.iterativeUtrDetectAndRepair``) read this mask, skip already-
+    flagged pixels, and OR their own findings back in.
+    ``_projectInternalMask`` lifts the final mask into ``Exposure.mask``
+    planes at the end.
+    """
+    H, W = shape
+    internal = np.zeros((H, W), dtype=np.uint16)
+    internal[:borderWidth, :] |= h4Linearity.BORDER_PIX
+    internal[-borderWidth:, :] |= h4Linearity.BORDER_PIX
+    internal[:, :borderWidth] |= h4Linearity.BORDER_PIX
+    internal[:, -borderWidth:] |= h4Linearity.BORDER_PIX
+    if defects is not None:
+        defImg = afwImage.MaskedImageF(geom.Extent2I(W, H))
+        defects.maskPixels(defImg)
+        internal[defImg.mask.array > 0] |= h4Linearity.MASKED_BY_INPUT
+    if linearity is not None:
+        internal |= linearity.badPixelMask.astype(np.uint16, copy=False)
+    return internal
+
+
+def _projectInternalMask(exposure, internalMask, *, crResult=None) -> None:
+    """Lift the H4 internal mask into ``Exposure.mask`` planes.
+
+    Single projection point for the canonical published set:
+
+      - ``DARK_DEFECT``      ← ``MASKED_BY_INPUT``      (also BAD)
+      - ``LINEARITY_DEFECT`` ← ``DEAD`` group           (also BAD)
+      - ``SAT``              ← ``ABOVE_VALID_RANGE``    (also BAD)
+      - ``UNSTABLE``         ← ``UNSTABLE``             (also BAD)
+      - ``BAD`` ← anywhere any internal bit is set (so BORDER pixels,
+        BELOW_VALID_RANGE pixels, etc. land in BAD without an
+        externally distinguished plane).
+      - ``CR`` ← from ``crResult.crFlagMask`` (per-delta).
+
+    Per the "first-reason-wins" rule, ABOVE_VALID_RANGE only fires on
+    pixels that survived defects + fit, so SAT is now the clean
+    "genuinely saturating" signal rather than a side effect of dead
+    pixels with stale ``fitMax``. ASIC glitch + UNCLASSIFIED bits are
+    not published — those live on ``IterativeRepairResult`` for
+    internal diagnostic use.
+    """
+    mask = exposure.mask
+    for plane in ("DARK_DEFECT", "LINEARITY_DEFECT", "UNSTABLE"):
+        if plane not in mask.getMaskPlaneDict():
+            mask.addMaskPlane(plane)
+    bit = mask.getPlaneBitMask
+
+    darkDefect = (internalMask & h4Linearity.MASKED_BY_INPUT) != 0
+    linDefect = (internalMask & h4Linearity.DEAD) != 0
+    sat = (internalMask & h4Linearity.ABOVE_VALID_RANGE) != 0
+    unstable = (internalMask & h4Linearity.UNSTABLE) != 0
+    anyBad = internalMask != 0
+
+    arr = mask.array
+    if darkDefect.any():
+        arr[darkDefect] |= bit("DARK_DEFECT")
+    if linDefect.any():
+        arr[linDefect] |= bit("LINEARITY_DEFECT")
+    if sat.any():
+        arr[sat] |= bit("SAT")
+    if unstable.any():
+        arr[unstable] |= bit("UNSTABLE")
+    if anyBad.any():
+        arr[anyBad] |= bit("BAD")
+
+    if crResult is not None:
+        crFlag = getattr(crResult, "flagMask", None)
+        if crFlag is not None and crFlag.any():
+            arr[crFlag] |= bit("CR")
+
+
+def _stampRampMetadata(exposure, *, r0, r1, nTotal, appliedUTR):
+    """Add H4 read-range header keys to ``exposure.metadata``.
+
+    Round-trips into the FITS header on persist, so downstream code (and
+    humans inspecting an exposure) can tell a partial-ramp postISRCCD
+    apart from a full-ramp one.
+
+    Parameters
+    ----------
+    exposure : `lsst.afw.image.Exposure`
+        Modified in place.
+    r0, r1 : int
+        First/last reads used, absolute 0-indexed, inclusive.
+    nTotal : int
+        Total read count of the original ramp on disk (``pfsRaw.getNumReads()``).
+    appliedUTR : bool
+        Whether UTR weights were actually applied to produce
+        ``exposure.image`` (True for the linearized + legacy-UTR arms,
+        False for quickCDS and the non-linearized non-UTR fallback).
+        Authoritative for downstream consumers — e.g. variance
+        estimation chooses between the CDS and UTR noise formulas on
+        this flag.
+    """
+    md = exposure.getMetadata()
+    md.set('H4READ0', int(r0), 'First H4 read used (absolute, 0-indexed)')
+    md.set('H4READ1', int(r1), 'Last H4 read used (absolute, 0-indexed, inclusive)')
+    md.set('H4NREAD', int(r1 - r0 + 1), 'Number of H4 reads spanned by this exposure')
+    md.set('H4NTOT', int(nTotal), 'Total H4 reads in the original ramp')
+    md.set('H4UTRWT', bool(appliedUTR),
+           'UTR weights applied to exposure image (False=CDS)')
+
+
 def lookupDefects(datasetType, registry, dataId, collections):
     """Look up defects
 
@@ -824,12 +1052,10 @@ def runQuantum(self, butlerQC, inputRefs, outputRefs):
             inputs["ipcCoeffs"] = self.readIPC(raw.detector.getName())
 
         if self.config.doDark:
-            if isNir and self.config.h4.useDarkCube:
+            if isNir:
                 if inputs.get("nirDark") is None:
-                    raise RuntimeError(
-                        f"No NIR dark cube found for {raw.detector.getName()}; try h4.useDarkCube=False"
-                    )
-            elif inputs["dark"] is None:
+                    raise RuntimeError(f"No NIR dark cube found for {raw.detector.getName()}")
+            elif inputs.get("dark") is None:
                 raise RuntimeError(f"No dark frame found for {raw.detector.getName()}")
 
         outputs = self.run(**inputs)
@@ -1034,7 +1260,6 @@ def runCCD(self, ccdExposure, **kwargs):
     def runH4RG(
         self,
         pfsRaw,
-        dark=None,
         nirDark=None,
         flat=None,
         defects=None,
@@ -1050,11 +1275,8 @@ def runH4RG(
             The raw exposure that is to be run through ISR.  The
             exposure is modified by this method. With the PfsRaw we
             can get access the ramp cubes.
-        dark : `lsst.afw.image.Exposure`, optional
-            Dark exposure to subtract (if ``config.h4.useDarkCube`` is
-            ``False``).
         nirDark : `lsst.obs.pfs.imageCube.ImageCube`, optional
-            Dark cube to subtract (if ``config.h4.useDarkCube`` is ``True``).
+            Dark cube to subtract.
         flat : `lsst.afw.image.Exposure`, optional
             Flat-field exposure to divide by.
         defects : `lsst.ip.isr.Defects`, optional
@@ -1083,15 +1305,8 @@ def runH4RG(
                     self.log.warn("Unexpected argument for runH4RG: %s", k)
 
         if self.config.doDark:
-            if self.config.h4.useDarkCube:
-                if nirDark is None:
-                    raise RuntimeError(
-                        "Must supply a dark cube if config.doDark=True and config.h4.useDarkCube=True."
-                    )
-            elif dark is None:
-                raise RuntimeError(
-                    "Must supply a dark exposure if config.doDark=True and config.h4.useDarkCube=False."
-                )
+            if nirDark is None:
+                raise RuntimeError("Must supply a NIR dark cube if config.doDark=True.")
         else:
             nirDark = None
         if self.config.doFlat and flat is None:
@@ -1106,20 +1321,24 @@ def runH4RG(
         if self.config.h4.doLinearize:
             linearity = self.resolveNirLinearity(pfsRaw.detector.getName())
             if linearity is None:
-                raise RuntimeError("Linearity corrections must be available if config.h4.doLinearize=True.")
-            if defects is None:
+                self.log.warn(
+                    f'no usable linearity for {pfsRaw.detector.getName()}; '
+                    'proceeding without linearity correction for this exposure'
+                )
+            elif defects is None:
                 self.log.warn('You usually want to supply defects when linearizing. Will avoid worst pixels.')
-
+        else:
+            linearity = None
         # All 3-d ramp-based operations are hidden inside this call. After .makeNirExposure()
         # we operate on a 2-d image.
-        exposure, rawRamp = self.makeNirExposure(pfsRaw, nirDark, self.config.h4.doWriteRawCube)
+        exposure, rawRamp = self.makeNirExposure(pfsRaw, nirDark,
+                                                 linearity=linearity,
+                                                 defects=defects,
+                                                 doReturnRawCube=self.config.h4.doWriteRawCube)
 
         # We BAD mask the defects now, but do not interpolate.
         if defects is not None:
             super().maskDefect(exposure, defects)
-        if self.config.h4.doLinearize:
-            self.log.info("Correcting non-linearity.")
-            exposure = self.applyNirLinearity(pfsRaw, exposure, linearity)
 
         assert len(exposure.getDetector()) == 1, "Fix me now we have multiple channels"
 
@@ -1128,18 +1347,25 @@ def runH4RG(
 
         exposure.image *= gain  # convert to electrons
         var = exposure.image.array.copy()  # assumes photon noise -- not true for the persistence
-        var += 2*channel.getReadNoise()**2  # 2* comes from CDS
+        # CDS vs UTR variance. Driven by ``H4UTRWT`` (records whether
+        # UTR weights were actually applied during ``makeNirExposure``)
+        # and ``H4NREAD`` (the actual reads integrated after the
+        # firstRead/lastRead trim — *not* ``pfsRaw.getNumReads()``).
+        md = exposure.getMetadata()
+        if md.getScalar("H4UTRWT"):
+            nread = int(md.getScalar("H4NREAD"))
+            # UTR noise per RHL Eq. 4.45.
+            var *= 6 * (nread * nread + 1) / (5 * nread * (nread + 1))
+            var += (12 * (nread - 1) / (nread * (nread + 1))
+                    * channel.getReadNoise()**2)
+        else:
+            var += 2 * channel.getReadNoise()**2  # CDS
         exposure.variance.array[:] = var
 
         if rawRamp is not None:
             rawRamp *= gain
             rawRamp = imageCube.ImageCube.fromCube(rawRamp, exposure.getMetadata())
 
-        if self.config.doDark and not self.config.h4.useDarkCube:
-            self.log.info("Applying simple dark correction.")
-            super().darkCorrection(exposure, dark)
-            super().debugView(exposure, "doDark")
-
         # Any nquarter stuff should be removed after PIPE2D-1200
         nQuarter = exposure.getDetector().getOrientation().getNQuarter()
 
@@ -1149,8 +1375,6 @@ def runH4RG(
 
         if self.config.doDefect:
             self.log.info("Masking defects.")
-            if not self.config.h4.useDarkCube:
-                self.log.warning("Masking defects, but dark cube not used")
             super().maskAndInterpolateDefects(exposure, defects)
 
         if self.config.maskNegativeVariance:
@@ -1217,120 +1441,12 @@ def resolveNirLinearity(self, cam):
         if not os.path.exists(absFilename):
             self.log.warn(f'no linearity available for {cam}: {absFilename} not found')
             return None
-
-        return nirLinearity.NirLinearity.readFits(absFilename)
-
-    def nirLinearityChebyshev(self, exposure, linearity):
-        """Apply linearity corrections using numpy polynomials.
-
-        Parameters
-        ----------
-        exposure : `lsst.afw.image.Exposure`
-            exposure to correct.
-        linearity : `nirLinearity.NirLinearity`
-            all parts of the corrections.
-
-        Returns
-        -------
-        exposure : `lsst.afw.image.Exposure`
-           the input exposure, corrected in place.
-        """
-
-        limits = linearity.limits
-        coeffs = linearity.coeffs
-
-        # We only record the max valid range of the linearization.
-        # Construct the "domains" and "scales" which numpy needs to
-        # normalize the exposure data. [0..maxValid] -> [-1..1]
-        domains = np.zeros(shape=(2, 4096, 4096), dtype='f4')
-        scales = np.ones(shape=(2, 4096, 4096), dtype='f4')
-        domains[1, :, :] = limits
-        scales[0, :, :] = -1
-        off, scl = np.polynomial.polyutils.mapparms(domains, scales)
-        rawIm = exposure.image.array
-        normIm = off + scl*rawIm
-        corr = np.zeros_like(rawIm)
-        BAD = exposure.mask.getPlaneBitMask('BAD')
-
-        # If defects were supplied we have a BAD pixel mask. Use it to
-        # avoid linearizing known defects. But in any case add in and
-        # avoid correcting the worst unmasked pixels.
-        badMask = 0 != (exposure.mask.array & BAD)
-        startingBADpixels = badMask.sum()
-
-        # For the moment, do not linearize significantly negative
-        # values. We currently mask most of these pixels externally,
-        # but leaks are bad.
-        assert len(exposure.getDetector()) == 1, "Fix me now we have multiple channels"
-        amp = exposure.detector.getAmplifiers()[0]
-        tooLow = rawIm < -amp.getReadNoise() * 10  # If permanent logic, add to config...
-        badMask[tooLow] |= True
-        corr[tooLow] = rawIm[tooLow]
-
-        # Declare that all pixels above the available linearization
-        # limits are SATurated.  When we get real data at MKO this
-        # will actually be correct. Some lab flats from JHU mask too
-        # many pixels, but we don't really care about the brightest
-        # ones.  We set the values to the limits found on the
-        # unlinearized flats, which is wrong but we cannot trust any
-        # linearization. We need to replace the coefficients for these
-        # pixels with more sane estimates before we can do anyting
-        # better.
-        saturated = rawIm > limits
-        badMask[saturated] |= True
-        corr[saturated] = limits[saturated]
-        exposure.mask.array[saturated] |= (exposure.mask.getPlaneBitMask('SAT'))
-
-        # Corrections are only valid for flux >= 0. The chebyshevs are
-        # forced to 0 at flux=0, and are pretty low order, and the
-        # negative values should be pretty small (for good pixels). We
-        # should pretend that the correction is symmetric across 0, but
-        # let it extrapolate instead. Hmm, CPL.
-        goodMask = ~badMask
-        corr[goodMask] = np.polynomial.chebyshev.chebval(normIm[goodMask],
-                                                         coeffs[:, goodMask],
-                                                         tensor=False)
-        exposure.image.array[:] = corr
-        exposure.mask.array[badMask] |= BAD
-
-        endingBADpixels = badMask.sum()
-        self.log.info(f'linearization added {endingBADpixels - startingBADpixels} BAD pixels')
-
-        # variance is handled later...
-
-        return exposure
-
-    def applyNirLinearity(self, pfsRaw, exposure, linearity):
-        """Apply NIR linearity corrections of some defined kind.
-
-        Parameters
-        ----------
-        pfsRaw : `lsst.obs.pfs.PfsRaw`
-            The raw exposure that we ran through ISR.
-        exposure : `lsst.afw.image.Exposure`
-            exposure to correct..
-        linearity : `nirLinearity.NirLinearity`
-            all parts of the linearity corrections
-
-        Returns
-        -------
-        exposure : `lsst.afw.image.Exposure`
-            input exposure, corrected in place.
-        """
-        method = linearity.method
-
-        if method == 'identity':
-            return exposure
-        elif method == 'np.polynomial.chebyshev':
-            self.nirLinearityChebyshev(exposure, linearity)
-            exposure.metadata.set('PFS ISR LINEARITY METHOD', method)
-            # Need some SHA-like ID. And the filename.
-        else:
-            # blow up here?
-            self.log.warn(f'ignoring unknown linearity method: {method}')
-            return exposure
-
-        return exposure
+        if not h4Linearity.isH4LinearityFile(absFilename):
+            self.log.warn(
+                f'no usable linearity for {cam}: {absFilename} is not an h4Linearity-format file'
+            )
+            return None
+        return h4Linearity.loadFits(absFilename)
 
     def calcUTRWeights(self, nreads: int) -> np.ndarray:
         """Compute the weights for linear UTR, per Eq 4.24 in RHL
@@ -1352,6 +1468,39 @@ def calcUTRWeights(self, nreads: int) -> np.ndarray:
             w.append(k)
         return np.array(w)
 
+    def calcUTRrateFromDeltas(self, deltas: np.ndarray) -> np.ndarray:
+        """UTR-weighted per-pixel rate, computed directly from a delta
+        cube. Equivalent to :meth:`calcUTRrates` applied to the
+        reconstructed cumulative ramp ``read0 + cumsum(deltas, axis=-1)``,
+        but skips the cumulative reconstruction by using the closed-form
+        delta weights ``u[j] = 6(j+1)(N-1-j) / (N(N-1)(N+1))`` for
+        j = 0..N-2. These sum to 1 and reproduce the read-space UTR
+        weights ``w[i] = (12i - 6(N-1)) / (N(N²-1))`` after the
+        cumsum/diff change of variables.
+
+        Parameters
+        ----------
+        deltas : `np.ndarray`
+            ``(H, W, N-1)`` per-pixel delta cube with the time axis last.
+
+        Returns
+        -------
+        `np.ndarray`
+            ``(H, W)`` per-pixel rate (ADU/read).
+        """
+        nDeltas = deltas.shape[-1]
+        nReads = nDeltas + 1
+        ks = np.arange(nDeltas, dtype=np.float32)
+        weights = (
+            6.0 * (ks + 1.0) * (nReads - 1.0 - ks)
+            / (nReads * (nReads - 1.0) * (nReads + 1.0))
+        ).astype(np.float32, copy=False)
+        # Slice-wise accumulation avoids an (H, W, N-1) transient.
+        rate = np.zeros(deltas.shape[:-1], dtype=np.float32)
+        for k in range(nDeltas):
+            rate += weights[k] * deltas[..., k]
+        return rate
+
     def calcUTRrates(self, cube: np.ndarray, nreads: Optional[int] = None) -> np.ndarray:
         """Apply UTR weights to the ramp reads to estimate a per-pixel arrival rate.
 
@@ -1380,28 +1529,92 @@ def calcUTRrates(self, cube: np.ndarray, nreads: Optional[int] = None) -> np.nda
 
         return rate_sum
 
-    def getDarkCube(self, nirDark, nreads: Optional[int] = None) -> np.ndarray:
+    def subtractDarkCube(self, nirDark, cube: np.ndarray, r0: int = 0) -> None:
+        """Subtract the per-read dark frames from ``cube`` in place.
+
+        Iterates absolute read indices ``[r0, r0 + cube.shape[0])`` and
+        subtracts each dark frame from the corresponding read of
+        ``cube``. No transient ramp-sized buffer: each ``(H, W)`` dark
+        frame is fetched via ``nirDark.getReadArray(...)``, gain-corrected,
+        and applied directly to ``cube[k]``. The per-read subtract is a
+        contiguous 2-D write because ``cube`` is ``(N, H, W)`` C-order,
+        which is cache-friendly and avoids the ~14 s / ~6.7 GB cost of
+        materializing-then-transposing a full dark cube via
+        :meth:`getDarkCube`.
+
+        Parameters
+        ----------
+        nirDark : `lsst.obs.pfs.imageCube.ImageCube`
+            The unloaded dark cube — frames live on disk and are
+            fetched on demand via ``nirDark.getReadArray``.
+            ``nirDark[i]`` is the dark for absolute read ``i + 1``
+            (the convention :meth:`getDarkRead` and :meth:`getDarkCube`
+            use).
+        cube : `np.ndarray`
+            ``(N, H, W)`` cumulative ramp to dark-subtract, modified in
+            place. ``cube[k]`` corresponds to absolute read ``r0 + k + 1``
+            of the original ramp.
+        r0 : `int`
+            Absolute index of the first read processed; ``nirDark[r0+k]``
+            is paired with ``cube[k]``.
+        """
+        gain = nirDark.metadata.get("GAIN", 1.0)
+        N = cube.shape[0]
+        for k in range(N):
+            darkFrame = nirDark.getReadArray(r0 + k)
+            if gain != 1.0:
+                # Back out the gain that ``ImageCube`` applied when
+                # writing the dark in electrons; the cube here is in ADU.
+                cube[k] -= darkFrame / gain
+            else:
+                cube[k] -= darkFrame
+
+    def getDarkCube(self, nirDark, nreads: Optional[int] = None, r0: int = 0) -> np.ndarray:
         """Get the dark cube for the NIR ramp.
 
+        .. note::
+
+           Production code uses :meth:`subtractDarkCube`, which fuses the
+           per-read fetch with an in-place subtract — no transient
+           dark-cube allocation, no transpose. ``getDarkCube`` is kept
+           for diagnostic paths (``validate.collectPixelRampData``'s
+           ``cubeDark`` and tests) that want to inspect the dark cube
+           in the project's ``(H, W, N)`` layout.
+
         Parameters
         ----------
         nirDark : `lsst.obs.pfs.imageCube.ImageCube`
-            The unprocessed to subtract.
+            The unloaded dark cube — frames live on disk and are
+            fetched on demand via ``nirDark.getReadArray``.
+        nreads : `int`, optional
+            Number of dark frames to return. If None, return all from ``r0``.
+        r0 : `int`
+            0-indexed offset into the dark cube. Returns ``dark[r0:r0+nreads]``.
+            Used when processing a sub-range of the data ramp; the dark slice
+            then aligns with the data reads being processed.
 
         Returns
         -------
         `np.ndarray`
-            The dark cube to subtract.
+            The dark cube to subtract, shape ``(H, W, nreads)`` — the
+            time axis is last to match the H4 ISR cube convention. The
+            underlying ``ImageCube`` stores frames per-read in
+            ``(N, H, W)`` form, so the transpose happens here at the
+            boundary.
         """
 
-        #
-        cube = nirDark.getImageCube(nreads=nreads)
+        if r0 > 0:
+            end = nirDark.nreads if nreads is None else r0 + nreads
+            full = nirDark.getImageCube(nreads=end)
+            cube = full[r0:end].copy()
+        else:
+            cube = nirDark.getImageCube(nreads=nreads)
         # If gain was applied, back it out. We used to apply darks to the
         # 2-d image in e-, but have switch to applying it to the raw rampin ADU.
         gain = nirDark.metadata.get("GAIN", 1.0)
         if gain != 1.0:
             cube /= gain
-        return cube
+        return np.ascontiguousarray(cube.transpose(1, 2, 0))
 
     def getDarkRead(self, nirDark, readNum) -> np.ndarray:
         """Get the dark read for the NIR ramp.
@@ -1412,7 +1625,8 @@ def getDarkRead(self, nirDark, readNum) -> np.ndarray:
         Parameters
         ----------
         nirDark : `lsst.obs.pfs.imageCube.ImageCube`
-            The unprocessed dark cube.
+            The unloaded dark cube — frames live on disk and are
+            fetched on demand via ``nirDark.getReadArray``.
         readNum : `int`
             The read number to get.
 
@@ -1430,63 +1644,510 @@ def getDarkRead(self, nirDark, readNum) -> np.ndarray:
             dark /= gain
         return dark
 
-    def makeNirExposure(self, pfsRaw, nirDark=None, doReturnRawCube=False):
-        """Construct a 2D image from the NIR ramp data.
+    def rampParams(self, pfsRaw):
+        """Ramp-processing parameters, dispatched by observation type.
+
+        Per-obstype defaults: arcs (``comparison``) and flats → full-ramp
+        CDS; darks → full-ramp UTR; science → UTR over reads ``1:-3``
+        (drop the shutter-closed read 0 and the trailing 3 shutter-closed
+        / transitional reads). Anything else falls back to full-ramp UTR.
+
+        Each `H4Config` field (``quickCDS`` / ``firstRead`` / ``lastRead``)
+        that is set to an explicit (non-``None``) value overrides the
+        dispatched default.
+
+        Returns
+        -------
+        quickCDS : `bool`
+        firstRead, lastRead : `int`
+            0-indexed inclusive read bounds (negative counts from the end).
+        """
+        obsType = (pfsRaw.obsInfo.observation_type or "").lower()
+        if obsType in ("comparison", "flat"):
+            # Arcs and flats use CDS. We may want to process longer
+            # flats UTR — deferred until we have a principled
+            # read-range choice.
+            quickCDS, firstRead, lastRead = True, 0, -1
+        elif obsType == "science":
+            # Shutter open/close FITS cards are not yet written, so the
+            # exact illuminated read range is unknown. Once those cards
+            # are available the range can be computed correctly; until
+            # then use reads 1:-3, which empirically drops the shutter-
+            # closed read 0 and the trailing 3 shutter-closed reads.
+            quickCDS, firstRead, lastRead = False, 1, -4
+        else:  # dark, unknown — full-ramp UTR
+            quickCDS, firstRead, lastRead = False, 0, -1
+
+        cfg = self.config.h4
+        if cfg.quickCDS is not None:
+            quickCDS = cfg.quickCDS
+        if cfg.firstRead is not None:
+            firstRead = cfg.firstRead
+        if cfg.lastRead is not None:
+            lastRead = cfg.lastRead
+        self.log.info(
+            f"ramp params for obsType={obsType!r}: quickCDS={quickCDS} "
+            f"firstRead={firstRead} lastRead={lastRead}"
+        )
+        return quickCDS, firstRead, lastRead
+
+    _INTERMEDIATE_KEYS = (
+        'raw', 'darkSubbed', 'linearized', 'crCorrected', 'crResult',
+    )
+
+    def makeNirExposure(self,
+                        pfsRaw,
+                        nirDark=None,
+                        linearity=None,
+                        defects=None,
+                        doReturnRawCube=False,
+                        intermediates=None):
+        """Build a 2-D image from the H4 ramp.
+
+        The read range and CDS-vs-UTR choice come from
+        :meth:`rampParams` (driven by ``config.h4`` and the observation
+        type). The CDS arm linearizes and differences two endpoint
+        frames; the UTR arm reads the cumulative ramp, subtracts the
+        dark cube, optionally linearizes the whole cube, and
+        — when ``config.h4.doCR`` is enabled — runs the iterative
+        UTR-rate CR / ASIC-glitch detector on the per-read deltas.
+        The returned exposure carries ``CR``, ``BAD``, ``SAT``,
+        ``DARK_DEFECT``, ``LINEARITY_DEFECT``, and ``UNSTABLE`` mask planes
+        as appropriate.
 
         Parameters
         ----------
         pfsRaw : `lsst.obs.pfs.PfsRaw`
-            Provides access to the ramp that is to be
-            run through ISR.
+            Raw H4 exposure giving access to the ramp.
         nirDark : `lsst.obs.pfs.imageCube.ImageCube`, optional
-            Dark cube to subtract (if ``config.h4.useDarkCube`` is ``True``).
+            Per-read dark cube to subtract from the ramp. Required when
+            ``config.doDark`` is True.
+        linearity : `lsst.obs.pfs.h4Linearity.H4Linearity`, optional
+            Linearity solution. Required when ``config.h4.doLinearize``
+            is True.
+        defects : `lsst.ip.isr.Defects`, optional
+            Known bad pixels; excluded from the CR/glitch detector's
+            statistics and folded into the linearity-time mask.
         doReturnRawCube : `bool`, optional
-            If True, return the raw ramp cube as well as the 2D image.
+            Reconstruct and return the (post-linearization, post-CR)
+            cumulative ramp cube alongside the 2-D image. Defaults to
+            False because the cube is large (~6.7 GB on a 4096²×100
+            ramp) and the production path is delta-only.
+        intermediates : `dict`, optional
+            If provided, the keys *already present* select which stages
+            to snapshot; each captured cube costs one full ``.copy()``.
+            Recognized keys:
+
+              ``'raw'``         pre-dark cumulative ramp
+              ``'darkSubbed'``  ramp after dark subtraction (input to
+                                linearity)
+              ``'linearized'``  post-linearization, pre-CR cumulative
+                                ramp
+              ``'crCorrected'`` post-CR cumulative ramp (equals
+                                ``'linearized'`` when
+                                ``config.h4.doCR`` is False)
+              ``'crResult'``    the iterative CR detector's
+                                ``IterativeRepairResult`` — per-pixel
+                                ``rate`` / ``sigma``, per-delta
+                                ``crFlagMask`` / ``glitchFlagMask`` /
+                                ``unclassifiedFlagMask``, per-pixel
+                                ``badPixelMask`` (UNSTABLE / RTS),
+                                iteration counts. None when
+                                ``config.h4.doCR`` is False.
+
+            Pass ``{'linearized': None}`` to capture just one stage;
+            pass ``dict.fromkeys(['raw', 'darkSubbed', 'linearized',
+            'crCorrected', 'crResult'])`` for everything. The values
+            are overwritten in place.
 
         Returns
         -------
         exposure : `lsst.afw.image.Exposure`
-            The 2-d image from the ramp.
+            The 2-D image with mask planes stamped.
+        rawCube : `numpy.ndarray` or `None`
+            The post-linearization, post-CR cumulative ramp when
+            ``doReturnRawCube`` is True; otherwise ``None``. This is
+            the same data the rate computation operates on — the
+            production path keeps it in delta form, but the cube is
+            rebuilt via ``cumsum`` for callers that need the
+            ``(N, H, W)`` cumulative layout (e.g. for persistence to
+            ``rawISRCube``).
         """
 
-        if self.config.h4.quickCDS:
-            self.log.info("creating quick CDS.")
-            nirImage = self.makeCDS(pfsRaw)
+        # Capture set is whichever recognized keys the caller pre-seeded
+        # in ``intermediates``. Skipping unrequested stages avoids their
+        # full-cube ``.copy()`` overhead.
+        if intermediates is None:
+            captureKeys = frozenset()
+        else:
+            captureKeys = frozenset(intermediates)
+            unknown = captureKeys - frozenset(self._INTERMEDIATE_KEYS)
+            if unknown:
+                raise ValueError(
+                    f"unknown intermediates keys: {sorted(unknown)}; "
+                    f"expected subset of {self._INTERMEDIATE_KEYS}."
+                )
+
+        # Dispatch CDS/UTR and the read range by observation type, then
+        # resolve to absolute, 0-indexed inclusive bounds.
+        quickCDS, firstRead, lastRead = self.rampParams(pfsRaw)
+        if quickCDS and self.config.h4.applyUTRWeights:
+            self.log.warn(
+                "applyUTRWeights=True but obstype logic switched that to CDS; "
+                "UTR weighting will not be applied for this exposure."
+            )
+        r0 = pfsRaw.positiveIndex(firstRead)
+        r1 = pfsRaw.positiveIndex(lastRead)
+        if r1 - r0 < 1:
+            raise ValueError(
+                f"firstRead={firstRead} (->{r0}) and "
+                f"lastRead={lastRead} (->{r1}) leave no readable range "
+                f"(need r1 > r0)."
+            )
+        # Seed the H4 internal mask once, before the CDS-vs-UTR
+        # dispatch — BORDER + DARK_DEFECT (input calib) + the linearity
+        # calib's fit-time bits are the universal first step, regardless
+        # of reduction path. Downstream stages (apply, the CR detector,
+        # the projection at the end) read this mask and OR in their own
+        # findings under the "first-reason-wins" rule.
+        detBBox = pfsRaw.detector.getBBox()
+        internalMask = _makeInternalMask(
+            (detBBox.getHeight(), detBBox.getWidth()),
+            linearity=linearity, defects=defects,
+        )
+
+        # Tracks whether ``exposure.image`` ends up UTR-weighted. The
+        # quickCDS branch and the non-linearized non-UTR fallback are
+        # the only paths that produce a CDS-style image; everything
+        # else applies UTR weights (the linearized arm via the closed-
+        # form delta weights, the legacy carve-out via
+        # ``calcUTRrates``). Set per-branch below and stamped into
+        # ``H4UTRWT`` by ``_stampRampMetadata``.
+        appliedUTR = False
+        if quickCDS:
+            # CDS from two reads, with two-read linearization: linearize
+            # the absolute cumulative frame at each endpoint and
+            # difference them (the model is nonlinear, so linearize then
+            # subtract — never the reverse). This still yields the
+            # linearity-derived mask planes (defects, bad fit,
+            # saturation). The endpoint frames are dark-subtracted first
+            # because the linearity model is calibrated on dark-
+            # subtracted cumulative ADU. CR/glitch detection needs the
+            # full ramp, so crResult stays None.
+            self.log.info(f"creating CDS over reads [{r0}, {r1}].")
+            frameR1 = self.makeCDS(pfsRaw, r0=0, r1=r1)
+            frameR0 = self.makeCDS(pfsRaw, r0=0, r1=r0) if r0 > 0 else None
+            if nirDark is not None:
+                frameR1 -= self.getDarkRead(nirDark, r1 - 1)
+                if frameR0 is not None:
+                    frameR0 -= self.getDarkRead(nirDark, r0 - 1)
+
             flux = None
+            crResult = None
+            if self.config.h4.doLinearize and linearity is not None:
+                self.log.info("Correcting non-linearity (two-read CDS).")
+                linR1, _ = h4Linearity.applyFrame(linearity, frameR1)
+                if frameR0 is not None:
+                    linR0, _ = h4Linearity.applyFrame(linearity, frameR0)
+                    nirImage = linR1 - linR0
+                else:
+                    nirImage = linR1
+                # Compute runtime range bits on the endpoint frames,
+                # gating on the "first-reason-wins" rule: only set
+                # ABOVE/BELOW range bits where the internal mask is
+                # otherwise empty.
+                goodPixels = internalMask == 0
+                frames = [frameR1] if frameR0 is None else [frameR0, frameR1]
+                for frame in frames:
+                    internalMask[(frame > linearity.fitMax)
+                                 & goodPixels] |= h4Linearity.ABOVE_VALID_RANGE
+                    internalMask[(frame < linearity.fitMin)
+                                 & goodPixels] |= h4Linearity.BELOW_VALID_RANGE
+            else:
+                nirImage = frameR1 if frameR0 is None else frameR1 - frameR0
         else:
-            # Too many interacting switches for CDS/UTR/darks. Especially note 2-d doDark is still possible.
-            self.log.info("reading full ramp...")
-            deltas = self.makeUTRdeltas(pfsRaw)
-
-            # We do not currently use the indices of the masked pixels,
-            # but do get them for when we put in the effort.
-            deltas, posIdx, negIdx = self.correctCRs(pfsRaw, deltas)
-
-            # Switch to accumulated flux for dark subtraction and
-            # UTR weighting. This is actually pretty expensive; should
-            # rethink.
-            # But do at least save memory by doing this in place.
-            flux = np.cumsum(deltas, axis=0, out=deltas)
-            del deltas  # Get rid of the name to avoid stupidities
-
-            if self.config.h4.applyUTRWeights:
-                if nirDark is not None:
-                    self.log.info("subtracting dark cube.")
-                    darkCube = self.getDarkCube(nirDark, len(flux))
-                    flux -= darkCube
+            self.log.info(f"reading ramp over reads [{r0}, {r1}]...")
+            # ``flux`` is the cumulative IRP-corrected ramp zero-anchored
+            # at r0, shape ``(N, H, W)`` — frames first so the per-read
+            # ``subtractDarkCube`` below writes contiguously. The single
+            # transpose to ``(H, W, N)`` happens at the ``apply()``
+            # boundary further down (in the linearization arm) or stays
+            # ``(N, H, W)`` all the way through the no-linearization
+            # fallback.
+            flux = self.makeUTRcumulative(pfsRaw, r0=r0, r1=r1)
+
+            # makeUTRcumulative zero-anchors at r0, so flux[i] = read[r0+i+1] - read[r0].
+            # Add the cumulative flux from read 0 to read r0 so flux is bias-relative
+            # absolute (read[r0+i+1] - read[0]). Linearity is calibrated on absolute
+            # cumulative ADU, so this anchoring is required for correctness when r0>0.
+            offsetRaw = None
+            if r0 > 0:
+                self.log.info(f"adding absolute baseline from reads [0, {r0}].")
+                offsetRaw = self.makeCDS(pfsRaw, r0=0, r1=r0).astype(flux.dtype, copy=False)
+                flux += offsetRaw[None]
+
+            if 'raw' in captureKeys:
+                # PixelRampData / the rest of the diagnostic chain wants
+                # the captured cubes in (H, W, N) form to match
+                # cubeLin / cubeCR. Transpose at capture time — paid
+                # only on the diagnostic path.
+                intermediates['raw'] = np.ascontiguousarray(flux.transpose(1, 2, 0))
+
+            if nirDark is not None:
+                self.log.info("subtracting dark cube (per-read, in place).")
+                self.subtractDarkCube(nirDark, flux, r0=r0)
+
+            if 'darkSubbed' in captureKeys:
+                intermediates['darkSubbed'] = np.ascontiguousarray(
+                    flux.transpose(1, 2, 0)
+                )
 
+            if self.config.h4.doLinearize and linearity is not None:
+                self.log.info("Correcting non-linearity.")
+                # h4Linearity.apply, the CR detector, the diff and the
+                # cumsum reconstruction all want the time axis last and
+                # contiguous — per-pixel Horner / partition / IQR /
+                # cumsum then stride 1 along reads instead of crossing
+                # the slowest axis. Single transpose here; everything
+                # downstream of this point stays ``(H, W, N)``.
+                flux = np.ascontiguousarray(flux.transpose(1, 2, 0))
+                # Hand the pre-seeded internal mask to apply() as
+                # ``validMask``; apply() OR's its own findings (range
+                # bits) back in under the first-reason-wins rule.
+                ramp = h4Linearity.Ramp(reads=flux, validMask=internalMask)
+                linearizedRamp = h4Linearity.apply(linearity, ramp)
+                flux = linearizedRamp.cumulativeLinear
+                internalMask = linearizedRamp.badPixelMask
+                self.log.info(
+                    f"   starting with {int((internalMask != 0).sum())} "
+                    f"already-flagged pixels (border + defects + fit-time bits)"
+                )
+                # The pre-linearization cube is now superseded by the
+                # linearized output. Drop the Ramp / LinearizedRamp
+                # holders so the input (~6.7 GB) can be GC'd before the
+                # CR/glitch detector copies a fresh ``cubeOriginal``.
+                del ramp, linearizedRamp
+
+                # Re-anchor the linearized cube at r0: subtract the linearized value
+                # at read r0 from every read so the output represents only the flux
+                # accumulated during (r0, r1]. Without this rebase, postISRCCD over
+                # disjoint sub-ranges would not sum to the full-ramp postISRCCD.
+                # No-op for r0=0 because the linearity model maps 0 -> 0.
+                if r0 > 0 and offsetRaw is not None:
+                    self.log.info(f"re-anchoring linearized cube at read {r0}.")
+                    if nirDark is not None:
+                        # Linearization saw flux already dark-subtracted; the rebase
+                        # offset must match. offsetRaw is the absolute cumulative
+                        # at read r0, so its dark is nirDark[r0-1] (nirDark[j] is
+                        # the dark for read j+1 — the same k->k-1 indexing
+                        # getDarkCube uses).
+                        offsetForLin = offsetRaw - self.getDarkRead(nirDark, r0 - 1)
+                    else:
+                        offsetForLin = offsetRaw
+                    linearizedOffset, _ = h4Linearity.applyFrame(linearity, offsetForLin)
+                    flux -= linearizedOffset[..., None]
+
+                if 'linearized' in captureKeys:
+                    intermediates['linearized'] = flux.copy()  # (H, W, N) post-lin, pre-CR
+
+                # Switch from cumulative flux to delta-space for everything
+                # downstream of linearization. The CR/glitch detector
+                # operates on deltas directly (no diff/cumsum dance inside).
+                # We only reconstruct the cumulative cube on demand
+                # (intermediates / doReturnRawCube).
+                read0 = flux[..., 0:1].copy()
+                deltas = np.diff(flux, axis=-1)
+                del flux
+                iterResult = None  # filled in if doCR runs
+
+                if self.config.h4.doCR:
+                    # First-reason-wins: CR detector only runs on
+                    # pixels with no pre-existing reason in the
+                    # internal mask.
+                    crGood = (internalMask == 0)
+
+                    if self.config.h4.doDeglitch:
+                        self.log.info("Correcting CRs and ASIC glitches.")
+                        # Glitch detection runs on all pixels. The matched-pair
+                        # cancellation criterion (opposite signs, sum within
+                        # threshold) is what discriminates a glitch from a CR;
+                        # restricting by ASIC channel misses glitches on
+                        # channels other than the historically-noted ones.
+                        glitchChanMask = np.ones(deltas.shape[:-1], dtype=bool)
+                    else:
+                        self.log.info("Correcting CRs (ASIC deglitching disabled).")
+                        glitchChanMask = None
+                    # deltas is already (H, W, N-1) — pass directly to
+                    # the CR detector; no boundary transpose here.
+                    iterResult = h4Linearity.cr.iterativeUtrDetectAndRepair(
+                        deltas,
+                        goodPixelMask=crGood,
+                        glitchPixelMask=glitchChanMask,
+                        sigmaFloorADU=self.config.h4.rateCRsigmaFloorADU,
+                        nSigma=self.config.h4.rateCRnSigma,
+                        maxIterations=self.config.h4.rateCRiterMax,
+                        repair=self.config.h4.repairCR,
+                        correctGlitches=self.config.h4.correctGlitches,
+                        glitchAmplitudeMinADU=self.config.h4.deglitchAmplitudeMinADU,
+                        maxDropFraction=self.config.h4.rateCRmaxDropFraction,
+                        nDropSigma=self.config.h4.rateCRnDropSigma,
+                        badPixelMinOutliers=self.config.h4.badPixelMinOutliers,
+                        badPixelOutlierSigma=self.config.h4.badPixelOutlierSigma,
+                    )
+                    # Suppress CR / glitch flags at pixels classified
+                    # BAD — RTS pixels often paint up as one or two CRs
+                    # by accident; downstream consumers should treat the
+                    # whole pixel as bad, not attempt CR repair.
+                    badPix2D = iterResult.badPixelMask
+                    if badPix2D.any():
+                        iterResult.crFlagMask[badPix2D] = False
+                        iterResult.glitchFlagMask[badPix2D] = False
+                    crFlagMask2D = iterResult.crFlagMask.any(axis=-1)
+                    glitchFlagMask2D = iterResult.glitchFlagMask.any(axis=-1)
+                    unclassMask2D = iterResult.unclassifiedFlagMask.any(axis=-1)
+                    crResult = SimpleNamespace(
+                        flagMask=crFlagMask2D,
+                        nFlagged=int(crFlagMask2D.sum()),
+                        glitchFlagMask=glitchFlagMask2D,
+                        nGlitchFlagged=int(glitchFlagMask2D.sum()),
+                        badPixelMask=badPix2D,
+                        nBadPixels=int(badPix2D.sum()),
+                    )
+                    # OR CR-stage findings back into the internal mask.
+                    # crResult.flagMask (CR-only) stays out of the
+                    # internal mask — projected directly to the CR
+                    # plane by _projectInternalMask.
+                    internalMask[badPix2D] |= h4Linearity.UNSTABLE
+                    internalMask[unclassMask2D] |= h4Linearity.UNCLASSIFIED
+                    # Promote ASIC-glitch pixels with at least one
+                    # pair height above ``asicGlitchHeightMaskADU`` to
+                    # BAD. Default threshold is 0 — every glitch-
+                    # flagged pixel is masked, matching the current
+                    # detect-but-don't-repair policy. Heights come
+                    # straight from the saved deltas because interior
+                    # pairs are not repaired (correctGlitches=False
+                    # by default).
+                    glitchMask3D = iterResult.glitchFlagMask
+                    if glitchMask3D.any():
+                        pairStart = (glitchMask3D[..., :-1]
+                                     & glitchMask3D[..., 1:])
+                        halfDiff = 0.5 * (deltas[..., :-1]
+                                          - deltas[..., 1:])
+                        maxHeight = np.where(
+                            pairStart, np.abs(halfDiff), 0.0,
+                        ).max(axis=-1)
+                        promote = (
+                            (maxHeight
+                             > self.config.h4.asicGlitchHeightMaskADU)
+                            & glitchMask3D.any(axis=-1)
+                        )
+                        if promote.any():
+                            internalMask[promote] |= h4Linearity.ASIC_GLITCH
+                    if 'crResult' in captureKeys:
+                        intermediates['crResult'] = iterResult
+                    self.log.info(
+                        f"iterative CR/glitch step: "
+                        f"{iterResult.nCRs} CR flag entries, "
+                        f"{iterResult.nGlitchPairs} glitch pairs, "
+                        f"{crResult.nFlagged} unique CR pixels, "
+                        f"{crResult.nGlitchFlagged} unique glitch pixels, "
+                        f"{crResult.nBadPixels} BAD (RTS) pixels "
+                        f"in {iterResult.nIterations} iterations."
+                    )
+                else:
+                    crResult = None
+
+                # Reconstruct the cumulative cube ONLY when a debug
+                # capture (``'crCorrected'``) or ``doReturnRawCube`` asks
+                # for it. The default production path is delta-only from
+                # here on. cumsum along the contiguous time axis is
+                # ~6 s vs ~40 s along the strided axis on a 4096²×100
+                # cube.
+                flux = None
+                if ('crCorrected' in captureKeys) or doReturnRawCube:
+                    fluxHWN = np.empty(
+                        deltas.shape[:-1] + (deltas.shape[-1] + 1,),
+                        dtype=deltas.dtype,
+                    )
+                    fluxHWN[..., 0:1] = read0
+                    np.cumsum(deltas, axis=-1, out=fluxHWN[..., 1:])
+                    fluxHWN[..., 1:] += read0
+                    if 'crCorrected' in captureKeys:
+                        intermediates['crCorrected'] = fluxHWN.copy()  # (H, W, N)
+                    if doReturnRawCube:
+                        # ``doReturnRawCube`` is consumed by ``runH4RG``
+                        # → ``ImageCube.fromCube``, which iterates the
+                        # cube frame-by-frame and so still needs the
+                        # ``(N, H, W)`` layout. Transpose only here.
+                        flux = np.ascontiguousarray(fluxHWN.transpose(2, 0, 1))
+                    del fluxHWN
+            else:
+                # No linearization path: the pre-seeded internalMask
+                # (BORDER + DARK_DEFECT only — linearity bits were skipped
+                # since no calib was loaded) carries through unchanged.
+                crResult = None
+                # No linearization → no apply-boundary transpose ran;
+                # flux is still ``(N, H, W)`` from makeUTRcumulative,
+                # which is the form the legacy ``calcUTRrates`` and
+                # ``flux[-1] - flux[0]`` paths below expect.
+                deltas = None
+                read0 = None
+
+            if deltas is not None:
+                # Delta-space science image: UTR-weighted rate × nReads,
+                # equivalent to ``calcUTRrates`` on the reconstructed
+                # (CR-corrected) flux ramp ``read0 + cumsum(deltas)``.
+                # The CR detector already computes this for us in
+                # ``iterResult.rate`` (delta-space closed form of the
+                # read-space UTR weights). When CR is disabled, apply
+                # the same delta-space weights directly. ``deltas`` is
+                # (H, W, N-1) here — the time axis is last.
+                if self.config.h4.doCR and iterResult is not None:
+                    pixelRate = iterResult.rate
+                else:
+                    pixelRate = self.calcUTRrateFromDeltas(deltas)
+                nirImage = pixelRate * deltas.shape[-1]
+                appliedUTR = True
+                del deltas, read0
+            elif self.config.h4.applyUTRWeights:
+                # Carve-out for non-linearizable detectors: they have no
+                # linearity solution but still need a UTR-weighted rate
+                # path. The linearized arm above is the long-term home
+                # for applyUTRWeights once a delta-form calcUTRrates
+                # exists, but for cameras without a linearity curve this
+                # branch stays load-bearing.
                 self.log.info("applying UTR weights.")
                 rates = self.calcUTRrates(flux)
                 nirImage = rates * len(flux)
+                appliedUTR = True
             else:
-                # CDS, basically.
-                if nirDark is not None:
-                    nirImage = ((flux[-1] - self.getDarkRead(nirDark, len(flux)-1)) -
-                                (flux[0] - self.getDarkRead(nirDark, 0)))
-                else:
-                    nirImage = flux[-1] - flux[0]
+                # No linearization and no UTR weighting: two-read
+                # difference on the already dark-subtracted ramp.
+                nirImage = flux[-1] - flux[0]
 
         exposure = self._makeExposure(pfsRaw, nirImage)
+        _stampRampMetadata(
+            exposure, r0=r0, r1=r1,
+            nTotal=int(pfsRaw.getNumReads()),
+            appliedUTR=appliedUTR,
+        )
+        # Single projection point: lift the internal mask + CR result
+        # into Exposure.mask planes (DARK_DEFECT, LINEARITY_DEFECT, SAT,
+        # UNSTABLE, BAD, CR). See ``_projectInternalMask``.
+        _projectInternalMask(exposure, internalMask, crResult=crResult)
+
+        nCR = crResult.nFlagged if crResult is not None else 0
+        self.log.info(
+            f"nSat={int(((internalMask & h4Linearity.ABOVE_VALID_RANGE) != 0).sum())} "
+            f"nLow={int(((internalMask & h4Linearity.BELOW_VALID_RANGE) != 0).sum())} "
+            f"nDefects={int(((internalMask & h4Linearity.MASKED_BY_INPUT) != 0).sum())} "
+            f"nLinDefects={int(((internalMask & h4Linearity.DEAD) != 0).sum())} "
+            f"nUnstable={int(((internalMask & h4Linearity.UNSTABLE) != 0).sum())} "
+            f"nUnclassified={int(((internalMask & h4Linearity.UNCLASSIFIED) != 0).sum())} "
+            f"nGlitchMasked={int(((internalMask & h4Linearity.ASIC_GLITCH) != 0).sum())} "
+            f"nCR={nCR}"
+        )
+
         return exposure, flux if doReturnRawCube else None
 
     def makeRawDataArray(self, pfsRaw, readNum, fromArray=None) -> np.ndarray:
@@ -1623,141 +2284,6 @@ def makeRawIrpNcube(self, pfsRaw,
                                     r0=r0, r1=r1, nreads=nreads, bbox=bbox,
                                     doDeltas=False)
 
-    def repairWithWindow(self, deltas, badPixels,
-                         corrRad, corrMin, corrMax,
-                         repairMask=None,
-                         otherIgnore=None) -> np.ndarray:
-        """Replace single pixels with a local correction
-
-        Parameters
-        ----------
-        deltas : `np.ndarray`
-           The 3-d cube of the ramp
-        badPixels : `np.ndarray`
-           A 2-d array of the indices of the reads to replace
-        corrRad : `int`
-           The width of the window (along reads) to use for replacement values
-        corrMin : `int`
-           In the deltas cube, the first read we can use for replacements
-        corrMax : `int`
-           In the deltas cube, the last read we can use for replacements
-        repairMask : `np.ndarray`, optional
-           If set, only apply corrections to these pixels.
-        otherIgnore : `np.ndarray`, optional
-           If set, array of read indices we cannot use for replacements.
-
-        Returns
-        -------
-        correctionWindow : `np.ndarray`
-           The windows we use to make replacements. NaNs ignored.
-           For engineering. Drop once we believe or replace this.
-        """
-        # Use neighboring reads, clipped by the ends of the ramp
-        lowIdx0 = badPixels - corrRad
-        lowIdx = np.maximum(lowIdx0, np.zeros_like(badPixels)+corrMin)
-
-        # If our window hits the end of the valid window, move the
-        # start of the window down.
-        highIdx0 = badPixels + corrRad
-        highIdx = np.minimum(highIdx0, np.zeros_like(badPixels)+corrMax-1)
-        highClip = np.where(highIdx != highIdx0)
-        lowIdx[highClip] = highIdx[highClip] - 2*corrRad+1
-
-        # Average the nearby reads, ignoring any we are correcting.
-        correctionWindow = np.zeros(shape=(2*corrRad+1, deltas.shape[1], deltas.shape[2]))
-        for i in range(2*corrRad + 1):
-            idx_i = lowIdx + i
-            corr1 = np.take_along_axis(deltas, idx_i, axis=0)
-            corr1[idx_i == badPixels] = np.nan
-            if otherIgnore is not None:
-                corr1[idx_i == otherIgnore] = np.nan
-            correctionWindow[i, :, :] = corr1
-        corrections = np.nanmean(correctionWindow, axis=0)
-
-        # This is not the cheapest way to handle repairMask, but re-writing to
-        # make that efficient would be error-prone work.
-        if repairMask is not None:
-            badPixels[~repairMask] = 0
-            corrections[~repairMask] = deltas[0][~repairMask]
-        np.put_along_axis(deltas, badPixels, corrections, axis=0)
-        return correctionWindow
-
-    def correctCRs(self, pfsRaw, deltas: np.ndarray):
-        """Correct cosmic ray hits using the array of flux deltas.
-
-        "Correct" means to detect both the maximum and the minimum increments for each
-        pixel, and replace both of those by a local estimate of the rate.
-
-        Since we are already paying the cost of detecting hot pixels, fold correction
-        of ASIC-injected bad pixels in here. Currently only n3, channel 24. In the
-        delta ramps these appear as paired high/low pixels. We do need to calculate a
-        robust sigma, which is expensive enough that we want to skip it if possible.
-
-        Parameters
-        ----------
-        pfsRaw : `lsst.obs.pfs.PfsRaw`
-          Access to the raw ramp
-        deltas : `np.ndarray`
-           Corrected in place.
-
-        Returns
-        -------
-        deltas : `np.array`
-           The corrected ramp
-        posIdx, negIdx : `np.array`
-           The per-pixel indexes into deltas of the corrected reads.
-        """
-
-        if not self.config.h4.doCR:
-            return deltas, None, None
-
-        # Oh, ultra gross. Just realized that for normal exposures the shutter is
-        # always closed for the first read and at least the last two reads, so
-        # we do not want to use those for interpolation.
-        # Worse, Ar and Ne exposures are illuminated for just one read, so *all*
-        # illuminated pixels would be rejected! If we start taking shorter quartz
-        # exposures those will lose significant flux.
-        # Finally, we do not want to use the closed-shutter reads to identify the
-        # minimum pixels: arc lines, etc. always get dinged. This is pretty dangerous
-        # for short exposures.
-        #
-        # Hack that following logic in for now, but need to somehow fix correctly -- CPL.
-        #  - if nread is "short", apply no correction: must use CR splits for Ar/Ne
-        #  - if exptype is not DARK, do not use first or two last reads
-        #
-        corrRad, corrMin, corrMax = self.calcCorrectionWindow(pfsRaw, len(deltas))
-        if corrMax - corrMin < self.config.h4.crMinReads:
-            self.log.warn(f'ramp is too short to correct CRs (need {self.config.h4.crMinReads}, '
-                          f'have {corrMax}-{corrMin}): you must use splits!')
-            return deltas, None, None
-
-        # Identify both min and max reads for each pixel. This
-        # is fairly expensive...
-        posIdx = np.argmax(deltas, axis=0, keepdims=True)
-        negIdx = np.argmin(deltas, axis=0, keepdims=True)
-
-        if False:  # Instead of repairing pixels, mask them for later processing.
-            np.put_along_axis(deltas, posIdx, axis=0, values=np.nan)
-            np.put_along_axis(deltas, negIdx, axis=0, values=np.nan)
-        else:
-            self.repairWithWindow(deltas, posIdx,
-                                  corrRad, corrMin, corrMax,
-                                  otherIgnore=negIdx)
-
-            # Need to correct for masking every max read. One way is to mask every
-            # min read. I hate this.
-            if True:
-                self.repairWithWindow(deltas, negIdx,
-                                      corrRad, corrMin, corrMax,
-                                      otherIgnore=posIdx)
-            else:
-                negIdx *= 0
-                self.log.warn('NOT accounting for min pixels in CR step.')
-
-        # we mask/replace two reads for every single pixel. Let the caller decide
-        # what to do about that, but return the indices of both.
-        return deltas, posIdx, negIdx
-
     def loadBadIRPpixels(self, detectorName: str) -> np.ndarray:
         """Return the bad IRP row pixel list."""
 
@@ -1816,6 +2342,38 @@ def loadBadAsicChannels(self, detectorName: str) -> tuple:
         badAsicChannels = dict(n3=(24,))
         return badAsicChannels.get(detectorName, ())
 
+    def asicBadChannelMask(self, detectorName: str, shape: tuple,
+                           nChannels: int = 32) -> np.ndarray:
+        """Return a ``(H, W)`` bool mask True at rows in known-bad ASIC channels.
+
+        H4 ASIC channels run along the y (rows) axis; each channel
+        spans ``H // nChannels`` rows. Useful as the ``glitchPixelMask``
+        argument to ``cr.iterativeUtrDetectAndRepair``: glitch detection
+        is restricted to the rows where ASIC glitches are known to occur.
+
+        Parameters
+        ----------
+        detectorName : str
+            E.g. ``"n3"``.
+        shape : tuple
+            ``(H, W)`` detector dimensions.
+        nChannels : int
+            Number of horizontal ASIC channels stacked along Y; 32 for H4.
+
+        Returns
+        -------
+        mask : np.ndarray
+            ``(H, W)`` bool. All-False if the detector has no known-bad
+            channels.
+        """
+        channels = self.loadBadAsicChannels(detectorName)
+        H, W = shape
+        channelHeight = H // nChannels
+        mask = np.zeros((H, W), dtype=bool)
+        for ch in channels:
+            mask[ch * channelHeight:(ch + 1) * channelHeight, :] = True
+        return mask
+
     def getSimpleDiffIrp(self, pfsRaw, rawDiffIrp) -> np.ndarray:
         """Return a diff IRP image which is just the median across the channel columns"""
         nchan = pfsRaw.nchan
@@ -2153,117 +2711,9 @@ def makeCDS(self, pfsRaw, r0=0, r1=-1):
 
         return image1
 
-    def calcCorrectionWindow(self, pfsRaw, nread, corrRad=2):
-        """Figure out the bounds of any interpolation/correction window for this ramp
-
-        If illuminated, we cannot use the reads when the shutter is closed;
-        currently (2025-01) the first and the last two.
-
-        Parameters
-        ----------
-        pfsRaw : `lsst.obs.pfs.PfsRaw`
-          Access to the raw ramp
-        nread : `int`
-          The number of actual reads we have kept.
-        corrRad : `int`
-          How many reads on either side we want to use
-
-        Returns
-        -------
-        corrRad : `int`
-           The radius of the window we keep.
-        corrMin, corrMax : `int`
-           The first and last reads we can use
-        """
-
-        exptype = pfsRaw.visitInfo.observationType
-
-        if exptype == 'dark' or nread < pfsRaw.getNumReads():
-            corrMin = 0
-            corrMax = nread-1
-        else:
-            # TODO: figure the shutter-closed reads out from the header. Need to add cards for that.
-            # This is correct for 2024-ish.
-            corrMin = 1
-            corrMax = nread-3
-
-        return corrRad, corrMin, corrMax
-
-    def repairAsicSpikes(self, pfsRaw, cube: np.ndarray, channel: int, sigClip=None, doTest=False):
-        """Replace bad pixels from bad ASIC channels
-
-        The bad pixels are positive going spikes from the ASIC, which can hit
-        both data and IRP pixels. We correct the pixels early, on the separate
-        data and IRP cubes, before even being converted to IRP1 or incremental UTR.
-
-        We expect to only run this for a few (1?) channel.
-
-        Need to iterate until no more spikes are found.....
-        Need to handle spikes at the ends of the ramp.
-
-        Parameters
-        ----------
-        pfsRaw : `lsst.obs.pfs.PfsRaw`
-          Access to the raw ramp
-        cube : `np.ndarray`
-           The 3-d cube of the ramp. Corrected in place.
-        channel : `int`
-           The index of the (32-channel read) channel we are correcting
-        """
-
-        if not self.config.h4.repairAsicSpikes:
-            return
-
-        if pfsRaw.nchan != 32:
-            self.log.warn(f'nchan ({pfsRaw.nchan}) != 32, so not (yet) correcting ASIC glitches')
-            return
-
-        if sigClip is None:
-            sigClip = self.config.h4.repairAsicSpikesSigma
-        badChan = cube[:, channel*128:(channel+1)*128, :]
-
-        diffChan = np.diff(badChan, axis=0, prepend=np.zeros_like(badChan[0:1]))
-        p25, meds, p75 = np.percentile(diffChan, [25, 50, 75], axis=0)
-        iqrSig = 0.741*(p75 - p25)
-
-        # the reads we are correcting have a single hot pixel, just like CRs. But the flux
-        # levels return on the following read, unlike CRs. So, in delta stacks look for
-        # positive spikes just before a matching negative spike. Or in cumulative stacks,
-        # look for a positive spike between two "normal" reads
-        brightMask = np.abs(diffChan) > (meds + sigClip*iqrSig)
-        bright_w = np.where(brightMask)
-
-        # Todo: Handle spikes at the ends of the ramp separately -- CPL
-        atEnd = (bright_w[0] >= len(diffChan)-1 | (bright_w[0] == 0))
-        bright_w1 = bright_w[0][~atEnd], bright_w[1][~atEnd], bright_w[2][~atEnd]
-        next_w1 = (bright_w1[0]+1), bright_w1[1], bright_w1[2]
-
-        peaks1 = diffChan[bright_w1]
-        next1 = diffChan[next_w1]
-        diffs = peaks1 + next1
-
-        meds1 = meds[bright_w1[1:]]
-        iqrSig1 = iqrSig[bright_w1[1:]]
-
-        fix_i = np.abs(diffs) < (meds1 + sigClip*iqrSig1)
-        fix_w = tuple([w1[fix_i] for w1 in bright_w1])
-        if True:  # Use the average of the two neighboring reads
-            fix_wm1 = (fix_w[0]-1, fix_w[1], fix_w[2])
-            fix_wp1 = (fix_w[0]+1, fix_w[1], fix_w[2])
-            repairVals = (badChan[fix_wm1] + badChan[fix_wp1]) / 2
-        else:     # Use the median of the entire ramp
-            repairVals = meds1[fix_w[1:]]
-        badChan[fix_w] = repairVals
-
-        # Return for testing
-        if doTest:
-            return fix_w, badChan, diffChan, bright_w1, iqrSig, meds, repairVals
-        else:
-            return fix_w
-
-    def makeUTRdeltas(self, pfsRaw, r0=0, r1=-1, nreads=None, bbox=None,
-                      showTimes=False) -> np.ndarray:
-        """Return all the fully IRP-corrected frames in a single 3d stack.
+    def makeUTRcumulative(self, pfsRaw, r0=0, r1=-1, nreads=None, bbox=None,
+                          showTimes=False) -> np.ndarray:
+        """Return the IRP-corrected cumulative ramp as a single 3-D stack.
 
         Given two raw data images d0 and d1, and two raw IRP images i0 and i1, the net CDS image
         can be either (d1 - i1) - (d0 - i0), or (d1 - d0) - (i1 - i0). The IRP row has various
@@ -2272,7 +2722,9 @@ def makeUTRdeltas(self, pfsRaw, r0=0, r1=-1, nreads=None, bbox=None,
         In particular there are:
           - bad IRP row pixels
           - fixed pixel-to-pixel offsets
-          - bad ASIC channels
+
+        ASIC glitches are handled later by the iterative CR/glitch detector
+        operating on the linearized cube, not here.
 
         Note that there is also up to ~1% printthrough from data->IRP, and presumably from IRP->data
 
@@ -2293,9 +2745,15 @@ def makeUTRdeltas(self, pfsRaw, r0=0, r1=-1, nreads=None, bbox=None,
 
         Returns
         -------
-        deltas : 3-d float32 numpy array
-           the UTR stack, with axis 0 being the reads up the ramp. We return
-           the flux increments between reads, not the actual fluxes.
+        flux : 3-d float32 numpy array
+           Cumulative IRP-corrected ADU, zero-anchored at read ``r0``,
+           shape ``(nreads-1, H, W)``. ``flux[k]`` is read ``r0+k+1``
+           minus read ``r0`` minus the per-channel IRP-filtered diff.
+           The frame-first layout is what the production caller wants
+           for the in-place ``subtractDarkCube`` step; a single
+           ``ascontiguousarray(flux.transpose(1, 2, 0))`` at the
+           ``apply()`` boundary is the only transpose between here and
+           the linearization step.
         """
 
         r0 = pfsRaw.positiveIndex(r0)
@@ -2306,36 +2764,12 @@ def makeUTRdeltas(self, pfsRaw, r0=0, r1=-1, nreads=None, bbox=None,
             nreads = r1 - r0 + 1
         reads = np.linspace(r0, r1, nreads, dtype='i2')
 
-        # n3/18321 channel 24 is covered with pixels which have spikes from the ASIC.
-        # We need to repair both the data and IRP pixels separately, and furthermore
-        # want to repair the non-interpolated IRP images. So read both in first and
-        # correct them before building the proper deltas. Since I/O is significant,
-        # we keep the raw data and IRP cubes, then re-read from those instead of
-        # from disk.
-        #
-        badChans = self.loadBadAsicChannels(pfsRaw.detector.getName())
-        if badChans is not None:
-            rawData = self.makeRawDataCube(pfsRaw=pfsRaw, r0=r0, r1=r1, nreads=nreads)
-            rawIrps = self.makeRawIrpNcube(pfsRaw=pfsRaw, r0=r0, r1=r1, nreads=nreads)
-
-            for badChan in badChans:
-                fixedData_w = self.repairAsicSpikes(pfsRaw, rawData, badChan)
-                fixedIrp_w = self.repairAsicSpikes(pfsRaw, rawIrps, badChan)
-                self.log.info(f'repaired {len(fixedData_w[0])} data and {len(fixedIrp_w[0])} IRP '
-                              f'ASIC pixels in channel {badChan}')
-        else:
-            rawData = rawIrps = None
-
-        # Grab the components of read 0, which we will subtract from all the others.
-        if rawData is not None:
-            data0 = self.makeRawDataArray(pfsRaw, 0, fromArray=rawData)
-            irp0 = self.makeRawIrpArray(pfsRaw, 0, fromArray=rawIrps)
-        else:
-            data0 = self.makeRawDataArray(pfsRaw, r0, fromArray=rawData)
-            irp0 = self.makeRawIrpArray(pfsRaw, r0, fromArray=rawIrps)
+        # Grab the components of read r0, which we will subtract from all the others.
+        data0 = self.makeRawDataArray(pfsRaw, r0)
+        irp0 = self.makeRawIrpArray(pfsRaw, r0)
         self.applyIRPcrosstalk(pfsRaw, irp0, data0)
 
-        # We are not squirreling away the bbox, but really should for the final Exposure
+        # We are not squirreling away the bbox, but really should for the final Exposure.
         if bbox is None:
             stackShape = (nreads-1, *data0.shape)
         else:
@@ -2345,12 +2779,8 @@ def makeUTRdeltas(self, pfsRaw, r0=0, r1=-1, nreads=None, bbox=None,
             if r_idx == 0:
                 continue
             t0 = time.time()
-            if rawData is not None:
-                data1 = self.makeRawDataArray(pfsRaw, r_idx, fromArray=rawData)
-                irp1 = self.makeRawIrpArray(pfsRaw, r_idx, fromArray=rawIrps)
-            else:
-                data1 = self.makeRawDataArray(pfsRaw, r_i, fromArray=rawData)
-                irp1 = self.makeRawIrpArray(pfsRaw, r_i, fromArray=rawIrps)
+            data1 = self.makeRawDataArray(pfsRaw, r_i)
+            irp1 = self.makeRawIrpArray(pfsRaw, r_i)
             t1 = time.time()
             self.applyIRPcrosstalk(pfsRaw, irp1, data1)
             dirp = irp1 - irp0
@@ -2365,9 +2795,7 @@ def makeUTRdeltas(self, pfsRaw, r0=0, r1=-1, nreads=None, bbox=None,
             if showTimes:
                 print(f'cds {r_i} io1={t1-t0:0.3f} proc={t2-t1:0.3f}')
 
-        # Warning: prepend=0 causes promotion to float64
-        # This should arguably be constructed on-the-fly, read by read.
-        return np.diff(stack, axis=0, prepend=np.zeros_like(stack[0:1]))
+        return stack
 
     def readIPC(self, detectorName: str) -> dict[int, dict[int, float]]:
         """Read IPC coefficients from file
@@ -2573,49 +3001,5 @@ def roughZeroPoint(self, exposure):
         """
         pass
 
-    def nirDarkCorrection(self, pfsRaw: "PfsRaw", exposure: "ExposureF", dark: "ImageCube") -> None:
-        """Apply NIR dark correction to the exposure
-
-        The NIR dark correction is applied to the exposure in place.
-
-        Parameters
-        ----------
-        pfsRaw : `lsst.obs.pfs.PfsRaw`
-            Access to the raw ramp
-        exposure : `lsst.afw.image.ExposureF`
-            Exposure to correct.
-        dark : `lsst.obs.pfs.dark.ImageCube`
-            Dark correction to apply.
-        """
-
-        nObjRead = pfsRaw.getNumReads()
-        nDarkRead = dark.getNumReads()
-        if nObjRead > nDarkRead:
-            darkScale = nObjRead/nDarkRead
-            self.log.warn(f"More reads in object ({nObjRead}) than dark ({nDarkRead}); "
-                          f"scaling dark by {darkScale:0.3f}")
-        else:
-            darkScale = 1.0
-            nDarkRead = nObjRead
-        try:
-            darkImage = dark[nDarkRead-1]
-        except Exception as e:
-            self.log.warn(f"No dark image available for NIR dark correction of read {nDarkRead}: {e}")
-            return
-
-        darkArray = darkImage.array.copy()
-        darkArray *= darkScale
-
-        # This should be pulled out into some NirDark class
-        # The dark imageCube itself has no variance, so we need to add it
-        # Readnoise needs to be worked out: the dark cube is from a stack of darks, and we use UTR to
-        #    get to our selected read. But as of 2025-03 there is observably more read noise than expected.
-        readNoise = dark.metadata["READNOISE"]
-        varArray = darkArray.copy()  # assumes photon noise
-        varArray += (readNoise/np.sqrt(nDarkRead))**2
-        darkImage = afwImage.makeMaskedImageFromArrays(darkArray, None, varArray)
-
-        exposure.maskedImage -= darkImage
-
     def _getMetadataName(self):
         return None                     # don't write metadata; requires fix to ip_isr
diff --git a/python/lsst/obs/pfs/nirLinearity.py b/python/lsst/obs/pfs/nirLinearity.py
deleted file mode 100644
index e9beb10f..00000000
--- a/python/lsst/obs/pfs/nirLinearity.py
+++ /dev/null
@@ -1,152 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING  # noqa: F401
-
-import numpy as np
-import astropy.io.fits
-
-from lsst.daf.base import PropertyList
-from astro_metadata_translator import fix_header
-
-from pfs.datamodel.utils import astropyHeaderFromDict
-
-from .translator import PfsTranslator
-
-__all__ = ("NirLinearity",)
-
-
-class NirLinearity:
-    """NIR Linearity correction data
-
-    Consists of the following:
-    - Primary HDU
-    - Image HDU named ``LIMITS`` contains the per-pixel "full-well", as a
-      detector-sized float array.
-    - Image HDU named ``COEFFS`` contains the per-pixel polynomial coefficients
-        for the linearity correction, as a cube of shape
-        ``(NCOEFF, height, width)``.
-
-    Parameters
-    ----------
-    limits : `np.ndarray`, shape ``(height, width)``
-        The per-pixel "full-well".
-    coeffs : `np.ndarray`, shape ``(NCOEFF, height, width)`
-        The per-pixel polynomial coefficients for the linearity correction.
-    metadata : `lsst.daf.base.PropertyList`
-        Metadata (FITS header) for the images.
-    method : `str`
-        The method used to derive the linearity correction.
-    """
-
-    DEFAULT_METHOD = "np.polynomial.chebyshev"
-
-    def __init__(
-        self,
-        limits: np.ndarray,
-        coeffs: np.ndarray,
-        metadata: "PropertyList",
-        method: str | None = None,
-    ) -> None:
-        self.height, self.width = limits.shape
-        if coeffs.ndim != 3 or coeffs.shape[1:] != limits.shape:
-            raise ValueError(
-                f"coeffs must have shape (NCOEFF, {self.height}, {self.width}), not {coeffs.shape}"
-            )
-        self.numCoeff = coeffs.shape[0]
-
-        self.limits = limits
-        self.coeffs = coeffs
-        self.metadata = metadata
-        self.method = method if method is not None else self.DEFAULT_METHOD
-
-    @classmethod
-    def empty(
-        cls, shape: tuple, numCoeffs: int, metadata: "PropertyList", method: str | None = None
-    ) -> "NirLinearity":
-        """Construct an empty linearity correction
-
-        Parameters
-        ----------
-        shape : `tuple`
-            The shape of the detector, ``(height, width)``.
-        numCoeffs : `int`
-            The number of coefficients for the linearity correction.
-        metadata : `lsst.daf.base.PropertyList`
-            Metadata (FITS header) for the images.
-        method : `str`, optional
-            The method used to derive the linearity correction.
-
-        Returns
-        -------
-        self : `NirLinearity`
-            An empty linearity correction.
-        """
-        limits = np.zeros(shape, dtype=np.float32)
-        coeffs = np.zeros((numCoeffs, shape[0], shape[1]), dtype=np.float32)
-        return cls(limits, coeffs, metadata, method)
-
-    @classmethod
-    def fromFits(cls, fits: astropy.io.fits.HDUList) -> "NirLinearity":
-        """Construct from a FITS file
-
-        Parameters
-        ----------
-        fits : `astropy.io.fits.HDUList`
-            The FITS file.
-
-        Returns
-        -------
-        self : `NirLinearity`
-            The linearity correction.
-        """
-        metadata = PropertyList.from_mapping(fits[0].header)
-        fix_header(metadata, translator_class=PfsTranslator)
-        limits = fits["LIMITS"].data.astype(np.float32, copy=False)
-        coeffs = fits["COEFFS"].data.astype(np.float32, copy=False)
-        method = fits[0].header.get("METHOD", cls.DEFAULT_METHOD)
-        return cls(limits, coeffs, metadata, method)
-
-    def toFits(self) -> astropy.io.fits.HDUList:
-        """Write to a FITS file
-
-        Returns
-        -------
-        fits : `astropy.io.fits.HDUList`
-            The FITS file.
-        """
-        fits = astropy.io.fits.HDUList()
-
-        header = astropyHeaderFromDict(self.metadata)
-        header["METHOD"] = self.method
-        fits.append(astropy.io.fits.PrimaryHDU(data=None, header=header))
-        fits.append(astropy.io.fits.ImageHDU(self.limits.astype(np.float32), name="LIMITS"))
-        fits.append(astropy.io.fits.ImageHDU(self.coeffs.astype(np.float32), name="COEFFS"))
-        return fits
-
-    def writeFits(self, path: str) -> None:
-        """Write the linearity correction to a FITS file
-
-        Parameters
-        ----------
-        path : `str`
-            Path to the output FITS file.
-        """
-        with open(path, "wb") as fd:
-            self.toFits().writeto(fd)
-
-    @classmethod
-    def readFits(cls, path: str) -> "NirLinearity":
-        """Read the linearity correction from a FITS file
-
-        Parameters
-        ----------
-        path : `str`
-            Path to the FITS file.
-
-        Returns
-        -------
-        self : `NirLinearity`
-            The linearity correction.
-        """
-        with astropy.io.fits.open(path) as fits:
-            return cls.fromFits(fits)
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..1b3908f7
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,108 @@
+"""Shared pytest fixtures for h4Linearity tests."""
+
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+from lsst.obs.pfs.h4Linearity.types import Ramp
+
+
+def _buildSyntheticReads(
+    H: int,
+    W: int,
+    N: int,
+    c0: np.ndarray,
+    c1: np.ndarray,
+    c2: np.ndarray,
+    c3: np.ndarray,
+    c4: np.ndarray,
+    rate: float,
+) -> tuple[np.ndarray, dict[str, np.ndarray]]:
+    """Construct ``(reads, trueCoeffs)`` such that for each pixel::
+
+        t[n] = rate * n                       # the linearization target
+        t[n] = c0 + c1*m[n] + c2*m[n]^2
+               + c3*m[n]^3 + c4*m[n]^4        # the per-pixel nonlinearity
+        m[n] = reads[n, h, w]                 # cumulative flux at read n
+
+    Read 0 is the implicit zero read (m[0] = 0, t[0] = 0); reads 1..N-1
+    are solved by inverting the polynomial for t[n] = rate * n via
+    Newton's method.
+    """
+    # Target at each read (including implicit zero read at n=0).
+    t = rate * np.arange(N, dtype=np.float64)  # (N,)
+
+    # Solve t[n] = c0 + c1 m + c2 m^2 + c3 m^3 + c4 m^4 for each pixel & read
+    # via Newton's method, starting from m ≈ (t - c0) / c1. The n=0 row is
+    # the implicit zero read.
+    m = np.empty((N, H, W), dtype=np.float64)
+    m[0] = 0.0
+    for n in range(1, N):
+        mGuess = (t[n] - c0) / np.where(c1 != 0, c1, 1.0)
+        for _ in range(50):
+            pVal = c0 + c1 * mGuess + c2 * mGuess**2 + c3 * mGuess**3 + c4 * mGuess**4
+            pPrime = c1 + 2 * c2 * mGuess + 3 * c3 * mGuess**2 + 4 * c4 * mGuess**3
+            step = (pVal - t[n]) / np.where(pPrime != 0, pPrime, 1.0)
+            mGuess = mGuess - step
+            if np.max(np.abs(step)) < 1e-10:
+                break
+        m[n] = mGuess
+
+    trueCoeffs = {
+        "c0": c0.astype(np.float32),
+        "c1": c1.astype(np.float32),
+        "c2": c2.astype(np.float32),
+        "c3": c3.astype(np.float32),
+        "c4": c4.astype(np.float32),
+        "targetRate": float(rate),
+        "target": t.astype(np.float32),
+        "mTrue": m.astype(np.float32),
+    }
+    return m.astype(np.float32), trueCoeffs
+
+
+@pytest.fixture
+def smallSyntheticRamp():
+    """A 30-read 4x5 ramp with spatially-varying polynomial coefficients.
+
+    Read 0 is the implicit zero read (``reads[0] == 0``); reads 1..29
+    follow the per-pixel polynomial inverse of the linear target
+    ``t[n] = rate * n``.
+    """
+    rng = np.random.default_rng(seed=42)
+    H, W, N = 4, 5, 30
+    rate = 1000.0  # DN per read
+
+    # Per-pixel polynomial coefficients: mostly-linear with small higher-order terms.
+    c0 = rng.normal(0.0, 1.0, size=(H, W)).astype(np.float64)
+    c1 = np.full((H, W), 1.0, dtype=np.float64) + rng.normal(0.0, 1e-3, size=(H, W))
+    c2 = rng.normal(0.0, 1e-7, size=(H, W)).astype(np.float64)
+    c3 = rng.normal(0.0, 1e-11, size=(H, W)).astype(np.float64)
+    c4 = rng.normal(0.0, 1e-15, size=(H, W)).astype(np.float64)
+
+    reads, trueCoeffs = _buildSyntheticReads(H, W, N, c0, c1, c2, c3, c4, rate)
+    return Ramp(reads=reads), trueCoeffs
+
+
+@pytest.fixture
+def tinyLinearRamp():
+    """A 7-read 2x3 ramp where every pixel is perfectly linear: t = m * pixelScale.
+
+    Read 0 is the implicit zero read; reads 1..6 grow linearly:
+    ``reads[n] == pixelScale * rate * n``.
+    """
+    N = 7
+    rate = 500.0
+    # Pixel-scale factor — varies per pixel so the PRNU is non-trivial
+    pixelScale = np.array(
+        [[1.0, 1.1, 0.9], [0.95, 1.05, 1.0]], dtype=np.float32
+    )
+    perRead = rate * pixelScale  # constant increment per read
+    reads = perRead[None, :, :] * np.arange(N, dtype=np.float32)[:, None, None]
+    target = rate * np.arange(N, dtype=np.float32)
+    return Ramp(reads=reads.astype(np.float32)), {
+        "pixelScale": pixelScale,
+        "targetRate": rate,
+        "target": target,
+    }
diff --git a/tests/test_calcUTRrates.py b/tests/test_calcUTRrates.py
new file mode 100644
index 00000000..9fbce0bc
--- /dev/null
+++ b/tests/test_calcUTRrates.py
@@ -0,0 +1,170 @@
+"""Tests for ``PfsIsrTask.calcUTRrates`` on an asymmetric ramp with
+position-dependent per-pixel slopes.
+
+The UTR weights are the standard linear-LS slope estimator on evenly
+spaced reads: with ``cube[i, y, x] = (i + 1) * slope[y, x]`` (no
+offset) and exact arithmetic, ``calcUTRrates(cube)[y, x] == slope[y, x]``.
+We use a position-encoded slope ``slope[y, x] = y * 10 + x`` and
+asymmetric ``(N=15, H=4, W=7)`` so that:
+
+- An ``H <-> W`` swap inside ``calcUTRrates`` would either raise (shape
+  mismatch between cube[0] and the rate buffer) or produce a transposed
+  rate plane whose values are observably wrong at every asymmetric
+  ``(y, x)``.
+- An axis-0 vs axis-1 swap (treating reads as height) would produce a
+  shape mismatch immediately.
+- A read-direction swap (cube[N-1-i] iterated instead of cube[i]) would
+  invert the sign of the recovered slope and is also caught.
+"""
+import unittest
+
+import numpy as np
+
+import lsst.utils.tests
+from lsst.obs.pfs import isrTask as pfsIsrTask
+from lsst.obs.pfs.h4Linearity import cr
+
+
+N_READS = 15
+H = 4
+W = 7
+
+
+def _slopePerPixel(H, W, dtype=np.float32):
+    y = np.arange(H, dtype=dtype)[:, None]
+    x = np.arange(W, dtype=dtype)[None, :]
+    return y * 10.0 + x
+
+
+def _linearRamp(nReads, H, W, dtype=np.float32):
+    """``cube[i, y, x] = (i + 1) * slope[y, x]``."""
+    slope = _slopePerPixel(H, W, dtype=dtype)
+    i = np.arange(1, nReads + 1, dtype=dtype)[:, None, None]
+    return (i * slope[None, :, :]).astype(dtype)
+
+
+def _makeIsrTask():
+    config = pfsIsrTask.PfsIsrTask.ConfigClass()
+    config.doFlat = False
+    config.doDark = True
+    config.doDefect = True
+    config.doSaturationInterpolation = False
+    config.h4.quickCDS = False
+    config.h4.doIPC = False
+    config.h4.doWriteRawCube = True
+    config.h4.doLinearize = False
+    config.h4.doCR = False
+    config.validate()
+    return pfsIsrTask.PfsIsrTask(config=config)
+
+
+class CalcUTRRatesTestCase(lsst.utils.tests.TestCase):
+
+    def setUp(self):
+        self.task = _makeIsrTask()
+
+    def testRecoversPerPixelSlope(self):
+        cube = _linearRamp(N_READS, H, W)
+        rates = self.task.calcUTRrates(cube)
+        expected = _slopePerPixel(H, W)
+        self.assertEqual(rates.shape, (H, W))
+        np.testing.assert_allclose(
+            rates, expected, atol=1e-3,
+            err_msg="calcUTRrates must recover the per-pixel linear slope",
+        )
+
+    def testValuesAtAsymmetricPixels(self):
+        # Spot-check a few asymmetric (y, x) so a silent H<->W swap shows
+        # up as a value mismatch even if the rate plane shape "looks
+        # right" after the swap.
+        cube = _linearRamp(N_READS, H, W)
+        rates = self.task.calcUTRrates(cube)
+        for y, x in [(0, 0), (3, 0), (0, 6), (2, 5), (3, 6)]:
+            self.assertAlmostEqual(
+                float(rates[y, x]),
+                y * 10.0 + x,
+                places=3,
+                msg=f"rate at (y={y}, x={x}) must equal y*10 + x",
+            )
+
+    def testReadOrderMatters(self):
+        # The UTR weights are anti-symmetric about the ramp midpoint:
+        # reversing the read order along axis 0 negates the recovered
+        # slope. Confirms axis 0 is actually being treated as the read
+        # index (not the spatial axis).
+        cube = _linearRamp(N_READS, H, W)
+        cubeRev = cube[::-1].copy()
+        rates = self.task.calcUTRrates(cube)
+        ratesRev = self.task.calcUTRrates(cubeRev)
+        np.testing.assert_allclose(
+            ratesRev, -rates, atol=1e-3,
+            err_msg="reversing reads must negate the recovered slope",
+        )
+
+    def testNreadsTruncatesAxisZero(self):
+        # ``nreads=k`` should only use the first k reads (axis 0). With
+        # cube[i, y, x] = (i+1) * slope, the first k reads still satisfy
+        # the linear model exactly, so the recovered slope is unchanged.
+        cube = _linearRamp(N_READS, H, W)
+        rates_full = self.task.calcUTRrates(cube)
+        rates_truncated = self.task.calcUTRrates(cube, nreads=N_READS - 5)
+        np.testing.assert_allclose(
+            rates_truncated, rates_full, atol=1e-3,
+            err_msg="truncating reads (still a linear ramp) must give "
+                    "the same slope",
+        )
+
+    def testShapeMatchesSpatialPlane(self):
+        # The output shape must equal cube[0].shape (i.e. (H, W)) — not
+        # some transposition. If axes 1 and 2 were swapped inside the
+        # function the shape would be (W, H) here.
+        cube = _linearRamp(N_READS, H, W)
+        rates = self.task.calcUTRrates(cube)
+        self.assertEqual(rates.shape, cube[0].shape)
+        self.assertEqual(rates.shape, (H, W))
+
+    def testThreeRateFunctionsAgreeOnCleanRamp(self):
+        """Cross-check: ``calcUTRrates`` (read-space), ``calcUTRrateFrom-
+        Deltas`` (delta-space closed form), and the CR detector's
+        ``IterativeRepairResult.rate`` are mathematically tied —
+        ``u[j] = Σ_{i>j} w[i] = 6(j+1)(N-1-j) / (N(N-1)(N+1))``. On a
+        clean ramp with no flags they must produce identical per-pixel
+        rates, within float32 round-off. Catches silent drift if anyone
+        re-derives one of the formulas.
+        """
+        cube = _linearRamp(N_READS, H, W)
+        cubeHWN = np.ascontiguousarray(cube.transpose(1, 2, 0))
+        deltas = np.diff(cubeHWN, axis=-1)
+
+        rRead = self.task.calcUTRrates(cube)
+        rDelta = self.task.calcUTRrateFromDeltas(deltas)
+        result = cr.iterativeUtrDetectAndRepair(
+            deltas.copy(),
+            goodPixelMask=np.ones(cubeHWN.shape[:-1], dtype=bool),
+            glitchPixelMask=None,
+        )
+        rCR = result.rate
+
+        np.testing.assert_allclose(
+            rDelta, rRead, rtol=1e-5,
+            err_msg="calcUTRrateFromDeltas must match calcUTRrates on a "
+                    "clean linear ramp",
+        )
+        np.testing.assert_allclose(
+            rCR, rRead, rtol=1e-5,
+            err_msg="iterResult.rate must match calcUTRrates on a clean "
+                    "linear ramp (no flags fired)",
+        )
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_getDarkCube.py b/tests/test_getDarkCube.py
new file mode 100644
index 00000000..a2b5e09e
--- /dev/null
+++ b/tests/test_getDarkCube.py
@@ -0,0 +1,143 @@
+"""Tests for ``PfsIsrTask.getDarkCube`` axis order and read-indexing.
+
+Uses a fake ``ImageCube`` whose ``getImageCube`` returns a
+position-encoded ``(N, H, W)`` cube where each sample's value is
+``k * 10_000 + y * 100 + x``. Any axis swap or off-by-one in the
+read index produces an observably wrong sample value at every
+returned position.
+"""
+import unittest
+
+import numpy as np
+
+import lsst.utils.tests
+from lsst.obs.pfs import isrTask as pfsIsrTask
+
+
+N_READS = 12
+H = 4
+W = 7
+
+
+K_FACTOR = 10_000
+Y_FACTOR = 100
+
+
+def _encodedCube(N, H, W, dtype=np.float32):
+    k = np.arange(N, dtype=dtype)[:, None, None]
+    y = np.arange(H, dtype=dtype)[None, :, None]
+    x = np.arange(W, dtype=dtype)[None, None, :]
+    return k * K_FACTOR + y * Y_FACTOR + x
+
+
+class _FakeImageCube:
+    """Minimum surface PfsIsrTask.getDarkCube needs:
+
+    - ``nreads``: total reads in the cube.
+    - ``getImageCube(nreads=k)``: returns ``cube[:k]`` (a fresh array).
+    - ``metadata``: mapping with optional ``"GAIN"``.
+    """
+
+    def __init__(self, cube, *, gain=1.0):
+        self._cube = cube
+        self.nreads = cube.shape[0]
+        self.metadata = {"GAIN": gain} if gain is not None else {}
+
+    def getImageCube(self, nreads=None):
+        n = self.nreads if nreads is None else nreads
+        return self._cube[:n].copy()
+
+
+def _makeIsrTask():
+    config = pfsIsrTask.PfsIsrTask.ConfigClass()
+    config.doFlat = False
+    config.doDark = True
+    config.doDefect = True
+    config.doSaturationInterpolation = False
+    config.h4.quickCDS = False
+    config.h4.doIPC = False
+    config.h4.doWriteRawCube = True
+    config.h4.doLinearize = False
+    config.h4.doCR = False
+    config.validate()
+    return pfsIsrTask.PfsIsrTask(config=config)
+
+
+class GetDarkCubeTestCase(lsst.utils.tests.TestCase):
+    """The returned slice's last axis corresponds to the requested
+    reads; axes 0 and 1 are the (H, W) spatial plane in that order
+    (time axis last, matching the H4 ISR cube convention)."""
+
+    def setUp(self):
+        self.task = _makeIsrTask()
+        self.encoded = _encodedCube(N_READS, H, W)
+        # The fake ImageCube still serves (N, H, W) — getDarkCube
+        # transposes at the boundary.
+        self.imageCube = _FakeImageCube(self.encoded)
+        # Reference slice in (H, W, N) form to compare against.
+        self.encodedHWN = np.ascontiguousarray(self.encoded.transpose(1, 2, 0))
+
+    def testFullCubeRoundTrip(self):
+        out = self.task.getDarkCube(self.imageCube)
+        self.assertEqual(out.shape, (H, W, N_READS))
+        np.testing.assert_array_equal(out, self.encodedHWN)
+
+    def testNreadsTruncatesFromLastAxis(self):
+        # r0=0 (default), nreads=k → returned[:, :, :k] = encoded[:k] transposed.
+        out = self.task.getDarkCube(self.imageCube, nreads=5)
+        self.assertEqual(out.shape, (H, W, 5))
+        np.testing.assert_array_equal(out, self.encodedHWN[:, :, :5])
+
+    def testR0OffsetReturnsSliceFromR0(self):
+        # r0=3, nreads=5 → encoded[3:8] transposed.
+        out = self.task.getDarkCube(self.imageCube, nreads=5, r0=3)
+        self.assertEqual(out.shape, (H, W, 5))
+        np.testing.assert_array_equal(out, self.encodedHWN[:, :, 3:8])
+
+    def testR0OffsetWithoutNreadsReturnsToEnd(self):
+        # r0=4, nreads=None → encoded[4:nreads_total] transposed.
+        out = self.task.getDarkCube(self.imageCube, r0=4)
+        self.assertEqual(out.shape, (H, W, N_READS - 4))
+        np.testing.assert_array_equal(out, self.encodedHWN[:, :, 4:])
+
+    def testPositionEncodedSpotChecks(self):
+        # Pin specific samples so an axis swap shows up as a value error
+        # rather than a shape error.
+        out = self.task.getDarkCube(self.imageCube, nreads=8, r0=2)
+        # Returned out[..., 0] is encoded[2]; out[y, x, k] == (k+2)*K + y*Y + x.
+        self.assertEqual(
+            float(out[0, 0, 0]), 2 * K_FACTOR,
+        )
+        self.assertEqual(
+            float(out[2, 5, 3]),
+            (3 + 2) * K_FACTOR + 2 * Y_FACTOR + 5,
+        )
+        self.assertEqual(
+            float(out[3, 6, 7]),
+            (7 + 2) * K_FACTOR + 3 * Y_FACTOR + 6,
+        )
+
+    def testGainBackoutDivides(self):
+        # If the dark cube was stored as electrons (gain applied), the
+        # method divides by ``GAIN`` to back out the conversion.
+        gain = 2.5
+        imageCube = _FakeImageCube(self.encoded.copy(), gain=gain)
+        out = self.task.getDarkCube(imageCube)
+        np.testing.assert_allclose(out, self.encodedHWN / gain, atol=1e-3)
+
+    def testGainOneNoChange(self):
+        out = self.task.getDarkCube(self.imageCube)
+        np.testing.assert_array_equal(out, self.encodedHWN)
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_h4CR.py b/tests/test_h4CR.py
new file mode 100644
index 00000000..4dac56d6
--- /dev/null
+++ b/tests/test_h4CR.py
@@ -0,0 +1,590 @@
+import unittest
+
+import numpy as np
+
+import lsst.utils.tests
+from lsst.obs.pfs.h4Linearity import cr
+
+
+def _flatRamp(nReads=16, H=11, W=13, rate=10.0, dtype=np.float32):
+    """Build a noiseless cumulative ramp: cumulative[k, y, x] = k * rate.
+
+    Default ``(H, W) = (11, 13)`` is deliberately asymmetric so any
+    accidental ``H <-> W`` swap in the CR/glitch detector or its
+    callers surfaces as a shape mismatch rather than passing silently.
+    """
+    return (
+        np.arange(nReads, dtype=np.float64)[:, None, None]
+        * rate
+        * np.ones((1, H, W), dtype=np.float64)
+    ).astype(dtype)
+
+
+def _runCR(flux, **kwargs):
+    """Test helper: run the deltas-based detector on a cumulative cube.
+
+    ``cr.iterativeUtrDetectAndRepair`` takes a ``(H, W, N-1)`` delta cube
+    — the production caller in ``isrTask.makeNirExposure`` does the
+    ``np.diff``/transpose once and the reconstructing ``np.cumsum`` once
+    (only when needed). Tests construct cumulative ``(N, H, W)`` cubes
+    (the natural way to express "inject a CR at read k") and check the
+    post-repair cube and flag masks in the same ``(N-1, H, W)`` form,
+    so we wrap the diff + transpose + result re-transpose here. The
+    re-transposed result exposes ``crFlagMask`` and ``glitchFlagMask``
+    in ``(N-1, H, W)`` so the test assertions can index them as
+    ``mask[k, y, x]``.
+
+    Modifies ``flux`` in place when ``repair=True`` (default).
+    """
+    repair = kwargs.get("repair", True)
+    deltas = np.diff(flux, axis=0)
+    read0 = flux[0:1].copy() if repair else None
+    deltasHWN = np.ascontiguousarray(deltas.transpose(1, 2, 0))
+    result = cr.iterativeUtrDetectAndRepair(deltasHWN, **kwargs)
+    if repair:
+        deltas = np.ascontiguousarray(deltasHWN.transpose(2, 0, 1))
+        flux[0:1] = read0
+        np.cumsum(deltas, axis=0, out=flux[1:])
+        flux[1:] += read0
+    # Re-transpose flag masks for backward-compat test indexing
+    # (mask[k, y, x] form).
+    result.crFlagMask = np.ascontiguousarray(result.crFlagMask.transpose(2, 0, 1))
+    result.glitchFlagMask = np.ascontiguousarray(
+        result.glitchFlagMask.transpose(2, 0, 1)
+    )
+    return result
+
+
+def _injectCR(flux, y, x, k, amount):
+    """Add a CR contribution at delta index k: cumulative reads >= k+1 jump by amount."""
+    flux[k + 1:, y, x] += amount
+
+
+def _injectGlitchPair(flux, y, x, k, amount):
+    """Add a single-read digital glitch: read k is offset by `amount`.
+
+    In delta space this creates an up/down pair at delta indices k-1 and k:
+    delta[k-1] = rate + amount, delta[k] = rate - amount.
+    """
+    flux[k, y, x] += amount
+
+
+class IterativeUtrDetectAndRepairTestCase(lsst.utils.tests.TestCase):
+
+    def testNoDefectsReturnsClean(self):
+        flux = _flatRamp(nReads=20, rate=10.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+        glitchMask = np.zeros_like(good)
+        before = flux.copy()
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=glitchMask,
+        )
+
+        self.assertEqual(result.nCRs, 0)
+        self.assertEqual(result.nGlitchPairs, 0)
+        self.assertFalse(result.crFlagMask.any())
+        self.assertFalse(result.glitchFlagMask.any())
+        np.testing.assert_array_equal(flux, before)
+
+    def testSingleCRDetectedAndRepaired(self):
+        flux = _flatRamp(nReads=20, rate=10.0)
+        _injectCR(flux, y=3, x=4, k=7, amount=300.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=None,
+        )
+
+        self.assertEqual(result.nCRs, 1)
+        self.assertEqual(result.nGlitchPairs, 0)
+        self.assertTrue(result.crFlagMask[7, 3, 4])
+        # Repair should restore the ramp to the clean linear line.
+        np.testing.assert_allclose(flux, _flatRamp(nReads=20, rate=10.0), atol=1.0)
+
+    def testGlitchOnActiveChannelClassifiedAsGlitch(self):
+        flux = _flatRamp(nReads=20, rate=10.0)
+        # ASIC glitch on (y=2, x=2): read 6 is offset by +400. Delta 5
+        # jumps up, delta 6 jumps down by the same amount.
+        _injectGlitchPair(flux, y=2, x=2, k=6, amount=400.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+        glitchMask = np.zeros_like(good)
+        glitchMask[2, :] = True   # row 2 is glitch-active
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=glitchMask,
+            correctGlitches=True,
+        )
+
+        self.assertEqual(result.nCRs, 0)
+        self.assertEqual(result.nGlitchPairs, 1)
+        self.assertTrue(result.glitchFlagMask[5, 2, 2])
+        self.assertTrue(result.glitchFlagMask[6, 2, 2])
+        # With correctGlitches=True the digital offset is removed.
+        np.testing.assert_allclose(flux, _flatRamp(nReads=20, rate=10.0), atol=1.0)
+
+    def testGlitchOnInactiveChannelClassifiedAsCR(self):
+        """Same digital pair as above but on a row NOT in glitchPixelMask.
+
+        With no glitch classification active, both deltas pass the
+        single-read check; only the positive-residual one is flagged as
+        a CR. Persistence check disabled so this test exercises the
+        glitch-mask fallback in isolation — with the default check the
+        cumulative drop one read later would correctly reject the
+        up-spike as a transient.
+        """
+        flux = _flatRamp(nReads=20, rate=10.0)
+        _injectGlitchPair(flux, y=2, x=2, k=6, amount=400.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+        glitchMask = np.zeros_like(good)   # row 2 is NOT glitch-active
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=glitchMask,
+            maxDropFraction=float('inf'),
+        )
+
+        self.assertEqual(result.nGlitchPairs, 0)
+        self.assertGreaterEqual(result.nCRs, 1)
+        # The positive-residual delta gets the CR flag.
+        self.assertTrue(result.crFlagMask[5, 2, 2])
+
+    def testGlitchMaskNoneDisablesGlitchDetection(self):
+        flux = _flatRamp(nReads=20, rate=10.0)
+        _injectGlitchPair(flux, y=2, x=2, k=6, amount=400.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=None,
+        )
+
+        self.assertEqual(result.nGlitchPairs, 0)
+
+    def testRepairFalseLeavesCubeUnchanged(self):
+        flux = _flatRamp(nReads=20, rate=10.0)
+        _injectCR(flux, y=3, x=4, k=7, amount=300.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+        before = flux.copy()
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=None, repair=False,
+        )
+
+        # Still flagged ...
+        self.assertEqual(result.nCRs, 1)
+        # ... but cube untouched.
+        np.testing.assert_array_equal(flux, before)
+
+    def testIterationsConverge(self):
+        """nByIteration should monotonically diminish and stop before maxIterations."""
+        flux = _flatRamp(nReads=20, rate=10.0)
+        _injectCR(flux, y=3, x=4, k=7, amount=300.0)
+        _injectGlitchPair(flux, y=5, x=6, k=10, amount=400.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+        glitchMask = np.zeros_like(good)
+        glitchMask[5, :] = True
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=glitchMask,
+            maxIterations=10,
+        )
+
+        self.assertGreaterEqual(result.nIterations, 1)
+        self.assertLessEqual(result.nIterations, 10)
+        self.assertEqual(len(result.nByIteration), result.nIterations)
+        # All defects accounted for in iter 1 on a clean synthetic ramp.
+        nc1, ng1 = result.nByIteration[0]
+        self.assertGreaterEqual(nc1, 1)
+        self.assertGreaterEqual(ng1, 1)
+        # Subsequent iterations add nothing.
+        for nc, ng in result.nByIteration[1:]:
+            self.assertEqual(nc, 0)
+            self.assertEqual(ng, 0)
+
+    def testAsymmetricGlitchPairPaired(self):
+        """Glitch where the recovery delta is just under threshold still pairs.
+
+        The matched-pair criterion only requires ONE of the adjacent
+        deltas to be independently flagged, plus opposite-sign residuals
+        that cancel within threshold.
+
+        Threshold here = nSigma * sigmaFloor = 5 * 8 = 40 ADU. Construct
+        a pair with delta[5] residual = +50 (flagged) and delta[6]
+        residual = -35 (NOT flagged), opposite signs, sum = +15 < 40
+        (cancels). This must be classified as a glitch, not a CR.
+        """
+        flux = _flatRamp(nReads=20, rate=10.0)
+        # Read 6 offset by +50 → delta[5] = rate+50, delta[6] = rate-50.
+        # Bring reads 7+ back by 15 → delta[6] = rate-35.
+        flux[6, 4, 4] += 50.0
+        flux[7:, 4, 4] += 15.0
+        good = np.ones(flux.shape[1:], dtype=bool)
+        glitchMask = np.ones_like(good)
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=glitchMask,
+        )
+
+        self.assertEqual(result.nCRs, 0)
+        self.assertEqual(result.nGlitchPairs, 1)
+        self.assertTrue(result.glitchFlagMask[5, 4, 4])
+        self.assertTrue(result.glitchFlagMask[6, 4, 4])
+
+    def testShortRampGlitchDoesNotDriveRateRunaway(self):
+        """A single big glitch on a short (22-read) ramp must NOT bias
+        the rate estimator into the runaway regime that false-flags
+        every other delta.
+
+        With LSQ slope, a single −5000 ADU outlier near the center of a
+        22-read ramp drags the slope by ~−10 ADU/read, which is enough
+        to push typical (rate≈0) deltas to near the 5σ threshold,
+        triggering iter-2 false flags and a divergent rate. Median is
+        robust to a few outliers; rate stays near 0.
+        """
+        flux = _flatRamp(nReads=22, rate=0.0)
+        flux[11:, 4, 4] -= 5000.0  # big downward glitch at read 11
+        flux[12:, 4, 4] += 5000.0  # recovery at read 12
+        good = np.ones(flux.shape[1:], dtype=bool)
+        glitchMask = np.ones_like(good)
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=glitchMask,
+        )
+
+        # Final rate must be near the true 0, not the runaway-divergent
+        # value the LSQ slope would produce.
+        self.assertLess(abs(float(result.rate[4, 4])), 5.0)
+        # Exactly one glitch pair detected; no false CRs.
+        self.assertEqual(result.nGlitchPairs, 1)
+        self.assertEqual(result.nCRs, 0)
+
+    def testGlitchRepairPreservesPostGlitchCumulative(self):
+        """Repair of a glitch pair must leave cube[k+2] (and beyond) unchanged.
+
+        A digital ASIC glitch corrupts only ONE cumulative value (cube[k+1]);
+        the recovery delta has restored the true cumulative at read k+2.
+        Replacing both deltas with `rate` (naive repair) erases real signal
+        in the second delta and shifts everything that follows. The correct
+        repair fixes only cube[k+1]. Exercises ``correctGlitches=True`` —
+        the default policy is detect-but-not-repair.
+        """
+        flux = _flatRamp(nReads=20, rate=5.0)
+        # Inject some real signal evolution after the glitch — a real flux
+        # event at reads 10..14 — that the repair must preserve.
+        flux[10:15, 4, 4] += np.arange(5, dtype=np.float32) * 3.0
+        flux[15:, 4, 4] += 12.0
+        # Inject a downward glitch at read 6 (large offset −500):
+        flux[6, 4, 4] -= 500.0
+        expectedAfterGlitch = flux[7:, 4, 4].copy()
+        # Repaired cube[6] should be cube[5] + rate (smooth interpolation).
+        expectedRead6 = flux[5, 4, 4] + 5.0
+
+        good = np.ones(flux.shape[1:], dtype=bool)
+        glitchMask = np.ones_like(good)
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=glitchMask,
+            correctGlitches=True,
+        )
+
+        self.assertEqual(result.nGlitchPairs, 1)
+        # The single corrupted read is restored to the smooth ramp.
+        self.assertAlmostEqual(float(flux[6, 4, 4]), float(expectedRead6), delta=1.0)
+        # Reads after the glitch are byte-preserved (the recovery delta
+        # already restored the true cumulative).
+        np.testing.assert_allclose(
+            flux[7:, 4, 4], expectedAfterGlitch, atol=0.01,
+            err_msg="Reads after a glitch pair must match the original cube.",
+        )
+
+    def testGlitchAmplitudeMinADUFloor(self):
+        """A small glitch pair stops being classified as glitch when the
+        amplitude floor exceeds its residual size.
+
+        With default (floor=0), a small but still-above-threshold pair
+        IS classified as glitch. Raising the floor above its amplitude
+        suppresses the glitch classification (the positive-residual
+        delta falls back to CR).
+        """
+        # Small symmetric glitch: read 6 offset by +60 → delta[5] resid +50,
+        # delta[6] resid −50. Threshold (5*sigmaFloor=40) is comfortably
+        # exceeded, sum cancels to 0.
+        flux = _flatRamp(nReads=20, rate=10.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+        glitchMask = np.ones_like(good)
+        flux[6, 1, 1] += 60.0
+        # Floor=0: classified as glitch.
+        result = _runCR(
+            flux.copy(), goodPixelMask=good, glitchPixelMask=glitchMask,
+        )
+        self.assertEqual(result.nGlitchPairs, 1)
+        self.assertEqual(result.nCRs, 0)
+
+        # Floor=80 ADU (above the 50-ADU residual): the pair is no longer
+        # classified, so the positive delta falls back to CR. Disable
+        # the cumulative-drop check (``maxDropFraction=inf``) — this test
+        # exercises the amplitude-floor fallback path, and the
+        # cumulative-drop check would otherwise correctly reject this
+        # transient as not-a-CR (the cumulative dips by the full
+        # amplitude one read later).
+        result = _runCR(
+            flux.copy(), goodPixelMask=good, glitchPixelMask=glitchMask,
+            glitchAmplitudeMinADU=80.0,
+            maxDropFraction=float('inf'),
+        )
+        self.assertEqual(result.nGlitchPairs, 0)
+        self.assertGreaterEqual(result.nCRs, 1)
+
+    def testBoundaryGlitchClassifiedAsGlitch(self):
+        """A flagged delta at index 0 or N-2 (no neighbor to pair with)
+        should be marked as ASIC_GLITCH, not CR or unclassified.
+
+        Digital glitches that hit the first or last read look like a
+        single huge delta at the ramp boundary and have no pair partner.
+        The boundary heuristic catches them anyway.
+        """
+        # Glitch at FIRST read: corrupt cube[0] by +200. delta[0] becomes
+        # rate - 200 → huge negative residual.
+        flux = _flatRamp(nReads=20, rate=5.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+        glitchMask = np.ones_like(good)
+        flux[0, 1, 1] += 200.0
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=glitchMask,
+        )
+        self.assertTrue(result.glitchFlagMask[0, 1, 1],
+                        'first-read glitch should hit the ASIC_GLITCH plane')
+        self.assertFalse(result.crFlagMask[0, 1, 1],
+                         'first-read glitch must NOT also be CR-flagged')
+
+        # Glitch at LAST read: corrupt cube[N-1] by -200. delta[N-2]
+        # becomes rate - 200 → huge negative residual at the very last
+        # delta index.
+        flux = _flatRamp(nReads=20, rate=5.0)
+        flux[-1, 2, 2] -= 200.0
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=glitchMask,
+        )
+        self.assertTrue(result.glitchFlagMask[-1, 2, 2],
+                        'last-read glitch should hit the ASIC_GLITCH plane')
+        self.assertFalse(result.crFlagMask[-1, 2, 2],
+                         'last-read glitch must NOT also be CR-flagged')
+
+    def testGoodPixelMaskExcludesPixels(self):
+        flux = _flatRamp(nReads=20, rate=10.0)
+        _injectCR(flux, y=3, x=4, k=7, amount=300.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+        good[3, 4] = False   # exclude the CR pixel
+
+        result = _runCR(
+            flux, goodPixelMask=good, glitchPixelMask=None,
+        )
+
+        self.assertEqual(result.nCRs, 0)
+
+    def testPersistenceRejectsTransientUpSpike(self):
+        """A positive single-delta spike whose charge does NOT persist
+        is downgraded from CR. The cumulative jumps by +A at delta k,
+        then linearly decays back to baseline over M subsequent reads
+        — no net charge deposited, so the candidate is rejected.
+
+        With ``maxDropFraction=inf`` (off) the up-spike falls into
+        the CR bucket; with the default 0.5 it is rejected.
+        """
+        flux = _flatRamp(nReads=24, rate=10.0)
+        good = np.ones(flux.shape[1:], dtype=bool)
+
+        # Spike at delta k=8 (read 9 jumps by +200); linear decay back
+        # to baseline by read 14 (5 decay steps of -40 each in delta
+        # space). The down half is spread across multiple deltas so it
+        # is NOT a glitch pair — the existing pair classifier never
+        # catches this; only the cumulative-drop check does.
+        k = 8
+        amount = 200.0
+        decayReads = 5
+        # Cumulative at read k+1 = baseline + amount; linearly down to
+        # baseline by read k+1+decayReads.
+        for j in range(1, decayReads + 1):
+            level = amount * (1.0 - j / decayReads)
+            flux[k + 1 + j, 2, 5] += level
+        flux[k + 1, 2, 5] += amount
+
+        # Default (cumulative-drop check on): up-spike rejected.
+        result = _runCR(
+            flux.copy(), goodPixelMask=good, glitchPixelMask=None,
+        )
+        self.assertFalse(
+            bool(result.crFlagMask[k, 2, 5]),
+            "cumulative-drop check should reject a transient up-spike that "
+            "decays back to baseline",
+        )
+
+        # With the cumulative-drop check disabled: same fixture, the up-spike is
+        # flagged as CR (the pair classifier doesn't catch a multi-read
+        # decay).
+        result = _runCR(
+            flux.copy(), goodPixelMask=good, glitchPixelMask=None,
+            maxDropFraction=float('inf'),
+        )
+        self.assertTrue(
+            bool(result.crFlagMask[k, 2, 5]),
+            "without the cumulative-drop check the up-spike falls into CR",
+        )
+
+    def testBadPixelMaskCatchesRTSPixel(self):
+        """An RTS / telegraph-noise pixel — several large delta
+        excursions across the ramp — is classified BAD by the outlier
+        count gate. A clean single-CR pixel elsewhere on the cube is NOT.
+        """
+        flux = _flatRamp(nReads=40, rate=0.0)
+
+        # RTS pixel at (5, 7): isolated single-read ±60 ADU pulses.
+        # 4 of each sign clears the default ``badPixelMinOutliers=4``
+        # gate (well above the binomial tail of Gaussian noise at
+        # production σ). Pulses are spaced ≥ 4 reads apart so the
+        # +60/-60 in-delta pair from one pulse does not overlap
+        # another pulse, and the baseline still dominates the IQR.
+        y, x = 5, 7
+        for k in (3, 11, 19, 27):
+            flux[k + 1, y, x] += 60.0
+        for k in (7, 15, 23, 31):
+            flux[k + 1, y, x] -= 60.0
+
+        # Clean single CR at a different pixel — should be CR, not BAD.
+        _injectCR(flux, y=3, x=4, k=10, amount=300.0)
+
+        result = _runCR(
+            flux.copy(), goodPixelMask=np.ones(flux.shape[1:], dtype=bool),
+            glitchPixelMask=None,
+        )
+
+        self.assertTrue(bool(result.crFlagMask[10, 3, 4]))
+        self.assertFalse(bool(result.badPixelMask[3, 4]))
+        self.assertTrue(
+            bool(result.badPixelMask[y, x]),
+            "RTS pixel with many ±60 ADU spikes should land in "
+            "badPixelMask",
+        )
+
+    def testBadPixelGateDoesNotMaskSimpleNoisyPixels(self):
+        """Pixels with realistic Gaussian read noise but no telegraph
+        behaviour must NOT be classified BAD at production knobs.
+        Tested at σ near the production sigmaFloorADU (8 ADU); the
+        3σ-IQR count gate's binomial tail is empty there. (At larger
+        σ the IQR shot-noise lets a few percent-of-ppm slip through —
+        documented and considered acceptable since real read noise on
+        H4 sits around 8 ADU.)
+        """
+        nReads = 40
+        H, W = 80, 80   # 6400 pixels, larger than any test patch.
+        rng = np.random.default_rng(seed=42)
+        sigma = 8.0
+        flux = np.cumsum(
+            rng.normal(loc=0.0, scale=sigma, size=(nReads, H, W)),
+            axis=0,
+        ).astype(np.float32)
+
+        result = _runCR(
+            flux.copy(), goodPixelMask=np.ones(flux.shape[1:], dtype=bool),
+            glitchPixelMask=None,
+        )
+
+        nBad = int(result.badPixelMask.sum())
+        self.assertEqual(
+            nBad, 0,
+            f"BAD gate misfired on {nBad} simply-noisy pixels "
+            f"out of {H * W} (σ={sigma} Gaussian noise; no RTS).",
+        )
+
+    def testBadPixelGateDisabledWhenMinOutliersZero(self):
+        """``badPixelMinOutliers=0`` turns the BAD-pixel pass off
+        entirely — even an obvious RTS pixel does not get flagged.
+        """
+        flux = _flatRamp(nReads=40, rate=0.0)
+        y, x = 5, 7
+        for k in (3, 11, 19, 27):
+            flux[k + 1, y, x] += 60.0
+        for k in (7, 15, 23, 31):
+            flux[k + 1, y, x] -= 60.0
+
+        result = _runCR(
+            flux.copy(), goodPixelMask=np.ones(flux.shape[1:], dtype=bool),
+            glitchPixelMask=None, badPixelMinOutliers=0,
+        )
+        self.assertFalse(result.badPixelMask.any())
+
+
+class RampQRTestCase(lsst.utils.tests.TestCase):
+    """Sanity tests for ``cr._rampQR``.
+
+    Equivalence to ``np.percentile(deltas, [25, 50, 75], axis=-1)`` with
+    method='linear' is the contract; if it ever drifts, the CR detector's
+    threshold changes silently. Fixtures are ``(H, W, N)`` shaped to
+    match the detector's time-axis-last layout.
+    """
+
+    def _checkMatchesNumpy(self, deltas):
+        p25, median, p75 = cr._rampQR(deltas)
+        ref = np.percentile(deltas, [25, 50, 75], axis=-1)
+        np.testing.assert_allclose(p25, ref[0], rtol=1e-6, atol=1e-6)
+        np.testing.assert_allclose(median, ref[1].astype(np.float32),
+                                   rtol=1e-6, atol=1e-6)
+        np.testing.assert_allclose(p75, ref[2], rtol=1e-6, atol=1e-6)
+        self.assertEqual(median.dtype, np.float32)
+
+    def testOddNMatchesNumpy(self):
+        # 87 deltas = 88-read dark, the production case (odd N -> exact median).
+        rng = np.random.default_rng(0)
+        deltas = rng.standard_normal((4, 5, 87)).astype(np.float32)
+        self._checkMatchesNumpy(deltas)
+
+    def testEvenNMatchesNumpy(self):
+        rng = np.random.default_rng(1)
+        deltas = rng.standard_normal((4, 5, 88)).astype(np.float32)
+        self._checkMatchesNumpy(deltas)
+
+    def testSmallNMatchesNumpy(self):
+        # Edge: minimum N for which the CR loop is even allowed (3 reads
+        # -> 2 deltas) and a couple of small N where the rank set
+        # collapses.
+        rng = np.random.default_rng(2)
+        for N in (2, 3, 4, 5):
+            deltas = rng.standard_normal((3, 3, N)).astype(np.float32)
+            with self.subTest(N=N):
+                self._checkMatchesNumpy(deltas)
+
+    def testSubsetShapeMatchesNumpy(self):
+        # The iter 2+ subset path calls _rampQR with shape (1, M, N-1).
+        rng = np.random.default_rng(3)
+        deltas = rng.standard_normal((1, 17, 87)).astype(np.float32)
+        self._checkMatchesNumpy(deltas)
+
+    def testConstantArrayIsExact(self):
+        # All ranks equal -> p25 == median == p75 == the constant.
+        deltas = np.full((3, 3, 87), 7.5, dtype=np.float32)
+        p25, median, p75 = cr._rampQR(deltas)
+        np.testing.assert_array_equal(p25, 7.5)
+        np.testing.assert_array_equal(median, 7.5)
+        np.testing.assert_array_equal(p75, 7.5)
+
+    def testDoesNotMutateInput(self):
+        # _rampQR partitions on a scratch copy; verify the caller's
+        # deltas array is not reordered (the residual pass reads it
+        # after _rampQR returns).
+        rng = np.random.default_rng(4)
+        deltas = rng.standard_normal((4, 5, 87)).astype(np.float32)
+        snapshot = deltas.copy()
+        cr._rampQR(deltas)
+        np.testing.assert_array_equal(deltas, snapshot)
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_h4LinearityFit.py b/tests/test_h4LinearityFit.py
new file mode 100644
index 00000000..6d3b8805
--- /dev/null
+++ b/tests/test_h4LinearityFit.py
@@ -0,0 +1,193 @@
+"""Tests for the top-level fit() function."""
+
+from __future__ import annotations
+
+import numpy as np
+
+from lsst.obs.pfs.h4Linearity.fit import fit
+from lsst.obs.pfs.h4Linearity.models import PolynomialModel
+from lsst.obs.pfs.h4Linearity.types import MASKED_BY_INPUT, Ramp
+
+
+def test_fitSingleRampRecoversTarget(smallSyntheticRamp):
+    ramp, truth = smallSyntheticRamp
+    # Disable HIGH_FIT_RESIDUAL flagging: the synthetic noise floor between
+    # otherwise-identical pixels can trip the 5×median rule on a tiny fixture.
+    correction = fit([ramp], badLinearityMedianMultiplier=None)
+    assert correction.coefficients.shape == (5, 4, 5)
+    assert (correction.badPixelMask == 0).all()
+    # Evaluate at the fit points: map m → x, then evaluate.
+    m = ramp.reads.astype(np.float32)
+    denom = correction.fitMax - correction.fitMin
+    denom = np.where(denom > 0, denom, 1.0)
+    x = 2.0 * (m - correction.fitMin[None]) / denom[None] - 1.0
+    # Upstream evaluate expects spatial axes leading: (H, W, ...) — bridge until
+    # the PIPE2D-1843 transpose is finished in evaluateMonomial too.
+    tPred = np.moveaxis(
+        correction.model.evaluate(correction.coefficients, np.moveaxis(x, 0, -1)),
+        -1, 0,
+    )
+    fitRate = float(np.median(ramp.reads[2] - ramp.reads[1]))
+    N = ramp.reads.shape[0]
+    expected = fitRate * np.arange(N, dtype=np.float32)
+    expectedBroad = np.broadcast_to(expected[:, None, None], tPred.shape)
+    np.testing.assert_allclose(tPred, expectedBroad, rtol=1e-3, atol=1.0)
+    # Summary should carry percentiles.
+    assert "residualRmsP50" in correction.diagnostics.summary
+    assert "residualRmsP95" in correction.diagnostics.summary
+    assert "residualRmsP99" in correction.diagnostics.summary
+
+
+def test_fitTilingIsDeterministic(smallSyntheticRamp):
+    """Fitting with different block sizes must yield identical coefficients
+    (within float32 precision)."""
+    ramp, _ = smallSyntheticRamp
+    c1 = fit([ramp], blockSize=(4, 5), badLinearityMedianMultiplier=None).coefficients
+    c2 = fit([ramp], blockSize=(2, 3), badLinearityMedianMultiplier=None).coefficients
+    np.testing.assert_allclose(c1, c2, rtol=1e-5, atol=1e-5)
+
+
+def test_fitPropagatesInputMask(tinyLinearRamp):
+    ramp, _ = tinyLinearRamp
+    mask = np.zeros(ramp.reads.shape[1:], dtype=np.uint8)
+    mask[0, 0] = 1  # Mark pixel (0, 0) as invalid
+    maskedRamp = Ramp(reads=ramp.reads, validMask=mask)
+    correction = fit(
+        [maskedRamp], model=PolynomialModel(order=1), badLinearityMedianMultiplier=None
+    )
+    assert correction.badPixelMask[0, 0] & MASKED_BY_INPUT
+    assert correction.badPixelMask[0, 1] == 0
+
+
+def test_fitMultipleRampsConcatenates():
+    """Two ramps of different lengths combine per-pixel."""
+    H, W = 3, 4
+    # Pixel-linear: t = m for every pixel.
+    # Ramp 1: 9 reads (1 implicit zero + 8), rate 100.
+    # Ramp 2: 13 reads (1 implicit zero + 12), rate 200.
+    rate1 = 100.0
+    rate2 = 200.0
+    N1, N2 = 9, 13
+    reads1 = np.full((1, H, W), rate1, dtype=np.float32) * np.arange(N1, dtype=np.float32)[:, None, None]
+    reads2 = np.full((1, H, W), rate2, dtype=np.float32) * np.arange(N2, dtype=np.float32)[:, None, None]
+    correction = fit(
+        [Ramp(reads=reads1), Ramp(reads=reads2)],
+        model=PolynomialModel(order=2),
+        badLinearityMedianMultiplier=None,
+    )
+    # Verify via evaluation: for ramp1, evaluate at its m values → should match targets.
+    m1 = reads1
+    denom = correction.fitMax - correction.fitMin
+    denom = np.where(denom > 0, denom, 1.0)
+    x1 = 2.0 * (m1 - correction.fitMin[None]) / denom[None] - 1.0
+    tPred = np.moveaxis(
+        correction.model.evaluate(correction.coefficients, np.moveaxis(x1, 0, -1)),
+        -1, 0,
+    )
+    # Target for ramp1: rate1 * n (with target[0] = 0).
+    expected1 = rate1 * np.arange(N1, dtype=np.float32)
+    expected1Broad = np.broadcast_to(expected1[:, None, None], tPred.shape)
+    np.testing.assert_allclose(tPred, expected1Broad, rtol=1e-4, atol=1e-2)
+    # nPointsUsed should be N1 + N2 everywhere.
+    assert (correction.diagnostics.nPointsUsed == N1 + N2).all()
+
+
+def test_fitEmptyRampListRaises():
+    import pytest
+    with pytest.raises(ValueError):
+        fit([])
+
+
+# --- New behaviour: saturation-knee + HIGH_FIT_RESIDUAL flag ---------------
+
+def _buildSaturatingRamp():
+    """Build a 30-read 4x5 ramp where pixel (2, 2) saturates partway through.
+
+    All other pixels ramp linearly at rate 1000 DN/read. Pixel (2, 2) ramps
+    at the same rate through read 5, then its deltas drop to 1% of rate
+    (200 → 10 DN) from read 6 onward — the classic 'faint saturating' case
+    that the saturation-knee detector should catch from delta 0.
+    """
+    H, W, N = 4, 5, 30
+    rate = 1000.0
+    reads = (
+        rate
+        * np.arange(N, dtype=np.float32)[:, None, None]
+        * np.ones((H, W), dtype=np.float32)
+    )
+    # Pixel (2, 2): keep first 5 deltas at rate, then collapse to 1% of rate.
+    pr, pc = 2, 2
+    for n in range(6, N):
+        reads[n, pr, pc] = reads[n - 1, pr, pc] + 0.01 * rate
+    return Ramp(reads=reads)
+
+
+def test_saturationKneeFlagsSaturatingPixelByShrinkingFitWindow():
+    ramp = _buildSaturatingRamp()
+    # Knee 0.5 must catch pixel (2,2): once its delta drops to 1% of rate
+    # (much less than 0.5 × refDelta), the read is masked.
+    correction = fit(
+        [ramp],
+        model=PolynomialModel(order=2),
+        saturationKnee=0.5,
+        badLinearityMedianMultiplier=None,
+    )
+    nUsedSat = int(correction.diagnostics.nPointsUsed[2, 2])
+    nUsedHealthy = int(correction.diagnostics.nPointsUsed[0, 0])
+    # Saturator should drop the late-saturated reads; healthy pixel keeps everything.
+    assert nUsedSat <= 7, f"expected <= 7 reads kept on saturator, got {nUsedSat}"
+    assert nUsedHealthy == 30
+    # Without the knee, the same ramp keeps every read on every pixel
+    # (the existing deviationLimit-only path is disabled by default).
+    correctionNoKnee = fit(
+        [ramp],
+        model=PolynomialModel(order=2),
+        saturationKnee=None,
+        badLinearityMedianMultiplier=None,
+    )
+    assert int(correctionNoKnee.diagnostics.nPointsUsed[2, 2]) == 30
+
+
+def test_badLinearityMultiplierFlagsHighResidualPixel():
+    from lsst.obs.pfs.h4Linearity.types import HIGH_FIT_RESIDUAL
+
+    ramp = _buildSaturatingRamp()
+    # Disable the saturation-knee so the saturating pixel survives into the
+    # fit with a bad residual — that gives the residual-based flag something
+    # to catch.
+    correction = fit(
+        [ramp],
+        model=PolynomialModel(order=2),
+        saturationKnee=None,
+        badLinearityMedianMultiplier=5.0,
+    )
+    assert correction.badPixelMask[2, 2] & HIGH_FIT_RESIDUAL, (
+        f"expected HIGH_FIT_RESIDUAL at (2,2); got mask = "
+        f"{correction.badPixelMask[2, 2]:#06x}"
+    )
+    # Other pixels are clean.
+    for r in range(4):
+        for c in range(5):
+            if (r, c) == (2, 2):
+                continue
+            assert (
+                correction.badPixelMask[r, c] & HIGH_FIT_RESIDUAL
+            ) == 0, f"unexpected HIGH_FIT_RESIDUAL at ({r}, {c})"
+    # Summary captures the new fields.
+    summary = correction.diagnostics.summary
+    assert "badPixelFraction_highFitResidual" in summary
+    assert summary["badPixelFraction_highFitResidual"] > 0
+    assert "highFitResidualThresholdDN" in summary
+
+
+def test_badLinearityMultiplierDisabledByPassingNone():
+    from lsst.obs.pfs.h4Linearity.types import HIGH_FIT_RESIDUAL
+
+    ramp = _buildSaturatingRamp()
+    correction = fit(
+        [ramp],
+        model=PolynomialModel(order=2),
+        saturationKnee=None,
+        badLinearityMedianMultiplier=None,
+    )
+    assert ((correction.badPixelMask & HIGH_FIT_RESIDUAL) == 0).all()
diff --git a/tests/test_h4LinearityFitThreading.py b/tests/test_h4LinearityFitThreading.py
new file mode 100644
index 00000000..96431f2d
--- /dev/null
+++ b/tests/test_h4LinearityFitThreading.py
@@ -0,0 +1,203 @@
+"""Tests for fit() threading: heuristic, parallel path, errors, determinism."""
+
+from __future__ import annotations
+
+import sys
+import threading
+
+import numpy as np
+import pytest
+
+from lsst.obs.pfs.h4Linearity.fit import _resolveWorkerCount, fit
+from lsst.obs.pfs.h4Linearity.models import PolynomialModel
+
+
+def test_resolveWorkerCountExplicitIntIsReturnedAsIs():
+    # Explicit wins over heuristic — no clamping, no size check.
+    assert _resolveWorkerCount(1, 10, 10) == 1
+    assert _resolveWorkerCount(4, 10, 10) == 4
+    assert _resolveWorkerCount(16, 10, 10) == 16
+    # Even on a "large" frame, explicit 1 is honored.
+    assert _resolveWorkerCount(1, 5000, 5000) == 1
+
+
+def test_resolveWorkerCountSmallFrameDefaultsToOne():
+    # With H*W < _SMALL_FRAME_PIXEL_LIMIT (1_000_000), None → 1 worker
+    # regardless of os.cpu_count().
+    assert _resolveWorkerCount(None, 100, 100) == 1
+    assert _resolveWorkerCount(None, 1000, 999) == 1  # 999_000 < 1_000_000
+
+
+def test_resolveWorkerCountLargeFrameCapsAtEight(monkeypatch):
+    # When H*W >= 1_000_000 and os.cpu_count() > 8, cap at 8.
+    fitModule = sys.modules["lsst.obs.pfs.h4Linearity.fit"]
+    monkeypatch.setattr(fitModule.os, "cpu_count", lambda: 16)
+    assert _resolveWorkerCount(None, 2000, 500) == 8  # 1_000_000 exactly
+    assert _resolveWorkerCount(None, 4096, 4096) == 8
+
+
+def test_resolveWorkerCountLargeFrameUncappedBelowEight(monkeypatch):
+    # When H*W is large but os.cpu_count() < 8, use cpu_count.
+    fitModule = sys.modules["lsst.obs.pfs.h4Linearity.fit"]
+    monkeypatch.setattr(fitModule.os, "cpu_count", lambda: 4)
+    assert _resolveWorkerCount(None, 4096, 4096) == 4
+
+
+def test_resolveWorkerCountHandlesNoneCpuCount(monkeypatch):
+    # os.cpu_count() can return None on some platforms; fall back to 1.
+    fitModule = sys.modules["lsst.obs.pfs.h4Linearity.fit"]
+    monkeypatch.setattr(fitModule.os, "cpu_count", lambda: None)
+    assert _resolveWorkerCount(None, 4096, 4096) == 1
+
+
+def test_resolveWorkerCountInvalidRaises():
+    with pytest.raises(ValueError, match="workers"):
+        _resolveWorkerCount(0, 10, 10)
+    with pytest.raises(ValueError, match="workers"):
+        _resolveWorkerCount(-3, 10, 10)
+
+
+def _arraysEqual(a, b, label):
+    """np.array_equal with a helpful assertion message."""
+    assert a.shape == b.shape, f"{label}: shape {a.shape} != {b.shape}"
+    assert a.dtype == b.dtype, f"{label}: dtype {a.dtype} != {b.dtype}"
+    assert np.array_equal(a, b), f"{label}: arrays differ"
+
+
+def test_fitWorkers4ProducesByteIdenticalOutputToWorkers1(smallSyntheticRamp):
+    """The threaded path must produce output byte-identical to the sequential
+    path, because each tile writes to disjoint output slices on the main
+    thread and the tile-assembly and fit arithmetic are deterministic."""
+    ramp, _ = smallSyntheticRamp
+    # blockSize=(2, 3) on a 4x5 frame produces 4 tiles — enough to exercise
+    # parallelism when workers=4.
+    serial = fit([ramp], blockSize=(2, 3), workers=1)
+    threaded = fit([ramp], blockSize=(2, 3), workers=4)
+
+    _arraysEqual(threaded.coefficients, serial.coefficients, "coefficients")
+    _arraysEqual(threaded.fitMin, serial.fitMin, "fitMin")
+    _arraysEqual(threaded.fitMax, serial.fitMax, "fitMax")
+    _arraysEqual(threaded.badPixelMask, serial.badPixelMask, "badPixelMask")
+    _arraysEqual(
+        threaded.diagnostics.residualRms,
+        serial.diagnostics.residualRms,
+        "residualRms",
+    )
+    _arraysEqual(
+        threaded.diagnostics.maxAbsResidual,
+        serial.diagnostics.maxAbsResidual,
+        "maxAbsResidual",
+    )
+    _arraysEqual(
+        threaded.diagnostics.nPointsUsed,
+        serial.diagnostics.nPointsUsed,
+        "nPointsUsed",
+    )
+    _arraysEqual(
+        threaded.diagnostics.monotonic,
+        serial.diagnostics.monotonic,
+        "monotonic",
+    )
+    _arraysEqual(
+        threaded.diagnostics.conditionNumber,
+        serial.diagnostics.conditionNumber,
+        "conditionNumber",
+    )
+    # Summary dicts must be equal (same float values, same keys).
+    assert threaded.diagnostics.summary == serial.diagnostics.summary
+
+
+def test_fitWorkers1DoesNotConstructExecutor(monkeypatch, smallSyntheticRamp):
+    """The sequential fast path must not touch the executor factory at all."""
+    fitModule = sys.modules["lsst.obs.pfs.h4Linearity.fit"]
+
+    def _fail(*args, **kwargs):
+        raise AssertionError(
+            f"_executorFactory was called for workers=1 path: "
+            f"args={args} kwargs={kwargs}"
+        )
+
+    monkeypatch.setattr(fitModule, "_executorFactory", _fail)
+    ramp, _ = smallSyntheticRamp
+    fit([ramp], blockSize=(2, 3), workers=1)  # must not raise
+
+
+def test_fitWorkers4ConstructsExecutorWithMaxWorkers4(
+    monkeypatch, smallSyntheticRamp
+):
+    """When workers=4, the factory must be called with max_workers=4."""
+    fitModule = sys.modules["lsst.obs.pfs.h4Linearity.fit"]
+    from concurrent.futures import ThreadPoolExecutor
+
+    recorded = {}
+
+    def _recordingFactory(*args, **kwargs):
+        recorded["args"] = args
+        recorded["kwargs"] = dict(kwargs)
+        # Return the real executor so the fit still runs to completion.
+        return ThreadPoolExecutor(*args, **kwargs)
+
+    monkeypatch.setattr(fitModule, "_executorFactory", _recordingFactory)
+    ramp, _ = smallSyntheticRamp
+    fit([ramp], blockSize=(2, 3), workers=4)
+    assert recorded["kwargs"].get("max_workers") == 4
+
+
+def test_fitAutoWorkersUsesResolvedCount(monkeypatch, smallSyntheticRamp):
+    """With workers=None and a small frame, resolved count is 1 (no executor
+    call). This mirrors test_fitWorkers1DoesNotConstructExecutor but via the
+    default code path rather than explicit workers=1."""
+    fitModule = sys.modules["lsst.obs.pfs.h4Linearity.fit"]
+
+    called = []
+    monkeypatch.setattr(
+        fitModule, "_executorFactory", lambda *a, **k: called.append(1)
+    )
+    ramp, _ = smallSyntheticRamp  # H=4 W=5, well below 1_000_000.
+    fit([ramp], blockSize=(2, 3))  # workers=None
+    assert called == []
+
+
+def test_fitInvalidWorkersRaises(smallSyntheticRamp):
+    ramp, _ = smallSyntheticRamp
+    with pytest.raises(ValueError, match="workers"):
+        fit([ramp], workers=0)
+    with pytest.raises(ValueError, match="workers"):
+        fit([ramp], workers=-1)
+
+
+def test_fitWorkerExceptionIncludesTileCoords(smallSyntheticRamp):
+    """If any fitBlock call raises on a worker thread, the exception must
+    be re-raised as a RuntimeError whose message identifies the offending
+    tile's row/col slice and whose __cause__ is the original exception."""
+    ramp, _ = smallSyntheticRamp
+    pm = PolynomialModel(order=2)
+    originalFitBlock = pm.fitBlock
+    failLock = threading.Lock()
+    failedOnce = threading.Event()
+
+    def failingFitBlock(m, t, valid, conditionNumberLimit):
+        # Atomically check-and-set: without the lock, two concurrent workers
+        # could both see is_set() == False and both raise. The lock guarantees
+        # exactly one thread takes the failure branch.
+        with failLock:
+            if not failedOnce.is_set():
+                failedOnce.set()
+                raise RuntimeError("injected failure")
+        return originalFitBlock(
+            m=m, t=t, valid=valid, conditionNumberLimit=conditionNumberLimit
+        )
+
+    # PolynomialModel is a frozen dataclass; bypass the frozen __setattr__
+    # to shadow the bound method with an instance attribute.
+    object.__setattr__(pm, "fitBlock", failingFitBlock)
+
+    with pytest.raises(
+        RuntimeError,
+        match=r"fitBlock failed at tile \[rows \d+:\d+, cols \d+:\d+\]",
+    ) as excInfo:
+        fit([ramp], model=pm, blockSize=(2, 3), workers=2)
+    # __cause__ carries the original exception.
+    cause = excInfo.value.__cause__
+    assert isinstance(cause, RuntimeError)
+    assert str(cause) == "injected failure"
diff --git a/tests/test_h4LinearityIo.py b/tests/test_h4LinearityIo.py
new file mode 100644
index 00000000..3aaca5c0
--- /dev/null
+++ b/tests/test_h4LinearityIo.py
@@ -0,0 +1,64 @@
+import os
+import tempfile
+import unittest
+
+from astropy.io import fits
+
+import lsst.utils.tests
+from lsst.obs.pfs.h4Linearity.io import isH4LinearityFile
+from lsst.obs.pfs.h4Linearity.models import MODEL_REGISTRY
+
+
+class IsH4LinearityFileTestCase(lsst.utils.tests.TestCase):
+    """`isH4LinearityFile` discriminates h4Linearity files (carrying a
+    valid ``MODEL`` keyword) from non-h4Linearity FITS files and from
+    nonexistent / unreadable paths.
+    """
+
+    def _writeFits(self, header_pairs, dirpath):
+        path = os.path.join(dirpath, "test.fits")
+        hdr = fits.Header()
+        for k, v in header_pairs:
+            hdr[k] = v
+        fits.PrimaryHDU(header=hdr).writeto(path)
+        return path
+
+    def testKnownModelReturnsTrue(self):
+        knownModel = next(iter(MODEL_REGISTRY))
+        with tempfile.TemporaryDirectory() as d:
+            path = self._writeFits([("MODEL", knownModel)], d)
+            self.assertTrue(isH4LinearityFile(path))
+
+    def testUnknownModelReturnsFalse(self):
+        with tempfile.TemporaryDirectory() as d:
+            path = self._writeFits([("MODEL", "not-a-model-name-XYZ")], d)
+            self.assertFalse(isH4LinearityFile(path))
+
+    def testMissingModelKeyReturnsFalse(self):
+        """A FITS file without a MODEL keyword (legacy nirLinearity style)."""
+        with tempfile.TemporaryDirectory() as d:
+            path = self._writeFits([("OBJECT", "anything")], d)
+            self.assertFalse(isH4LinearityFile(path))
+
+    def testNonexistentPathReturnsFalse(self):
+        self.assertFalse(isH4LinearityFile("/nonexistent/path/that/does/not/exist.fits"))
+
+    def testNonFitsFileReturnsFalse(self):
+        with tempfile.TemporaryDirectory() as d:
+            path = os.path.join(d, "garbage.fits")
+            with open(path, "wb") as f:
+                f.write(b"this is not a FITS file")
+            self.assertFalse(isH4LinearityFile(path))
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_h4Linearity_apply.py b/tests/test_h4Linearity_apply.py
new file mode 100644
index 00000000..57396ac7
--- /dev/null
+++ b/tests/test_h4Linearity_apply.py
@@ -0,0 +1,284 @@
+"""End-to-end integration tests for ``h4Linearity.apply`` with an
+asymmetric, position-encoded ramp.
+
+The polynomial models themselves are exercised in ``test_polynomial.py``;
+this file's job is to confirm the cube-level orchestration in ``apply``
+preserves axis order and pixel identity. Each per-pixel Chebyshev
+coefficient depends on ``(y, x)`` so an axis swap would yield a
+visibly wrong output value at any pixel, not just at the corners.
+
+Conventions verified here:
+- Input ``Ramp.reads`` is ``(H, W, N)`` with the time axis last;
+  output ``cumulativeLinear`` is the same shape.
+- Output ``badPixelMask`` is ``(H, W)`` with axes matching the spatial
+  plane of ``reads``.
+- ``BELOW_VALID_RANGE`` / ``ABOVE_VALID_RANGE`` bits fire on the
+  per-pixel ``fitMin`` / ``fitMax`` interval, not a global one.
+- Bad pixels (``correction.badPixelMask != 0`` or ``ramp.validMask``)
+  pass their input value through unchanged.
+"""
+import unittest
+
+import numpy as np
+
+import lsst.utils.tests
+from lsst.obs.pfs.h4Linearity.apply import apply
+from lsst.obs.pfs.h4Linearity.models.polynomial import PolynomialModel
+from lsst.obs.pfs.h4Linearity.types import (
+    ABOVE_VALID_RANGE,
+    BELOW_VALID_RANGE,
+    Diagnostics,
+    LinearityCorrection,
+    MASKED_BY_INPUT,
+    Ramp,
+)
+
+
+N_READS = 11
+H = 5
+W = 7
+FIT_MIN_VALUE = 0.0
+FIT_MAX_VALUE = 100.0
+
+
+# Per-pixel linear-in-Chebyshev-domain slope. With c1[y, x] = y * 10 + x
+# and c0 = 0, the linearized t = c1[y, x] * cheb_x; mismatched axes
+# produce a different value at every pixel.
+def _expectedC1(H, W):
+    y = np.arange(H, dtype=np.float32)[:, None]
+    x = np.arange(W, dtype=np.float32)[None, :]
+    return y * 10.0 + x
+
+
+def _makeCorrection(H, W, fitMin=FIT_MIN_VALUE, fitMax=FIT_MAX_VALUE,
+                    badPixelMask=None):
+    """Build a degree-1 polynomial LinearityCorrection.
+
+    coefs[0] = 0 (no offset), coefs[1] = y * 10 + x (per-pixel slope).
+    fitMin/fitMax are uniform unless overridden.
+    """
+    coefs = np.zeros((2, H, W), dtype=np.float32)
+    coefs[1] = _expectedC1(H, W)
+    if badPixelMask is None:
+        badPixelMask = np.zeros((H, W), dtype=np.uint8)
+    diag = Diagnostics(
+        residualRms=np.zeros((H, W), dtype=np.float32),
+        maxAbsResidual=np.zeros((H, W), dtype=np.float32),
+        nPointsUsed=np.full((H, W), N_READS, dtype=np.int32),
+        monotonic=np.ones((H, W), dtype=bool),
+        conditionNumber=np.ones((H, W), dtype=np.float32),
+        summary={},
+    )
+    return LinearityCorrection(
+        model=PolynomialModel(order=1),
+        coefficients=coefs,
+        fitMin=np.full((H, W), fitMin, dtype=np.float32),
+        fitMax=np.full((H, W), fitMax, dtype=np.float32),
+        badPixelMask=badPixelMask,
+        diagnostics=diag,
+    )
+
+
+RAMP_RATE = 5.0
+# cumulative[k] = (k+1)*RATE; chosen so even N=11 stays inside the
+# default [FIT_MIN_VALUE, FIT_MAX_VALUE] window.
+
+
+def _makeRamp(N, H, W, *, fillValue=None):
+    """Build a ``(H, W, N)`` cumulative ramp.
+
+    ``fillValue=None`` (default): cumulative[y, x, k] = (k+1) * RAMP_RATE
+    (uniform across pixels, varies along reads). Easy to predict in the
+    Chebyshev-rescaled domain.
+    """
+    if fillValue is None:
+        k = np.arange(1, N + 1, dtype=np.float32) * RAMP_RATE
+        return np.broadcast_to(k, (H, W, N)).astype(np.float32, copy=True)
+    return np.full((H, W, N), float(fillValue), dtype=np.float32)
+
+
+def _expectedLinearized(reads, c1, fitMin, fitMax):
+    """Reference: t = c1 * cheb_x with cheb_x = 2*(m-fitMin)/(fitMax-fitMin) - 1.
+
+    ``reads`` is ``(H, W, N)``; ``c1`` is ``(H, W)``; ``fitMin`` / ``fitMax``
+    are scalars in this test. Broadcasts ``c1[..., None]`` along the
+    time axis.
+    """
+    chebX = 2.0 * (reads - fitMin) / (fitMax - fitMin) - 1.0
+    return (c1[..., None] * chebX).astype(np.float32)
+
+
+class ApplyAxisOrderTestCase(lsst.utils.tests.TestCase):
+    """The cube comes back in the same axis order it went in, with each
+    pixel linearized through *its own* coefficients."""
+
+    def testOutputShapeMatchesInput(self):
+        correction = _makeCorrection(H, W)
+        ramp = Ramp(reads=_makeRamp(N_READS, H, W))
+        result = apply(correction, ramp)
+        self.assertEqual(result.cumulativeLinear.shape, (H, W, N_READS))
+        self.assertEqual(result.badPixelMask.shape, (H, W))
+
+    def testValuesMatchPerPixelCoefficients(self):
+        correction = _makeCorrection(H, W)
+        readsOrig = _makeRamp(N_READS, H, W)
+        ramp = Ramp(reads=readsOrig.copy())  # apply mutates ramp.reads
+        result = apply(correction, ramp)
+        expected = _expectedLinearized(
+            readsOrig, _expectedC1(H, W), FIT_MIN_VALUE, FIT_MAX_VALUE
+        )
+        np.testing.assert_allclose(
+            result.cumulativeLinear, expected, atol=1e-4,
+            err_msg="apply() must linearize each pixel through its own coefficients",
+        )
+
+    def testPositionAsymmetricSpotChecks(self):
+        # Pin the result at three specific asymmetric (y, x, k) so a
+        # transposition of (y, x) — silent under any H==W fixture — is
+        # caught by direct value comparison.
+        correction = _makeCorrection(H, W)
+        ramp = Ramp(reads=_makeRamp(N_READS, H, W))
+        result = apply(correction, ramp)
+        # cheb_x at read k for m=(k+1)*RAMP_RATE: 2*m/100 - 1 = (k+1)*0.1 - 1.
+        # t at (y, x) = c1[y, x] * cheb_x = (y*10 + x) * ((k+1)*0.1 - 1).
+        for k, y, x in [(0, 2, 3), (5, 0, 6), (10, 4, 1)]:
+            chebX = (k + 1) * 0.1 - 1.0
+            expected = (y * 10.0 + x) * chebX
+            self.assertAlmostEqual(
+                float(result.cumulativeLinear[y, x, k]),
+                float(expected),
+                places=4,
+                msg=f"apply mismatch at (y={y}, x={x}, k={k})",
+            )
+
+
+class ApplyBadPixelTestCase(lsst.utils.tests.TestCase):
+    """Bad pixels (fit-time mask, caller validMask, both) pass input
+    through unchanged and the merged mask flags them in the output."""
+
+    def testFitTimeBadPixelsPassThrough(self):
+        badMask = np.zeros((H, W), dtype=np.uint8)
+        badMask[2, 3] = 0x04  # arbitrary fit-time bad bit
+        correction = _makeCorrection(H, W, badPixelMask=badMask)
+        readsOrig = _makeRamp(N_READS, H, W)
+        ramp = Ramp(reads=readsOrig.copy())
+        result = apply(correction, ramp)
+        # Bad pixel passed through unchanged on every read.
+        np.testing.assert_array_equal(
+            result.cumulativeLinear[2, 3, :],
+            readsOrig[2, 3, :],
+            err_msg="fit-time bad pixel must pass through unchanged",
+        )
+        # The fit-time bit survives into the output mask.
+        self.assertEqual(int(result.badPixelMask[2, 3]) & 0x04, 0x04)
+        # And other pixels are NOT bad.
+        self.assertEqual(int(result.badPixelMask[0, 0]), 0)
+        self.assertEqual(int(result.badPixelMask[4, 6]), 0)
+
+    def testValidMaskMergedAsMaskedByInput(self):
+        correction = _makeCorrection(H, W)
+        validMask = np.zeros((H, W), dtype=np.uint8)
+        validMask[1, 5] = 1  # caller-supplied defect
+        readsOrig = _makeRamp(N_READS, H, W)
+        ramp = Ramp(reads=readsOrig.copy(), validMask=validMask)
+        result = apply(correction, ramp)
+        np.testing.assert_array_equal(
+            result.cumulativeLinear[1, 5, :], readsOrig[1, 5, :],
+            err_msg="caller-flagged defect must pass through unchanged",
+        )
+        self.assertEqual(
+            int(result.badPixelMask[1, 5]) & MASKED_BY_INPUT,
+            MASKED_BY_INPUT,
+        )
+
+
+class ApplyRangeFlagsTestCase(lsst.utils.tests.TestCase):
+    """Per-pixel fitMin/fitMax range checks stamp the right bits."""
+
+    def testAboveRangeFlagged(self):
+        # Push pixel (3, 4) over fitMax via a final read above 100.
+        correction = _makeCorrection(H, W)
+        reads = _makeRamp(N_READS, H, W)
+        reads[3, 4, -1] = FIT_MAX_VALUE + 1.0
+        result = apply(correction, Ramp(reads=reads.copy()))
+        self.assertEqual(
+            int(result.badPixelMask[3, 4]) & ABOVE_VALID_RANGE,
+            ABOVE_VALID_RANGE,
+        )
+        # Neighbouring pixels stay un-flagged.
+        self.assertEqual(int(result.badPixelMask[3, 3]) & ABOVE_VALID_RANGE, 0)
+        self.assertEqual(int(result.badPixelMask[4, 4]) & ABOVE_VALID_RANGE, 0)
+
+    def testBelowRangeFlagged(self):
+        correction = _makeCorrection(H, W)
+        reads = _makeRamp(N_READS, H, W)
+        reads[1, 2, 0] = FIT_MIN_VALUE - 1.0
+        result = apply(correction, Ramp(reads=reads.copy()))
+        self.assertEqual(
+            int(result.badPixelMask[1, 2]) & BELOW_VALID_RANGE,
+            BELOW_VALID_RANGE,
+        )
+
+    def testPerPixelFitRangeNotGlobal(self):
+        # Pixel (2, 5) gets a tighter range; reads that are within the
+        # global range but outside this pixel's range should still flag.
+        correction = _makeCorrection(H, W)
+        # Mutate via construction since LinearityCorrection is frozen.
+        fitMax = correction.fitMax.copy()
+        fitMax[2, 5] = 50.0
+        correction = LinearityCorrection(
+            model=correction.model,
+            coefficients=correction.coefficients,
+            fitMin=correction.fitMin,
+            fitMax=fitMax,
+            badPixelMask=correction.badPixelMask,
+            diagnostics=correction.diagnostics,
+        )
+        # Default ramp has reads up to N_READS * RAMP_RATE = 55, comfortably
+        # above the per-pixel fitMax of 50 at (2, 5).
+        reads = _makeRamp(N_READS, H, W)
+        result = apply(correction, Ramp(reads=reads.copy()))
+        self.assertEqual(
+            int(result.badPixelMask[2, 5]) & ABOVE_VALID_RANGE,
+            ABOVE_VALID_RANGE,
+        )
+        # A neighbour with the (default) fitMax=100 stays clean.
+        self.assertEqual(
+            int(result.badPixelMask[2, 4]) & ABOVE_VALID_RANGE, 0
+        )
+
+
+class ApplyInputValidationTestCase(lsst.utils.tests.TestCase):
+    """Shape / dimensionality errors surface as ValueError."""
+
+    def testRejectsNon3D(self):
+        correction = _makeCorrection(H, W)
+        with self.assertRaises(ValueError):
+            apply(correction, Ramp(reads=np.zeros((H, W), dtype=np.float32)))
+
+    def testRejectsZeroReads(self):
+        correction = _makeCorrection(H, W)
+        with self.assertRaises(ValueError):
+            apply(correction, Ramp(reads=np.zeros((H, W, 0), dtype=np.float32)))
+
+    def testRejectsHWMismatch(self):
+        correction = _makeCorrection(H, W)
+        # A (H+1, W) ramp spatial plane must be rejected, not silently broadcast.
+        with self.assertRaises(ValueError):
+            apply(
+                correction,
+                Ramp(reads=np.zeros((H + 1, W, N_READS), dtype=np.float32)),
+            )
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_h4_cube_semantics.py b/tests/test_h4_cube_semantics.py
new file mode 100644
index 00000000..4113f2a2
--- /dev/null
+++ b/tests/test_h4_cube_semantics.py
@@ -0,0 +1,111 @@
+"""Sanity tests for the (N, H, W) ramp cube axis convention.
+
+These tests are a canary for any change that accidentally swaps axes,
+permutes a layout, or otherwise scrambles the convention used across
+the H4 ISR pipeline: axis 0 is the read index, axes 1 and 2 are the
+spatial (y, x) plane. They use a value-encoded cube where each sample
+encodes its own ``(k, y, x)`` position, so any axis permutation yields
+a measurably different (and diagnosable) value at any sample.
+
+Tests in other files import ``valueEncodedRamp`` from here as a
+fixture for integration tests that need to track per-sample identity
+across pipeline stages.
+"""
+import unittest
+
+import numpy as np
+
+import lsst.utils.tests
+
+
+K_FACTOR = 10_000
+Y_FACTOR = 100
+# x has factor 1 (no separate constant). With H <= 99 and W <= 99,
+# the contribution from each axis is unambiguous: the floor-divide by
+# 10_000 recovers k, by 100 (after subtracting k * K_FACTOR) recovers y,
+# and the remainder is x. Any axis swap or permutation in pipeline code
+# yields a value with the wrong axis's contribution in the wrong place.
+
+
+def valueEncodedRamp(nReads, H, W, *, dtype=np.float32):
+    """Build a cube where ``cube[k, y, x] = k*K_FACTOR + y*Y_FACTOR + x``.
+
+    Returns a C-contiguous ``(nReads, H, W)`` array. Designed for tests
+    that need to verify axis order is preserved across a transformation:
+    if a function accidentally permutes axes, the value at any returned
+    sample will not match the expected encoding.
+    """
+    k = np.arange(nReads, dtype=dtype)[:, None, None]
+    y = np.arange(H, dtype=dtype)[None, :, None]
+    x = np.arange(W, dtype=dtype)[None, None, :]
+    return k * K_FACTOR + y * Y_FACTOR + x
+
+
+class CubeSemanticsTestCase(lsst.utils.tests.TestCase):
+    """Pin down what ``cube[k, y, x]`` means under the project convention."""
+
+    def setUp(self):
+        # Asymmetric N != H != W so axis swaps would change the shape.
+        self.N = 11
+        self.H = 5
+        self.W = 7
+        self.cube = valueEncodedRamp(self.N, self.H, self.W)
+
+    def testShape(self):
+        self.assertEqual(self.cube.shape, (self.N, self.H, self.W))
+
+    def testEncoding(self):
+        # cube[k, y, x] == k * K + y * Y + x. Spot-check asymmetric coords
+        # so a (y, x) transposition would be loud.
+        self.assertEqual(self.cube[5, 2, 3], 5 * K_FACTOR + 2 * Y_FACTOR + 3)
+        self.assertEqual(self.cube[3, 4, 6], 3 * K_FACTOR + 4 * Y_FACTOR + 6)
+        self.assertEqual(self.cube[10, 0, 0], 10 * K_FACTOR)
+        self.assertEqual(self.cube[0, 4, 6], 4 * Y_FACTOR + 6)
+
+    def testPerPixelRamp(self):
+        # cube[:, y, x] is the per-pixel time series at (y, x).
+        ramp = self.cube[:, 2, 3]
+        self.assertEqual(ramp.shape, (self.N,))
+        # Steps along the ramp: cube[k+1, y, x] - cube[k, y, x] = K_FACTOR.
+        np.testing.assert_array_equal(
+            np.diff(ramp), K_FACTOR * np.ones(self.N - 1, dtype=ramp.dtype)
+        )
+
+    def testPerReadFrame(self):
+        # cube[k, :, :] is the 2-D frame for read k.
+        frame = self.cube[5, :, :]
+        self.assertEqual(frame.shape, (self.H, self.W))
+        for y in range(self.H):
+            for x in range(self.W):
+                self.assertEqual(frame[y, x], 5 * K_FACTOR + y * Y_FACTOR + x)
+
+    def testDiffAxis0(self):
+        # np.diff(axis=0) computes per-read deltas; shape (N-1, H, W).
+        deltas = np.diff(self.cube, axis=0)
+        self.assertEqual(deltas.shape, (self.N - 1, self.H, self.W))
+        # Every delta equals K_FACTOR (the per-read step).
+        np.testing.assert_array_equal(
+            deltas, K_FACTOR * np.ones_like(deltas)
+        )
+
+    def testCumsumAxis0Roundtrip(self):
+        # diff -> cumsum reconstructs the cube modulo the initial frame.
+        deltas = np.diff(self.cube, axis=0)
+        reconstructed = np.empty_like(self.cube)
+        reconstructed[0:1] = self.cube[0:1]
+        np.cumsum(deltas, axis=0, out=reconstructed[1:])
+        reconstructed[1:] += self.cube[0:1]
+        np.testing.assert_array_equal(reconstructed, self.cube)
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_isrHeaderKeys.py b/tests/test_isrHeaderKeys.py
new file mode 100644
index 00000000..3602c620
--- /dev/null
+++ b/tests/test_isrHeaderKeys.py
@@ -0,0 +1,195 @@
+import unittest
+
+import numpy as np
+
+import lsst.afw.image as afwImage
+import lsst.geom as geom
+import lsst.utils.tests
+from lsst.obs.pfs import h4Linearity
+from lsst.obs.pfs.isrTask import (
+    _makeInternalMask, _projectInternalMask, _stampRampMetadata,
+)
+
+
+class StampRampMetadataTestCase(lsst.utils.tests.TestCase):
+    """`_stampRampMetadata` writes the expected H4* keys on the
+    exposure's metadata so downstream consumers can identify partial-ramp
+    products from a full-ramp postISRCCD.
+    """
+
+    def _makeExposure(self):
+        return afwImage.ExposureF(geom.Extent2I(4, 4))
+
+    def testFullRampWithUTR(self):
+        exp = self._makeExposure()
+        _stampRampMetadata(exp, r0=0, r1=39, nTotal=40, appliedUTR=True)
+        md = exp.getMetadata()
+        self.assertEqual(int(md.getScalar('H4READ0')), 0)
+        self.assertEqual(int(md.getScalar('H4READ1')), 39)
+        self.assertEqual(int(md.getScalar('H4NREAD')), 40)
+        self.assertEqual(int(md.getScalar('H4NTOT')), 40)
+        self.assertTrue(bool(md.getScalar('H4UTRWT')))
+
+    def testPartialRampMatchesFirstHalfRange(self):
+        """The first-half slice carries the trimmed range, not the full ramp's."""
+        exp = self._makeExposure()
+        _stampRampMetadata(exp, r0=1, r1=22, nTotal=40, appliedUTR=True)
+        md = exp.getMetadata()
+        self.assertEqual(int(md.getScalar('H4READ0')), 1)
+        self.assertEqual(int(md.getScalar('H4READ1')), 22)
+        self.assertEqual(int(md.getScalar('H4NREAD')), 22)
+        self.assertEqual(int(md.getScalar('H4NTOT')), 40)
+
+    def testCdsLayout(self):
+        """A CDS (non-UTR-weighted) product marks itself via H4UTRWT=False."""
+        exp = self._makeExposure()
+        _stampRampMetadata(exp, r0=0, r1=39, nTotal=40, appliedUTR=False)
+        self.assertFalse(bool(exp.getMetadata().getScalar('H4UTRWT')))
+
+    def testCommentsArePresent(self):
+        """Each key carries a one-line comment for the FITS header."""
+        exp = self._makeExposure()
+        _stampRampMetadata(exp, r0=0, r1=39, nTotal=40, appliedUTR=True)
+        md = exp.getMetadata()
+        for key in ('H4READ0', 'H4READ1', 'H4NREAD', 'H4NTOT', 'H4UTRWT'):
+            self.assertTrue(md.getComment(key), msg=f'{key} should carry a comment')
+
+
+class MakeInternalMaskTestCase(lsst.utils.tests.TestCase):
+    """`_makeInternalMask` seeds the H4 ISR internal mask with
+    BORDER_PIX on the outer ring plus MASKED_BY_INPUT from the defects
+    calib and whatever bits the linearity calib's badPixelMask carries.
+    """
+
+    def testBorderRingSet(self):
+        m = _makeInternalMask((20, 24))
+        # Outer 4 rows/cols carry BORDER_PIX; interior is zero.
+        for region in (m[:4, :], m[-4:, :], m[:, :4], m[:, -4:]):
+            self.assertTrue(((region & h4Linearity.BORDER_PIX)
+                             == h4Linearity.BORDER_PIX).all())
+        self.assertEqual(int(m[4:-4, 4:-4].sum()), 0)
+
+    def testBorderWidthRespected(self):
+        m = _makeInternalMask((16, 16), borderWidth=2)
+        self.assertTrue(((m[:2, :] & h4Linearity.BORDER_PIX)
+                         == h4Linearity.BORDER_PIX).all())
+        self.assertEqual(int(m[2:-2, 2:-2].sum()), 0)
+
+    def testInternalMaskDtypeUint16(self):
+        m = _makeInternalMask((16, 16))
+        self.assertEqual(m.dtype, np.uint16)
+
+
+class ProjectInternalMaskTestCase(lsst.utils.tests.TestCase):
+    """`_projectInternalMask` lifts internal-mask bits onto Exposure.mask
+    planes following the canonical projection rule.
+    """
+
+    def _makeExposure(self, H=16, W=16):
+        return afwImage.ExposureF(geom.Extent2I(W, H))
+
+    def testBorderProjectsToBADOnly(self):
+        """BORDER_PIX → BAD (no separate BORDER plane published)."""
+        exp = self._makeExposure(H=12, W=12)
+        internal = _makeInternalMask((12, 12))
+        _projectInternalMask(exp, internal)
+        badBit = exp.mask.getPlaneBitMask('BAD')
+        # Outer ring all BAD; interior clean.
+        for region in (exp.mask.array[:4, :], exp.mask.array[-4:, :],
+                       exp.mask.array[:, :4], exp.mask.array[:, -4:]):
+            self.assertTrue(((region & badBit) == badBit).all())
+        self.assertEqual(int(exp.mask.array[4:-4, 4:-4].sum()), 0)
+        # No BORDER plane registered.
+        self.assertNotIn('BORDER', exp.mask.getMaskPlaneDict())
+
+    def testDarkDefectProjection(self):
+        """MASKED_BY_INPUT → DARK_DEFECT + BAD."""
+        exp = self._makeExposure(H=8, W=8)
+        internal = np.zeros((8, 8), dtype=np.uint16)
+        internal[3, 4] |= h4Linearity.MASKED_BY_INPUT
+        _projectInternalMask(exp, internal)
+        darkDefectBit = exp.mask.getPlaneBitMask('DARK_DEFECT')
+        badBit = exp.mask.getPlaneBitMask('BAD')
+        self.assertEqual(exp.mask.array[3, 4] & darkDefectBit, darkDefectBit)
+        self.assertEqual(exp.mask.array[3, 4] & badBit, badBit)
+
+    def testDeadGroupProjection(self):
+        """Any of INSUFFICIENT_POINTS / FIT_FAILED / NON_MONOTONIC →
+        LINEARITY_DEFECT + BAD.
+        """
+        exp = self._makeExposure(H=8, W=8)
+        internal = np.zeros((8, 8), dtype=np.uint16)
+        internal[1, 1] |= h4Linearity.INSUFFICIENT_POINTS
+        internal[2, 2] |= h4Linearity.FIT_FAILED
+        internal[3, 3] |= h4Linearity.NON_MONOTONIC
+        _projectInternalMask(exp, internal)
+        ldBit = exp.mask.getPlaneBitMask('LINEARITY_DEFECT')
+        for y, x in [(1, 1), (2, 2), (3, 3)]:
+            self.assertEqual(exp.mask.array[y, x] & ldBit, ldBit)
+
+    def testSatProjection(self):
+        """ABOVE_VALID_RANGE → SAT + BAD."""
+        exp = self._makeExposure(H=8, W=8)
+        internal = np.zeros((8, 8), dtype=np.uint16)
+        internal[4, 5] |= h4Linearity.ABOVE_VALID_RANGE
+        _projectInternalMask(exp, internal)
+        satBit = exp.mask.getPlaneBitMask('SAT')
+        badBit = exp.mask.getPlaneBitMask('BAD')
+        self.assertEqual(exp.mask.array[4, 5] & satBit, satBit)
+        self.assertEqual(exp.mask.array[4, 5] & badBit, badBit)
+
+    def testUnstableProjection(self):
+        """UNSTABLE bit → UNSTABLE plane + BAD."""
+        exp = self._makeExposure(H=8, W=8)
+        internal = np.zeros((8, 8), dtype=np.uint16)
+        internal[5, 2] |= h4Linearity.UNSTABLE
+        _projectInternalMask(exp, internal)
+        unstBit = exp.mask.getPlaneBitMask('UNSTABLE')
+        badBit = exp.mask.getPlaneBitMask('BAD')
+        self.assertEqual(exp.mask.array[5, 2] & unstBit, unstBit)
+        self.assertEqual(exp.mask.array[5, 2] & badBit, badBit)
+
+    def testUnclassifiedNotPublished(self):
+        """UNCLASSIFIED bit folds into BAD but not into any standalone
+        external plane.
+        """
+        exp = self._makeExposure(H=8, W=8)
+        internal = np.zeros((8, 8), dtype=np.uint16)
+        internal[6, 6] |= h4Linearity.UNCLASSIFIED
+        _projectInternalMask(exp, internal)
+        badBit = exp.mask.getPlaneBitMask('BAD')
+        self.assertEqual(exp.mask.array[6, 6] & badBit, badBit)
+
+    def testNoAsicGlitchPlaneCreated(self):
+        """The refactor drops ASIC_GLITCH from the published mask."""
+        exp = self._makeExposure(H=8, W=8)
+        _projectInternalMask(exp, np.zeros((8, 8), dtype=np.uint16))
+        self.assertNotIn('ASIC_GLITCH', exp.mask.getMaskPlaneDict())
+
+    def testAsicGlitchInternalBitFoldsIntoBADOnly(self):
+        """The ASIC_GLITCH internal bit (set by ``makeNirExposure``
+        when a glitch-pair height clears
+        ``asicGlitchHeightMaskADU``) lifts to BAD via the
+        any-bit-set catch-all, without registering its own external
+        plane.
+        """
+        exp = self._makeExposure(H=8, W=8)
+        internal = np.zeros((8, 8), dtype=np.uint16)
+        internal[2, 5] |= h4Linearity.ASIC_GLITCH
+        _projectInternalMask(exp, internal)
+        badBit = exp.mask.getPlaneBitMask('BAD')
+        self.assertEqual(exp.mask.array[2, 5] & badBit, badBit)
+        self.assertNotIn('ASIC_GLITCH', exp.mask.getMaskPlaneDict())
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_polynomial.py b/tests/test_polynomial.py
new file mode 100644
index 00000000..3330d165
--- /dev/null
+++ b/tests/test_polynomial.py
@@ -0,0 +1,116 @@
+"""Sanity tests for ``PolynomialModel.{chebToMonomial,evaluateMonomial,evaluate}``.
+
+Production reads Chebyshev coefficients from disk and evaluates them
+per pixel inside ``h4Linearity.apply``. ``apply`` calls
+``chebToMonomial`` once and ``evaluateMonomial`` per chunk — a
+Horner-on-monomial that needs one fewer (N, H, W) buffer than direct
+Clenshaw evaluation on the Chebyshev coefficients would. The trade-off
+is numerical (monomial form is ill-conditioned at high order). Order
+<= 5 is fine; these tests cover that range and assert agreement with
+``numpy.polynomial.chebyshev.chebval`` to float32 precision.
+"""
+import unittest
+
+import numpy as np
+
+import lsst.utils.tests
+from lsst.obs.pfs.h4Linearity.models.polynomial import PolynomialModel
+
+
+def _refChebval(coefs, x):
+    """Per-pixel reference: numpy's chebval with coefficients on axis 0.
+
+    ``x`` has spatial axes leading and trailing axes broadcastable over
+    each pixel's per-time samples (production layout: ``(H, W, N)``).
+    """
+    H, W = coefs.shape[1], coefs.shape[2]
+    out = np.empty(x.shape, dtype=x.dtype)
+    for h in range(H):
+        for w in range(W):
+            out[h, w, ...] = np.polynomial.chebyshev.chebval(
+                x[h, w, ...], coefs[:, h, w]
+            )
+    return out
+
+
+class ChebToMonomialTestCase(lsst.utils.tests.TestCase):
+
+    def _matchesReference(self, order):
+        rng = np.random.default_rng(order)
+        H, W = 3, 4
+        cheb = rng.standard_normal((order + 1, H, W)).astype(np.float32)
+        mon = PolynomialModel(order=order).chebToMonomial(cheb)
+
+        # mon should match per-pixel cheb2poly applied 1-D.
+        for h in range(H):
+            for w in range(W):
+                refMon = np.polynomial.chebyshev.cheb2poly(cheb[:, h, w])
+                actMon = mon[:len(refMon), h, w]
+                np.testing.assert_allclose(actMon, refMon, rtol=1e-5, atol=1e-5)
+                # Any trailing entries (when cheb2poly trims) should be zero.
+                if mon.shape[0] > len(refMon):
+                    np.testing.assert_array_equal(
+                        mon[len(refMon):, h, w],
+                        np.zeros(mon.shape[0] - len(refMon), dtype=mon.dtype),
+                    )
+
+    def testOrders1To5MatchNumpy(self):
+        for order in (1, 2, 3, 4, 5):
+            with self.subTest(order=order):
+                self._matchesReference(order)
+
+    def testOrder1IsIdentityForLinearTerm(self):
+        # T_0 = 1, T_1 = x → for order=1, mon[0]=cheb[0], mon[1]=cheb[1].
+        cheb = np.array(
+            [[[1.5, 2.5]], [[3.0, 4.0]]], dtype=np.float32
+        )  # (2, 1, 2)
+        mon = PolynomialModel(order=1).chebToMonomial(cheb)
+        np.testing.assert_array_equal(mon, cheb)
+
+
+class EvaluateMonomialTestCase(lsst.utils.tests.TestCase):
+
+    def _matchesChebvalAt(self, order, xShape):
+        rng = np.random.default_rng(100 + order)
+        H, W = 3, 4
+        cheb = rng.standard_normal((order + 1, H, W)).astype(np.float32)
+        x = rng.uniform(-1.0, 1.0, size=(H, W) + xShape).astype(np.float32)
+        model = PolynomialModel(order=order)
+        t = model.evaluate(cheb, x)
+        tRef = _refChebval(cheb, x)
+        np.testing.assert_allclose(t, tRef, rtol=1e-3, atol=1e-3)
+
+    def testOrders1To5(self):
+        for order in (1, 2, 3, 4, 5):
+            with self.subTest(order=order):
+                self._matchesChebvalAt(order, xShape=(7,))
+
+    def testWorksOnSingleFrame(self):
+        # x shape (H, W) — what apply()'s applyFrame uses.
+        self._matchesChebvalAt(order=4, xShape=())
+
+    def testEvaluateMonomialMatchesEvaluate(self):
+        """evaluate() == evaluateMonomial(chebToMonomial(c), x)."""
+        rng = np.random.default_rng(7)
+        order = 4
+        H, W = 3, 4
+        cheb = rng.standard_normal((order + 1, H, W)).astype(np.float32)
+        x = rng.uniform(-1.0, 1.0, size=(H, W, 5)).astype(np.float32)
+        model = PolynomialModel(order=order)
+        mon = model.chebToMonomial(cheb).astype(np.float32, copy=False)
+        t1 = model.evaluate(cheb, x)
+        t2 = model.evaluateMonomial(mon, x)
+        np.testing.assert_array_equal(t1, t2)
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_sim.py b/tests/test_sim.py
new file mode 100644
index 00000000..5565dfd4
--- /dev/null
+++ b/tests/test_sim.py
@@ -0,0 +1,653 @@
+import unittest
+
+import numpy as np
+
+import lsst.utils.tests
+from lsst.obs.pfs.h4Linearity import cr, sim
+
+
+class RampGenerationTestCase(lsst.utils.tests.TestCase):
+
+    def testCleanRampIsLinearOnAverage(self):
+        params = sim.RampParams(nReads=30, H=8, W=8, rate=100.0,
+                                readNoise=1.0, poisson=False)
+        cube = sim.makeRamp(params, rng=0)
+        self.assertEqual(cube.shape, (30, 8, 8))
+        self.assertEqual(cube.dtype, np.float32)
+        # Per-pixel slope ~ rate (low-noise case).
+        for y in range(8):
+            for x in range(8):
+                slope = float(np.polyfit(np.arange(30), cube[:, y, x], 1)[0])
+                self.assertAlmostEqual(slope, 100.0, delta=0.3)
+
+    def testNoisePropagates(self):
+        params = sim.RampParams(nReads=50, H=32, W=32, rate=0.0,
+                                readNoise=5.0, poisson=False)
+        cube = sim.makeRamp(params, rng=42)
+        # Empirical per-read std should match readNoise; cumulative-space
+        # std of read 0 (vs the cube-mean prediction zero) is just readNoise.
+        std_read0 = float(cube[0].std())
+        self.assertAlmostEqual(std_read0, 5.0, delta=0.5)
+
+    def testCRPersists(self):
+        params = sim.RampParams(nReads=20, H=4, W=4, rate=10.0,
+                                readNoise=0.0, poisson=False)
+        crs = [sim.CR(y=2, x=3, read=10, amount=500.0)]
+        cube = sim.makeRamp(params, crs=crs, rng=0)
+        # New cumulative convention: cube[k] is the cumulative AFTER (k+1)
+        # reads, i.e. (k+1)*rate. CR injected at read 10 adds to read 10's
+        # per-read flux, so cube[10..] are bumped up by +500.
+        for k in range(10):
+            self.assertAlmostEqual(float(cube[k, 2, 3]),
+                                   (k + 1) * 10.0, delta=0.5)
+        for k in range(10, 20):
+            self.assertAlmostEqual(float(cube[k, 2, 3]),
+                                   (k + 1) * 10.0 + 500.0, delta=0.5)
+
+    def testAsicGlitchIsSingleRead(self):
+        params = sim.RampParams(nReads=20, H=4, W=4, rate=10.0,
+                                readNoise=0.0, poisson=False)
+        glitches = [sim.AsicGlitch(y=1, x=1, read=8, amount=2048.0)]
+        cube = sim.makeRamp(params, glitches=glitches, rng=0)
+        # cube[k] = (k+1)*rate; the glitch offsets ONE read (k=8).
+        self.assertAlmostEqual(float(cube[7, 1, 1]), 8 * 10.0, delta=0.5)
+        self.assertAlmostEqual(float(cube[8, 1, 1]),
+                               9 * 10.0 + 2048.0, delta=0.5)
+        self.assertAlmostEqual(float(cube[9, 1, 1]), 10 * 10.0, delta=0.5)
+        # In delta space, this shows as +2048 then −2048.
+        deltas = np.diff(cube[:, 1, 1])
+        # delta[7] = read[8] - read[7] ≈ rate + glitch
+        self.assertAlmostEqual(float(deltas[7]), 10.0 + 2048.0, delta=0.5)
+        # delta[8] = read[9] - read[8] ≈ rate − glitch
+        self.assertAlmostEqual(float(deltas[8]), 10.0 - 2048.0, delta=0.5)
+
+    def testDigitalGlitchAmountsArePowersOf2(self):
+        amounts = sim.digitalGlitchAmounts(
+            n=200, bits=(10, 11, 12), signed=True, rng=42,
+        )
+        self.assertEqual(amounts.shape, (200,))
+        for a in amounts:
+            self.assertIn(abs(float(a)), {1024.0, 2048.0, 4096.0})
+            self.assertIn(np.sign(float(a)), {-1.0, 1.0})
+
+    def testRandomGenerators(self):
+        crs = sim.randomCRs(n=20, H=8, W=8, nReads=30, rng=0)
+        self.assertEqual(len(crs), 20)
+        for c in crs:
+            self.assertTrue(0 <= c.y < 8)
+            self.assertTrue(0 <= c.x < 8)
+            self.assertTrue(1 <= c.read < 29)
+            self.assertTrue(50.0 <= c.amount <= 5000.0)
+
+        glitches = sim.randomAsicGlitches(n=10, H=8, W=8, nReads=30, rng=0)
+        self.assertEqual(len(glitches), 10)
+        for g in glitches:
+            self.assertTrue(0 <= g.y < 8)
+            self.assertTrue(0 <= g.x < 8)
+            self.assertTrue(1 <= g.read < 28)
+            self.assertIn(abs(g.amount), {1024.0, 2048.0, 4096.0, 8192.0})
+
+
+class NonlinearityTestCase(lsst.utils.tests.TestCase):
+
+    def testLinearCaseIsIdentity(self):
+        nl = sim.Nonlinearity(alpha=0.0)
+        t = np.linspace(0, 50000, 1000, dtype=np.float32)
+        np.testing.assert_array_equal(nl.forward(t), t)
+        np.testing.assert_array_equal(nl.inverse(t), t)
+
+    def testCompressedAtFullWell(self):
+        nl = sim.Nonlinearity(alpha=0.05, qMax=60000.0)
+        # forward at t=qMax is qMax * (1 − alpha) = 0.95 * qMax = 57000.
+        m = nl.forward(np.array([60000.0], dtype=np.float32))
+        self.assertAlmostEqual(float(m[0]), 57000.0, delta=1.0)
+
+    def testRoundTripIsExact(self):
+        nl = sim.Nonlinearity(alpha=0.05, qMax=60000.0)
+        t = np.linspace(0.0, 50000.0, 5000, dtype=np.float32)
+        recovered = nl.inverse(nl.forward(t))
+        # Round-trip in float32 should be tight; allow a few ADU slop.
+        np.testing.assert_allclose(recovered, t, atol=2.0)
+
+    def testRoundTripWith3DCube(self):
+        nl = sim.Nonlinearity(alpha=0.05, qMax=60000.0)
+        cube = np.linspace(0.0, 50000.0, 100, dtype=np.float32).reshape(10, 5, 2)
+        recovered = nl.inverse(nl.forward(cube))
+        np.testing.assert_allclose(recovered, cube, atol=2.0)
+
+
+class RawAndLinearRampsTestCase(lsst.utils.tests.TestCase):
+
+    def testRawIsCompressedRelativeToLinearWhenAlphaPositive(self):
+        params = sim.RampParams(nReads=40, H=4, W=4, rate=1500.0,
+                                readNoise=0.0, poisson=False)
+        nl = sim.Nonlinearity(alpha=0.05, qMax=60000.0)
+        raw, lin = sim.makeRawAndLinearRamps(params, nonlinearity=nl,
+                                             rng=0)
+        # With cum[k] = (k+1)*rate, last read is 40*1500 = 60000 (full well).
+        # raw at full well ≈ 60000 * (1 − 0.05) = 57000.
+        self.assertAlmostEqual(float(lin[-1].mean()), 60000.0, delta=200.0)
+        self.assertAlmostEqual(float(raw[-1].mean()), 57000.0, delta=300.0)
+        # Linearized rate (slope) should recover the true rate.
+        rate_est = sim.utrRate(lin)
+        self.assertAlmostEqual(float(rate_est.mean()), params.rate, delta=10.0)
+
+    def testCRInRawAndLinearMatchTruth(self):
+        params = sim.RampParams(nReads=30, H=4, W=4, rate=100.0,
+                                readNoise=0.0, poisson=False)
+        nl = sim.Nonlinearity(alpha=0.05, qMax=60000.0)
+        crs = [sim.CR(y=2, x=3, read=15, amount=2000.0)]
+        raw, lin = sim.makeRawAndLinearRamps(params, nonlinearity=nl,
+                                             crs=crs, rng=0)
+        # CR injected in true-linear space, so the linearized cube should
+        # show a clean +2000 step starting at read 15.
+        delta_lin = float(lin[15, 2, 3] - lin[14, 2, 3])
+        self.assertAlmostEqual(delta_lin, params.rate + 2000.0, delta=10.0)
+
+    def testAsicGlitchInRawIsCleanSingleRead(self):
+        params = sim.RampParams(nReads=20, H=4, W=4, rate=100.0,
+                                readNoise=0.0, poisson=False)
+        nl = sim.Nonlinearity(alpha=0.05, qMax=60000.0)
+        g = sim.AsicGlitch(y=1, x=1, read=10, amount=2048.0)
+        raw = sim.makeRawRamp(params, nonlinearity=nl, asicGlitches=[g], rng=0)
+        # In raw: only read 10 is offset; reads 9 and 11 are at the
+        # nominal compressed ramp. With cum[k] = (k+1)*rate convention.
+        nominal_9 = float(nl.forward(np.array([10 * 100.0]))[0])
+        nominal_10 = float(nl.forward(np.array([11 * 100.0]))[0])
+        nominal_11 = float(nl.forward(np.array([12 * 100.0]))[0])
+        self.assertAlmostEqual(float(raw[9, 1, 1]), nominal_9, delta=1.0)
+        self.assertAlmostEqual(float(raw[10, 1, 1]),
+                               nominal_10 + 2048.0, delta=1.0)
+        self.assertAlmostEqual(float(raw[11, 1, 1]), nominal_11, delta=1.0)
+
+
+class UtrRateClosureTestCase(lsst.utils.tests.TestCase):
+
+    def testRecoversTrueRateOnCleanRamp(self):
+        """utrRate(clean linear ramp) should recover params.rate within noise."""
+        params = sim.RampParams(nReads=45, H=16, W=16, rate=87.0,
+                                readNoise=5.0, poisson=False)
+        cube = sim.makeRamp(params, rng=0)
+        rate = sim.utrRate(cube)
+        self.assertEqual(rate.shape, (16, 16))
+        # Expected precision: LSQ slope std = readNoise * sqrt(12 / (N*(N-1)*(N+1)))
+        # ≈ 5 * sqrt(12 / (45 * 44 * 46)) ≈ 0.12 ADU/read. So 3-sigma ~ 0.4.
+        self.assertAlmostEqual(float(rate.mean()), 87.0, delta=0.5)
+        self.assertLess(float(rate.std()), 0.5)
+
+    def testRecoversRateWithPoissonNoise(self):
+        params = sim.RampParams(nReads=45, H=16, W=16, rate=100.0,
+                                readNoise=5.0, poisson=True)
+        cube = sim.makeRamp(params, rng=0)
+        rate = sim.utrRate(cube)
+        # With Poisson, the variance grows along the ramp; the unweighted
+        # LSQ slope is slightly less optimal but still close.
+        self.assertAlmostEqual(float(rate.mean()), 100.0, delta=1.0)
+
+    def testReadMask1DExcludesReads(self):
+        """Excluding some reads should still give the right slope."""
+        params = sim.RampParams(nReads=30, H=4, W=4, rate=50.0,
+                                readNoise=0.0, poisson=False)
+        cube = sim.makeRamp(params, rng=0)
+        # Inject a CR at read 10 that pollutes reads 10..29.
+        cube[10:, :, :] += 5000.0
+        # All-reads fit will be wrong; masking out reads 10..29 should recover
+        # the original slope.
+        rate_all = sim.utrRate(cube)
+        mask = np.ones(30, dtype=bool)
+        mask[10:] = False
+        rate_masked = sim.utrRate(cube, readMask=mask)
+        self.assertNotAlmostEqual(float(rate_all.mean()), 50.0, delta=10.0)
+        self.assertAlmostEqual(float(rate_masked.mean()), 50.0, delta=0.5)
+
+    def testReadMask3DPerPixel(self):
+        """Per-pixel read masks should work."""
+        params = sim.RampParams(nReads=30, H=4, W=4, rate=50.0,
+                                readNoise=0.0, poisson=False)
+        cube = sim.makeRamp(params, rng=0)
+        # CR at one pixel only.
+        cube[15:, 2, 3] += 3000.0
+        # Per-pixel mask: exclude reads 15.. for that one pixel; keep all for others.
+        mask3D = np.ones((30, 4, 4), dtype=bool)
+        mask3D[15:, 2, 3] = False
+        rate = sim.utrRate(cube, readMask=mask3D)
+        self.assertAlmostEqual(float(rate[2, 3]), 50.0, delta=0.5)
+        self.assertAlmostEqual(float(rate[0, 0]), 50.0, delta=0.5)
+
+    def testRoundTripRateThroughNonlinearityAndInverse(self):
+        """sim(rate) -> rawRamp -> nl.inverse -> utrRate should recover rate."""
+        params = sim.RampParams(nReads=45, H=8, W=8, rate=200.0,
+                                readNoise=5.0, poisson=True)
+        nl = sim.Nonlinearity(alpha=0.05, qMax=60000.0)
+        _, linearized = sim.makeRawAndLinearRamps(params, nl, rng=0)
+        rate = sim.utrRate(linearized)
+        # Closure: recovered rate should equal the true rate within noise.
+        self.assertAlmostEqual(float(rate.mean()), 200.0, delta=2.0)
+
+
+def _runCR(cube, **kwargs):
+    """Test helper: run the deltas-based detector on a cumulative cube.
+
+    Same shape as ``test_h4CR._runCR`` — diff once, transpose to the
+    ``(H, W, N-1)`` layout the detector expects, call, transpose
+    repaired deltas back, cumsum back. The returned ``crFlagMask`` /
+    ``glitchFlagMask`` are also re-transposed to ``(N-1, H, W)`` so the
+    existing test assertions can index them as ``mask[k, y, x]``.
+    """
+    repair = kwargs.get("repair", True)
+    deltas = np.diff(cube, axis=0)
+    read0 = cube[0:1].copy() if repair else None
+    deltasHWN = np.ascontiguousarray(deltas.transpose(1, 2, 0))
+    result = cr.iterativeUtrDetectAndRepair(deltasHWN, **kwargs)
+    if repair:
+        deltas = np.ascontiguousarray(deltasHWN.transpose(2, 0, 1))
+        cube[0:1] = read0
+        np.cumsum(deltas, axis=0, out=cube[1:])
+        cube[1:] += read0
+    result.crFlagMask = np.ascontiguousarray(result.crFlagMask.transpose(2, 0, 1))
+    result.glitchFlagMask = np.ascontiguousarray(
+        result.glitchFlagMask.transpose(2, 0, 1)
+    )
+    return result
+
+
+class IterativeUtrDetectAndRepairTestCase(lsst.utils.tests.TestCase):
+    """Tests for cr.iterativeUtrDetectAndRepair against simulated truth."""
+
+    def _makeCube(self, params, crs=None, glitches=None, rng=0):
+        # Note: the iterative detector works on the linearized cube. We
+        # inject CRs in true space and glitches in measured space (the
+        # standard sim chain) and feed the linearized cube into detect.
+        nl = sim.Nonlinearity(alpha=0.0)  # tests focus on detection, not lin model
+        raw = sim.makeRawRamp(params, nonlinearity=nl, crs=crs,
+                              asicGlitches=glitches, rng=rng)
+        return raw  # alpha=0 → raw == linearized
+
+    def testBrightCRs(self):
+        params = sim.RampParams(nReads=30, H=16, W=16, rate=50.0,
+                                readNoise=5.0, poisson=True)
+        truth = [sim.CR(y=3, x=4, read=10, amount=500.0),
+                 sim.CR(y=8, x=8, read=20, amount=1500.0)]
+        cube = self._makeCube(params, crs=truth, rng=0)
+        good = np.ones((16, 16), dtype=bool)
+        result = _runCR(
+            cube, goodPixelMask=good, nSigma=5.0,
+        )
+        self.assertEqual(result.nGlitchPairs, 0)
+        self.assertGreaterEqual(result.nCRs, len(truth))
+        # Each truth CR should have a flag at the corresponding (delta_index, y, x).
+        for c in truth:
+            self.assertTrue(result.crFlagMask[c.read - 1, c.y, c.x],
+                            f"missed CR at {(c.read, c.y, c.x)}")
+
+    def testHighAmplitudeGlitches(self):
+        params = sim.RampParams(nReads=30, H=16, W=16, rate=50.0,
+                                readNoise=5.0, poisson=True)
+        truth = [sim.AsicGlitch(y=2, x=2, read=10, amount=+2048.0),
+                 sim.AsicGlitch(y=5, x=5, read=15, amount=-1024.0)]
+        cube = self._makeCube(params, glitches=truth, rng=0)
+        good = np.ones((16, 16), dtype=bool)
+        glitchMask = np.ones((16, 16), dtype=bool)  # opt in across the cube
+        result = _runCR(
+            cube, goodPixelMask=good, glitchPixelMask=glitchMask,
+            nSigma=5.0,
+        )
+        # Glitch at read m → pair at delta indices (m-1, m). Both should flag.
+        for g in truth:
+            self.assertTrue(result.glitchFlagMask[g.read - 1, g.y, g.x],
+                            f"missed glitch at delta {g.read - 1}, ({g.y}, {g.x})")
+            self.assertTrue(result.glitchFlagMask[g.read, g.y, g.x],
+                            f"missed glitch at delta {g.read}, ({g.y}, {g.x})")
+        # No CRs in this scenario.
+        self.assertEqual(result.nCRs, 0)
+
+    def testGlitchDetectionOffByDefault(self):
+        """With glitchPixelMask=None (default), glitches are not detected.
+
+        With ``maxDropFraction=inf`` (off), the +half of the glitch
+        falls into the CR bucket — this test exercises that classifier
+        fallback path. The default-on cumulative-drop check would correctly
+        catch the up/down pair via the cumulative-drop test, but that's
+        verified separately in ``testPersistenceRejectsTransientUpSpike``.
+        """
+        params = sim.RampParams(nReads=30, H=8, W=8, rate=50.0,
+                                readNoise=5.0, poisson=True)
+        truth = [sim.AsicGlitch(y=3, x=3, read=10, amount=+2048.0)]
+        cube = self._makeCube(params, glitches=truth, rng=0)
+        good = np.ones((8, 8), dtype=bool)
+        result = _runCR(
+            cube, goodPixelMask=good, nSigma=5.0,
+            maxDropFraction=float('inf'),
+        )
+        self.assertEqual(result.nGlitchPairs, 0,
+                         "no glitches should be detected without glitchPixelMask")
+        # The +half of the glitch (positive residual) gets classified as a CR
+        # because pair detection is disabled. The negative half is flagged
+        # but neither CR nor glitch.
+        self.assertGreaterEqual(result.nCRs, 1,
+                                "positive glitch half should be misclassified as CR")
+
+    def testGlitchDetectionRestrictedToChannel(self):
+        """Pair detection runs only where glitchPixelMask is True."""
+        # Two identical glitches: one in the "good" channel (x=5), one outside.
+        params = sim.RampParams(nReads=30, H=4, W=12, rate=50.0,
+                                readNoise=5.0, poisson=True)
+        truth = [
+            sim.AsicGlitch(y=2, x=5, read=12, amount=+2048.0),   # inside channel
+            sim.AsicGlitch(y=2, x=10, read=15, amount=+2048.0),  # outside
+        ]
+        cube = self._makeCube(params, glitches=truth, rng=0)
+        good = np.ones((4, 12), dtype=bool)
+        # Mask covers x in [4, 8) — channel for glitch detection.
+        glitchMask = np.zeros((4, 12), dtype=bool)
+        glitchMask[:, 4:8] = True
+        result = _runCR(
+            cube, goodPixelMask=good, glitchPixelMask=glitchMask,
+            nSigma=5.0,
+        )
+        # In-channel glitch: detected as a pair.
+        self.assertTrue(result.glitchFlagMask[11, 2, 5],
+                        "in-channel glitch first half not flagged")
+        self.assertTrue(result.glitchFlagMask[12, 2, 5],
+                        "in-channel glitch second half not flagged")
+        # Out-of-channel glitch: NOT detected as a pair.
+        self.assertFalse(result.glitchFlagMask[14, 2, 10],
+                         "out-of-channel glitch wrongly tagged as pair")
+        self.assertFalse(result.glitchFlagMask[15, 2, 10],
+                         "out-of-channel glitch wrongly tagged as pair")
+
+    def testLowAmplitudeGlitchesProbeNoiseFloor(self):
+        """N=3,4,5 glitches (8/16/32 ADU) at readNoise=5 → threshold ~35 ADU.
+
+        Expectation: amplitudes well above the threshold are caught;
+        amplitudes at or below it are not — that's the design limit.
+        """
+        params = sim.RampParams(nReads=30, H=24, W=8, rate=50.0,
+                                readNoise=5.0, poisson=False)
+        glitches = []
+        # Three glitches per amplitude, at distinct pixels. Skip read 0 and
+        # the last read (atEnd-style; glitches must be interior).
+        amps_by_bit = {3: 8.0, 4: 16.0, 5: 32.0, 6: 64.0, 8: 256.0, 10: 1024.0}
+        y = 0
+        for bit, amp in amps_by_bit.items():
+            for x in range(3):
+                glitches.append(sim.AsicGlitch(y=y, x=x, read=10 + 2 * x,
+                                               amount=+amp))
+            y += 1
+
+        cube = self._makeCube(params, glitches=glitches, rng=42)
+        good = np.ones((24, 8), dtype=bool)
+        glitchMask = np.ones((24, 8), dtype=bool)
+        # With poisson=False the only noise source is read noise; per-delta
+        # std ≈ sqrt(2)·readNoise = sqrt(2)·5 ≈ 7 ADU. IQR sigma ≈ 7, so
+        # threshold = 5·7 = 35 ADU. Bits with amp > 35 detect, amp ≤ 35 don't.
+        result = _runCR(
+            cube, goodPixelMask=good, glitchPixelMask=glitchMask,
+            sigmaFloorADU=0.0,  # disable floor for this experiment
+            nSigma=5.0,
+        )
+
+        detected_by_bit = {}
+        for bit, amp in amps_by_bit.items():
+            y_for_bit = list(amps_by_bit.keys()).index(bit)
+            # Each amp has 3 glitches at row y_for_bit
+            hits = 0
+            for x in range(3):
+                read = 10 + 2 * x
+                if result.glitchFlagMask[read - 1, y_for_bit, x]:
+                    hits += 1
+            detected_by_bit[bit] = hits
+
+        # Above the noise wall: at least one of three glitches detected
+        # (allow some randomness from per-pixel rate jitter).
+        self.assertGreaterEqual(detected_by_bit[10], 3,
+                                f"bit-10 (1024 ADU) hits: {detected_by_bit[10]}")
+        self.assertGreaterEqual(detected_by_bit[8], 3,
+                                f"bit-8 (256 ADU) hits: {detected_by_bit[8]}")
+        self.assertGreaterEqual(detected_by_bit[6], 2,
+                                f"bit-6 (64 ADU) hits: {detected_by_bit[6]}")
+        # Below the noise wall (32, 16, 8 ADU at threshold ~50): hits should
+        # be small (mostly missed). Don't require zero — chance pickups OK.
+        self.assertLessEqual(detected_by_bit[5], 2,
+                             f"bit-5 (32 ADU) over-detected: {detected_by_bit[5]}")
+        self.assertLessEqual(detected_by_bit[4], 1,
+                             f"bit-4 (16 ADU) over-detected: {detected_by_bit[4]}")
+        self.assertLessEqual(detected_by_bit[3], 1,
+                             f"bit-3 (8 ADU) over-detected: {detected_by_bit[3]}")
+
+    def testRepairRecoversCleanRamp(self):
+        """After repair, utrRate of the cube should match the true rate.
+
+        The default policy is to detect-but-not-repair interior glitch
+        pairs, so this test opts in explicitly to ``correctGlitches=True``
+        — it's specifically exercising the cube-repair path.
+        """
+        params = sim.RampParams(nReads=40, H=8, W=8, rate=80.0,
+                                readNoise=4.0, poisson=False)
+        crs = [sim.CR(y=2, x=3, read=15, amount=1500.0)]
+        glitches = [sim.AsicGlitch(y=5, x=5, read=20, amount=+2048.0)]
+        cube = self._makeCube(params, crs=crs, glitches=glitches, rng=0)
+        good = np.ones((8, 8), dtype=bool)
+        glitchMask = np.ones((8, 8), dtype=bool)
+        result = _runCR(
+            cube, goodPixelMask=good, glitchPixelMask=glitchMask,
+            nSigma=5.0, correctGlitches=True,
+        )
+        self.assertEqual(result.nCRs, 1)
+        self.assertEqual(result.nGlitchPairs, 1)
+        # Post-repair rate should match params.rate.
+        rate_after = sim.utrRate(cube)
+        self.assertAlmostEqual(float(rate_after.mean()), params.rate, delta=0.5)
+        # Specifically the CR pixel and the glitch pixel should also match.
+        self.assertAlmostEqual(float(rate_after[2, 3]), params.rate, delta=1.0)
+        self.assertAlmostEqual(float(rate_after[5, 5]), params.rate, delta=1.0)
+
+    def testCorrectGlitchesTrueRepairsInteriorPairs(self):
+        """correctGlitches=True: an interior glitch pair is detected
+        and repaired out of the cube."""
+        params = sim.RampParams(nReads=30, H=4, W=4, rate=50.0,
+                                readNoise=0.0, poisson=False)
+        g = sim.AsicGlitch(y=1, x=1, read=12, amount=+2048.0)
+        cube = self._makeCube(params, glitches=[g], rng=0)
+        good = np.ones((4, 4), dtype=bool)
+        glitchMask = np.ones((4, 4), dtype=bool)
+        result = _runCR(cube, goodPixelMask=good, glitchPixelMask=glitchMask,
+                        nSigma=5.0, correctGlitches=True)
+        self.assertEqual(result.nGlitchPairs, 1)
+        self.assertTrue(result.glitchFlagMask[11, 1, 1])
+        self.assertTrue(result.glitchFlagMask[12, 1, 1])
+        # Repaired: the pair deltas are now ~rate, the +/-2048 spike gone.
+        deltasAfter = np.diff(cube[:, 1, 1])
+        self.assertAlmostEqual(float(deltasAfter[11]), params.rate, delta=2.0)
+        self.assertAlmostEqual(float(deltasAfter[12]), params.rate, delta=2.0)
+        self.assertAlmostEqual(float(result.rate[1, 1]), params.rate, delta=1.0)
+
+    def testCorrectGlitchesFalseLeavesInteriorPairs(self):
+        """correctGlitches=False: an interior glitch pair is still
+        detected but left in the cube — it self-cancels in the mean rate."""
+        params = sim.RampParams(nReads=30, H=4, W=4, rate=50.0,
+                                readNoise=0.0, poisson=False)
+        g = sim.AsicGlitch(y=1, x=1, read=12, amount=+2048.0)
+        cube = self._makeCube(params, glitches=[g], rng=0)
+        good = np.ones((4, 4), dtype=bool)
+        glitchMask = np.ones((4, 4), dtype=bool)
+        deltasBefore = np.diff(cube[:, 1, 1])
+        result = _runCR(cube, goodPixelMask=good, glitchPixelMask=glitchMask,
+                        nSigma=5.0, correctGlitches=False)
+        # Detection still happens — the pair is in the ASIC_GLITCH mask.
+        self.assertEqual(result.nGlitchPairs, 1)
+        self.assertTrue(result.glitchFlagMask[11, 1, 1])
+        self.assertTrue(result.glitchFlagMask[12, 1, 1])
+        # NOT repaired: the cube still carries the +2048/-2048 pair.
+        deltasAfter = np.diff(cube[:, 1, 1])
+        np.testing.assert_allclose(deltasAfter[11:13], deltasBefore[11:13],
+                                   atol=1.0)
+        # The symmetric pair cancels in the mean, so the rate still recovers.
+        self.assertAlmostEqual(float(result.rate[1, 1]), params.rate, delta=1.0)
+
+    def testEndGlitchAlwaysCorrected(self):
+        """An end glitch (lone spike at the last delta, no pair partner)
+        is always repaired, even with correctGlitches=False."""
+        params = sim.RampParams(nReads=30, H=4, W=4, rate=50.0,
+                                readNoise=0.0, poisson=False)
+        # Glitch on the last read → a lone +2048 spike at the final delta.
+        g = sim.AsicGlitch(y=2, x=2, read=29, amount=+2048.0)
+        cube = self._makeCube(params, glitches=[g], rng=0)
+        good = np.ones((4, 4), dtype=bool)
+        glitchMask = np.ones((4, 4), dtype=bool)
+        result = _runCR(cube, goodPixelMask=good, glitchPixelMask=glitchMask,
+                        nSigma=5.0, correctGlitches=False)
+        # It is an ASIC glitch (in the mask) but not a pair.
+        self.assertEqual(result.nGlitchPairs, 0)
+        self.assertTrue(result.glitchFlagMask[28, 2, 2])
+        # Repaired despite correctGlitches=False — a lone end glitch has no
+        # partner, so it cannot self-cancel and must always be corrected.
+        deltasAfter = np.diff(cube[:, 2, 2])
+        self.assertAlmostEqual(float(deltasAfter[28]), params.rate, delta=2.0)
+        self.assertAlmostEqual(float(result.rate[2, 2]), params.rate, delta=1.0)
+
+    # ---------- Independence of CR vs ASIC-glitch handling -----------------
+    #
+    # The next three tests pin down that CR detection/correction and ASIC
+    # glitch detection/correction stay independent. Each isolates one knob.
+
+    def testCRAndGlitchIndependentInDefaultMode(self):
+        """A CR and an interior glitch in the same ramp, on different
+        pixels: with the default (correctGlitches=False), CRs are
+        repaired, interior glitch pairs are detected but left in place,
+        and each pixel's rate recovers via its own mechanism."""
+        params = sim.RampParams(nReads=40, H=8, W=8, rate=80.0,
+                                readNoise=0.0, poisson=False)
+        crs = [sim.CR(y=2, x=3, read=15, amount=1500.0)]
+        glitches = [sim.AsicGlitch(y=5, x=5, read=20, amount=+2048.0)]
+        cube = self._makeCube(params, crs=crs, glitches=glitches, rng=0)
+        good = np.ones((8, 8), dtype=bool)
+        glitchMask = np.ones((8, 8), dtype=bool)
+        glitchDeltasBefore = np.diff(cube[:, 5, 5]).copy()
+
+        result = _runCR(
+            cube, goodPixelMask=good, glitchPixelMask=glitchMask,
+            nSigma=5.0,
+        )
+
+        # Both classifications happen, independently.
+        self.assertEqual(result.nCRs, 1)
+        self.assertEqual(result.nGlitchPairs, 1)
+        self.assertTrue(result.crFlagMask[14, 2, 3])
+        self.assertTrue(result.glitchFlagMask[19, 5, 5])
+        self.assertTrue(result.glitchFlagMask[20, 5, 5])
+        # CR pixel: the CR delta is repaired to ~rate.
+        crDeltasAfter = np.diff(cube[:, 2, 3])
+        self.assertAlmostEqual(float(crDeltasAfter[14]), params.rate, delta=2.0)
+        self.assertAlmostEqual(float(result.rate[2, 3]), params.rate, delta=1.0)
+        # Glitch pixel: the pair deltas are NOT repaired; cube is unchanged
+        # at the pair, and the rate recovers via +A/-A self-cancellation.
+        glitchDeltasAfter = np.diff(cube[:, 5, 5])
+        np.testing.assert_allclose(
+            glitchDeltasAfter[19:21], glitchDeltasBefore[19:21], atol=1.0,
+            err_msg="Interior glitch pair must not be repaired in default mode",
+        )
+        self.assertAlmostEqual(float(result.rate[5, 5]), params.rate, delta=1.0)
+
+    def testCorrectGlitchesFlagDoesNotAffectCRs(self):
+        """The correctGlitches flag must not entangle with CR handling.
+
+        Running an identical ramp that contains only a CR (no glitch
+        injected) under correctGlitches=False vs True must produce bit-
+        identical CR detection, the same repaired cube state at the CR
+        position, and the same per-pixel rate.
+        """
+        params = sim.RampParams(nReads=40, H=4, W=4, rate=80.0,
+                                readNoise=0.0, poisson=False)
+        crs = [sim.CR(y=1, x=1, read=15, amount=1500.0)]
+        good = np.ones((4, 4), dtype=bool)
+        glitchMask = np.ones((4, 4), dtype=bool)
+
+        cubeFalse = self._makeCube(params, crs=crs, rng=0)
+        resultFalse = _runCR(
+            cubeFalse, goodPixelMask=good, glitchPixelMask=glitchMask,
+            nSigma=5.0, correctGlitches=False,
+        )
+
+        cubeTrue = self._makeCube(params, crs=crs, rng=0)
+        resultTrue = _runCR(
+            cubeTrue, goodPixelMask=good, glitchPixelMask=glitchMask,
+            nSigma=5.0, correctGlitches=True,
+        )
+
+        # CR detection and flag mask: identical.
+        self.assertEqual(resultFalse.nCRs, 1)
+        self.assertEqual(resultFalse.nCRs, resultTrue.nCRs)
+        np.testing.assert_array_equal(
+            resultFalse.crFlagMask, resultTrue.crFlagMask,
+            err_msg="CR flag mask must not depend on correctGlitches",
+        )
+        # Repaired cube at the CR pixel: identical.
+        np.testing.assert_allclose(
+            cubeFalse[:, 1, 1], cubeTrue[:, 1, 1], atol=1e-5,
+            err_msg="CR repair must not depend on correctGlitches",
+        )
+        # Rate at the CR pixel: identical, and matches the true rate.
+        self.assertAlmostEqual(
+            float(resultFalse.rate[1, 1]), float(resultTrue.rate[1, 1]),
+            delta=1e-4,
+        )
+        self.assertAlmostEqual(float(resultFalse.rate[1, 1]),
+                               params.rate, delta=1.0)
+
+    def testGlitchMaskDoesNotAffectInteriorCRs(self):
+        """Turning ASIC-glitch detection on (glitchPixelMask=all-True)
+        must not change how an *interior* CR is classified or repaired.
+
+        Boundary CRs (at delta 0 or N-2) would re-classify as end
+        glitches under the boundary heuristic by design; this test uses
+        an interior CR so the two glitch-mask settings are directly
+        comparable.
+        """
+        params = sim.RampParams(nReads=40, H=4, W=4, rate=80.0,
+                                readNoise=0.0, poisson=False)
+        crs = [sim.CR(y=1, x=1, read=15, amount=1500.0)]
+        good = np.ones((4, 4), dtype=bool)
+
+        cubeOff = self._makeCube(params, crs=crs, rng=0)
+        resultOff = _runCR(
+            cubeOff, goodPixelMask=good, glitchPixelMask=None,
+            nSigma=5.0,
+        )
+
+        cubeOn = self._makeCube(params, crs=crs, rng=0)
+        resultOn = _runCR(
+            cubeOn, goodPixelMask=good,
+            glitchPixelMask=np.ones((4, 4), dtype=bool),
+            nSigma=5.0,
+        )
+
+        # CR is classified as CR (and not boundary/glitch) either way.
+        self.assertEqual(resultOff.nCRs, 1)
+        self.assertEqual(resultOn.nCRs, 1)
+        self.assertEqual(resultOff.nGlitchPairs, 0)
+        self.assertEqual(resultOn.nGlitchPairs, 0)
+        np.testing.assert_array_equal(
+            resultOff.crFlagMask, resultOn.crFlagMask,
+            err_msg="CR flag mask must not depend on glitchPixelMask "
+                    "for an interior CR",
+        )
+        # Repaired cube at the CR pixel: identical.
+        np.testing.assert_allclose(
+            cubeOff[:, 1, 1], cubeOn[:, 1, 1], atol=1e-5,
+            err_msg="CR repair must not depend on glitchPixelMask "
+                    "for an interior CR",
+        )
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()
diff --git a/tests/test_validate_processRamp.py b/tests/test_validate_processRamp.py
new file mode 100644
index 00000000..47b04c05
--- /dev/null
+++ b/tests/test_validate_processRamp.py
@@ -0,0 +1,188 @@
+"""Integration tests for ``validate.processRamp`` on an asymmetric ramp.
+
+Uses the ``reuseExposure`` path (pre-built exposure + cube, butler
+not touched) so we can drive the CR-detection portion of
+``processRamp`` with deterministic synthetic inputs and verify:
+
+- The cube returned has shape ``(H, W, N)`` (axis order preserved
+  through the in-helper diff -> CR detector -> cumsum reconstruction).
+- The exposure mask is stamped at ``(y, x)`` for CR / ASIC_GLITCH;
+  axes 0 and 1 of the cube map onto ``mask.array[y, x]``, not
+  ``mask.array[x, y]``.
+- ``crResult.crFlagMask`` and ``glitchFlagMask`` have shape
+  ``(H, W, N-1)`` with hits at the expected delta indices.
+- The pre-seeded ``intermediates`` opt-in captures ``'crCorrected'``
+  only when requested.
+
+Asymmetric shape (N=20, H=5, W=7) is used throughout so any axis
+permutation surfaces as a shape error rather than a silent transposed
+result.
+"""
+import unittest
+
+import numpy as np
+
+import lsst.afw.image as afwImage
+import lsst.geom as geom
+import lsst.utils.tests
+
+from lsst.obs.pfs.h4Linearity import validate as pfsValidate
+
+
+N_READS = 20
+H = 5
+W = 7
+RATE = 10.0  # cumulative[y, x, k] = (k+1) * RATE
+
+
+def _flatCube(nReads=N_READS, h=H, w=W, rate=RATE, dtype=np.float32):
+    """``cumulative[y, x, k] = (k+1) * rate`` over a flat (H, W) plane,
+    returned in the project's ``(H, W, N)`` time-axis-last layout."""
+    k = np.arange(1, nReads + 1, dtype=np.float64) * rate
+    return np.broadcast_to(k, (h, w, nReads)).astype(dtype, copy=True)
+
+
+def _makeExposure(h=H, w=W):
+    """Build an ExposureF with mask + image planes at (H, W)."""
+    # Extent2I is (width, height); mask.array.shape becomes (H, W).
+    return afwImage.ExposureF(geom.Extent2I(w, h))
+
+
+class ProcessRampAxisOrderTestCase(lsst.utils.tests.TestCase):
+    """Verify processRamp preserves the (H, W, N) convention and stamps
+    masks at the right ``(y, x)``."""
+
+    def testCubeShapePreservedNoDefects(self):
+        # No injected defects: the cube comes back the same shape.
+        cube = _flatCube()
+        exp = _makeExposure()
+        _, cubeOut, crResult = pfsValidate.processRamp(
+            butler=None, dataId={}, cam="n3",
+            exposure=exp, cube=cube.copy(),
+            doLinearize=False, doCR=True, repairCR=True,
+        )
+        self.assertEqual(cubeOut.shape, (H, W, N_READS))
+        self.assertEqual(crResult.crFlagMask.shape, (H, W, N_READS - 1))
+        self.assertEqual(crResult.glitchFlagMask.shape, (H, W, N_READS - 1))
+        self.assertEqual(int(crResult.nCRs), 0)
+        self.assertEqual(int(crResult.nGlitchPairs), 0)
+
+    def testCRPixelStampedAtCorrectYX(self):
+        # Inject a CR at (y=3, x=4, read 10): cube[3, 4, 10:] += 200.
+        # The single delta delta[3, 4, 9] = RATE + 200, far above
+        # threshold; result.crFlagMask[3, 4, 9] should fire and the
+        # exposure mask should get the CR bit at (3, 4).
+        cube = _flatCube()
+        cube[3, 4, 10:] += 200.0
+        exp = _makeExposure()
+        _, cubeOut, crResult = pfsValidate.processRamp(
+            butler=None, dataId={}, cam="n3",
+            exposure=exp, cube=cube.copy(),
+            doLinearize=False, doCR=True, repairCR=True,
+        )
+        self.assertTrue(
+            bool(crResult.crFlagMask[3, 4, 9]),
+            "CR flag must land at delta index k-1 of the injected read",
+        )
+        # The exposure mask carries the CR bit at the spatial (y, x) =
+        # (3, 4) — not at (4, 3) or any transposed location.
+        crBit = exp.mask.getPlaneBitMask("CR")
+        self.assertTrue(
+            bool(exp.mask.array[3, 4] & crBit),
+            "exposure.mask.array[y, x] must have CR bit set at (y=3, x=4)",
+        )
+        # And NO CR bit at the symmetric (4, 3) position — fixture
+        # shape is (5, 7) so (4, 3) is valid; if axes were swapped the
+        # mask would be at (4, 3) instead.
+        self.assertFalse(
+            bool(exp.mask.array[4, 3] & crBit),
+            "no CR should be flagged at (y=4, x=3); axis (y, x) "
+            "would be swapped if this fires",
+        )
+
+    def testRepairRestoresFluxAtRepairedPixel(self):
+        # With repairCR=True, the cube returned has the CR contribution
+        # subtracted; ``cube[3, 4, 10]`` should be ~11*RATE again.
+        cube = _flatCube()
+        cubeOrig = cube.copy()
+        cube[3, 4, 10:] += 200.0
+        exp = _makeExposure()
+        _, cubeOut, _ = pfsValidate.processRamp(
+            butler=None, dataId={}, cam="n3",
+            exposure=exp, cube=cube,
+            doLinearize=False, doCR=True, repairCR=True,
+        )
+        # The repaired ramp at (3, 4) matches the original flat ramp
+        # to within ~0.5 ADU (the detector's iterative residual).
+        np.testing.assert_allclose(
+            cubeOut[3, 4, :], cubeOrig[3, 4, :], atol=2.0,
+            err_msg="repaired ramp at (3, 4) must align with the flat baseline",
+        )
+
+    def testNoRepairLeavesCubeUntouched(self):
+        # With repairCR=False, the cube isn't modified — the spike at
+        # cube[3, 4, 10:] survives; only the mask gets stamped.
+        cube = _flatCube()
+        cube[3, 4, 10:] += 200.0
+        cubeIn = cube.copy()
+        exp = _makeExposure()
+        _, cubeOut, crResult = pfsValidate.processRamp(
+            butler=None, dataId={}, cam="n3",
+            exposure=exp, cube=cube,
+            doLinearize=False, doCR=True, repairCR=False,
+        )
+        np.testing.assert_array_equal(cubeOut, cubeIn)
+        # But the flag still fires.
+        self.assertTrue(bool(crResult.crFlagMask[3, 4, 9]))
+
+
+class ProcessRampIntermediatesTestCase(lsst.utils.tests.TestCase):
+    """The pre-seeded ``intermediates`` opt-in API: only requested keys
+    are populated; unrelated keys remain absent."""
+
+    def testOptInCrCorrectedOnly(self):
+        cube = _flatCube()
+        cube[3, 4, 10:] += 200.0
+        exp = _makeExposure()
+        intermediates = {"crCorrected": None}
+        _, cubeOut, _ = pfsValidate.processRamp(
+            butler=None, dataId={}, cam="n3",
+            exposure=exp, cube=cube.copy(),
+            doLinearize=False, doCR=True, repairCR=True,
+            intermediates=intermediates,
+        )
+        self.assertIn("crCorrected", intermediates)
+        self.assertIsNotNone(intermediates["crCorrected"])
+        self.assertEqual(intermediates["crCorrected"].shape, (H, W, N_READS))
+        # Sanity: the captured crCorrected matches cubeOut elementwise
+        # (both are the post-repair cumulative cube).
+        np.testing.assert_allclose(intermediates["crCorrected"], cubeOut)
+        # Other keys did not appear.
+        self.assertNotIn("raw", intermediates)
+        self.assertNotIn("darkSubbed", intermediates)
+        self.assertNotIn("linearized", intermediates)
+
+    def testNoneIntermediatesCaptureNothing(self):
+        cube = _flatCube()
+        exp = _makeExposure()
+        _, _, _ = pfsValidate.processRamp(
+            butler=None, dataId={}, cam="n3",
+            exposure=exp, cube=cube.copy(),
+            doLinearize=False, doCR=True, repairCR=True,
+            intermediates=None,
+        )
+        # Nothing to assert beyond "doesn't crash"; this exercises
+        # the ``intermediates is None`` short-circuit in processRamp.
+
+
+class TestMemory(lsst.utils.tests.MemoryTestCase):
+    pass
+
+
+def setup_module(module):
+    lsst.utils.tests.init()
+
+
+if __name__ == "__main__":
+    lsst.utils.tests.init()
+    unittest.main()