Add QA plots for source injection to analysis_tools

lsst · Jul 3, 2024 · 8c02756 · 8c02756
1 parent cdf17bb
commit 8c02756
Show file tree

Hide file tree

Showing 10 changed files with 651 additions and 0 deletions.
diff --git a/pipelines/injectedCoaddQualityCore.yaml b/pipelines/injectedCoaddQualityCore.yaml
@@ -0,0 +1,16 @@
+description: |
+  Tier1 plots and metrics to assess injected coadd quality
+tasks:
+  injectedObjectAnalysis:
+    class: lsst.analysis.tools.tasks.injectedObjectAnalysis.InjectedObjectAnalysisTask
+    config:
+      atools.completenessHist: CompletenessPurityTool
+      atools.targetInjectedCatDeltaRAScatterPlot: TargetInjectedCatDeltaRAScatterPlot
+      atools.targetInjectedCatDeltaDecScatterPlot: TargetInjectedCatDeltaDecScatterPlot
+      atools.targetInjectedCatDeltaPsfScatterPlot: TargetInjectedCatDeltaPsfScatterPlot
+      atools.injectedMatchDiffMetrics: TargetInjectedCatDeltaMetrics
+      atools.injectedMatchDiffMetrics.applyContext: CoaddContext
+      bands: ["g", "r", "i", "z", "y"]
+      python: |
+        from lsst.analysis.tools.atools import *
+        from lsst.analysis.tools.contexts import *
diff --git a/python/lsst/analysis/tools/actions/keyedData/__init__.py b/python/lsst/analysis/tools/actions/keyedData/__init__.py
@@ -1,3 +1,4 @@
 from .calcDistances import *
 from .keyedDataActions import *
+from .magPercentiles import *
 from .stellarLocusFit import *
diff --git a/python/lsst/analysis/tools/actions/keyedData/magPercentiles.py b/python/lsst/analysis/tools/actions/keyedData/magPercentiles.py
@@ -0,0 +1,96 @@
+# This file is part of analysis_tools.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+from __future__ import annotations
+
+__all__ = ("MagPercentileAction",)
+
+import logging
+
+import numpy as np
+
+# import pandas as pd
+from astropy import units as u
+
+# from astropy.coordinates import SkyCoord
+from lsst.pex.config import Field, ListField
+
+from ...interfaces import KeyedData, KeyedDataSchema, Scalar, Vector, VectorAction
+from ...math import fluxToMag, isPercent  # divide, fluxToMag, isPercent, log10
+
+# from typing import Optional, cast
+
+# from lsst.pex.config.configurableActions import ConfigurableActionField, ConfigurableActionStructField
+
+# from .selectors import VectorSelector
+
+_LOG = logging.getLogger(__name__)
+
+
+class MagPercentileAction(VectorAction):
+    """Calculates the magnitude at the given percentile for completeness"""
+
+    matchDistanceKey = Field[str]("Match distance Vector")
+    vectorKey = Field[str](doc="Key of vector which should be loaded")
+    fluxUnits = Field[str](doc="Units for the column.", default="nanojansky")
+    percentiles = ListField[float](
+        doc="The percentiles to find the magnitude at.", default=[16.0, 50.0, 84.0], itemCheck=isPercent
+    )
+
+    def getInputSchema(self) -> KeyedDataSchema:
+        return (
+            (self.matchDistanceKey, Vector),
+            (self.vectorKey, Vector),
+        )
+
+    def getOutputSchema(self) -> KeyedDataSchema:
+        result = []
+        for pct in self.percentiles:
+            name = self.getPercentileName(pct)
+            result.append((name, Scalar))
+        return result
+
+    def getPercentileName(self, percentile: float) -> str:
+        return f"mag_{percentile:.2f}"
+
+    def __call__(self, data: KeyedData, **kwargs) -> KeyedData:
+        matched = np.isfinite(data[self.matchDistanceKey])
+        fluxValues = data[self.vectorKey.format(**kwargs)]
+        values = fluxToMag(fluxValues, flux_unit=u.Unit(self.fluxUnits))
+        nInput, bins = np.histogram(
+            values,
+            range=(np.nanmin(values), np.nanmax(values)),
+            bins=100,
+        )
+        nOutput, _ = np.histogram(
+            values[matched],
+            range=(np.nanmin(values[matched]), np.nanmax(values[matched])),
+            bins=bins,
+        )
+        # Find bin where the fraction recovered first falls below a percentile.
+        mags: KeyedData = {}
+        for pct in self.percentiles:
+            name = self.getPercentileName(pct)
+            belowPercentile = np.where((nOutput / nInput < pct / 100))[0]
+            if len(belowPercentile) == 0:
+                mags[name] = np.nan
+            else:
+                mags[name] = np.min(bins[belowPercentile])
+        return mags
diff --git a/python/lsst/analysis/tools/actions/plot/__init__.py b/python/lsst/analysis/tools/actions/plot/__init__.py
@@ -1,6 +1,7 @@
 from .barPlots import *
 from .calculateRange import *
 from .colorColorFitPlot import *
+from .completenessPlot import *
 from .diaSkyPlot import *
 from .focalPlanePlot import *
 from .gridPlot import *

diff --git a/python/lsst/analysis/tools/actions/plot/completenessPlot.py b/python/lsst/analysis/tools/actions/plot/completenessPlot.py
@@ -0,0 +1,183 @@
+# This file is part of analysis_tools.
+#
+# Developed for the LSST Data Management System.
+# This product includes software developed by the LSST Project
+# (https://www.lsst.org).
+# See the COPYRIGHT file at the top-level directory of this distribution
+# for details of code ownership.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+
+from typing import Mapping
+
+import matplotlib.pyplot as plt
+import numpy as np
+from lsst.pex.config import Field, ListField
+from matplotlib.figure import Figure
+
+from ...interfaces import KeyedData, KeyedDataSchema, PlotAction, Scalar, ScalarType, Vector
+from .plotUtils import addPlotInfo
+
+__all__ = ("CompletenessHist",)
+
+
+class CompletenessHist(PlotAction):
+    """Makes a scatter plot of the data with a marginal
+    histogram for each axis.
+    """
+
+    magKey = Field[str](doc="Name of the magnitude column.", default="mag")
+    matchDistanceKey = Field[str](doc="Name of the match distance column.", default="matchDistance")
+    xAxisLabel = Field[str](doc="Label for the x axis.", default="Input Magnitude (mag)")
+    inputLabel = Field[str](doc="Label for the input source histogram.", default="Synthetic Inputs")
+    outputLabel = Field[str](doc="Label for the recovered source histogram.", default="Synthetic Recovered")
+    numBins = Field[int](doc="Number of bins to use for the histograms.", default=100)
+    completenessPercentiles = ListField[float](
+        doc="Record the magnitudes at these percentiles", default=[84.0, 50.0, 16.0]
+    )
+
+    def getInputSchema(self) -> KeyedDataSchema:
+        base: list[tuple[str, type[Vector] | ScalarType]] = []
+        base.append((self.magKey, Vector))
+        base.append((self.matchDistanceKey, Vector))
+        return base
+
+    def __call__(self, data: KeyedData, **kwargs) -> Mapping[str, Figure] | Figure:
+        self._validateInput(data, **kwargs)
+        return self.makePlot(data, **kwargs)
+
+    def _validateInput(self, data: KeyedData, **kwargs) -> None:
+        """NOTE currently can only check that something is not a Scalar, not
+        check that the data is consistent with Vector
+        """
+        needed = self.getFormattedInputSchema(**kwargs)
+        if remainder := {key.format(**kwargs) for key, _ in needed} - {
+            key.format(**kwargs) for key in data.keys()
+        }:
+            raise ValueError(f"Task needs keys {remainder} but they were not found in input")
+        for name, typ in needed:
+            isScalar = issubclass((colType := type(data[name.format(**kwargs)])), Scalar)
+            if isScalar and typ != Scalar:
+                raise ValueError(f"Data keyed by {name} has type {colType} but action requires type {typ}")
+
+    def makePlot(self, data, plotInfo, **kwargs):
+        """Makes a plot showing the fraction of injected sources recovered by
+        input magnitude.
+
+        Parameters
+        ----------
+        data : `KeyedData`
+            All the data
+        plotInfo : `dict`
+            A dictionary of information about the data being plotted with keys:
+            ``camera``
+                The camera used to take the data (`lsst.afw.cameraGeom.Camera`)
+            ``"cameraName"``
+                The name of camera used to take the data (`str`).
+            ``"filter"``
+                The filter used for this data (`str`).
+            ``"ccdKey"``
+                The ccd/dectector key associated with this camera (`str`).
+            ``"visit"``
+                The visit of the data; only included if the data is from a
+                single epoch dataset (`str`).
+            ``"patch"``
+                The patch that the data is from; only included if the data is
+                from a coadd dataset (`str`).
+            ``"tract"``
+                The tract that the data comes from (`str`).
+            ``"photoCalibDataset"``
+                The dataset used for the calibration, e.g. "jointcal" or "fgcm"
+                (`str`).
+            ``"skyWcsDataset"``
+                The sky Wcs dataset used (`str`).
+            ``"rerun"``
+                The rerun the data is stored in (`str`).
+
+        Returns
+        ------
+        ``fig``
+            The figure to be saved (`matplotlib.figure.Figure`).
+
+        Notes
+        -----
+        Makes a histogram showing the fraction recovered in each magnitude
+        bin with the number input and recovered overplotted.
+        """
+
+        # Make plot showing the fraction recovered in magnitude bins
+        fig, axLeft = plt.subplots(dpi=300)
+        axLeft.tick_params(axis="y", labelcolor="C0")
+        axLeft.set_xlabel(self.xAxisLabel)
+        axLeft.set_ylabel("Fraction Recovered", color="C0")
+        axRight = axLeft.twinx()
+        axRight.set_ylabel("Number of Sources")
+        matched = np.isfinite(data[self.matchDistanceKey])
+        nInput, bins, _ = axRight.hist(
+            data[self.magKey],
+            range=(np.nanmin(data[self.magKey]), np.nanmax(data[self.magKey])),
+            bins=self.numBins,
+            log=True,
+            histtype="step",
+            label=self.inputLabel,
+            color="black",
+        )
+        nOutput, _, _ = axRight.hist(
+            data[self.magKey][matched],
+            range=(np.nanmin(data[self.magKey][matched]), np.nanmax(data[self.magKey][matched])),
+            bins=bins,
+            log=True,
+            histtype="step",
+            label=self.outputLabel,
+            color="grey",
+        )
+
+        # Find bin where the fraction recovered falls below a given percentile.
+        percentileInfo = []
+        xlims = plt.gca().get_xlim()
+        for pct in self.completenessPercentiles:
+            pct /= 100
+            magArray = np.where((nOutput / nInput < pct))[0]
+            if len(magArray) == 0:
+                mag = np.nan
+            else:
+                mag = np.min(bins[magArray])
+                axLeft.plot([xlims[0], mag], [pct, pct], ls=":", color="grey")
+                axLeft.plot([mag, mag], [0, pct], ls=":", color="grey")
+                percentileInfo.append("Magnitude at {}% recovered: {:0.2f}".format(pct * 100, mag))
+        plt.xlim(xlims)
+        axLeft.set_ylim(0, 1.05)
+        axRight.legend(loc="lower left", ncol=2)
+        axLeft.axhline(1, color="grey", ls="--")
+        axLeft.bar(
+            bins[:-1],
+            nOutput / nInput,
+            width=np.diff(bins),
+            align="edge",
+            color="C0",
+            alpha=0.5,
+            zorder=10,
+        )
+
+        # Add useful information to the plot
+        fig = plt.gcf()
+        addPlotInfo(fig, plotInfo)
+        statsText = ""
+        for info in percentileInfo:
+            statsText += f"{info}\n"
+        bbox = dict(edgecolor="grey", linestyle=":", facecolor="none")
+        fig.text(0.7, 0.075, statsText[:-1], bbox=bbox, transform=fig.transFigure, fontsize=6)
+        fig.subplots_adjust(bottom=0.2)
+        return fig
diff --git a/python/lsst/analysis/tools/atools/__init__.py b/python/lsst/analysis/tools/atools/__init__.py
@@ -33,6 +33,7 @@
 from .skyFluxStatisticMetrics import *
 from .skyObject import *
 from .skySource import *
+from .sourceInjectionPlots import *
 from .sources import *
 from .stellarLocus import *
 from .wholeSkyPlotTool import *