From 691c4cfdc07f35d1ab2ff60c7c9509a9f05343d4 Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Fri, 16 Jun 2023 08:24:09 -0600
Subject: [PATCH 01/10] Update diag.history to read from parquet

Update the tests and the implementation for retrieving historical data.
In the process, I simplified the response type. I think, in the future,
if we want to select unadjusted O - F or just the historical
observations, we should build that into the URL, rather than sending all
of the data back to the client and letting the client pick what to view.
This cuts down on data transfer and allows us to simplify our components
when they read data, since they don't have to parse a nested JavaScript
object.
---
 src/unified_graphics/diag.py   | 45 +++++++++-------
 src/unified_graphics/routes.py |  4 +-
 tests/conftest.py              | 26 +++++++++-
 tests/test_unified_graphics.py | 94 +++++++++-------------------------
 4 files changed, 77 insertions(+), 92 deletions(-)

diff --git a/src/unified_graphics/diag.py b/src/unified_graphics/diag.py
index d2a8e4de..f11abceb 100644
--- a/src/unified_graphics/diag.py
+++ b/src/unified_graphics/diag.py
@@ -2,10 +2,12 @@
 from collections import namedtuple
 from dataclasses import dataclass
 from enum import Enum
+from pathlib import Path
 from typing import Generator, List, Optional, Union
 from urllib.parse import urlparse
 
 import numpy as np
+import pandas as pd
 import sqlalchemy as sa
 import xarray as xr
 import zarr  # type: ignore
@@ -527,24 +529,29 @@ def history(
     variable: Variable,
     loop: MinimLoop,
     filters: MultiDict,
-):
-    for init_time in get_model_run_list(
-        diag_zarr, model, system, domain, background, frequency, variable
-    ):
-        result = summary(
-            diag_zarr,
-            model,
-            system,
-            domain,
-            background,
-            frequency,
-            init_time,
-            variable,
-            loop,
-            filters,
-        )
+) -> pd.DataFrame:
+    parquet_file = (
+        Path(diag_zarr)
+        / ".."
+        / "_".join((model, background, system, domain, frequency))
+        / variable.value
+    )
+
+    df = pd.read_parquet(
+        parquet_file,
+        columns=["initialization_time", "obs_minus_forecast_unadjusted"],
+        filters=(("loop", "=", loop.value), ("is_used", "=", 1)),
+    )
+
+    if df.empty:
+        return df
 
-        if not result:
-            continue
+    df = (
+        df.sort_values("initialization_time")
+        .groupby("initialization_time")
+        .describe()
+        .droplevel(0, axis=1)  # Drop a level from the columns created by the groupby
+        .reset_index()
+    )
 
-        yield result
+    return df[["initialization_time", "min", "max", "mean", "count"]]
diff --git a/src/unified_graphics/routes.py b/src/unified_graphics/routes.py
index 52c6b072..88531e21 100644
--- a/src/unified_graphics/routes.py
+++ b/src/unified_graphics/routes.py
@@ -147,7 +147,9 @@ def history(model, system, domain, background, frequency, variable, loop):
         request.args,
     )
 
-    return jsonify([d for d in data])
+    return data.to_json(orient="records", date_format="iso"), {
+        "Content-Type": "application/json"
+    }
 
 
 @bp.route(
diff --git a/tests/conftest.py b/tests/conftest.py
index 685a5041..1436c46e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -191,7 +191,7 @@ def factory(
 
 
 # FIXME: Replace diag_dataset with this fixture
-@pytest.fixture
+@pytest.fixture(scope="session")
 def test_dataset():
     def factory(
         *,
@@ -305,6 +305,30 @@ def factory(
     return factory
 
 
+@pytest.fixture
+def diag_parquet(diag_zarr_file):
+    def factory(
+        ds: xr.Dataset,
+    ) -> Path:
+        parquet_file = (
+            Path(diag_zarr_file)
+            / ".."
+            / "_".join((ds.model, ds.background, ds.system, ds.domain, ds.frequency))
+            / ds.name
+        )
+        df = ds.to_dataframe()
+        df["loop"] = ds.loop
+        df["initialization_time"] = ds.initialization_time
+
+        df.to_parquet(
+            parquet_file, partition_cols=["loop"], index=True, engine="pyarrow"
+        )
+
+        return parquet_file
+
+    return factory
+
+
 @pytest.fixture
 def diag_zarr(diag_zarr_file, diag_dataset):
     def factory(
diff --git a/tests/test_unified_graphics.py b/tests/test_unified_graphics.py
index 9644a2a1..cc49b140 100644
--- a/tests/test_unified_graphics.py
+++ b/tests/test_unified_graphics.py
@@ -61,7 +61,7 @@ def test_scalar_diag(variable_name, variable_code, loop, diag_zarr, client):
     }
 
 
-def test_scalar_history(diag_zarr, test_dataset, client):
+def test_scalar_history(diag_parquet, test_dataset, client):
     run_list = [
         {
             "initialization_time": "2022-05-16T04:00",
@@ -83,7 +83,7 @@ def test_scalar_history(diag_zarr, test_dataset, client):
 
     for run in run_list:
         data = test_dataset(**run)
-        diag_zarr([data.name], data.initialization_time, data.loop, data=data)
+        diag_parquet(data)
 
     response = client.get("/diag/RTMA/WCOSS/CONUS/HRRR/REALTIME/ps/ges/")
 
@@ -91,46 +91,22 @@ def test_scalar_history(diag_zarr, test_dataset, client):
     assert response.json == [
         {
             "initialization_time": "2022-05-16T04:00",
-            "obs_minus_forecast_adjusted": {
-                "min": 5.0,
-                "max": 10.0,
-                "mean": 7.5,
-            },
-            "observation": {
-                "min": 10.0,
-                "max": 20.0,
-                "mean": 15.0,
-            },
-            "obs_minus_forecast_unadjusted": {
-                "min": 5.0,
-                "max": 10.0,
-                "mean": 7.5,
-            },
-            "obs_count": 2,
+            "min": 5.0,
+            "max": 10.0,
+            "mean": 7.5,
+            "count": 2,
         },
         {
             "initialization_time": "2022-05-16T07:00",
-            "obs_minus_forecast_adjusted": {
-                "min": -8.0,
-                "max": 0.0,
-                "mean": -4.0,
-            },
-            "observation": {
-                "min": 1.0,
-                "max": 3.0,
-                "mean": 2.0,
-            },
-            "obs_minus_forecast_unadjusted": {
-                "min": -8.0,
-                "max": 0.0,
-                "mean": -4.0,
-            },
-            "obs_count": 3,
+            "min": -8.0,
+            "max": 0.0,
+            "mean": -4.0,
+            "count": 3,
         },
     ]
 
 
-def test_scalar_history_unused(diag_zarr, test_dataset, client):
+def test_scalar_history_unused(diag_parquet, test_dataset, client):
     run_list = [
         {
             "initialization_time": "2022-05-16T04:00",
@@ -150,7 +126,7 @@ def test_scalar_history_unused(diag_zarr, test_dataset, client):
 
     for run in run_list:
         data = test_dataset(**run)
-        diag_zarr([data.name], data.initialization_time, data.loop, data=data)
+        diag_parquet(data)
 
     response = client.get("/diag/RTMA/WCOSS/CONUS/HRRR/REALTIME/ps/ges/")
 
@@ -158,46 +134,22 @@ def test_scalar_history_unused(diag_zarr, test_dataset, client):
     assert response.json == [
         {
             "initialization_time": "2022-05-16T04:00",
-            "obs_minus_forecast_adjusted": {
-                "min": 5.0,
-                "max": 5.0,
-                "mean": 5.0,
-            },
-            "observation": {
-                "min": 10.0,
-                "max": 10.0,
-                "mean": 10.0,
-            },
-            "obs_minus_forecast_unadjusted": {
-                "min": 5.0,
-                "max": 5.0,
-                "mean": 5.0,
-            },
-            "obs_count": 1,
+            "min": 5.0,
+            "max": 5.0,
+            "mean": 5.0,
+            "count": 1,
         },
         {
             "initialization_time": "2022-05-16T07:00",
-            "obs_minus_forecast_adjusted": {
-                "min": -8.0,
-                "max": -8.0,
-                "mean": -8.0,
-            },
-            "observation": {
-                "min": 2.0,
-                "max": 2.0,
-                "mean": 2.0,
-            },
-            "obs_minus_forecast_unadjusted": {
-                "min": -8.0,
-                "max": -8.0,
-                "mean": -8.0,
-            },
-            "obs_count": 1,
+            "min": -8.0,
+            "max": -8.0,
+            "mean": -8.0,
+            "count": 1,
         },
     ]
 
 
-def test_scalar_history_empty(diag_zarr, test_dataset, client):
+def test_scalar_history_empty(diag_parquet, test_dataset, client):
     run_list = [
         {
             "initialization_time": "2022-05-16T04:00",
@@ -211,7 +163,7 @@ def test_scalar_history_empty(diag_zarr, test_dataset, client):
 
     for run in run_list:
         data = test_dataset(**run)
-        diag_zarr([data.name], data.initialization_time, data.loop, data=data)
+        diag_parquet(data)
 
     response = client.get("/diag/RTMA/WCOSS/CONUS/HRRR/REALTIME/ps/ges/")
 
@@ -220,7 +172,7 @@ def test_scalar_history_empty(diag_zarr, test_dataset, client):
 
 
 @pytest.mark.xfail
-def test_vectory_history():
+def test_vector_history():
     assert 0, "Not implemented"
 
 

From 48076054343450019c4dc2edf0c5b31d2d49543b Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Fri, 16 Jun 2023 08:38:53 -0600
Subject: [PATCH 02/10] Remove some unused code from the history implementation

Removed some of the dataclasses and functions that had been part of the
history API which are no longer used, now that we have the Parquet
implementation.
---
 src/unified_graphics/diag.py | 71 +-----------------------------------
 1 file changed, 1 insertion(+), 70 deletions(-)

diff --git a/src/unified_graphics/diag.py b/src/unified_graphics/diag.py
index f11abceb..d9e30aea 100644
--- a/src/unified_graphics/diag.py
+++ b/src/unified_graphics/diag.py
@@ -3,7 +3,7 @@
 from dataclasses import dataclass
 from enum import Enum
 from pathlib import Path
-from typing import Generator, List, Optional, Union
+from typing import Generator, List, Union
 from urllib.parse import urlparse
 
 import numpy as np
@@ -77,44 +77,6 @@ def to_geojson(self):
         }
 
 
-@dataclass
-class SummaryStatistics:
-    min: float
-    max: float
-    mean: float
-
-    @classmethod
-    def from_data_array(cls, array: xr.DataArray) -> "SummaryStatistics":
-        return cls(
-            min=float(array.min()),
-            max=float(array.max()),
-            mean=float(array.mean()),
-        )
-
-
-@dataclass
-class DiagSummary:
-    initialization_time: str
-    obs_minus_forecast_adjusted: SummaryStatistics
-    obs_minus_forecast_unadjusted: SummaryStatistics
-    observation: SummaryStatistics
-    obs_count: int
-
-    @classmethod
-    def from_dataset(cls, dataset: xr.Dataset) -> "DiagSummary":
-        return cls(
-            initialization_time=dataset.attrs["initialization_time"],
-            obs_minus_forecast_adjusted=SummaryStatistics.from_data_array(
-                dataset["obs_minus_forecast_adjusted"]
-            ),
-            obs_minus_forecast_unadjusted=SummaryStatistics.from_data_array(
-                dataset["obs_minus_forecast_unadjusted"]
-            ),
-            observation=SummaryStatistics.from_data_array(dataset["observation"]),
-            obs_count=len(dataset["nobs"]),
-        )
-
-
 ModelMetadata = namedtuple(
     "ModelMetadata",
     (
@@ -488,37 +450,6 @@ def get_model_run_list(
         return group.group_keys()
 
 
-def summary(
-    diag_zarr: str,
-    model: str,
-    system: str,
-    domain: str,
-    background: str,
-    frequency: str,
-    initialization_time: str,
-    variable: Variable,
-    loop: MinimLoop,
-    filters: MultiDict,
-) -> Optional[DiagSummary]:
-    store = get_store(diag_zarr)
-    path = "/".join(
-        [
-            model,
-            system,
-            domain,
-            background,
-            frequency,
-            variable.value,
-            initialization_time,
-            loop.value,
-        ]
-    )
-
-    ds = xr.open_zarr(store, group=path, consolidated=False)
-    ds = apply_filters(ds, filters)
-    return DiagSummary.from_dataset(ds) if len(ds["nobs"]) > 0 else None
-
-
 def history(
     diag_zarr: str,
     model: str,

From 0b984f2935065c576c3db8a0f76dc3f7031c32af Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Tue, 20 Jun 2023 11:02:26 -0600
Subject: [PATCH 03/10] Test with a file:// prototocol

The diag.history function fails when there's a file:// prefix in the
diag file path because apparently Pandas gets a little confused and
tries to pass it to urllib instead of just opening the file.
---
 src/unified_graphics/diag.py | 2 ++
 tests/conftest.py            | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/unified_graphics/diag.py b/src/unified_graphics/diag.py
index d9e30aea..6ad16126 100644
--- a/src/unified_graphics/diag.py
+++ b/src/unified_graphics/diag.py
@@ -461,6 +461,8 @@ def history(
     loop: MinimLoop,
     filters: MultiDict,
 ) -> pd.DataFrame:
+    # FIXME: This fails when diag_zarr is a file:// URL. Pandas ends up trying to use
+    # urlopen to read the file, but it's a directory
     parquet_file = (
         Path(diag_zarr)
         / ".."
diff --git a/tests/conftest.py b/tests/conftest.py
index 1436c46e..0b06513e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -120,7 +120,7 @@ def session(engine):
 
 @pytest.fixture
 def diag_zarr_file(tmp_path):
-    return str(tmp_path / "test_diag.zarr")
+    return f"file://{tmp_path / 'test_diag.zarr'}"
 
 
 @pytest.fixture

From 6e295ed4c35985f30c431b767375a00b985f4e94 Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Wed, 4 Oct 2023 15:02:31 -0600
Subject: [PATCH 04/10] Revert "Remove the time series for now"

This reverts commit 716a400ca63ac876f6c7dc4c3565d8c46cf134ad.
---
 .../templates/layouts/diag.html               | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/unified_graphics/templates/layouts/diag.html b/src/unified_graphics/templates/layouts/diag.html
index f1d25c76..8c80c1a2 100644
--- a/src/unified_graphics/templates/layouts/diag.html
+++ b/src/unified_graphics/templates/layouts/diag.html
@@ -22,11 +22,20 @@ <h2 class="heading-2 flex-0">{% if minim_loop == "ges" %}Guess{% else %}Analysis
           >Observation &minus; Forecast</color-ramp>
       </chart-container>
     {%- else -%}
-      <chart-container class="padding-2 radius-md bg-white shadow-1">
-        <span class="axis-y title" slot="title-y">Observation Count</span>
-        <chart-histogram id="distribution-{{ minim_loop }}" src="{{ dist_url[minim_loop] }}"></chart-histogram>
-        <span class="axis-x title" slot="title-x">Observation &minus; Forecast</span>
-      </chart-container>
+      <div class="grid">
+        <chart-container class="padding-2 radius-md bg-white shadow-1">
+          <span class="axis-y title" slot="title-y">Observation Count</span>
+          <chart-histogram id="distribution-{{ minim_loop }}" src="{{ dist_url[minim_loop] }}"></chart-histogram>
+          <span class="axis-x title" slot="title-x">Observation &minus; Forecast</span>
+        </chart-container>
+
+        <chart-container class="padding-2 radius-md bg-white shadow-1">
+          <span class="axis-y title" slot="title-y">Observation &minus; Forecast</span>
+          <chart-timeseries id="history-{{ minim_loop }}" src="{{ history_url[minim_loop] }}"
+            current="{{ form.get("initialization_time") }}"></chart-timeseries>
+          <span class="axis-x title" slot="title-x">Initialization Time</span>
+        </chart-container>
+      </div>
     {%- endif %}
 
     <chart-container class="padding-2 radius-md bg-white shadow-1">

From b480a0d2f7726269016e224cdd10edf515b79d30 Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Wed, 4 Oct 2023 15:13:02 -0600
Subject: [PATCH 05/10] Use env variable to find Parquet files in routes

Instead of deriving the Parquet file path from the Zarr path, the route
reads in the FLASK_DIAG_PARQUET variable from the environment and passes
that explicitly to diag.history. diag.history uses that to find the
Parquet file for this model, and strips out `file://` if it's present
because Pandas is a bit silly about that.
---
 src/unified_graphics/diag.py   | 10 +++++-----
 src/unified_graphics/routes.py |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/unified_graphics/diag.py b/src/unified_graphics/diag.py
index 6ad16126..dd0e01c9 100644
--- a/src/unified_graphics/diag.py
+++ b/src/unified_graphics/diag.py
@@ -451,7 +451,7 @@ def get_model_run_list(
 
 
 def history(
-    diag_zarr: str,
+    parquet_path: str,
     model: str,
     system: str,
     domain: str,
@@ -462,10 +462,10 @@ def history(
     filters: MultiDict,
 ) -> pd.DataFrame:
     # FIXME: This fails when diag_zarr is a file:// URL. Pandas ends up trying to use
-    # urlopen to read the file, but it's a directory
+    # urlopen to read the file, but it's a directory. For now, we strip file://, but
+    # this is a hack.
     parquet_file = (
-        Path(diag_zarr)
-        / ".."
+        Path(parquet_path.replace("file://", ""))
         / "_".join((model, background, system, domain, frequency))
         / variable.value
     )
@@ -473,7 +473,7 @@ def history(
     df = pd.read_parquet(
         parquet_file,
         columns=["initialization_time", "obs_minus_forecast_unadjusted"],
-        filters=(("loop", "=", loop.value), ("is_used", "=", 1)),
+        filters=(("loop", "=", loop.value), ("is_used", "=", True)),
     )
 
     if df.empty:
diff --git a/src/unified_graphics/routes.py b/src/unified_graphics/routes.py
index 88531e21..6311d7c8 100644
--- a/src/unified_graphics/routes.py
+++ b/src/unified_graphics/routes.py
@@ -136,7 +136,7 @@ def serviceworker():
 @bp.route("/diag/<model>/<system>/<domain>/<background>/<frequency>/<variable>/<loop>/")
 def history(model, system, domain, background, frequency, variable, loop):
     data = diag.history(
-        current_app.config["DIAG_ZARR"],
+        current_app.config["DIAG_PARQUET"],
         model,
         system,
         domain,

From 13bdd7fc154a75bc3ae0ff271d3b8a3a36689200 Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Wed, 4 Oct 2023 15:18:02 -0600
Subject: [PATCH 06/10] Update property access for time series charts

The charts really need to be refactored (in part) to eliminate these
hard-coded property accesses, but for now we can just update the
properties that the time series component expects so that we can include
test out the time series data.
---
 .../static/js/component/ChartTimeSeries.js            | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/unified_graphics/static/js/component/ChartTimeSeries.js b/src/unified_graphics/static/js/component/ChartTimeSeries.js
index d85da1dd..65a46f00 100644
--- a/src/unified_graphics/static/js/component/ChartTimeSeries.js
+++ b/src/unified_graphics/static/js/component/ChartTimeSeries.js
@@ -177,10 +177,7 @@ export default class ChartTimeSeries extends ChartElement {
   }
 
   get yScale() {
-    const domain = [
-      min(this.#data, (d) => d.obs_minus_forecast_adjusted.min),
-      max(this.#data, (d) => d.obs_minus_forecast_adjusted.max),
-    ];
+    const domain = [min(this.#data, (d) => d.min), max(this.#data, (d) => d.max)];
     const { top, bottom } = this.margin;
     const height = this.height - top - bottom;
 
@@ -196,12 +193,12 @@ export default class ChartTimeSeries extends ChartElement {
     const { xScale, yScale } = this;
     const rangeArea = area()
       .x((d) => xScale(d.initialization_time))
-      .y0((d) => yScale(d.obs_minus_forecast_adjusted.min))
-      .y1((d) => yScale(d.obs_minus_forecast_adjusted.max))
+      .y0((d) => yScale(d.min))
+      .y1((d) => yScale(d.max))
       .curve(curveBumpX);
     const meanLine = line()
       .x((d) => xScale(d.initialization_time))
-      .y((d) => yScale(d.obs_minus_forecast_adjusted.mean))
+      .y((d) => yScale(d.mean))
       .curve(curveBumpX);
 
     this.#svg.attr("viewBox", `0 0 ${this.width} ${this.height}`);

From 79aa304e98d08477effa208aa4119705aaf6e7d3 Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Wed, 11 Oct 2023 10:19:08 -0600
Subject: [PATCH 07/10] Add parquet config to the test app

---
 tests/conftest.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 0b06513e..923b24b3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -124,11 +124,12 @@ def diag_zarr_file(tmp_path):
 
 
 @pytest.fixture
-def app(diag_zarr_file, test_db):
+def app(tmp_path, diag_zarr_file, test_db):
     _app = create_app(
         {
             "SQLALCHEMY_DATABASE_URI": test_db,
             "DIAG_ZARR": diag_zarr_file,
+            "DIAG_PARQUET": f"file://{tmp_path}",
         }
     )
 

From 92378511d9d459990d0f2548202e8cd9cabe9b1c Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Wed, 11 Oct 2023 10:19:48 -0600
Subject: [PATCH 08/10] Use tmp_path to store parquet fixtures

Instead of deriving the path to save parquet fixtures for tests, we use
tmp_path, basically the same way we would for production.
---
 tests/conftest.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 923b24b3..980abd7d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -307,13 +307,12 @@ def factory(
 
 
 @pytest.fixture
-def diag_parquet(diag_zarr_file):
+def diag_parquet(tmp_path):
     def factory(
         ds: xr.Dataset,
     ) -> Path:
         parquet_file = (
-            Path(diag_zarr_file)
-            / ".."
+            tmp_path
             / "_".join((ds.model, ds.background, ds.system, ds.domain, ds.frequency))
             / ds.name
         )

From 6cf5524b2aed51523e661e36b77c7eab179ee8cb Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Wed, 11 Oct 2023 10:20:36 -0600
Subject: [PATCH 09/10] Strip file protocol from diag zarr config

It seems like pathlib.Path is having an issue with the `file://`
protocol on our environment variables, although I don't think it used
to. It's easy enough to strip this, although this may require more
robust path/uri handling in our tests.
---
 tests/test_unified_graphics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_unified_graphics.py b/tests/test_unified_graphics.py
index cc49b140..810b7608 100644
--- a/tests/test_unified_graphics.py
+++ b/tests/test_unified_graphics.py
@@ -608,7 +608,7 @@ def test_diag_not_found(variable, client):
     ["t", "q", "ps", "uv"],
 )
 def test_diag_read_error(variable, app, client):
-    Path(app.config["DIAG_ZARR"]).touch()
+    Path(app.config["DIAG_ZARR"].replace("file://", "")).touch()
 
     response = client.get(
         f"/diag/RTMA/WCOSS/CONUS/HRRR/REALTIME/{variable}/2022-05-05T14:00/ges/"

From 5d93be04ef394c6fda5a6709a9defb7cc5646c94 Mon Sep 17 00:00:00 2001
From: "W. Evan Sheehan" <evan.sheehan@noaa.gov>
Date: Wed, 11 Oct 2023 10:38:00 -0600
Subject: [PATCH 10/10] Fix data type for is_used in test fixtures

When we process diag files in our pipeline, we convert the integers in
the diag files for is_used into booleans, so should treat them the same
way in our test fixtures. I think this is a sign that our test fixtures
are poorly set up, since they can get out of sync with reality. The
result of this problem was that either the application worked, or our
tests passed. Prior to this change, we needed to compare is_used to a
boolean for Parquet files generated with our ETL code, but to an integer
for test files generated with our fixtures.
---
 tests/conftest.py              |  4 ++--
 tests/test_unified_graphics.py | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 980abd7d..348b9777 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -206,7 +206,7 @@ def factory(
         loop: str = "ges",
         longitude: list[float] = [90, 91],
         latitude: list[float] = [22, 23],
-        is_used: list[int] = [1, 0],
+        is_used: list[bool] = [True, False],
         observation: list[float] = [1, 0],
         forecast_unadjusted: list[float] = [0, 1],
         forecast_adjusted: Optional[list[float]] = None,
@@ -233,7 +233,7 @@ def factory(
             coords=dict(
                 longitude=(["nobs"], np.array(longitude, dtype=np.float64)),
                 latitude=(["nobs"], np.array(latitude, dtype=np.float64)),
-                is_used=(["nobs"], np.array(is_used, dtype=np.int8)),
+                is_used=(["nobs"], np.array(is_used)),
                 **kwargs,
             ),
             attrs={
diff --git a/tests/test_unified_graphics.py b/tests/test_unified_graphics.py
index 810b7608..37cdca7d 100644
--- a/tests/test_unified_graphics.py
+++ b/tests/test_unified_graphics.py
@@ -67,7 +67,7 @@ def test_scalar_history(diag_parquet, test_dataset, client):
             "initialization_time": "2022-05-16T04:00",
             "observation": [10, 20],
             "forecast_unadjusted": [5, 10],
-            "is_used": [1, 1],
+            "is_used": [True, True],
             # O - F [5, 10]
         },
         {
@@ -76,7 +76,7 @@ def test_scalar_history(diag_parquet, test_dataset, client):
             "forecast_unadjusted": [5, 10, 3],
             "longitude": [0, 0, 0],
             "latitude": [0, 0, 0],
-            "is_used": [1, 1, 1],
+            "is_used": [True, True, True],
             # O - F [-4, -8, 0]
         },
     ]
@@ -112,14 +112,14 @@ def test_scalar_history_unused(diag_parquet, test_dataset, client):
             "initialization_time": "2022-05-16T04:00",
             "observation": [10, 20],
             "forecast_unadjusted": [5, 10],
-            "is_used": [1, 0],
+            "is_used": [True, False],
             # O - F [5, 10]
         },
         {
             "initialization_time": "2022-05-16T07:00",
             "observation": [1, 2],
             "forecast_unadjusted": [5, 10],
-            "is_used": [0, 1],
+            "is_used": [False, True],
             # O - F [-4, -8]
         },
     ]
@@ -153,11 +153,11 @@ def test_scalar_history_empty(diag_parquet, test_dataset, client):
     run_list = [
         {
             "initialization_time": "2022-05-16T04:00",
-            "is_used": [0, 0],
+            "is_used": [False, False],
         },
         {
             "initialization_time": "2022-05-16T07:00",
-            "is_used": [0, 0],
+            "is_used": [False, False],
         },
     ]
 
@@ -324,7 +324,7 @@ def test_range_filter_scalar(diag_zarr, client):
         coords=dict(
             longitude=(["nobs"], [90, -160]),
             latitude=(["nobs"], [22, 25]),
-            is_used=(["nobs"], [0, 1]),
+            is_used=(["nobs"], [False, True]),
         ),
         attrs={
             "name": variable,
@@ -446,7 +446,7 @@ def test_range_filter_vector(diag_zarr, client):
             component=["u", "v"],
             longitude=(["nobs"], [90, -160]),
             latitude=(["nobs"], [22, 25]),
-            is_used=(["nobs"], [1, 0]),
+            is_used=(["nobs"], [True, False]),
         ),
         attrs={
             "name": variable,