Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Fix FileNotFound error for Parquet files in S3 #430

Merged
merged 5 commits into from
Oct 31, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add a test for diag.history with local files
We need to exercise this function against different storage locations
because there's a bug when using an `s3://` URI. First is to add a test
for the local files with a `file://` URI so that we don't regress.
esheehan-gsl committed Oct 30, 2023
commit d306a2e4615306fc70f49c6d48d311bcbd229600
60 changes: 60 additions & 0 deletions tests/test_diag.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
from functools import partial

import numpy as np
import pandas as pd
import pytest
import xarray as xr
from botocore.session import Session
@@ -290,3 +291,62 @@ def test_lower_bounds(self, result, expected):

def test_upper_bounds(self, result, expected):
assert (result[0][2] == expected[0][2]).all()


def test_history(tmp_path, test_dataset, diag_parquet):
run_list = [
{
"initialization_time": "2022-05-16T04:00",
"observation": [10, 20],
"forecast_unadjusted": [5, 10],
"is_used": [True, True],
# O - F [5, 10]
},
{
"initialization_time": "2022-05-16T07:00",
"observation": [1, 2, 3],
"forecast_unadjusted": [5, 10, 3],
"longitude": [0, 0, 0],
"latitude": [0, 0, 0],
"is_used": [True, True, True],
# O - F [-4, -8, 0]
},
]

for run in run_list:
data = test_dataset(
model="RTMA",
system="WCOSS",
domain="CONUS",
background="RRFS",
frequency="REALTIME",
variable="ps",
loop="ges",
**run,
)
diag_parquet(data)

result = diag.history(
f"file://{tmp_path}/",
"RTMA",
"WCOSS",
"CONUS",
"RRFS",
"REALTIME",
diag.Variable.PRESSURE,
diag.MinimLoop.GUESS,
MultiDict(),
)

pd.testing.assert_frame_equal(
result,
pd.DataFrame(
{
"initialization_time": ["2022-05-16T04:00", "2022-05-16T07:00"],
"min": [5.0, -8.0],
"max": [10.0, 0.0],
"mean": [7.5, -4.0],
"count": [2.0, 3.0]
}
)
)