-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for more EAMxx variables (#880)
Co-authored-by: Tom Vo <[email protected]>
- Loading branch information
1 parent
70ecf94
commit d01fd7a
Showing
19 changed files
with
659 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
85 changes: 85 additions & 0 deletions
85
auxiliary_tools/cdat_regression_testing/892-bottleneck/debug_ref_u.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
""" | ||
This script is used to debug the bottleneck issue in the reference u variable. | ||
""" | ||
|
||
# %% | ||
import timeit | ||
|
||
import xarray as xr | ||
|
||
# Perlmutter | ||
# ---------- | ||
# filepaths = [ | ||
# "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc" | ||
# ] | ||
|
||
# LCRC | ||
# ----- | ||
filepaths = [ | ||
"/lcrc/group/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc" | ||
] | ||
time_slice = slice("1996-01-15", "1997-01-15", None) | ||
|
||
# %% | ||
# Test case 1 - OPEN_MFDATASET() + "ua" dataset (76 GB) + subsetting + `.load()` | ||
# Result: .load() hangs when using `open_mfdataset` | ||
# ------------------------------------------------------------------------------ | ||
ds_ua_omfd = xr.open_mfdataset( | ||
filepaths, | ||
decode_times=True, | ||
use_cftime=True, | ||
coords="minimal", | ||
compat="override", | ||
) | ||
ds_ua_omfd_sub = ds_ua_omfd.sel(time=time_slice) | ||
|
||
# %% | ||
start_time = timeit.default_timer() | ||
ds_ua_omfd_sub.load() | ||
elapsed = timeit.default_timer() - start_time | ||
print(f"Time taken to load ds_xc_sub: {elapsed} seconds") | ||
|
||
# %% | ||
# Test case 2 - OPEN_DATASET() + "ua" dataset (76 GB) + subsetting + `.load()` | ||
# Result: load() works fine when using `open_dataset` | ||
# ------------------------------------------------------------------------------ | ||
ds_ua_od = xc.open_dataset( | ||
filepaths[0], | ||
add_bounds=["X", "Y", "T"], | ||
decode_times=True, | ||
use_cftime=True, | ||
# coords="minimal", | ||
# compat="override", | ||
) | ||
ds_ua_od_sub = ds_ua_od.sel(time=time_slice) | ||
|
||
# %% | ||
start_time = timeit.default_timer() | ||
ds_ua_od_sub.load() | ||
elapsed = timeit.default_timer() - start_time | ||
print(f"Time taken to load ds_xc_sub: {elapsed} seconds") | ||
|
||
# %% | ||
# Test case 3 - OPEN_MFDATASET() + "pr" dataset (2 GB) + subsetting + `.load()` | ||
# Result: ds.load() works fine with pr variable, but not with ua variable | ||
# Notes: pr is 3D variable (time, lat, lon), ua is a 4D variable (time, lat, lon, plev). | ||
# ------------------------------------------------------------------------------ | ||
filepaths_pr = [ | ||
"/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series/ERA5/pr_197901_201912.nc" | ||
] | ||
ds_pr = xc.open_mfdataset( | ||
filepaths_pr, | ||
add_bounds=["X", "Y", "T"], | ||
decode_times=True, | ||
use_cftime=True, | ||
coords="minimal", | ||
compat="override", | ||
) | ||
|
||
# %% | ||
# pr dataset is ~2 GB without subsetting. There is no need to subset. | ||
start_time = timeit.default_timer() | ||
ds_pr.load() | ||
elapsed = timeit.default_timer() - start_time | ||
print(f"Time taken to load ds_xc_sub_0: {elapsed} seconds") | ||
# %% |
13 changes: 13 additions & 0 deletions
13
auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.cfg
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[#] | ||
sets = ["lat_lon"] | ||
case_id = "ERA5" | ||
variables = ["U"] | ||
ref_name = "ERA5" | ||
reference_name = "ERA5 Reanalysis" | ||
seasons = ["ANN", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "DJF", "MAM", "JJA", "SON"] | ||
plevs = [850.0] | ||
test_colormap = "PiYG_r" | ||
reference_colormap = "PiYG_r" | ||
contour_levels = [-20, -15, -10, -8, -5, -3, -1, 1, 3, 5, 8, 10, 15, 20] | ||
diff_levels = [-8, -6, -5, -4, -3, -2, -1, 1, 2, 3, 4, 5, 6, 8] | ||
regrid_method = "bilinear" |
39 changes: 39 additions & 0 deletions
39
auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import sys | ||
import os | ||
from e3sm_diags.parameter.core_parameter import CoreParameter | ||
from e3sm_diags.run import runner | ||
|
||
param = CoreParameter() | ||
|
||
|
||
param.reference_data_path = ( | ||
"/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series" | ||
) | ||
param.test_data_path = "/global/cfs/cdirs/e3sm/chengzhu/eamxx/post/data/rgr" | ||
param.test_name = "eamxx_decadal" | ||
param.seasons = ["ANN"] | ||
# param.save_netcdf = True | ||
|
||
param.ref_timeseries_input = True | ||
# Years to slice the ref data, base this off the years in the filenames. | ||
param.ref_start_yr = "1996" | ||
param.ref_end_yr = "1996" | ||
|
||
prefix = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/892-bottleneck" | ||
param.results_dir = os.path.join(prefix, "eamxx_decadal_1996_1107_edv3") | ||
|
||
cfg_path = "auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.cfg" | ||
sys.argv.extend(["--diags", cfg_path]) | ||
|
||
runner.sets_to_run = [ | ||
"lat_lon", | ||
"zonal_mean_xy", | ||
"zonal_mean_2d", | ||
"zonal_mean_2d_stratosphere", | ||
"polar", | ||
"cosp_histogram", | ||
"meridional_mean_2d", | ||
"annual_cycle_zonal_mean", | ||
] | ||
|
||
runner.run_diags([param]) |
20 changes: 20 additions & 0 deletions
20
auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_e3sm_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# %% | ||
import timeit | ||
|
||
import xarray as xr | ||
|
||
filepaths = [ | ||
"/lcrc/group/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc" | ||
] | ||
|
||
ds = xr.open_mfdataset(filepaths) | ||
|
||
ds_sub = ds.sel(time=slice("1996-01-15", "1997-01-15", None)) | ||
|
||
# %% | ||
start_time = timeit.default_timer() | ||
ds_sub.ua.load() | ||
elapsed = timeit.default_timer() - start_time | ||
print(f"Time taken to load ds_xc_sub: {elapsed} seconds") | ||
|
||
# %% |
48 changes: 48 additions & 0 deletions
48
auxiliary_tools/cdat_regression_testing/892-bottleneck/xr_mvce_gh.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# %% | ||
import numpy as np | ||
import pandas as pd | ||
import xarray as xr | ||
import timeit | ||
|
||
import dask.array as da | ||
|
||
# %% | ||
# Define the dimensions | ||
time = 12 | ||
plev = 37 | ||
lat = 721 | ||
lon = 1440 | ||
|
||
# Create the data arrays using dask. | ||
data = da.random.random(size=(time, plev, lat, lon), chunks=(12, 37, 721, 1440)).astype( | ||
np.float32 | ||
) | ||
|
||
# Create the coordinates. | ||
times = pd.date_range("2000-01-01", periods=time) | ||
plevs = np.linspace(100000, 10, plev) | ||
lats = np.linspace(-90, 90, lat) | ||
lons = np.linspace(0, 360, lon, endpoint=False) | ||
|
||
# Create the dataset and write out to a file. | ||
ds = xr.Dataset( | ||
{"data": (["time", "plev", "lat", "lon"], data)}, | ||
coords={"time": times, "plev": plevs, "lat": lats, "lon": lons}, | ||
) | ||
# %% | ||
ds.to_netcdf("dask_bottleneck.nc") | ||
|
||
# %% | ||
# Open the dataset. | ||
ds_open = xr.open_mfdataset("dask_bottleneck.nc") | ||
|
||
# %% | ||
# Load the dataset into memory | ||
start_time = timeit.default_timer() | ||
ds.load() | ||
end_time = timeit.default_timer() | ||
|
||
print(f"Time taken to load the dataset: {end_time - start_time} seconds") | ||
|
||
|
||
# %% |
Oops, something went wrong.