Skip to content

Commit

Permalink
fix inconsistencies in netcdf output
Browse files Browse the repository at this point in the history
  • Loading branch information
jmccreight committed Dec 15, 2023
1 parent 1d82587 commit 9765eaf
Show file tree
Hide file tree
Showing 13 changed files with 395 additions and 208 deletions.
89 changes: 68 additions & 21 deletions autotest/test_netcdf_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def control(domain):
control.edit_n_time_steps(n_time_steps)
control.options["budget_type"] = "error"
del control.options["netcdf_output_var_names"]
del control.options["netcdf_output_dir"]
return control


Expand All @@ -40,6 +41,15 @@ def control(domain):
# optional variables to budgets

check_vars = {
"PRMSSolarGeometry": [
"soltab_horad_potsw",
"soltab_potsw",
],
"PRMSAtmosphere": [
"tminf",
"potet",
"swrad",
],
"PRMSCanopy": [
"hru_intcpstor",
"hru_intcpstor_change",
Expand All @@ -65,7 +75,24 @@ def control(domain):
def test_process_budgets(domain, control, params, tmp_path, budget_sum_param):
tmp_dir = pl.Path(tmp_path)
# print(tmp_dir)
model_procs = [pywatershed.PRMSCanopy, pywatershed.PRMSChannel]
model_procs = [
pywatershed.PRMSSolarGeometry,
pywatershed.PRMSAtmosphere,
pywatershed.PRMSCanopy,
pywatershed.PRMSChannel,
]

# setup input_dir with symlinked prms inputs and outputs
domain_output_dir = domain["prms_output_dir"]
input_dir = tmp_path / "input"
input_dir.mkdir()
control.options["input_dir"] = input_dir

# Could limit this to just the variables in model_procs
for ff in domain_output_dir.resolve().glob("*.nc"):
shutil.copy(ff, input_dir / ff.name)
for ff in domain_output_dir.parent.resolve().glob("*.nc"):
shutil.copy(ff, input_dir / ff.name)

# Deal with parameter around what budget sum vars to write and check
if budget_sum_param == "some":
Expand All @@ -80,9 +107,7 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param):
else:
raise ValueError("upexpected value")

# dont need any PRMS inputs for the model specified, so this is sufficient
input_dir = domain["prms_output_dir"]
control.options["input_dir"] = input_dir
control.options["netcdf_output_dir"] = tmp_dir

# TODO: Eliminate potet and other variables from being used
model = Model(
Expand All @@ -91,27 +116,26 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param):
parameters=params,
)

# we are going to harvest the data from memory and store here
check_dict = {proc: {} for proc in check_vars.keys()}

# test outputting specific vars by only using check_vars
output_vars = [
item for sublist in list(check_vars.values()) for item in sublist
]
output_vars = None

with pytest.warns(UserWarning):
with pytest.raises(ValueError):
model.initialize_netcdf(
tmp_dir,
pl.Path("foo"),
budget_args=budget_args,
output_vars=output_vars,
)

with pytest.raises(RuntimeError):
model.initialize_netcdf(
tmp_dir,
budget_args=budget_args,
output_vars=output_vars,
)
model.initialize_netcdf(
output_dir=tmp_dir, # should allow a matching argument to control
budget_args=budget_args,
output_vars=output_vars,
)

for tt in range(n_time_steps):
model.advance()
Expand All @@ -122,11 +146,24 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param):
for vv in pp_vars:
if tt == 0:
# use the output data to figure out the shape
check_dict[pp][vv] = np.zeros(
(n_time_steps, model.processes[pp][vv].shape[0])
)
if isinstance(
model.processes[pp][vv], pywatershed.TimeseriesArray
):
spatial_len = model.processes[pp][vv].data.shape[1]
else:
spatial_len = model.processes[pp][vv].shape[0]

check_dict[pp][vv][tt, :] = model.processes[pp][vv]
check_dict[pp][vv] = np.zeros((n_time_steps, spatial_len))

if isinstance(
model.processes[pp][vv], pywatershed.TimeseriesArray
):
check_dict[pp][vv][tt, :] = model.processes[pp][vv].current
else:
check_dict[pp][vv][tt, :] = model.processes[pp][vv]

if pp in ["PRMSSolarGeometry", "PRMSAtmosphere"]:
continue

for bb in check_budget_sum_vars:
if tt == 0:
Expand All @@ -148,7 +185,15 @@ def test_process_budgets(domain, control, params, tmp_path, budget_sum_param):
for pp, pp_vars in check_vars.items():
for vv in pp_vars:
nc_data = xr.open_dataset(tmp_dir / f"{vv}.nc")[vv]
assert np.allclose(check_dict[pp][vv], nc_data)
if vv in pywatershed.PRMSSolarGeometry.get_variables():
assert np.allclose(
check_dict[pp][vv], nc_data[0:n_time_steps, :]
)
else:
assert np.allclose(check_dict[pp][vv], nc_data)

if pp in ["PRMSSolarGeometry", "PRMSAtmosphere"]:
continue

for bb in check_budget_sum_vars:
nc_data = xr.open_dataset(tmp_dir / f"{pp}_budget.nc")[bb]
Expand Down Expand Up @@ -194,14 +239,12 @@ def test_separate_together_var_list(
]

# setup input_dir with symlinked prms inputs and outputs
test_output_dir = tmp_dir / "test_results"
domain_output_dir = domain["prms_output_dir"]
input_dir = tmp_path / "input"
input_dir.mkdir()
control.options["input_dir"] = input_dir
control.options["netcdf_output_var_names"] = output_vars
control.options["netcdf_output_separate_files"] = separate
del control.options["netcdf_output_dir"]

# Could limit this to just the variables in model_procs
for ff in domain_output_dir.resolve().glob("*.nc"):
Expand All @@ -218,6 +261,7 @@ def test_separate_together_var_list(
# passing no output_dir arg and none in opts throws an error
model.initialize_netcdf()

test_output_dir = tmp_dir / "test_results"
control.options["netcdf_output_dir"] = test_output_dir
model = Model(
model_procs,
Expand Down Expand Up @@ -257,7 +301,10 @@ def test_separate_together_var_list(
assert nc_file.exists()

ds = xr.open_dataset(nc_file, decode_timedelta=False)
proc_vars = set(proc.get_variables())
if output_vars is None:
proc_vars = set(proc.get_variables())
else:
proc_vars = set(check_vars[proc_key])
nc_vars = set(ds.data_vars)
assert proc_vars == nc_vars
for vv in proc.variables:
Expand Down
5 changes: 3 additions & 2 deletions autotest/test_nhm_self_drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@ def test_drive_indiv_process(domain, tmp_path):
control.options["calc_method"] = "numba"
control.options["input_dir"] = domain["prms_run_dir"]
del control.options["netcdf_output_var_names"]
del control.options["netcdf_output_dir"]

nhm = pws.Model(
nhm_processes,
control=control,
parameters=params,
)
with pytest.warns(UserWarning):
nhm.initialize_netcdf(output_dir=nhm_output_dir)

nhm.initialize_netcdf(output_dir=nhm_output_dir)

nhm.run(finalize=True)
del nhm, params, control
Expand Down
7 changes: 5 additions & 2 deletions autotest/test_prms_atmosphere.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@

@pytest.fixture(scope="function")
def control(domain):
return Control.load_prms(domain["control_file"], warn_unused_options=False)
ctl = Control.load_prms(domain["control_file"], warn_unused_options=False)
del ctl.options["netcdf_output_dir"]
return ctl


@pytest.fixture(scope="function")
Expand Down Expand Up @@ -57,9 +59,10 @@ def test_compare_prms(domain, control, discretization, parameters, tmp_path):
discretization=discretization,
parameters=parameters,
**input_variables,
netcdf_output_dir=tmp_path,
)

atm.initialize_netcdf(output_dir=tmp_path)

if do_compare_in_memory:
answers = {}
for var in comparison_var_names:
Expand Down
5 changes: 4 additions & 1 deletion autotest/test_prms_canopy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@

@pytest.fixture(scope="function")
def control(domain):
return Control.load_prms(domain["control_file"], warn_unused_options=False)
ctl = Control.load_prms(domain["control_file"], warn_unused_options=False)
del ctl.options["netcdf_output_dir"]
del ctl.options["netcdf_output_var_names"]
return ctl


@pytest.fixture(scope="function")
Expand Down
5 changes: 4 additions & 1 deletion autotest/test_prms_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@

@pytest.fixture(scope="function")
def control(domain):
return Control.load_prms(domain["control_file"], warn_unused_options=False)
ctl = Control.load_prms(domain["control_file"], warn_unused_options=False)
del ctl.options["netcdf_output_dir"]
del ctl.options["netcdf_output_var_names"]
return ctl


@pytest.fixture(scope="function")
Expand Down
7 changes: 5 additions & 2 deletions autotest/test_prms_solar_geom.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@

@pytest.fixture(scope="function")
def control(domain):
return Control.load_prms(domain["control_file"], warn_unused_options=False)
ctl = Control.load_prms(domain["control_file"], warn_unused_options=False)
del ctl.options["netcdf_output_dir"]
return ctl


@pytest.fixture(scope="function")
Expand Down Expand Up @@ -57,9 +59,10 @@ def test_compare_prms(
discretization=discretization,
parameters=parameters,
from_prms_file=from_prms_file,
netcdf_output_dir=tmp_path,
)

solar_geom.initialize_netcdf(output_dir=tmp_path)

if do_compare_in_memory:
answers = {}
for var in PRMSSolarGeometry.get_variables():
Expand Down
2 changes: 1 addition & 1 deletion doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Pywatershed enhances PRMS with a new software design that is object-oriented and
flexible, allowing users to easily run "sub-models", replace process representations, and
incorporate new data. The Python language is accessible to a wide audience of
potential contributors which will help foster community development and experimentation.
A large number of advanced libraries are available for Python can be applied to
A large number of advanced libraries available for Python can be applied to
hdyrologic modeling, including libraries for parallelism, data access and manipulation,
and machine learning.

Expand Down
Loading

0 comments on commit 9765eaf

Please sign in to comment.