Skip to content

Commit

Permalink
So many fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
collijk committed Jul 9, 2024
1 parent ae7b477 commit cef92b3
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 81 deletions.
2 changes: 1 addition & 1 deletion src/climate_downscale/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def extracted_era5(self) -> Path:
def extracted_era5_path(
self, dataset: str, variable: str, year: int | str, month: str
) -> Path:
return self.extracted_era5 / f"{dataset}_{variable}_{year}_{month}.nc"
return self.extracted_era5 / f"reanalysis-era5-{dataset}_{variable}_{year}_{month}.nc"

@property
def extracted_cmip6(self) -> Path:
Expand Down
3 changes: 0 additions & 3 deletions src/climate_downscale/generate/derived_daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,16 @@
source_variables=["mean_temperature", "relative_humidity"],
transform_funcs=[utils.heat_index],
encoding_scale=0.01,
encoding_offset=273.15,
),
"humidex": utils.Transform(
source_variables=["mean_temperature", "relative_humidity"],
transform_funcs=[utils.humidex],
encoding_scale=0.01,
encoding_offset=273.15,
),
"effective_temperature": utils.Transform(
source_variables=["mean_temperature", "relative_humidity", "wind_speed"],
transform_funcs=[utils.effective_temperature],
encoding_scale=0.01,
encoding_offset=273.15,
),
}

Expand Down
7 changes: 2 additions & 5 deletions src/climate_downscale/generate/historical_daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,16 @@
source_variables=["2m_temperature"],
transform_funcs=[utils.daily_mean],
encoding_scale=0.01,
encoding_offset=273.15,
),
"max_temperature": utils.Transform(
source_variables=["2m_temperature"],
transform_funcs=[utils.daily_max],
encoding_scale=0.01,
encoding_offset=273.15,
),
"min_temperature": utils.Transform(
source_variables=["2m_temperature"],
transform_funcs=[utils.daily_min],
encoding_scale=0.01,
encoding_offset=273.15,
),
"wind_speed": utils.Transform(
source_variables=["10m_u_component_of_wind", "10m_v_component_of_wind"],
Expand Down Expand Up @@ -98,8 +95,8 @@ def load_variable(
ds = load_and_shift_longitude(path)
# There are some slight numerical differences in the lat/long for some of
# the land datasets. They are gridded consistently, so just tweak the
# coordinates so things align.
ds = ds.assign_coords(latitude=utils.TARGET_LAT, longitude=utils.TARGET_LON)
# coordinates so things align.
ds = ds.assign_coords(latitude=utils.TARGET_LAT[::-1], longitude=utils.TARGET_LON)
else:
ds = load_and_shift_longitude(path)
conversion = CONVERT_MAP[variable]
Expand Down
2 changes: 1 addition & 1 deletion src/climate_downscale/generate/historical_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def generate_historical_reference_task(

@click.command() # type: ignore[arg-type]
@clio.with_output_directory(DEFAULT_ROOT)
@clio.with_target_variable(variable_names=list(TRANSFORM_MAP))
@clio.with_target_variable(allow_all=True, variable_names=list(TRANSFORM_MAP))
@clio.with_queue()
def generate_historical_reference(
output_dir: str,
Expand Down
6 changes: 0 additions & 6 deletions src/climate_downscale/generate/scenario_annual.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,16 @@
source_variables=["mean_temperature"],
transform_funcs=[utils.annual_mean],
encoding_scale=0.01,
encoding_offset=273.15,
),
"mean_high_temperature": utils.Transform(
source_variables=["max_temperature"],
transform_funcs=[utils.annual_mean],
encoding_scale=0.01,
encoding_offset=273.15,
),
"mean_low_temperature": utils.Transform(
source_variables=["min_temperature"],
transform_funcs=[utils.annual_mean],
encoding_scale=0.01,
encoding_offset=273.15,
),
**{
f"days_over_{temp}C": utils.Transform(
Expand All @@ -43,7 +40,6 @@
source_variables=["heat_index"],
transform_funcs=[utils.annual_mean],
encoding_scale=0.01,
encoding_offset=273.15,
),
**{
f"days_over_{temp}C_heat_index": utils.Transform(
Expand All @@ -59,7 +55,6 @@
source_variables=["humidex"],
transform_funcs=[utils.annual_mean],
encoding_scale=0.01,
encoding_offset=273.15,
),
**{
f"days_over_{temp}C_humidex": utils.Transform(
Expand All @@ -75,7 +70,6 @@
source_variables=["effective_temperature"],
transform_funcs=[utils.annual_mean],
encoding_scale=0.01,
encoding_offset=273.15,
),
**{
f"days_over_{temp}C_effective_temperature": utils.Transform(
Expand Down
139 changes: 74 additions & 65 deletions src/climate_downscale/generate/scenario_daily.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import defaultdict
import itertools
from pathlib import Path

Expand Down Expand Up @@ -33,7 +34,6 @@
source_variables=["tas"],
transform_funcs=[utils.identity],
encoding_scale=0.01,
encoding_offset=273.15,
),
"additive",
),
Expand All @@ -42,7 +42,6 @@
source_variables=["tasmax"],
transform_funcs=[utils.identity],
encoding_scale=0.01,
encoding_offset=273.15,
),
"additive",
),
Expand All @@ -51,7 +50,6 @@
source_variables=["tasmin"],
transform_funcs=[utils.identity],
encoding_scale=0.01,
encoding_offset=273.15,
),
"additive",
),
Expand Down Expand Up @@ -86,29 +84,16 @@ def get_source_paths(
cd_data: ClimateDownscaleData,
source_variables: list[str],
cmip6_experiment: str,
) -> list[list[Path]]:
models_by_var = {}
for source_variable in source_variables:
model_vars = {
p.stem.split(f"{cmip6_experiment}_")[1]
for p in cd_data.extracted_cmip6.glob(
f"{source_variable}_{cmip6_experiment}*.nc"
)
}
models_by_var[source_variable] = model_vars

shared_models = set.intersection(*models_by_var.values())
for var, models in models_by_var.items():
extra_models = models.difference(shared_models)
if extra_models:
print(var, extra_models)
source_paths = [
[
cd_data.extracted_cmip6 / f"{source_variable}_{cmip6_experiment}_{model}.nc"
for source_variable in source_variables
]
for model in sorted(shared_models)
]
) -> dict[str, list[list[Path]]]:
inclusion_meta = cd_data.load_scenario_inclusion_metadata()[source_variables]
inclusion_meta = inclusion_meta[inclusion_meta.all(axis=1)]
source_paths = defaultdict(list)
for source, variant in inclusion_meta.index.tolist():
source_paths[source].append(
[cd_data.extracted_cmip6_path(v, cmip6_experiment, source, variant)
for v in source_variables]
)

return source_paths


Expand Down Expand Up @@ -187,52 +172,76 @@ def generate_scenario_daily_main( # noqa: PLR0912
year="reference",
)

anomalies: dict[str, xr.Dataset] = {}
for i, sps in enumerate(source_paths):
pid = f"{i+1}/{len(source_paths)} {sps[0].stem}"
print(f"{pid}: Loading reference")
try:
scenario_reference = transform(
*[load_variable(sp, "reference") for sp in sps]
)
print(f"{pid}: Loading target")
target = transform(*[load_variable(sp, year) for sp in sps])
except KeyError:
print(f"{pid}: Bad formatting, skipping...")
continue
print(f"{pid}: computing anomaly")
s_anomaly = compute_anomaly(scenario_reference, target, anomaly_type)
key = f"{len(s_anomaly.latitude)}_{len(s_anomaly.longitude)}"
anomalies: dict[str, dict[str, tuple[int, xr.Dataset]]] = {}
for i, (source, variant_paths) in enumerate(source_paths.items()):
sid = f"Source {i+1}/{len(source_paths)}: {source}"

if key in anomalies:
old = anomalies[key]
for coord in ["latitude", "longitude"]:
old_c = old[coord].to_numpy()
new_c = s_anomaly[coord].to_numpy()
tol = 1e-5
if np.abs(old_c - new_c).max() < tol:
s_anomaly = s_anomaly.assign({coord: old_c})
else:
msg = f"{coord} does not match despite having the same subdivision"
raise ValueError(msg)
anomalies[key] = old + s_anomaly
else:
anomalies[key] = s_anomaly
source_anomalies: dict[str, tuple[int, xr.Dataset]] = {}
for j, vps in enumerate(variant_paths):
vid = f"{sid}, Variant {j+1}/{len(variant_paths)}: {vps[0].stem.split('_')[-1]}"
try:
print(f"{vid}: Loading reference")
sref = transform(*[load_variable(vp, "reference") for vp in vps])
print(f"{vid}: Loading target")
target = transform(*[load_variable(vp, year) for vp in vps])
except KeyError:
print(f"{vid}: Bad formatting, skipping...")
continue

print(f"{vid}: computing anomaly")
v_anomaly = compute_anomaly(sref, target, anomaly_type)

key = f"{len(v_anomaly.latitude)}_{len(v_anomaly.longitude)}"

if key in source_anomalies:
old_count, old_anomaly = source_anomalies[key]

for coord in ["latitude", "longitude"]:
old_c = old_anomaly[coord].to_numpy()
new_c = v_anomaly[coord].to_numpy()
tol = 1e-5

if np.abs(old_c - new_c).max() < tol:
v_anomaly = v_anomaly.assign({coord: old_c})
else:
msg = f"{coord} does not match despite having the same subdivision"
raise ValueError(msg)
source_anomalies[key] = old_count + 1, old_anomaly + v_anomaly
else:
source_anomalies[key] = 1, v_anomaly
if source_anomalies:
anomalies[source] = source_anomalies

ensemble_anomaly = xr.Dataset()
for i, (source, source_anomalies) in enumerate(anomalies.items()):
sid = f"Source {i+1}/{len(source_paths)}: {source}"
print(f"Downscaling {i+1}/{len(anomalies)}: {source}")

source_ensemble_anomaly = xr.Dataset()
total_count = 0
for j, (res, (count, v_anomaly)) in enumerate(source_anomalies.items()):
res_id = f"{sid}, Resolution {j} / {len(source_anomalies)}: {res}"
print(f"Downscaling {res_id}")

if source_ensemble_anomaly.nbytes:
source_ensemble_anomaly += utils.interpolate_to_target_latlon(v_anomaly, method="linear")
else:
source_ensemble_anomaly = utils.interpolate_to_target_latlon(v_anomaly, method="linear")
total_count += count
source_ensemble_anomaly /= total_count

anomaly = xr.Dataset()
for i, (k, v) in enumerate(anomalies.items()):
print(f"Downscaling {i+1}/{len(anomalies)}: {k}")
if anomaly.nbytes:
anomaly += utils.interpolate_to_target_latlon(v, method="linear")
if ensemble_anomaly.nbytes:
ensemble_anomaly += source_ensemble_anomaly
else:
anomaly = utils.interpolate_to_target_latlon(v, method="linear")
anomaly /= len(source_paths)
ensemble_anomaly = source_ensemble_anomaly

ensemble_anomaly /= len(anomalies)

print("Computing scenario data")
if anomaly_type == "additive":
scenario_data = historical_reference + anomaly.groupby("date.month")
scenario_data = historical_reference + ensemble_anomaly.groupby("date.month")
else:
scenario_data = historical_reference * anomaly.groupby("date.month")
scenario_data = historical_reference * ensemble_anomaly.groupby("date.month")
scenario_data = scenario_data.drop_vars("month")
print("Saving")
cd_data.save_daily_results(
Expand Down
6 changes: 6 additions & 0 deletions src/climate_downscale/generate/scenario_inclusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ def generate_scenario_inclusion_main(
)
inclusion_df = pd.concat([year_range, valid_scenarios], axis=1).reset_index()
inclusion_df["include"] = inclusion_df.valid_scenarios == 5 # noqa: PLR2004
inclusion_df = (
inclusion_df.loc[inclusion_df.include]
.set_index(['source', 'variant', 'variable']).include
.unstack()
.fillna(False)
)

cd_data.save_scenario_metadata(meta_df)
cd_data.save_scenario_inclusion_metadata(inclusion_df)
Expand Down

0 comments on commit cef92b3

Please sign in to comment.