Skip to content

Commit

Permalink
Merge in upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
collijk committed May 13, 2024
2 parents 2f3d168 + e6e252a commit 1ee4c4a
Show file tree
Hide file tree
Showing 13 changed files with 518 additions and 310 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,5 @@ venv.bak/
# IDEs
.idea/

# Jupyter Notebook
# Jupyter scratch notebooks
notebooks/
5 changes: 0 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@ repos:
# Add --fix, in case you want it to autofix when this hook runs
entry: poetry run ruff check --force-exclude --fix
require_serial: true
- id: ruff
name: ruff
# Add --fix, in case you want it to autofix when this hook runs
entry: poetry run ruff check --force-exclude
require_serial: true
language: system
types: [ python ]
- id: mypy
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ Pipelines to downscale ERA5 and CMIP6 data.

Instructions using conda:

1. Clone this repository.
1. Clone this repository.

Over ssh:
```sh
git clone [email protected]:ihmeuw/climate-downscale.git
```

Over https:
```sh
git clone https://github.com/ihmeuw/climate-downscale.git
Expand All @@ -49,8 +49,8 @@ Instructions using conda:

### Pre-commit

Pre-commit hooks run all the auto-formatting (`ruff format`), linters
(e.g. `ruff` and `mypy`), and other quality checks to make sure the changeset is in
Pre-commit hooks run all the auto-formatting (`ruff format`), linters
(e.g. `ruff` and `mypy`), and other quality checks to make sure the changeset is in
good shape before a commit/push happens.

You can install the hooks with (runs for each commit):
Expand Down
11 changes: 10 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,19 @@ cdtask = "climate_downscale.cli:cdtask"

[tool.ruff]
target-version = "py310" # The lowest supported version
exclude = [
"temperature",
]

[tool.ruff.lint]
# By default, enable all the lint rules.
# Add to the ignore list below if you don't want some rules.
# If you need some ignores for certain modules, see tool.ruff.lint.per-file-ignores below.
# For individual ignore cases, prefer inline `# noqa`s within the code.
select = ["ALL"]
exclude = [
"temperature",
]
ignore = [
"COM812", # flake8 missing trailing comma, formatter handles
"ISC001", # Implicit string concatenation
Expand Down Expand Up @@ -103,7 +109,7 @@ classmethod-decorators = [
]

[tool.ruff.lint.flake8-tidy-imports]
ban-relative-imports = true
ban-relative-imports = "all"

[tool.pytest.ini_options]
addopts = """\
Expand All @@ -125,6 +131,9 @@ exclude_lines = [
# Avoid changing this!
strict = true # See all the enabled flags `mypy --help | grep -A 10 'Strict mode'`
disallow_any_unimported = true
exclude = [
"temperature",
]

# If you need to ignore something for some specific module,
# add overrides for them. Avoid changing the global config!
Expand Down
4 changes: 2 additions & 2 deletions temperature/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ This repo contains the fundamental code used to downscale ERA5 daily average tem

1. Moving the intermediate processing steps to their own worker scripts
2. Converting all outputs to a compressed file format. Ideally, using compression options found in the xarray library when outputting .nc files.
3. Creating an additional script that addresses problematic outliers in the prediction step. Ideally, this is done within the regional_predictions.py script or directly after it.
4. Include diagnostic scripts used to visualize prediction data.
3. Creating an additional script that addresses problematic outliers in the prediction step. Ideally, this is done within the regional_predictions.py script or directly after it.
4. Include diagnostic scripts used to visualize prediction data.
54 changes: 28 additions & 26 deletions temperature/daily_preds_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,23 @@
final_data = pd.DataFrame()

for file in os.listdir(folder):
if file.endswith('.feather'):
if file.endswith(".feather"):
print(file)
# load the data
print("Loading data")
data = pd.read_feather(folder + file)
# drop index column
print("Dropping unnecessary columns")
# data = data.drop(columns='index')
data = data[['latitude_st', 'longitude_st', 'day_of_year', 'predictions']]
data = data[["latitude_st", "longitude_st", "day_of_year", "predictions"]]
# filter to get day
print("Filtering to get day: ", day)
data_day = data[data['day_of_year'] == day]
data_day = data[data["day_of_year"] == day]
# round lat/lon for storage
print("Rounding lat/lon")
data_day['latitude_st'] = data_day['latitude_st'].round(10)
data_day['longitude_st'] = data_day['longitude_st'].round(10)
data_day['predictions'] = data_day['predictions'].round(3)
data_day["latitude_st"] = data_day["latitude_st"].round(10)
data_day["longitude_st"] = data_day["longitude_st"].round(10)
data_day["predictions"] = data_day["predictions"].round(3)
print("Appending data")
# Append the data for the current day to the larger DataFrame
final_data = pd.concat([final_data, data_day])
Expand All @@ -53,26 +53,26 @@
date = date + timedelta(days=day_of_year)

# Format the datetime object into a string
date_string = date.strftime('%Y_%m_%d')
date_string = date.strftime("%Y_%m_%d")
print("Here is the date string: ", date_string)

# replace old time columns with date
final_data.rename(columns={'latitude_st': 'lat', 'longitude_st': 'lon'}, inplace=True)
final_data = final_data[['lat', 'lon', 'predictions']]
final_data['date'] = date_string
final_data.rename(columns={"latitude_st": "lat", "longitude_st": "lon"}, inplace=True)
final_data = final_data[["lat", "lon", "predictions"]]
final_data["date"] = date_string

filename = 'predictions_' + date_string
filename = "predictions_" + date_string

print('resetting index')
final_data.set_index(['date', 'lat', 'lon'], inplace=True)
print("resetting index")
final_data.set_index(["date", "lat", "lon"], inplace=True)

# convert the dataframe to an xarray Dataset
print('converting to xarray Dataset')
print("converting to xarray Dataset")
final_data = xr.Dataset.from_dataframe(final_data)

# Calculate the scale factor and add offset
min_value = final_data['predictions'].min(dim=('date', 'lat', 'lon')).item()
max_value = final_data['predictions'].max(dim=('date', 'lat', 'lon')).item()
min_value = final_data["predictions"].min(dim=("date", "lat", "lon")).item()
max_value = final_data["predictions"].max(dim=("date", "lat", "lon")).item()
scale_factor = (max_value - min_value) / (2**16 - 1)
add_offset = min_value

Expand All @@ -82,13 +82,15 @@
print("saving file to: ", output_path + filename)

# save the Dataset to a netCDF file
final_data.to_netcdf(output_path + filename + '.nc',
encoding={
'predictions': {
'dtype': 'float32',
'scale_factor': scale_factor,
'add_offset': add_offset,
'_FillValue': -9999,
'zlib': True
}
})
final_data.to_netcdf(
output_path + filename + ".nc",
encoding={
"predictions": {
"dtype": "float32",
"scale_factor": scale_factor,
"add_offset": add_offset,
"_FillValue": -9999,
"zlib": True,
}
},
)
95 changes: 59 additions & 36 deletions temperature/grid_setup/grid_setup_with_elevation_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from joblib import load
Expand All @@ -45,25 +45,35 @@
import subprocess

import pyproj
pyproj.datadir.set_data_dir('/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj')
os.environ['PROJ_LIB'] = '/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj'

pyproj.datadir.set_data_dir(
"/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj"
)
os.environ["PROJ_LIB"] = (
"/ihme/homes/nhashmeh/miniconda3/envs/earth-analytics-python/share/proj"
)

# Parse arguments
parser = argparse.ArgumentParser()
parser.add_argument('file', type=str, help='filename')
parser.add_argument("file", type=str, help="filename")

args = parser.parse_args()

chunks_path = "/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_by_latlon_v3/"
chunks_path = (
"/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_by_latlon_v3/"
)

chunk_file = args.file

grid_df = pd.read_feather(chunks_path + chunk_file)

grid_df.drop(columns=['latitude_right', 'longitude_right'], inplace=True) # this should have been dropped earlier...
grid_df.drop(
columns=["latitude_right", "longitude_right"], inplace=True
) # this should have been dropped earlier...

grid_df["geometry"] = grid_df["geometry"].apply(wkt.loads)
grid_gdf = gpd.GeoDataFrame(grid_df, geometry="geometry")

grid_df['geometry'] = grid_df['geometry'].apply(wkt.loads)
grid_gdf = gpd.GeoDataFrame(grid_df, geometry='geometry')

# Function to adjust longitude
def adjust_longitude(geometry):
Expand All @@ -72,15 +82,15 @@ def adjust_longitude(geometry):
else:
return geometry


gdf_merge = grid_gdf
gdf_merge['elevation'] = np.nan
gdf_merge["elevation"] = np.nan

merge_counter = 0
success_counter = 0

for file in os.listdir('/mnt/share/erf/SRTM_GL1_srtm/'):

file_ext = file.split('.')[-1]
for file in os.listdir("/mnt/share/erf/SRTM_GL1_srtm/"):
file_ext = file.split(".")[-1]

if file_ext != "tif":
continue
Expand All @@ -93,59 +103,72 @@ def adjust_longitude(geometry):
if file_ext != "tif":
continue

if NS == 'S':
NS_num= float(NS_num) * -1
if NS == "S":
NS_num = float(NS_num) * -1

if EW == 'W':
if EW == "W":
EW_num = 360 - float(EW_num)

if (((EW_num < math.floor(gdf_merge['longitude_left'].min())) | (EW_num > math.ceil(gdf_merge['longitude_left'].max()))) | ((NS_num < math.floor(gdf_merge['latitude_left'].min())) | (NS_num > math.ceil(gdf_merge['latitude_left'].max())))):
if (
(EW_num < math.floor(gdf_merge["longitude_left"].min()))
| (EW_num > math.ceil(gdf_merge["longitude_left"].max()))
) | (
(NS_num < math.floor(gdf_merge["latitude_left"].min()))
| (NS_num > math.ceil(gdf_merge["latitude_left"].max()))
):
merge_counter += 1
continue

# rescaling 30 meters to 1 km
downscale_factor = 30/1000
downscale_factor = 30 / 1000

# Load the raster
with reo.open("/mnt/share/erf/SRTM_GL1_srtm/" + file) as src:
# resample data to target shape
data = src.read(
out_shape=(
src.count,
int(src.height * downscale_factor), # multiply height by downscale factor
int(src.width * downscale_factor) # multiply widrh by downscale factor
int(
src.height * downscale_factor
), # multiply height by downscale factor
int(src.width * downscale_factor), # multiply widrh by downscale factor
),
resampling=Resampling.bilinear # resampling method
resampling=Resampling.bilinear, # resampling method
)

# scale image transform
transform = src.transform * src.transform.scale(
(src.width / data.shape[-1]),
(src.height / data.shape[-2])
(src.width / data.shape[-1]), (src.height / data.shape[-2])
)

# convert the resampled raster to a geodataframe
# convert the resampled raster to a geodataframe
results = (
{'properties': {'raster_val': v}, 'geometry': s}
for i, (s, v)
in enumerate(
reo.features.shapes(data[0], transform=transform))) # assumes a single band image
{"properties": {"raster_val": v}, "geometry": s}
for i, (s, v) in enumerate(
reo.features.shapes(data[0], transform=transform)
)
) # assumes a single band image

geodf = gpd.GeoDataFrame.from_features(list(results))

# Apply the function to the geometry column
geodf['geometry'] = geodf['geometry'].apply(adjust_longitude)
geodf["geometry"] = geodf["geometry"].apply(adjust_longitude)

prev_len = len(gdf_merge[~gdf_merge['elevation'].isna()]) # used for debugging
prev_len = len(gdf_merge[~gdf_merge["elevation"].isna()]) # used for debugging

gdf_merge = gpd.sjoin(gdf_merge, geodf, how='left', predicate='intersects')
gdf_merge = gpd.sjoin(gdf_merge, geodf, how="left", predicate="intersects")
# test_merge.rename(columns = {'raster_val' : 'elevation'}, inplace=True)
gdf_merge['elevation'] = gdf_merge['elevation'].combine_first(gdf_merge['raster_val'])
gdf_merge.drop(columns = 'index_right', inplace=True)
gdf_merge.drop(columns = 'raster_val', inplace=True)

gdf_merge["elevation"] = gdf_merge["elevation"].combine_first(
gdf_merge["raster_val"]
)
gdf_merge.drop(columns="index_right", inplace=True)
gdf_merge.drop(columns="raster_val", inplace=True)

if len(gdf_merge[~gdf_merge['elevation'].isna()]) > prev_len:
if len(gdf_merge[~gdf_merge["elevation"].isna()]) > prev_len:
success_counter += 1

gdf_merge.to_feather("/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_with_elevation_v3/" + chunk_file, index=False)
gdf_merge.to_feather(
"/mnt/share/erf/ERA5/merged_era5_with_stations/grid_setup/chunks_with_elevation_v3/"
+ chunk_file,
index=False,
)
Loading

0 comments on commit 1ee4c4a

Please sign in to comment.